b214e9c01fee2fab78d3777396b77486aace1a9d
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "Stable.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31 #include "linker/CacheFlush.h"
32 #include "linker/SymbolExtras.h"
33 #include "PathUtils.h"
34
35 #if !defined(mingw32_HOST_OS)
36 #include "posix/Signals.h"
37 #endif
38
39 // get protos for is*()
40 #include <ctype.h>
41
42 #ifdef HAVE_SYS_TYPES_H
43 #include <sys/types.h>
44 #endif
45
46 #include <inttypes.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <stdio.h>
50 #include <assert.h>
51
52 #ifdef HAVE_SYS_STAT_H
53 #include <sys/stat.h>
54 #endif
55
56 #if defined(HAVE_DLFCN_H)
57 #include <dlfcn.h>
58 #endif
59
60 #if defined(OBJFORMAT_ELF)
61 # include "linker/Elf.h"
62 # include <regex.h> // regex is already used by dlopen() so this is OK
63 // to use here without requiring an additional lib
64 #elif defined(OBJFORMAT_PEi386)
65 # include "linker/PEi386.h"
66 # include <windows.h>
67 #elif defined(OBJFORMAT_MACHO)
68 # include "linker/MachO.h"
69 # include <regex.h>
70 # include <mach/machine.h>
71 # include <mach-o/fat.h>
72 #endif
73
74 #if defined(x86_64_HOST_ARCH) && defined(darwin_HOST_OS)
75 #define ALWAYS_PIC
76 #endif
77
78 #if defined(dragonfly_HOST_OS)
79 #include <sys/tls.h>
80 #endif
81 /*
82 Note [runtime-linker-support]
83 -----------------------------
84 When adding support for a new platform to the runtime linker please
85 update `$TOP/configure.ac` under heading `Does target have runtime
86 linker support?`.
87 */
88 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
89 This hashtable will contain information on all symbols
90 that we know of, however the .o they are in may not be loaded.
91
92 Until the ObjectCode the symbol belongs to is actually
93 loaded this symbol may be replaced. So do not rely on
94 addresses of unloaded symbols.
95
96 Note [runtime-linker-phases]
97 --------------------------------------
98 Broadly the behavior of the runtime linker can be
99 split into the following four phases:
100
101 - Indexing (e.g. ocVerifyImage and ocGetNames)
102 - Initialization (e.g. ocResolve and ocRunInit)
103 - Resolve (e.g. resolveObjs())
104 - Lookup (e.g. lookupSymbol)
105
106 This is to enable lazy loading of symbols. Eager loading is problematic
107 as it means that all symbols must be available, even those which we will
108 never use. This is especially painful on Windows, where the number of
109 libraries required to link things like mingwex grows to be quite high.
110
111 We proceed through these stages as follows,
112
113 * During Indexing we verify and open the ObjectCode and
114 perform a quick scan/indexing of the ObjectCode. All the work
115 required to actually load the ObjectCode is done.
116
117 All symbols from the ObjectCode are also inserted into
118 `symhash`, where possible duplicates are handled via the semantics
119 described in `ghciInsertSymbolTable`.
120
121 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
122 depending on whether they are an archive member or not.
123
124 * During initialization we load ObjectCode, perform relocations, execute
125 static constructors etc. This phase may trigger other ObjectCodes to
126 be loaded because of the calls to lookupSymbol.
127
128 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
129 previous status was `OBJECT_LOADED`.
130
131 * During resolve we attempt to resolve all the symbols needed for the
132 initial link. This essentially means, that for any ObjectCode given
133 directly to the command-line we perform lookupSymbols on the required
134 symbols. lookupSymbols may trigger the loading of additional ObjectCode
135 if required.
136
137 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
138 the previous status was `OBJECT_NEEDED`.
139
140 * lookupSymbols is used to lookup any symbols required, both during initial
141 link and during statement and expression compilations in the REPL.
142 Declaration of e.g. a foreign import, will eventually call lookupSymbol
143 which will either fail (symbol unknown) or succeed (and possibly trigger a
144 load).
145
146 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
147
148 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
149 by calling unloadObj and starting over.
150 When a new declaration or statement is performed ultimately lookupSymbol is called
151 without doing a re-link.
152
153 The goal of these different phases is to allow the linker to be able to perform
154 "lazy loading" of ObjectCode. The reason for this is that we want to only link
155 in symbols that are actually required for the link. This reduces:
156
157 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
158 then we don't need to load libB. This means the dependency chain for libraries
159 such as mingw32 and mingwex can be broken down.
160
161 2) The number of duplicate symbols, since now only symbols that are
162 true duplicates will display the error.
163 */
164 /*Str*/HashTable *symhash;
165
166 /* List of currently loaded objects */
167 ObjectCode *objects = NULL; /* initially empty */
168
169 /* List of objects that have been unloaded via unloadObj(), but are waiting
170 to be actually freed via checkUnload() */
171 ObjectCode *unloaded_objects = NULL; /* initially empty */
172
173 #ifdef THREADED_RTS
174 /* This protects all the Linker's global state except unloaded_objects */
175 Mutex linker_mutex;
176 /*
177 * This protects unloaded_objects. We have a separate mutex for this, because
178 * the GC needs to access unloaded_objects in checkUnload, while the linker only
179 * needs to access unloaded_objects in unloadObj(), so this allows most linker
180 * operations proceed concurrently with the GC.
181 */
182 Mutex linker_unloaded_mutex;
183 #endif
184
185 /* Generic wrapper function to try and Resolve and RunInit oc files */
186 int ocTryLoad( ObjectCode* oc );
187
188 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
189 * small memory model on this architecture (see gcc docs,
190 * -mcmodel=small).
191 *
192 * MAP_32BIT not available on OpenBSD/amd64
193 */
194 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
195 #define TRY_MAP_32BIT MAP_32BIT
196 #else
197 #define TRY_MAP_32BIT 0
198 #endif
199
200 /*
201 * Due to the small memory model (see above), on x86_64 we have to map
202 * all our non-PIC object files into the low 2Gb of the address space
203 * (why 2Gb and not 4Gb? Because all addresses must be reachable
204 * using a 32-bit signed PC-relative offset). On Linux we can do this
205 * using the MAP_32BIT flag to mmap(), however on other OSs
206 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
207 * can't do this. So on these systems, we have to pick a base address
208 * in the low 2Gb of the address space and try to allocate memory from
209 * there.
210 *
211 * We pick a default address based on the OS, but also make this
212 * configurable via an RTS flag (+RTS -xm)
213 */
214 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
215
216 #if defined(MAP_32BIT)
217 // Try to use MAP_32BIT
218 #define MMAP_32BIT_BASE_DEFAULT 0
219 #else
220 // A guess: 1Gb.
221 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
222 #endif
223
224 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
225 #endif
226
227 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
228 ObjectCode *owner)
229 {
230 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
231 if (!pinfo || owner != pinfo->owner) return;
232 removeStrHashTable(table, key, NULL);
233 stgFree(pinfo);
234 }
235
236 /* -----------------------------------------------------------------------------
237 * Insert symbols into hash tables, checking for duplicates.
238 *
239 * Returns: 0 on failure, nonzero on success
240 */
241 /*
242 Note [weak-symbols-support]
243 -------------------------------------
244 While ghciInsertSymbolTable does implement extensive
245 logic for weak symbol support, weak symbols are not currently
246 fully supported by the RTS. This code is mostly here for COMDAT
247 support which uses the weak symbols support.
248
249 Linking weak symbols defined purely in C code with other C code
250 should also work, probably. Observing weak symbols in Haskell
251 won't.
252
253 Some test have been written for weak symbols but have been disabled
254 mostly because it's unsure how the weak symbols support should look.
255 See Trac #11223
256 */
257 int ghciInsertSymbolTable(
258 pathchar* obj_name,
259 HashTable *table,
260 const SymbolName* key,
261 SymbolAddr* data,
262 HsBool weak,
263 ObjectCode *owner)
264 {
265 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
266 if (!pinfo) /* new entry */
267 {
268 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
269 pinfo->value = data;
270 pinfo->owner = owner;
271 pinfo->weak = weak;
272 insertStrHashTable(table, key, pinfo);
273 return 1;
274 }
275 else if (weak && data && pinfo->weak && !pinfo->value)
276 {
277 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
278 pinfo->value = data;
279 pinfo->owner = owner;
280 return 1;
281 }
282 else if (weak)
283 {
284 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
285 already know of another copy throw this one away.
286
287 or both weak symbols have a nonzero value. Keep the existing one.
288
289 This also preserves the semantics of linking against
290 the first symbol we find. */
291 }
292 else if (pinfo->weak && !weak) /* weak symbol is in the table */
293 {
294 /* override the weak definition with the non-weak one */
295 pinfo->value = data;
296 pinfo->owner = owner;
297 pinfo->weak = HS_BOOL_FALSE;
298 return 1;
299 }
300 else if ( pinfo->owner
301 && pinfo->owner->status != OBJECT_RESOLVED
302 && pinfo->owner->status != OBJECT_NEEDED)
303 {
304 /* If the other symbol hasn't been loaded or will be loaded and we want to
305 explicitly load the new one, we can just swap it out and load the one
306 that has been requested. If not, just keep the first one encountered.
307
308 Because the `symHash' table consists symbols we've also not loaded but
309 found during the initial scan this is safe to do. If however the existing
310 symbol has been loaded then it means we have a duplicate.
311
312 This is essentially emulating the behavior of a linker wherein it will always
313 link in object files that are .o file arguments, but only take object files
314 from archives as needed. */
315 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
316 pinfo->value = data;
317 pinfo->owner = owner;
318 pinfo->weak = weak;
319 }
320
321 return 1;
322 }
323 else if (pinfo->owner == owner)
324 {
325 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
326 return 1;
327 }
328 else if (owner && owner->status == OBJECT_LOADED)
329 {
330 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
331 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
332 call this function again to trigger the duplicate error. */
333 return 1;
334 }
335
336 pathchar* archiveName = NULL;
337 debugBelch(
338 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
339 " %s\n"
340 "whilst processing object file\n"
341 " %" PATH_FMT "\n"
342 "The symbol was previously defined in\n"
343 " %" PATH_FMT "\n"
344 "This could be caused by:\n"
345 " * Loading two different object files which export the same symbol\n"
346 " * Specifying the same object file twice on the GHCi command line\n"
347 " * An incorrect `package.conf' entry, causing some object to be\n"
348 " loaded twice.\n",
349 (char*)key,
350 obj_name,
351 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
352 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
353 : pinfo->owner->fileName
354 );
355
356 if (archiveName)
357 {
358 stgFree(archiveName);
359 archiveName = NULL;
360 }
361 return 0;
362 }
363
364 /* -----------------------------------------------------------------------------
365 * Looks up symbols into hash tables.
366 *
367 * Returns: 0 on failure and result is not set,
368 * nonzero on success and result set to nonzero pointer
369 */
370 HsBool ghciLookupSymbolInfo(HashTable *table,
371 const SymbolName* key, RtsSymbolInfo **result)
372 {
373 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
374 if (!pinfo) {
375 *result = NULL;
376 return HS_BOOL_FALSE;
377 }
378 if (pinfo->weak)
379 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
380 /* Once it's looked up, it can no longer be overridden */
381 pinfo->weak = HS_BOOL_FALSE;
382
383 *result = pinfo;
384 return HS_BOOL_TRUE;
385 }
386
387 /* -----------------------------------------------------------------------------
388 * initialize the object linker
389 */
390
391
392 static int linker_init_done = 0 ;
393
394 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
395 static void *dl_prog_handle;
396 static regex_t re_invalid;
397 static regex_t re_realso;
398 #ifdef THREADED_RTS
399 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
400 #endif
401 #endif
402
403 void initLinker (void)
404 {
405 // default to retaining CAFs for backwards compatibility. Most
406 // users will want initLinker_(0): otherwise unloadObj() will not
407 // be able to unload object files when they contain CAFs.
408 initLinker_(1);
409 }
410
411 void
412 initLinker_ (int retain_cafs)
413 {
414 RtsSymbolVal *sym;
415 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
416 int compileResult;
417 #endif
418
419 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
420
421 /* Make initLinker idempotent, so we can call it
422 before every relevant operation; that means we
423 don't need to initialise the linker separately */
424 if (linker_init_done == 1) {
425 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
426 return;
427 } else {
428 linker_init_done = 1;
429 }
430
431 objects = NULL;
432 unloaded_objects = NULL;
433
434 #if defined(THREADED_RTS)
435 initMutex(&linker_mutex);
436 initMutex(&linker_unloaded_mutex);
437 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
438 initMutex(&dl_mutex);
439 #endif
440 #endif
441
442 symhash = allocStrHashTable();
443
444 /* populate the symbol table with stuff from the RTS */
445 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
446 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
447 symhash, sym->lbl, sym->addr,
448 sym->weak, NULL)) {
449 barf("ghciInsertSymbolTable failed");
450 }
451 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
452 }
453 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
454 machoInitSymbolsWithoutUnderscore();
455 # endif
456 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
457 referenced from a statically linked module. We need to mimic this, but
458 we cannot use NULL because we use it to mean nonexistent symbols. So we
459 use an arbitrary (hopefully unique) address here.
460 */
461 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
462 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
463 barf("ghciInsertSymbolTable failed");
464 }
465
466 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
467 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
468 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
469 retain_cafs ? newRetainedCAF : newGCdCAF,
470 HS_BOOL_FALSE, NULL)) {
471 barf("ghciInsertSymbolTable failed");
472 }
473
474 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
475 # if defined(RTLD_DEFAULT)
476 dl_prog_handle = RTLD_DEFAULT;
477 # else
478 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
479 # endif /* RTLD_DEFAULT */
480
481 compileResult = regcomp(&re_invalid,
482 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short)",
483 REG_EXTENDED);
484 if (compileResult != 0) {
485 barf("Compiling re_invalid failed");
486 }
487 compileResult = regcomp(&re_realso,
488 "(GROUP|INPUT) *\\( *([^ )]+)",
489 REG_EXTENDED);
490 if (compileResult != 0) {
491 barf("Compiling re_realso failed");
492 }
493 # endif
494
495 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
496 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
497 // User-override for mmap_32bit_base
498 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
499 }
500 #endif
501
502 if (RTS_LINKER_USE_MMAP)
503 m32_allocator_init();
504
505 #if defined(OBJFORMAT_PEi386)
506 initLinker_PEi386();
507 #endif
508
509 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
510 return;
511 }
512
513 void
514 exitLinker( void ) {
515 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
516 if (linker_init_done == 1) {
517 regfree(&re_invalid);
518 regfree(&re_realso);
519 #ifdef THREADED_RTS
520 closeMutex(&dl_mutex);
521 #endif
522 }
523 #endif
524 if (linker_init_done == 1) {
525 freeHashTable(symhash, free);
526 }
527 #ifdef THREADED_RTS
528 closeMutex(&linker_mutex);
529 #endif
530 }
531
532 /* -----------------------------------------------------------------------------
533 * Loading DLL or .so dynamic libraries
534 * -----------------------------------------------------------------------------
535 *
536 * Add a DLL from which symbols may be found. In the ELF case, just
537 * do RTLD_GLOBAL-style add, so no further messing around needs to
538 * happen in order that symbols in the loaded .so are findable --
539 * lookupSymbol() will subsequently see them by dlsym on the program's
540 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
541 *
542 * In the PEi386 case, open the DLLs and put handles to them in a
543 * linked list. When looking for a symbol, try all handles in the
544 * list. This means that we need to load even DLLs that are guaranteed
545 * to be in the ghc.exe image already, just so we can get a handle
546 * to give to loadSymbol, so that we can find the symbols. For such
547 * libraries, the LoadLibrary call should be a no-op except for returning
548 * the handle.
549 *
550 */
551
552 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
553
554 /* Suppose in ghci we load a temporary SO for a module containing
555 f = 1
556 and then modify the module, recompile, and load another temporary
557 SO with
558 f = 2
559 Then as we don't unload the first SO, dlsym will find the
560 f = 1
561 symbol whereas we want the
562 f = 2
563 symbol. We therefore need to keep our own SO handle list, and
564 try SOs in the right order. */
565
566 typedef
567 struct _OpenedSO {
568 struct _OpenedSO* next;
569 void *handle;
570 }
571 OpenedSO;
572
573 /* A list thereof. */
574 static OpenedSO* openedSOs = NULL;
575
576 static const char *
577 internal_dlopen(const char *dll_name)
578 {
579 OpenedSO* o_so;
580 void *hdl;
581 const char *errmsg;
582 char *errmsg_copy;
583
584 // omitted: RTLD_NOW
585 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
586 IF_DEBUG(linker,
587 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
588
589 //-------------- Begin critical section ------------------
590 // This critical section is necessary because dlerror() is not
591 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
592 // Also, the error message returned must be copied to preserve it
593 // (see POSIX also)
594
595 ACQUIRE_LOCK(&dl_mutex);
596 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
597
598 errmsg = NULL;
599 if (hdl == NULL) {
600 /* dlopen failed; return a ptr to the error msg. */
601 errmsg = dlerror();
602 if (errmsg == NULL) errmsg = "addDLL: unknown error";
603 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
604 strcpy(errmsg_copy, errmsg);
605 errmsg = errmsg_copy;
606 } else {
607 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
608 o_so->handle = hdl;
609 o_so->next = openedSOs;
610 openedSOs = o_so;
611 }
612
613 RELEASE_LOCK(&dl_mutex);
614 //--------------- End critical section -------------------
615
616 return errmsg;
617 }
618
619 /*
620 Note [RTLD_LOCAL]
621
622 In GHCi we want to be able to override previous .so's with newly
623 loaded .so's when we recompile something. This further implies that
624 when we look up a symbol in internal_dlsym() we have to iterate
625 through the loaded libraries (in order from most recently loaded to
626 oldest) looking up the symbol in each one until we find it.
627
628 However, this can cause problems for some symbols that are copied
629 by the linker into the executable image at runtime - see #8935 for a
630 lengthy discussion. To solve that problem we need to look up
631 symbols in the main executable *first*, before attempting to look
632 them up in the loaded .so's. But in order to make that work, we
633 have to always call dlopen with RTLD_LOCAL, so that the loaded
634 libraries don't populate the global symbol table.
635 */
636
637 static void *
638 internal_dlsym(const char *symbol) {
639 OpenedSO* o_so;
640 void *v;
641
642 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
643 ACQUIRE_LOCK(&dl_mutex);
644 dlerror();
645 // look in program first
646 v = dlsym(dl_prog_handle, symbol);
647 if (dlerror() == NULL) {
648 RELEASE_LOCK(&dl_mutex);
649 return v;
650 }
651
652 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
653 v = dlsym(o_so->handle, symbol);
654 if (dlerror() == NULL) {
655 RELEASE_LOCK(&dl_mutex);
656 return v;
657 }
658 }
659 RELEASE_LOCK(&dl_mutex);
660 return v;
661 }
662 # endif
663
664 const char *
665 addDLL( pathchar *dll_name )
666 {
667 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
668 /* ------------------- ELF DLL loader ------------------- */
669
670 #define NMATCH 5
671 regmatch_t match[NMATCH];
672 const char *errmsg;
673 FILE* fp;
674 size_t match_length;
675 #define MAXLINE 1000
676 char line[MAXLINE];
677 int result;
678
679 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
680 errmsg = internal_dlopen(dll_name);
681
682 if (errmsg == NULL) {
683 return NULL;
684 }
685
686 // GHC Trac ticket #2615
687 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
688 // contain linker scripts rather than ELF-format object code. This
689 // code handles the situation by recognizing the real object code
690 // file name given in the linker script.
691 //
692 // If an "invalid ELF header" error occurs, it is assumed that the
693 // .so file contains a linker script instead of ELF object code.
694 // In this case, the code looks for the GROUP ( ... ) linker
695 // directive. If one is found, the first file name inside the
696 // parentheses is treated as the name of a dynamic library and the
697 // code attempts to dlopen that file. If this is also unsuccessful,
698 // an error message is returned.
699
700 // see if the error message is due to an invalid ELF header
701 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
702 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
703 IF_DEBUG(linker, debugBelch("result = %i\n", result));
704 if (result == 0) {
705 // success -- try to read the named file as a linker script
706 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
707 MAXLINE-1);
708 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
709 line[match_length] = '\0'; // make sure string is null-terminated
710 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
711 if ((fp = fopen(line, "r")) == NULL) {
712 return errmsg; // return original error if open fails
713 }
714 // try to find a GROUP or INPUT ( ... ) command
715 while (fgets(line, MAXLINE, fp) != NULL) {
716 IF_DEBUG(linker, debugBelch("input line = %s", line));
717 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
718 // success -- try to dlopen the first named file
719 IF_DEBUG(linker, debugBelch("match%s\n",""));
720 line[match[2].rm_eo] = '\0';
721 stgFree((void*)errmsg); // Free old message before creating new one
722 errmsg = internal_dlopen(line+match[2].rm_so);
723 break;
724 }
725 // if control reaches here, no GROUP or INPUT ( ... ) directive
726 // was found and the original error message is returned to the
727 // caller
728 }
729 fclose(fp);
730 }
731 return errmsg;
732
733 # elif defined(OBJFORMAT_PEi386)
734 return addDLL_PEi386(dll_name);
735
736 # else
737 barf("addDLL: not implemented on this platform");
738 # endif
739 }
740
741 /* -----------------------------------------------------------------------------
742 * Searches the system directories to determine if there is a system DLL that
743 * satisfies the given name. This prevent GHCi from linking against a static
744 * library if a DLL is available.
745 *
746 * Returns: NULL on failure or no DLL found, else the full path to the DLL
747 * that can be loaded.
748 */
749 pathchar* findSystemLibrary(pathchar* dll_name)
750 {
751 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%" PATH_FMT "'\n", dll_name));
752
753 #if defined(OBJFORMAT_PEi386)
754 return findSystemLibrary_PEi386(dll_name);
755 #else
756 (void)(dll_name); // Function not implemented for other platforms.
757 return NULL;
758 #endif
759 }
760
761 /* -----------------------------------------------------------------------------
762 * Emits a warning determining that the system is missing a required security
763 * update that we need to get access to the proper APIs
764 */
765 void warnMissingKBLibraryPaths( void )
766 {
767 static HsBool missing_update_warn = HS_BOOL_FALSE;
768 if (!missing_update_warn) {
769 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
770 missing_update_warn = HS_BOOL_TRUE;
771 }
772 }
773
774 /* -----------------------------------------------------------------------------
775 * appends a directory to the process DLL Load path so LoadLibrary can find it
776 *
777 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
778 * restore the search path to what it was before this call.
779 */
780 HsPtr addLibrarySearchPath(pathchar* dll_path)
781 {
782 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%" PATH_FMT "'\n", dll_path));
783
784 #if defined(OBJFORMAT_PEi386)
785 return addLibrarySearchPath_PEi386(dll_path);
786 #else
787 (void)(dll_path); // Function not implemented for other platforms.
788 return NULL;
789 #endif
790 }
791
792 /* -----------------------------------------------------------------------------
793 * removes a directory from the process DLL Load path
794 *
795 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
796 */
797 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
798 {
799 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n", dll_path_index));
800
801 #if defined(OBJFORMAT_PEi386)
802 return removeLibrarySearchPath_PEi386(dll_path_index);
803 #else
804 (void)(dll_path_index); // Function not implemented for other platforms.
805 return HS_BOOL_FALSE;
806 #endif
807 }
808
809 /* -----------------------------------------------------------------------------
810 * insert a symbol in the hash table
811 *
812 * Returns: 0 on failure, nozero on success
813 */
814 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
815 {
816 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE, NULL);
817 }
818
819 /* -----------------------------------------------------------------------------
820 * lookup a symbol in the hash table
821 */
822 #if defined(OBJFORMAT_PEi386)
823 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
824 {
825 return lookupSymbol_PEi386(lbl);
826 }
827
828 #else
829
830 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
831 {
832 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
833
834 ASSERT(symhash != NULL);
835 RtsSymbolInfo *pinfo;
836
837 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
838 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
839
840 # if defined(OBJFORMAT_ELF)
841 return internal_dlsym(lbl);
842 # elif defined(OBJFORMAT_MACHO)
843
844 /* HACK: On OS X, all symbols are prefixed with an underscore.
845 However, dlsym wants us to omit the leading underscore from the
846 symbol name -- the dlsym routine puts it back on before searching
847 for the symbol. For now, we simply strip it off here (and ONLY
848 here).
849 */
850 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", lbl));
851 ASSERT(lbl[0] == '_');
852 return internal_dlsym(lbl + 1);
853
854 # else
855 ASSERT(2+2 == 5);
856 return NULL;
857 # endif
858 } else {
859 return loadSymbol(lbl, pinfo);
860 }
861 }
862 #endif /* OBJFORMAT_PEi386 */
863
864 /*
865 * Load and relocate the object code for a symbol as necessary.
866 * Symbol name only used for diagnostics output.
867 */
868 SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo) {
869 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl, pinfo->value));
870 ObjectCode* oc = pinfo->owner;
871
872 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
873 See Note [runtime-linker-phases] */
874 if (oc && lbl && oc->status == OBJECT_LOADED) {
875 oc->status = OBJECT_NEEDED;
876 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand loading symbol '%s'\n", lbl));
877 int r = ocTryLoad(oc);
878 if (!r) {
879 return NULL;
880 }
881
882 #ifdef PROFILING
883 // collect any new cost centres & CCSs
884 // that were defined during runInit
885 initProfiling2();
886 #endif
887 }
888
889 return pinfo->value;
890 }
891
892 SymbolAddr* lookupSymbol( SymbolName* lbl )
893 {
894 ACQUIRE_LOCK(&linker_mutex);
895 SymbolAddr* r = lookupSymbol_(lbl);
896 if (!r) {
897 errorBelch("^^ Could not load '%s', dependency unresolved. See top entry above.\n", lbl);
898 fflush(stderr);
899 }
900 RELEASE_LOCK(&linker_mutex);
901 return r;
902 }
903
904 /* -----------------------------------------------------------------------------
905 Create a StablePtr for a foreign export. This is normally called by
906 a C function with __attribute__((constructor)), which is generated
907 by GHC and linked into the module.
908
909 If the object code is being loaded dynamically, then we remember
910 which StablePtrs were allocated by the constructors and free them
911 again in unloadObj().
912 -------------------------------------------------------------------------- */
913
914 static ObjectCode *loading_obj = NULL;
915
916 StgStablePtr foreignExportStablePtr (StgPtr p)
917 {
918 ForeignExportStablePtr *fe_sptr;
919 StgStablePtr *sptr;
920
921 sptr = getStablePtr(p);
922
923 if (loading_obj != NULL) {
924 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
925 "foreignExportStablePtr");
926 fe_sptr->stable_ptr = sptr;
927 fe_sptr->next = loading_obj->stable_ptrs;
928 loading_obj->stable_ptrs = fe_sptr;
929 }
930
931 return sptr;
932 }
933
934
935 /* -----------------------------------------------------------------------------
936 * Debugging aid: look in GHCi's object symbol tables for symbols
937 * within DELTA bytes of the specified address, and show their names.
938 */
939 #ifdef DEBUG
940 void ghci_enquire ( SymbolAddr* addr );
941
942 void ghci_enquire(SymbolAddr* addr)
943 {
944 int i;
945 SymbolName* sym;
946 RtsSymbolInfo* a;
947 const int DELTA = 64;
948 ObjectCode* oc;
949
950 for (oc = objects; oc; oc = oc->next) {
951 for (i = 0; i < oc->n_symbols; i++) {
952 sym = oc->symbols[i];
953 if (sym == NULL) continue;
954 a = NULL;
955 if (a == NULL) {
956 ghciLookupSymbolInfo(symhash, sym, &a);
957 }
958 if (a == NULL) {
959 // debugBelch("ghci_enquire: can't find %s\n", sym);
960 }
961 else if ( a->value
962 && (char*)addr-DELTA <= (char*)a->value
963 && (char*)a->value <= (char*)addr+DELTA) {
964 debugBelch("%p + %3d == `%s'\n", addr, (int)((char*)a->value - (char*)addr), sym);
965 }
966 }
967 }
968 }
969 #endif
970
971 #if RTS_LINKER_USE_MMAP
972 //
973 // Returns NULL on failure.
974 //
975 void *
976 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
977 {
978 void *map_addr = NULL;
979 void *result;
980 size_t size;
981 static uint32_t fixed = 0;
982
983 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
984 size = roundUpToPage(bytes);
985
986 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
987 mmap_again:
988
989 if (mmap_32bit_base != 0) {
990 map_addr = mmap_32bit_base;
991 }
992 #endif
993
994 IF_DEBUG(linker,
995 debugBelch("mmapForLinker: \tprotection %#0x\n",
996 PROT_EXEC | PROT_READ | PROT_WRITE));
997 IF_DEBUG(linker,
998 debugBelch("mmapForLinker: \tflags %#0x\n",
999 MAP_PRIVATE | TRY_MAP_32BIT | fixed | flags));
1000
1001 result = mmap(map_addr, size,
1002 PROT_EXEC|PROT_READ|PROT_WRITE,
1003 MAP_PRIVATE|TRY_MAP_32BIT|fixed|flags, fd, offset);
1004
1005 if (result == MAP_FAILED) {
1006 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1007 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1008 return NULL;
1009 }
1010
1011 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1012 if (mmap_32bit_base != 0) {
1013 if (result == map_addr) {
1014 mmap_32bit_base = (StgWord8*)map_addr + size;
1015 } else {
1016 if ((W_)result > 0x80000000) {
1017 // oops, we were given memory over 2Gb
1018 munmap(result,size);
1019 #if defined(freebsd_HOST_OS) || \
1020 defined(kfreebsdgnu_HOST_OS) || \
1021 defined(dragonfly_HOST_OS)
1022 // Some platforms require MAP_FIXED. This is normally
1023 // a bad idea, because MAP_FIXED will overwrite
1024 // existing mappings.
1025 fixed = MAP_FIXED;
1026 goto mmap_again;
1027 #else
1028 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1029 "asked for %lu bytes at %p. "
1030 "Try specifying an address with +RTS -xm<addr> -RTS",
1031 size, map_addr);
1032 return NULL;
1033 #endif
1034 } else {
1035 // hmm, we were given memory somewhere else, but it's
1036 // still under 2Gb so we can use it. Next time, ask
1037 // for memory right after the place we just got some
1038 mmap_32bit_base = (StgWord8*)result + size;
1039 }
1040 }
1041 } else {
1042 if ((W_)result > 0x80000000) {
1043 // oops, we were given memory over 2Gb
1044 // ... try allocating memory somewhere else?;
1045 debugTrace(DEBUG_linker,
1046 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1047 bytes, result);
1048 munmap(result, size);
1049
1050 // Set a base address and try again... (guess: 1Gb)
1051 mmap_32bit_base = (void*)0x40000000;
1052 goto mmap_again;
1053 }
1054 }
1055 #endif
1056
1057 IF_DEBUG(linker,
1058 debugBelch("mmapForLinker: mapped %" FMT_Word
1059 " bytes starting at %p\n", (W_)size, result));
1060 IF_DEBUG(linker,
1061 debugBelch("mmapForLinker: done\n"));
1062
1063 return result;
1064 }
1065 #endif
1066
1067 /*
1068 * Remove symbols from the symbol table, and free oc->symbols.
1069 * This operation is idempotent.
1070 */
1071 static void removeOcSymbols (ObjectCode *oc)
1072 {
1073 if (oc->symbols == NULL) return;
1074
1075 // Remove all the mappings for the symbols within this object..
1076 int i;
1077 for (i = 0; i < oc->n_symbols; i++) {
1078 if (oc->symbols[i] != NULL) {
1079 ghciRemoveSymbolTable(symhash, oc->symbols[i], oc);
1080 }
1081 }
1082
1083 stgFree(oc->symbols);
1084 oc->symbols = NULL;
1085 }
1086
1087 /*
1088 * Release StablePtrs and free oc->stable_ptrs.
1089 * This operation is idempotent.
1090 */
1091 static void freeOcStablePtrs (ObjectCode *oc)
1092 {
1093 // Release any StablePtrs that were created when this
1094 // object module was initialized.
1095 ForeignExportStablePtr *fe_ptr, *next;
1096
1097 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1098 next = fe_ptr->next;
1099 freeStablePtr(fe_ptr->stable_ptr);
1100 stgFree(fe_ptr);
1101 }
1102 oc->stable_ptrs = NULL;
1103 }
1104
1105 static void
1106 freePreloadObjectFile (ObjectCode *oc)
1107 {
1108 #if defined(mingw32_HOST_OS)
1109 freePreloadObjectFile_PEi386(oc);
1110 #else
1111
1112 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1113 munmap(oc->image, oc->fileSize);
1114 }
1115 else {
1116 stgFree(oc->image);
1117 }
1118
1119 #endif
1120
1121 oc->image = NULL;
1122 oc->fileSize = 0;
1123 }
1124
1125 /*
1126 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1127 * the GC when a previously unloaded ObjectCode has been determined to be
1128 * unused, and when an error occurs during loadObj().
1129 */
1130 void freeObjectCode (ObjectCode *oc)
1131 {
1132 freePreloadObjectFile(oc);
1133
1134 if (oc->symbols != NULL) {
1135 stgFree(oc->symbols);
1136 oc->symbols = NULL;
1137 }
1138
1139 if (oc->extraInfos != NULL) {
1140 freeHashTable(oc->extraInfos, NULL);
1141 oc->extraInfos = NULL;
1142 }
1143
1144 if (oc->sections != NULL) {
1145 int i;
1146 for (i=0; i < oc->n_sections; i++) {
1147 if (oc->sections[i].start != NULL) {
1148 switch(oc->sections[i].alloc){
1149 #if RTS_LINKER_USE_MMAP
1150 case SECTION_MMAP:
1151 munmap(oc->sections[i].mapped_start,
1152 oc->sections[i].mapped_size);
1153 break;
1154 case SECTION_M32:
1155 m32_free(oc->sections[i].start,
1156 oc->sections[i].size);
1157 break;
1158 #endif
1159 case SECTION_MALLOC:
1160 stgFree(oc->sections[i].start);
1161 break;
1162 default:
1163 break;
1164 }
1165 }
1166 if (oc->sections[i].info) {
1167 stgFree(oc->sections[i].info);
1168 }
1169 }
1170 stgFree(oc->sections);
1171 }
1172
1173 freeProddableBlocks(oc);
1174
1175 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1176 * alongside the image, so we don't need to free. */
1177 #if defined(NEED_SYMBOL_EXTRAS) && (!defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS))
1178 if (RTS_LINKER_USE_MMAP) {
1179 if (!USE_CONTIGUOUS_MMAP && oc->symbol_extras != NULL) {
1180 m32_free(oc->symbol_extras,
1181 sizeof(SymbolExtra) * oc->n_symbol_extras);
1182 }
1183 }
1184 else {
1185 stgFree(oc->symbol_extras);
1186 }
1187 #endif
1188
1189 #if defined(OBJECTFORMAT_MACHO)
1190 ocDeinit_MachO(oc);
1191 #endif
1192
1193 stgFree(oc->fileName);
1194 stgFree(oc->archiveMemberName);
1195
1196 stgFree(oc);
1197 }
1198
1199 /* -----------------------------------------------------------------------------
1200 * Sets the initial status of a fresh ObjectCode
1201 */
1202 static void setOcInitialStatus(ObjectCode* oc) {
1203 if (oc->archiveMemberName == NULL) {
1204 oc->status = OBJECT_NEEDED;
1205 } else {
1206 oc->status = OBJECT_LOADED;
1207 }
1208 }
1209
1210 ObjectCode*
1211 mkOc( pathchar *path, char *image, int imageSize,
1212 bool mapped, char *archiveMemberName, int misalignment ) {
1213 ObjectCode* oc;
1214
1215 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1216 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1217
1218 oc->info = NULL;
1219
1220 # if defined(OBJFORMAT_ELF)
1221 oc->formatName = "ELF";
1222 # elif defined(OBJFORMAT_PEi386)
1223 oc->formatName = "PEi386";
1224 # elif defined(OBJFORMAT_MACHO)
1225 oc->formatName = "Mach-O";
1226 # else
1227 stgFree(oc);
1228 barf("loadObj: not implemented on this platform");
1229 # endif
1230
1231 oc->image = image;
1232 oc->fileName = pathdup(path);
1233
1234 if (archiveMemberName) {
1235 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1, "loadObj" );
1236 strcpy(oc->archiveMemberName, archiveMemberName);
1237 } else {
1238 oc->archiveMemberName = NULL;
1239 }
1240
1241 setOcInitialStatus( oc );
1242
1243 oc->fileSize = imageSize;
1244 oc->symbols = NULL;
1245 oc->n_sections = 0;
1246 oc->sections = NULL;
1247 oc->proddables = NULL;
1248 oc->stable_ptrs = NULL;
1249 #if defined(NEED_SYMBOL_EXTRAS)
1250 oc->symbol_extras = NULL;
1251 #endif
1252 oc->imageMapped = mapped;
1253
1254 oc->misalignment = misalignment;
1255 oc->extraInfos = NULL;
1256
1257 /* chain it onto the list of objects */
1258 oc->next = NULL;
1259
1260 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1261 return oc;
1262 }
1263
1264 /* -----------------------------------------------------------------------------
1265 * Check if an object or archive is already loaded.
1266 *
1267 * Returns: 1 if the path is already loaded, 0 otherwise.
1268 */
1269 HsInt
1270 isAlreadyLoaded( pathchar *path )
1271 {
1272 ObjectCode *o;
1273 for (o = objects; o; o = o->next) {
1274 if (0 == pathcmp(o->fileName, path)) {
1275 return 1; /* already loaded */
1276 }
1277 }
1278 return 0; /* not loaded yet */
1279 }
1280
1281 //
1282 // Load the object file into memory. This will not be its final resting place,
1283 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
1284 // address space, properly aligned.
1285 //
1286 static ObjectCode *
1287 preloadObjectFile (pathchar *path)
1288 {
1289 int fileSize;
1290 struct_stat st;
1291 int r;
1292 void *image;
1293 ObjectCode *oc;
1294 int misalignment = 0;
1295
1296 r = pathstat(path, &st);
1297 if (r == -1) {
1298 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
1299 return NULL;
1300 }
1301
1302 fileSize = st.st_size;
1303
1304 #if RTS_LINKER_USE_MMAP
1305 int fd;
1306
1307 /* On many architectures malloc'd memory isn't executable, so we need to use
1308 * mmap. */
1309
1310 #if defined(openbsd_HOST_OS)
1311 fd = open(path, O_RDONLY, S_IRUSR);
1312 #else
1313 fd = open(path, O_RDONLY);
1314 #endif
1315 if (fd == -1) {
1316 errorBelch("loadObj: can't open %s", path);
1317 return NULL;
1318 }
1319
1320 /* iOS does not permit to mmap with r+w+x, however while the comment for
1321 * this function says this is not the final resting place, for some
1322 * architectures / hosts (at least mach-o non-iOS -- see ocGetNames_MachO)
1323 * the image mmaped here in fact ends up being the final resting place for
1324 * the sections. And hence we need to leave r+w+x here for other hosts
1325 * until all hosts have been made aware of the initial image being r+w only.
1326 *
1327 * See also the misalignment logic for darwin below.
1328 */
1329 #if defined(ios_HOST_OS)
1330 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
1331 #else
1332 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
1333 MAP_PRIVATE, fd, 0);
1334 #endif
1335
1336 if (image == MAP_FAILED) {
1337 errorBelch("mmap: failed. errno = %d", errno);
1338 }
1339 // not 32-bit yet, we'll remap later
1340 close(fd);
1341
1342 #else /* !RTS_LINKER_USE_MMAP */
1343 FILE *f;
1344
1345 /* load the image into memory */
1346 /* coverity[toctou] */
1347 f = pathopen(path, WSTR("rb"));
1348 if (!f) {
1349 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
1350 return NULL;
1351 }
1352
1353 # if defined(mingw32_HOST_OS)
1354
1355 // TODO: We would like to use allocateExec here, but allocateExec
1356 // cannot currently allocate blocks large enough.
1357 image = allocateImageAndTrampolines(path, "itself", f, fileSize,
1358 HS_BOOL_FALSE);
1359 if (image == NULL) {
1360 fclose(f);
1361 return NULL;
1362 }
1363
1364 # elif defined(darwin_HOST_OS)
1365
1366 // In a Mach-O .o file, all sections can and will be misaligned
1367 // if the total size of the headers is not a multiple of the
1368 // desired alignment. This is fine for .o files that only serve
1369 // as input for the static linker, but it's not fine for us,
1370 // as SSE (used by gcc for floating point) and Altivec require
1371 // 16-byte alignment.
1372 // We calculate the correct alignment from the header before
1373 // reading the file, and then we misalign image on purpose so
1374 // that the actual sections end up aligned again.
1375 misalignment = machoGetMisalignment(f);
1376 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
1377 image += misalignment;
1378
1379 # else /* !defined(mingw32_HOST_OS) */
1380
1381 image = stgMallocBytes(fileSize, "loadObj(image)");
1382
1383 #endif
1384
1385 int n;
1386 n = fread ( image, 1, fileSize, f );
1387 fclose(f);
1388 if (n != fileSize) {
1389 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
1390 stgFree(image);
1391 return NULL;
1392 }
1393
1394 #endif /* RTS_LINKER_USE_MMAP */
1395
1396 oc = mkOc(path, image, fileSize, true, NULL, misalignment);
1397
1398 #ifdef OBJFORMAT_MACHO
1399 if (ocVerifyImage_MachO( oc ))
1400 ocInit_MachO( oc );
1401 #endif
1402
1403 return oc;
1404 }
1405
1406 /* -----------------------------------------------------------------------------
1407 * Load an obj (populate the global symbol table, but don't resolve yet)
1408 *
1409 * Returns: 1 if ok, 0 on error.
1410 */
1411 static HsInt loadObj_ (pathchar *path)
1412 {
1413 ObjectCode* oc;
1414 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
1415
1416 /* debugBelch("loadObj %s\n", path ); */
1417
1418 /* Check that we haven't already loaded this object.
1419 Ignore requests to load multiple times */
1420
1421 if (isAlreadyLoaded(path)) {
1422 IF_DEBUG(linker,
1423 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1424 return 1; /* success */
1425 }
1426
1427 oc = preloadObjectFile(path);
1428 if (oc == NULL) return 0;
1429
1430 if (! loadOc(oc)) {
1431 // failed; free everything we've allocated
1432 removeOcSymbols(oc);
1433 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
1434 freeObjectCode(oc);
1435 return 0;
1436 }
1437
1438 oc->next = objects;
1439 objects = oc;
1440 return 1;
1441 }
1442
1443 HsInt loadObj (pathchar *path)
1444 {
1445 ACQUIRE_LOCK(&linker_mutex);
1446 HsInt r = loadObj_(path);
1447 RELEASE_LOCK(&linker_mutex);
1448 return r;
1449 }
1450
1451 HsInt loadOc (ObjectCode* oc)
1452 {
1453 int r;
1454
1455 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
1456
1457 /* verify the in-memory image */
1458 # if defined(OBJFORMAT_ELF)
1459 r = ocVerifyImage_ELF ( oc );
1460 # elif defined(OBJFORMAT_PEi386)
1461 r = ocVerifyImage_PEi386 ( oc );
1462 # elif defined(OBJFORMAT_MACHO)
1463 r = ocVerifyImage_MachO ( oc );
1464 # else
1465 barf("loadObj: no verify method");
1466 # endif
1467 if (!r) {
1468 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
1469 return r;
1470 }
1471
1472 #if defined(NEED_SYMBOL_EXTRAS)
1473 # if defined(OBJFORMAT_MACHO)
1474 r = ocAllocateSymbolExtras_MachO ( oc );
1475 if (!r) {
1476 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_MachO failed\n"));
1477 return r;
1478 }
1479 # elif defined(OBJFORMAT_ELF)
1480 r = ocAllocateSymbolExtras_ELF ( oc );
1481 if (!r) {
1482 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_ELF failed\n"));
1483 return r;
1484 }
1485 # elif defined(OBJFORMAT_PEi386)
1486 ocAllocateSymbolExtras_PEi386 ( oc );
1487 # endif
1488 #endif
1489
1490 /* build the symbol list for this image */
1491 # if defined(OBJFORMAT_ELF)
1492 r = ocGetNames_ELF ( oc );
1493 # elif defined(OBJFORMAT_PEi386)
1494 r = ocGetNames_PEi386 ( oc );
1495 # elif defined(OBJFORMAT_MACHO)
1496 r = ocGetNames_MachO ( oc );
1497 # else
1498 barf("loadObj: no getNames method");
1499 # endif
1500 if (!r) {
1501 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
1502 return r;
1503 }
1504
1505 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
1506 setOcInitialStatus( oc );
1507 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
1508
1509 return 1;
1510 }
1511
1512 /* -----------------------------------------------------------------------------
1513 * try to load and initialize an ObjectCode into memory
1514 *
1515 * Returns: 1 if ok, 0 on error.
1516 */
1517 int ocTryLoad (ObjectCode* oc) {
1518 int r;
1519
1520 if (oc->status != OBJECT_NEEDED) {
1521 return 1;
1522 }
1523
1524 /* Check for duplicate symbols by looking into `symhash`.
1525 Duplicate symbols are any symbols which exist
1526 in different ObjectCodes that have both been loaded, or
1527 are to be loaded by this call.
1528
1529 This call is intended to have no side-effects when a non-duplicate
1530 symbol is re-inserted.
1531
1532 We set the Address to NULL since that is not used to distinguish
1533 symbols. Duplicate symbols are distinguished by name and oc.
1534 */
1535 int x;
1536 SymbolName* symbol;
1537 for (x = 0; x < oc->n_symbols; x++) {
1538 symbol = oc->symbols[x];
1539 if ( symbol
1540 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol, NULL, isSymbolWeak(oc, symbol), oc)) {
1541 return 0;
1542 }
1543 }
1544
1545 # if defined(OBJFORMAT_ELF)
1546 r = ocResolve_ELF ( oc );
1547 # elif defined(OBJFORMAT_PEi386)
1548 r = ocResolve_PEi386 ( oc );
1549 # elif defined(OBJFORMAT_MACHO)
1550 r = ocResolve_MachO ( oc );
1551 # else
1552 barf("ocTryLoad: not implemented on this platform");
1553 # endif
1554 if (!r) { return r; }
1555
1556 // run init/init_array/ctors/mod_init_func
1557
1558 loading_obj = oc; // tells foreignExportStablePtr what to do
1559 #if defined(OBJFORMAT_ELF)
1560 r = ocRunInit_ELF ( oc );
1561 #elif defined(OBJFORMAT_PEi386)
1562 r = ocRunInit_PEi386 ( oc );
1563 #elif defined(OBJFORMAT_MACHO)
1564 r = ocRunInit_MachO ( oc );
1565 #else
1566 barf("ocTryLoad: initializers not implemented on this platform");
1567 #endif
1568 loading_obj = NULL;
1569
1570 if (!r) { return r; }
1571
1572 oc->status = OBJECT_RESOLVED;
1573
1574 return 1;
1575 }
1576
1577 /* -----------------------------------------------------------------------------
1578 * resolve all the currently unlinked objects in memory
1579 *
1580 * Returns: 1 if ok, 0 on error.
1581 */
1582 static HsInt resolveObjs_ (void)
1583 {
1584 ObjectCode *oc;
1585 int r;
1586
1587 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
1588
1589 for (oc = objects; oc; oc = oc->next) {
1590 r = ocTryLoad(oc);
1591 if (!r)
1592 {
1593 return r;
1594 }
1595 }
1596
1597 #ifdef PROFILING
1598 // collect any new cost centres & CCSs that were defined during runInit
1599 initProfiling2();
1600 #endif
1601
1602 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
1603 return 1;
1604 }
1605
1606 HsInt resolveObjs (void)
1607 {
1608 ACQUIRE_LOCK(&linker_mutex);
1609 HsInt r = resolveObjs_();
1610 RELEASE_LOCK(&linker_mutex);
1611 return r;
1612 }
1613
1614 /* -----------------------------------------------------------------------------
1615 * delete an object from the pool
1616 */
1617 static HsInt unloadObj_ (pathchar *path, bool just_purge)
1618 {
1619 ObjectCode *oc, *prev, *next;
1620 HsBool unloadedAnyObj = HS_BOOL_FALSE;
1621
1622 ASSERT(symhash != NULL);
1623 ASSERT(objects != NULL);
1624
1625 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
1626
1627 prev = NULL;
1628 for (oc = objects; oc; oc = next) {
1629 next = oc->next; // oc might be freed
1630
1631 if (!pathcmp(oc->fileName,path)) {
1632
1633 // these are both idempotent, so in just_purge mode we can
1634 // later call unloadObj() to really unload the object.
1635 removeOcSymbols(oc);
1636 freeOcStablePtrs(oc);
1637
1638 if (!just_purge) {
1639 if (prev == NULL) {
1640 objects = oc->next;
1641 } else {
1642 prev->next = oc->next;
1643 }
1644 ACQUIRE_LOCK(&linker_unloaded_mutex);
1645 oc->next = unloaded_objects;
1646 unloaded_objects = oc;
1647 oc->status = OBJECT_UNLOADED;
1648 RELEASE_LOCK(&linker_unloaded_mutex);
1649 // We do not own oc any more; it can be released at any time by
1650 // the GC in checkUnload().
1651 } else {
1652 prev = oc;
1653 }
1654
1655 /* This could be a member of an archive so continue
1656 * unloading other members. */
1657 unloadedAnyObj = HS_BOOL_TRUE;
1658 } else {
1659 prev = oc;
1660 }
1661 }
1662
1663 if (unloadedAnyObj) {
1664 return 1;
1665 }
1666 else {
1667 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
1668 return 0;
1669 }
1670 }
1671
1672 HsInt unloadObj (pathchar *path)
1673 {
1674 ACQUIRE_LOCK(&linker_mutex);
1675 HsInt r = unloadObj_(path, false);
1676 RELEASE_LOCK(&linker_mutex);
1677 return r;
1678 }
1679
1680 HsInt purgeObj (pathchar *path)
1681 {
1682 ACQUIRE_LOCK(&linker_mutex);
1683 HsInt r = unloadObj_(path, true);
1684 RELEASE_LOCK(&linker_mutex);
1685 return r;
1686 }
1687
1688 /* -----------------------------------------------------------------------------
1689 * Sanity checking. For each ObjectCode, maintain a list of address ranges
1690 * which may be prodded during relocation, and abort if we try and write
1691 * outside any of these.
1692 */
1693 void
1694 addProddableBlock ( ObjectCode* oc, void* start, int size )
1695 {
1696 ProddableBlock* pb
1697 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
1698
1699 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
1700 ASSERT(size > 0);
1701 pb->start = start;
1702 pb->size = size;
1703 pb->next = oc->proddables;
1704 oc->proddables = pb;
1705 }
1706
1707 void
1708 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
1709 {
1710 ProddableBlock* pb;
1711
1712 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
1713 char* s = (char*)(pb->start);
1714 char* e = s + pb->size;
1715 char* a = (char*)addr;
1716 if (a >= s && (a+size) <= e) return;
1717 }
1718 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
1719 }
1720
1721 void freeProddableBlocks (ObjectCode *oc)
1722 {
1723 ProddableBlock *pb, *next;
1724
1725 for (pb = oc->proddables; pb != NULL; pb = next) {
1726 next = pb->next;
1727 stgFree(pb);
1728 }
1729 oc->proddables = NULL;
1730 }
1731
1732 /* -----------------------------------------------------------------------------
1733 * Section management.
1734 */
1735 void
1736 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
1737 void* start, StgWord size, StgWord mapped_offset,
1738 void* mapped_start, StgWord mapped_size)
1739 {
1740 s->start = start; /* actual start of section in memory */
1741 s->size = size; /* actual size of section in memory */
1742 s->kind = kind;
1743 s->alloc = alloc;
1744 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
1745
1746 s->mapped_start = mapped_start; /* start of mmap() block */
1747 s->mapped_size = mapped_size; /* size of mmap() block */
1748
1749 s->info = (SectionFormatInfo*)stgCallocBytes(1, sizeof(SectionFormatInfo),
1750 "addSection(SectionFormatInfo)");
1751
1752 IF_DEBUG(linker,
1753 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
1754 start, (void*)((StgWord)start + size),
1755 size, kind ));
1756 }
1757