UNREG: use __builtin___clear_cache where available
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "Stable.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31 #include "linker/CacheFlush.h"
32 #include "linker/SymbolExtras.h"
33 #include "PathUtils.h"
34
35 #if !defined(mingw32_HOST_OS)
36 #include "posix/Signals.h"
37 #endif
38
39 // get protos for is*()
40 #include <ctype.h>
41
42 #if defined(HAVE_SYS_TYPES_H)
43 #include <sys/types.h>
44 #endif
45
46 #include <inttypes.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <stdio.h>
50 #include <assert.h>
51
52 #if defined(HAVE_SYS_STAT_H)
53 #include <sys/stat.h>
54 #endif
55
56 #if defined(HAVE_DLFCN_H)
57 #include <dlfcn.h>
58 #endif
59
60 #if defined(OBJFORMAT_ELF)
61 # include "linker/Elf.h"
62 # include <regex.h> // regex is already used by dlopen() so this is OK
63 // to use here without requiring an additional lib
64 #elif defined(OBJFORMAT_PEi386)
65 # include "linker/PEi386.h"
66 # include <windows.h>
67 #elif defined(OBJFORMAT_MACHO)
68 # include "linker/MachO.h"
69 # include <regex.h>
70 # include <mach/machine.h>
71 # include <mach-o/fat.h>
72 #endif
73
74 #if defined(x86_64_HOST_ARCH) && defined(darwin_HOST_OS)
75 #define ALWAYS_PIC
76 #endif
77
78 #if defined(dragonfly_HOST_OS)
79 #include <sys/tls.h>
80 #endif
81 /*
82 Note [runtime-linker-support]
83 -----------------------------
84 When adding support for a new platform to the runtime linker please
85 update `$TOP/configure.ac` under heading `Does target have runtime
86 linker support?`.
87 */
88 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
89 This hashtable will contain information on all symbols
90 that we know of, however the .o they are in may not be loaded.
91
92 Until the ObjectCode the symbol belongs to is actually
93 loaded this symbol may be replaced. So do not rely on
94 addresses of unloaded symbols.
95
96 Note [runtime-linker-phases]
97 --------------------------------------
98 Broadly the behavior of the runtime linker can be
99 split into the following four phases:
100
101 - Indexing (e.g. ocVerifyImage and ocGetNames)
102 - Initialization (e.g. ocResolve and ocRunInit)
103 - Resolve (e.g. resolveObjs())
104 - Lookup (e.g. lookupSymbol)
105
106 This is to enable lazy loading of symbols. Eager loading is problematic
107 as it means that all symbols must be available, even those which we will
108 never use. This is especially painful on Windows, where the number of
109 libraries required to link things like mingwex grows to be quite high.
110
111 We proceed through these stages as follows,
112
113 * During Indexing we verify and open the ObjectCode and
114 perform a quick scan/indexing of the ObjectCode. All the work
115 required to actually load the ObjectCode is done.
116
117 All symbols from the ObjectCode are also inserted into
118 `symhash`, where possible duplicates are handled via the semantics
119 described in `ghciInsertSymbolTable`.
120
121 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
122 depending on whether they are an archive member or not.
123
124 * During initialization we load ObjectCode, perform relocations, execute
125 static constructors etc. This phase may trigger other ObjectCodes to
126 be loaded because of the calls to lookupSymbol.
127
128 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
129 previous status was `OBJECT_LOADED`.
130
131 * During resolve we attempt to resolve all the symbols needed for the
132 initial link. This essentially means, that for any ObjectCode given
133 directly to the command-line we perform lookupSymbols on the required
134 symbols. lookupSymbols may trigger the loading of additional ObjectCode
135 if required.
136
137 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
138 the previous status was `OBJECT_NEEDED`.
139
140 * lookupSymbols is used to lookup any symbols required, both during initial
141 link and during statement and expression compilations in the REPL.
142 Declaration of e.g. a foreign import, will eventually call lookupSymbol
143 which will either fail (symbol unknown) or succeed (and possibly trigger a
144 load).
145
146 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
147
148 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
149 by calling unloadObj and starting over.
150 When a new declaration or statement is performed ultimately lookupSymbol is called
151 without doing a re-link.
152
153 The goal of these different phases is to allow the linker to be able to perform
154 "lazy loading" of ObjectCode. The reason for this is that we want to only link
155 in symbols that are actually required for the link. This reduces:
156
157 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
158 then we don't need to load libB. This means the dependency chain for libraries
159 such as mingw32 and mingwex can be broken down.
160
161 2) The number of duplicate symbols, since now only symbols that are
162 true duplicates will display the error.
163 */
164 /*Str*/HashTable *symhash;
165
166 /* List of currently loaded objects */
167 ObjectCode *objects = NULL; /* initially empty */
168
169 /* List of objects that have been unloaded via unloadObj(), but are waiting
170 to be actually freed via checkUnload() */
171 ObjectCode *unloaded_objects = NULL; /* initially empty */
172
173 #if defined(THREADED_RTS)
174 /* This protects all the Linker's global state except unloaded_objects */
175 Mutex linker_mutex;
176 /*
177 * This protects unloaded_objects. We have a separate mutex for this, because
178 * the GC needs to access unloaded_objects in checkUnload, while the linker only
179 * needs to access unloaded_objects in unloadObj(), so this allows most linker
180 * operations proceed concurrently with the GC.
181 */
182 Mutex linker_unloaded_mutex;
183 #endif
184
185 /* Generic wrapper function to try and Resolve and RunInit oc files */
186 int ocTryLoad( ObjectCode* oc );
187
188 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
189 * small memory model on this architecture (see gcc docs,
190 * -mcmodel=small).
191 *
192 * MAP_32BIT not available on OpenBSD/amd64
193 */
194 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
195 #define TRY_MAP_32BIT MAP_32BIT
196 #else
197 #define TRY_MAP_32BIT 0
198 #endif
199
200 /*
201 * Due to the small memory model (see above), on x86_64 we have to map
202 * all our non-PIC object files into the low 2Gb of the address space
203 * (why 2Gb and not 4Gb? Because all addresses must be reachable
204 * using a 32-bit signed PC-relative offset). On Linux we can do this
205 * using the MAP_32BIT flag to mmap(), however on other OSs
206 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
207 * can't do this. So on these systems, we have to pick a base address
208 * in the low 2Gb of the address space and try to allocate memory from
209 * there.
210 *
211 * We pick a default address based on the OS, but also make this
212 * configurable via an RTS flag (+RTS -xm)
213 */
214 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
215
216 #if defined(MAP_32BIT)
217 // Try to use MAP_32BIT
218 #define MMAP_32BIT_BASE_DEFAULT 0
219 #else
220 // A guess: 1Gb.
221 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
222 #endif
223
224 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
225 #endif
226
227 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
228 ObjectCode *owner)
229 {
230 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
231 if (!pinfo || owner != pinfo->owner) return;
232 removeStrHashTable(table, key, NULL);
233 if (isSymbolImport (owner, key))
234 stgFree(pinfo->value);
235
236 stgFree(pinfo);
237 }
238
239 /* -----------------------------------------------------------------------------
240 * Insert symbols into hash tables, checking for duplicates.
241 *
242 * Returns: 0 on failure, nonzero on success
243 */
244 /*
245 Note [weak-symbols-support]
246 -------------------------------------
247 While ghciInsertSymbolTable does implement extensive
248 logic for weak symbol support, weak symbols are not currently
249 fully supported by the RTS. This code is mostly here for COMDAT
250 support which uses the weak symbols support.
251
252 Linking weak symbols defined purely in C code with other C code
253 should also work, probably. Observing weak symbols in Haskell
254 won't.
255
256 Some test have been written for weak symbols but have been disabled
257 mostly because it's unsure how the weak symbols support should look.
258 See Trac #11223
259 */
260 int ghciInsertSymbolTable(
261 pathchar* obj_name,
262 HashTable *table,
263 const SymbolName* key,
264 SymbolAddr* data,
265 HsBool weak,
266 ObjectCode *owner)
267 {
268 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
269 if (!pinfo) /* new entry */
270 {
271 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
272 pinfo->value = data;
273 pinfo->owner = owner;
274 pinfo->weak = weak;
275 insertStrHashTable(table, key, pinfo);
276 return 1;
277 }
278 else if (weak && data && pinfo->weak && !pinfo->value)
279 {
280 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
281 pinfo->value = data;
282 pinfo->owner = owner;
283 return 1;
284 }
285 else if (weak)
286 {
287 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
288 already know of another copy throw this one away.
289
290 or both weak symbols have a nonzero value. Keep the existing one.
291
292 This also preserves the semantics of linking against
293 the first symbol we find. */
294 }
295 else if (pinfo->weak && !weak) /* weak symbol is in the table */
296 {
297 /* override the weak definition with the non-weak one */
298 pinfo->value = data;
299 pinfo->owner = owner;
300 pinfo->weak = HS_BOOL_FALSE;
301 return 1;
302 }
303 else if ( pinfo->owner
304 && pinfo->owner->status != OBJECT_RESOLVED
305 && pinfo->owner->status != OBJECT_NEEDED)
306 {
307 /* If the other symbol hasn't been loaded or will be loaded and we want to
308 explicitly load the new one, we can just swap it out and load the one
309 that has been requested. If not, just keep the first one encountered.
310
311 Because the `symHash' table consists symbols we've also not loaded but
312 found during the initial scan this is safe to do. If however the existing
313 symbol has been loaded then it means we have a duplicate.
314
315 This is essentially emulating the behavior of a linker wherein it will always
316 link in object files that are .o file arguments, but only take object files
317 from archives as needed. */
318 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
319 pinfo->value = data;
320 pinfo->owner = owner;
321 pinfo->weak = weak;
322 }
323
324 return 1;
325 }
326 else if (pinfo->owner == owner)
327 {
328 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
329 return 1;
330 }
331 else if (owner && owner->status == OBJECT_LOADED)
332 {
333 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
334 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
335 call this function again to trigger the duplicate error. */
336 return 1;
337 }
338
339 pathchar* archiveName = NULL;
340 debugBelch(
341 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
342 " %s\n"
343 "whilst processing object file\n"
344 " %" PATH_FMT "\n"
345 "The symbol was previously defined in\n"
346 " %" PATH_FMT "\n"
347 "This could be caused by:\n"
348 " * Loading two different object files which export the same symbol\n"
349 " * Specifying the same object file twice on the GHCi command line\n"
350 " * An incorrect `package.conf' entry, causing some object to be\n"
351 " loaded twice.\n",
352 (char*)key,
353 obj_name,
354 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
355 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
356 : pinfo->owner->fileName
357 );
358
359 if (archiveName)
360 {
361 stgFree(archiveName);
362 archiveName = NULL;
363 }
364 return 0;
365 }
366
367 /* -----------------------------------------------------------------------------
368 * Looks up symbols into hash tables.
369 *
370 * Returns: 0 on failure and result is not set,
371 * nonzero on success and result set to nonzero pointer
372 */
373 HsBool ghciLookupSymbolInfo(HashTable *table,
374 const SymbolName* key, RtsSymbolInfo **result)
375 {
376 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
377 if (!pinfo) {
378 *result = NULL;
379 return HS_BOOL_FALSE;
380 }
381 if (pinfo->weak)
382 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
383 /* Once it's looked up, it can no longer be overridden */
384 pinfo->weak = HS_BOOL_FALSE;
385
386 *result = pinfo;
387 return HS_BOOL_TRUE;
388 }
389
390 /* -----------------------------------------------------------------------------
391 * initialize the object linker
392 */
393
394
395 static int linker_init_done = 0 ;
396
397 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
398 static void *dl_prog_handle;
399 static regex_t re_invalid;
400 static regex_t re_realso;
401 #if defined(THREADED_RTS)
402 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
403 #endif
404 #endif
405
406 void initLinker (void)
407 {
408 // default to retaining CAFs for backwards compatibility. Most
409 // users will want initLinker_(0): otherwise unloadObj() will not
410 // be able to unload object files when they contain CAFs.
411 initLinker_(1);
412 }
413
414 void
415 initLinker_ (int retain_cafs)
416 {
417 RtsSymbolVal *sym;
418 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
419 int compileResult;
420 #endif
421
422 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
423
424 /* Make initLinker idempotent, so we can call it
425 before every relevant operation; that means we
426 don't need to initialise the linker separately */
427 if (linker_init_done == 1) {
428 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
429 return;
430 } else {
431 linker_init_done = 1;
432 }
433
434 objects = NULL;
435 unloaded_objects = NULL;
436
437 #if defined(THREADED_RTS)
438 initMutex(&linker_mutex);
439 initMutex(&linker_unloaded_mutex);
440 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
441 initMutex(&dl_mutex);
442 #endif
443 #endif
444
445 symhash = allocStrHashTable();
446
447 /* populate the symbol table with stuff from the RTS */
448 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
449 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
450 symhash, sym->lbl, sym->addr,
451 sym->weak, NULL)) {
452 barf("ghciInsertSymbolTable failed");
453 }
454 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
455 }
456 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
457 machoInitSymbolsWithoutUnderscore();
458 # endif
459 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
460 referenced from a statically linked module. We need to mimic this, but
461 we cannot use NULL because we use it to mean nonexistent symbols. So we
462 use an arbitrary (hopefully unique) address here.
463 */
464 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
465 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
466 barf("ghciInsertSymbolTable failed");
467 }
468
469 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
470 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
471 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
472 retain_cafs ? newRetainedCAF : newGCdCAF,
473 HS_BOOL_FALSE, NULL)) {
474 barf("ghciInsertSymbolTable failed");
475 }
476
477 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
478 # if defined(RTLD_DEFAULT)
479 dl_prog_handle = RTLD_DEFAULT;
480 # else
481 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
482 # endif /* RTLD_DEFAULT */
483
484 compileResult = regcomp(&re_invalid,
485 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short)",
486 REG_EXTENDED);
487 if (compileResult != 0) {
488 barf("Compiling re_invalid failed");
489 }
490 compileResult = regcomp(&re_realso,
491 "(GROUP|INPUT) *\\( *([^ )]+)",
492 REG_EXTENDED);
493 if (compileResult != 0) {
494 barf("Compiling re_realso failed");
495 }
496 # endif
497
498 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
499 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
500 // User-override for mmap_32bit_base
501 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
502 }
503 #endif
504
505 if (RTS_LINKER_USE_MMAP)
506 m32_allocator_init();
507
508 #if defined(OBJFORMAT_PEi386)
509 initLinker_PEi386();
510 #endif
511
512 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
513 return;
514 }
515
516 void
517 exitLinker( void ) {
518 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
519 if (linker_init_done == 1) {
520 regfree(&re_invalid);
521 regfree(&re_realso);
522 #if defined(THREADED_RTS)
523 closeMutex(&dl_mutex);
524 #endif
525 }
526 #endif
527 if (linker_init_done == 1) {
528 freeHashTable(symhash, free);
529 }
530 #if defined(THREADED_RTS)
531 closeMutex(&linker_mutex);
532 #endif
533 }
534
535 /* -----------------------------------------------------------------------------
536 * Loading DLL or .so dynamic libraries
537 * -----------------------------------------------------------------------------
538 *
539 * Add a DLL from which symbols may be found. In the ELF case, just
540 * do RTLD_GLOBAL-style add, so no further messing around needs to
541 * happen in order that symbols in the loaded .so are findable --
542 * lookupSymbol() will subsequently see them by dlsym on the program's
543 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
544 *
545 * In the PEi386 case, open the DLLs and put handles to them in a
546 * linked list. When looking for a symbol, try all handles in the
547 * list. This means that we need to load even DLLs that are guaranteed
548 * to be in the ghc.exe image already, just so we can get a handle
549 * to give to loadSymbol, so that we can find the symbols. For such
550 * libraries, the LoadLibrary call should be a no-op except for returning
551 * the handle.
552 *
553 */
554
555 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
556
557 /* Suppose in ghci we load a temporary SO for a module containing
558 f = 1
559 and then modify the module, recompile, and load another temporary
560 SO with
561 f = 2
562 Then as we don't unload the first SO, dlsym will find the
563 f = 1
564 symbol whereas we want the
565 f = 2
566 symbol. We therefore need to keep our own SO handle list, and
567 try SOs in the right order. */
568
569 typedef
570 struct _OpenedSO {
571 struct _OpenedSO* next;
572 void *handle;
573 }
574 OpenedSO;
575
576 /* A list thereof. */
577 static OpenedSO* openedSOs = NULL;
578
579 static const char *
580 internal_dlopen(const char *dll_name)
581 {
582 OpenedSO* o_so;
583 void *hdl;
584 const char *errmsg;
585 char *errmsg_copy;
586
587 // omitted: RTLD_NOW
588 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
589 IF_DEBUG(linker,
590 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
591
592 //-------------- Begin critical section ------------------
593 // This critical section is necessary because dlerror() is not
594 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
595 // Also, the error message returned must be copied to preserve it
596 // (see POSIX also)
597
598 ACQUIRE_LOCK(&dl_mutex);
599 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
600
601 errmsg = NULL;
602 if (hdl == NULL) {
603 /* dlopen failed; return a ptr to the error msg. */
604 errmsg = dlerror();
605 if (errmsg == NULL) errmsg = "addDLL: unknown error";
606 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
607 strcpy(errmsg_copy, errmsg);
608 errmsg = errmsg_copy;
609 } else {
610 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
611 o_so->handle = hdl;
612 o_so->next = openedSOs;
613 openedSOs = o_so;
614 }
615
616 RELEASE_LOCK(&dl_mutex);
617 //--------------- End critical section -------------------
618
619 return errmsg;
620 }
621
622 /*
623 Note [RTLD_LOCAL]
624
625 In GHCi we want to be able to override previous .so's with newly
626 loaded .so's when we recompile something. This further implies that
627 when we look up a symbol in internal_dlsym() we have to iterate
628 through the loaded libraries (in order from most recently loaded to
629 oldest) looking up the symbol in each one until we find it.
630
631 However, this can cause problems for some symbols that are copied
632 by the linker into the executable image at runtime - see #8935 for a
633 lengthy discussion. To solve that problem we need to look up
634 symbols in the main executable *first*, before attempting to look
635 them up in the loaded .so's. But in order to make that work, we
636 have to always call dlopen with RTLD_LOCAL, so that the loaded
637 libraries don't populate the global symbol table.
638 */
639
640 static void *
641 internal_dlsym(const char *symbol) {
642 OpenedSO* o_so;
643 void *v;
644
645 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
646 ACQUIRE_LOCK(&dl_mutex);
647 dlerror();
648 // look in program first
649 v = dlsym(dl_prog_handle, symbol);
650 if (dlerror() == NULL) {
651 RELEASE_LOCK(&dl_mutex);
652 return v;
653 }
654
655 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
656 v = dlsym(o_so->handle, symbol);
657 if (dlerror() == NULL) {
658 RELEASE_LOCK(&dl_mutex);
659 return v;
660 }
661 }
662 RELEASE_LOCK(&dl_mutex);
663 return v;
664 }
665 # endif
666
667 const char *
668 addDLL( pathchar *dll_name )
669 {
670 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
671 /* ------------------- ELF DLL loader ------------------- */
672
673 #define NMATCH 5
674 regmatch_t match[NMATCH];
675 const char *errmsg;
676 FILE* fp;
677 size_t match_length;
678 #define MAXLINE 1000
679 char line[MAXLINE];
680 int result;
681
682 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
683 errmsg = internal_dlopen(dll_name);
684
685 if (errmsg == NULL) {
686 return NULL;
687 }
688
689 // GHC Trac ticket #2615
690 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
691 // contain linker scripts rather than ELF-format object code. This
692 // code handles the situation by recognizing the real object code
693 // file name given in the linker script.
694 //
695 // If an "invalid ELF header" error occurs, it is assumed that the
696 // .so file contains a linker script instead of ELF object code.
697 // In this case, the code looks for the GROUP ( ... ) linker
698 // directive. If one is found, the first file name inside the
699 // parentheses is treated as the name of a dynamic library and the
700 // code attempts to dlopen that file. If this is also unsuccessful,
701 // an error message is returned.
702
703 // see if the error message is due to an invalid ELF header
704 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
705 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
706 IF_DEBUG(linker, debugBelch("result = %i\n", result));
707 if (result == 0) {
708 // success -- try to read the named file as a linker script
709 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
710 MAXLINE-1);
711 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
712 line[match_length] = '\0'; // make sure string is null-terminated
713 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
714 if ((fp = fopen(line, "r")) == NULL) {
715 return errmsg; // return original error if open fails
716 }
717 // try to find a GROUP or INPUT ( ... ) command
718 while (fgets(line, MAXLINE, fp) != NULL) {
719 IF_DEBUG(linker, debugBelch("input line = %s", line));
720 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
721 // success -- try to dlopen the first named file
722 IF_DEBUG(linker, debugBelch("match%s\n",""));
723 line[match[2].rm_eo] = '\0';
724 stgFree((void*)errmsg); // Free old message before creating new one
725 errmsg = internal_dlopen(line+match[2].rm_so);
726 break;
727 }
728 // if control reaches here, no GROUP or INPUT ( ... ) directive
729 // was found and the original error message is returned to the
730 // caller
731 }
732 fclose(fp);
733 }
734 return errmsg;
735
736 # elif defined(OBJFORMAT_PEi386)
737 return addDLL_PEi386(dll_name, NULL);
738
739 # else
740 barf("addDLL: not implemented on this platform");
741 # endif
742 }
743
744 /* -----------------------------------------------------------------------------
745 * Searches the system directories to determine if there is a system DLL that
746 * satisfies the given name. This prevent GHCi from linking against a static
747 * library if a DLL is available.
748 *
749 * Returns: NULL on failure or no DLL found, else the full path to the DLL
750 * that can be loaded.
751 */
752 pathchar* findSystemLibrary(pathchar* dll_name)
753 {
754 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%"
755 PATH_FMT "'\n", dll_name));
756
757 #if defined(OBJFORMAT_PEi386)
758 return findSystemLibrary_PEi386(dll_name);
759 #else
760 (void)(dll_name); // Function not implemented for other platforms.
761 return NULL;
762 #endif
763 }
764
765 /* -----------------------------------------------------------------------------
766 * Emits a warning determining that the system is missing a required security
767 * update that we need to get access to the proper APIs
768 */
769 void warnMissingKBLibraryPaths( void )
770 {
771 static HsBool missing_update_warn = HS_BOOL_FALSE;
772 if (!missing_update_warn) {
773 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
774 missing_update_warn = HS_BOOL_TRUE;
775 }
776 }
777
778 /* -----------------------------------------------------------------------------
779 * appends a directory to the process DLL Load path so LoadLibrary can find it
780 *
781 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
782 * restore the search path to what it was before this call.
783 */
784 HsPtr addLibrarySearchPath(pathchar* dll_path)
785 {
786 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%"
787 PATH_FMT "'\n", dll_path));
788
789 #if defined(OBJFORMAT_PEi386)
790 return addLibrarySearchPath_PEi386(dll_path);
791 #else
792 (void)(dll_path); // Function not implemented for other platforms.
793 return NULL;
794 #endif
795 }
796
797 /* -----------------------------------------------------------------------------
798 * removes a directory from the process DLL Load path
799 *
800 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
801 */
802 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
803 {
804 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n",
805 dll_path_index));
806
807 #if defined(OBJFORMAT_PEi386)
808 return removeLibrarySearchPath_PEi386(dll_path_index);
809 #else
810 (void)(dll_path_index); // Function not implemented for other platforms.
811 return HS_BOOL_FALSE;
812 #endif
813 }
814
815 /* -----------------------------------------------------------------------------
816 * insert a symbol in the hash table
817 *
818 * Returns: 0 on failure, nozero on success
819 */
820 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
821 {
822 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE,
823 NULL);
824 }
825
826 /* -----------------------------------------------------------------------------
827 * lookup a symbol in the hash table
828 */
829 #if defined(OBJFORMAT_PEi386)
830 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
831 {
832 return lookupSymbol_PEi386(lbl);
833 }
834
835 #else
836
837 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
838 {
839 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
840
841 ASSERT(symhash != NULL);
842 RtsSymbolInfo *pinfo;
843
844 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
845 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
846
847 # if defined(OBJFORMAT_ELF)
848 return internal_dlsym(lbl);
849 # elif defined(OBJFORMAT_MACHO)
850
851 /* HACK: On OS X, all symbols are prefixed with an underscore.
852 However, dlsym wants us to omit the leading underscore from the
853 symbol name -- the dlsym routine puts it back on before
854 searching for the symbol. For now, we simply strip it off here
855 (and ONLY here).
856 */
857 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n",
858 lbl));
859 ASSERT(lbl[0] == '_');
860 return internal_dlsym(lbl + 1);
861
862 # else
863 ASSERT(2+2 == 5);
864 return NULL;
865 # endif
866 } else {
867 return loadSymbol(lbl, pinfo);
868 }
869 }
870 #endif /* OBJFORMAT_PEi386 */
871
872 /*
873 * Load and relocate the object code for a symbol as necessary.
874 * Symbol name only used for diagnostics output.
875 */
876 SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo) {
877 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl,
878 pinfo->value));
879 ObjectCode* oc = pinfo->owner;
880
881 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
882 See Note [runtime-linker-phases] */
883 if (oc && lbl && oc->status == OBJECT_LOADED) {
884 oc->status = OBJECT_NEEDED;
885 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand "
886 "loading symbol '%s'\n", lbl));
887 int r = ocTryLoad(oc);
888 if (!r) {
889 return NULL;
890 }
891
892 #if defined(PROFILING)
893 // collect any new cost centres & CCSs
894 // that were defined during runInit
895 initProfiling2();
896 #endif
897 }
898
899 return pinfo->value;
900 }
901
902 SymbolAddr* lookupSymbol( SymbolName* lbl )
903 {
904 ACQUIRE_LOCK(&linker_mutex);
905 SymbolAddr* r = lookupSymbol_(lbl);
906 if (!r) {
907 errorBelch("^^ Could not load '%s', dependency unresolved. "
908 "See top entry above.\n", lbl);
909 fflush(stderr);
910 }
911 RELEASE_LOCK(&linker_mutex);
912 return r;
913 }
914
915 /* -----------------------------------------------------------------------------
916 Create a StablePtr for a foreign export. This is normally called by
917 a C function with __attribute__((constructor)), which is generated
918 by GHC and linked into the module.
919
920 If the object code is being loaded dynamically, then we remember
921 which StablePtrs were allocated by the constructors and free them
922 again in unloadObj().
923 -------------------------------------------------------------------------- */
924
925 static ObjectCode *loading_obj = NULL;
926
927 StgStablePtr foreignExportStablePtr (StgPtr p)
928 {
929 ForeignExportStablePtr *fe_sptr;
930 StgStablePtr *sptr;
931
932 sptr = getStablePtr(p);
933
934 if (loading_obj != NULL) {
935 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
936 "foreignExportStablePtr");
937 fe_sptr->stable_ptr = sptr;
938 fe_sptr->next = loading_obj->stable_ptrs;
939 loading_obj->stable_ptrs = fe_sptr;
940 }
941
942 return sptr;
943 }
944
945
946 /* -----------------------------------------------------------------------------
947 * Debugging aid: look in GHCi's object symbol tables for symbols
948 * within DELTA bytes of the specified address, and show their names.
949 */
950 #if defined(DEBUG)
951 void ghci_enquire ( SymbolAddr* addr );
952
953 void ghci_enquire(SymbolAddr* addr)
954 {
955 int i;
956 SymbolName* sym;
957 RtsSymbolInfo* a;
958 const int DELTA = 64;
959 ObjectCode* oc;
960
961 for (oc = objects; oc; oc = oc->next) {
962 for (i = 0; i < oc->n_symbols; i++) {
963 sym = oc->symbols[i];
964 if (sym == NULL) continue;
965 a = NULL;
966 if (a == NULL) {
967 ghciLookupSymbolInfo(symhash, sym, &a);
968 }
969 if (a == NULL) {
970 // debugBelch("ghci_enquire: can't find %s\n", sym);
971 }
972 else if ( a->value
973 && (char*)addr-DELTA <= (char*)a->value
974 && (char*)a->value <= (char*)addr+DELTA) {
975 debugBelch("%p + %3d == `%s'\n", addr,
976 (int)((char*)a->value - (char*)addr), sym);
977 }
978 }
979 }
980 }
981 #endif
982
983 #if RTS_LINKER_USE_MMAP
984 //
985 // Returns NULL on failure.
986 //
987 void *
988 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
989 {
990 void *map_addr = NULL;
991 void *result;
992 size_t size;
993 static uint32_t fixed = 0;
994
995 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
996 size = roundUpToPage(bytes);
997
998 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
999 mmap_again:
1000
1001 if (mmap_32bit_base != 0) {
1002 map_addr = mmap_32bit_base;
1003 }
1004 #endif
1005
1006 IF_DEBUG(linker,
1007 debugBelch("mmapForLinker: \tprotection %#0x\n",
1008 PROT_EXEC | PROT_READ | PROT_WRITE));
1009 IF_DEBUG(linker,
1010 debugBelch("mmapForLinker: \tflags %#0x\n",
1011 MAP_PRIVATE | TRY_MAP_32BIT | fixed | flags));
1012
1013 result = mmap(map_addr, size,
1014 PROT_EXEC|PROT_READ|PROT_WRITE,
1015 MAP_PRIVATE|TRY_MAP_32BIT|fixed|flags, fd, offset);
1016
1017 if (result == MAP_FAILED) {
1018 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1019 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1020 return NULL;
1021 }
1022
1023 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1024 if (mmap_32bit_base != 0) {
1025 if (result == map_addr) {
1026 mmap_32bit_base = (StgWord8*)map_addr + size;
1027 } else {
1028 if ((W_)result > 0x80000000) {
1029 // oops, we were given memory over 2Gb
1030 munmap(result,size);
1031 #if defined(freebsd_HOST_OS) || \
1032 defined(kfreebsdgnu_HOST_OS) || \
1033 defined(dragonfly_HOST_OS)
1034 // Some platforms require MAP_FIXED. This is normally
1035 // a bad idea, because MAP_FIXED will overwrite
1036 // existing mappings.
1037 fixed = MAP_FIXED;
1038 goto mmap_again;
1039 #else
1040 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1041 "asked for %lu bytes at %p. "
1042 "Try specifying an address with +RTS -xm<addr> -RTS",
1043 size, map_addr);
1044 return NULL;
1045 #endif
1046 } else {
1047 // hmm, we were given memory somewhere else, but it's
1048 // still under 2Gb so we can use it. Next time, ask
1049 // for memory right after the place we just got some
1050 mmap_32bit_base = (StgWord8*)result + size;
1051 }
1052 }
1053 } else {
1054 if ((W_)result > 0x80000000) {
1055 // oops, we were given memory over 2Gb
1056 // ... try allocating memory somewhere else?;
1057 debugTrace(DEBUG_linker,
1058 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1059 bytes, result);
1060 munmap(result, size);
1061
1062 // Set a base address and try again... (guess: 1Gb)
1063 mmap_32bit_base = (void*)0x40000000;
1064 goto mmap_again;
1065 }
1066 }
1067 #endif
1068
1069 IF_DEBUG(linker,
1070 debugBelch("mmapForLinker: mapped %" FMT_Word
1071 " bytes starting at %p\n", (W_)size, result));
1072 IF_DEBUG(linker,
1073 debugBelch("mmapForLinker: done\n"));
1074
1075 return result;
1076 }
1077 #endif
1078
1079 /*
1080 * Remove symbols from the symbol table, and free oc->symbols.
1081 * This operation is idempotent.
1082 */
1083 static void removeOcSymbols (ObjectCode *oc)
1084 {
1085 if (oc->symbols == NULL) return;
1086
1087 // Remove all the mappings for the symbols within this object..
1088 int i;
1089 for (i = 0; i < oc->n_symbols; i++) {
1090 if (oc->symbols[i] != NULL) {
1091 ghciRemoveSymbolTable(symhash, oc->symbols[i], oc);
1092 }
1093 }
1094
1095 stgFree(oc->symbols);
1096 oc->symbols = NULL;
1097 }
1098
1099 /*
1100 * Release StablePtrs and free oc->stable_ptrs.
1101 * This operation is idempotent.
1102 */
1103 static void freeOcStablePtrs (ObjectCode *oc)
1104 {
1105 // Release any StablePtrs that were created when this
1106 // object module was initialized.
1107 ForeignExportStablePtr *fe_ptr, *next;
1108
1109 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1110 next = fe_ptr->next;
1111 freeStablePtr(fe_ptr->stable_ptr);
1112 stgFree(fe_ptr);
1113 }
1114 oc->stable_ptrs = NULL;
1115 }
1116
1117 static void
1118 freePreloadObjectFile (ObjectCode *oc)
1119 {
1120 #if defined(mingw32_HOST_OS)
1121 freePreloadObjectFile_PEi386(oc);
1122 #else
1123
1124 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1125 munmap(oc->image, oc->fileSize);
1126 }
1127 else {
1128 stgFree(oc->image);
1129 }
1130
1131 #endif
1132
1133 oc->image = NULL;
1134 oc->fileSize = 0;
1135 }
1136
1137 /*
1138 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1139 * the GC when a previously unloaded ObjectCode has been determined to be
1140 * unused, and when an error occurs during loadObj().
1141 */
1142 void freeObjectCode (ObjectCode *oc)
1143 {
1144 freePreloadObjectFile(oc);
1145
1146 if (oc->symbols != NULL) {
1147 stgFree(oc->symbols);
1148 oc->symbols = NULL;
1149 }
1150
1151 if (oc->extraInfos != NULL) {
1152 freeHashTable(oc->extraInfos, NULL);
1153 oc->extraInfos = NULL;
1154 }
1155
1156 if (oc->sections != NULL) {
1157 int i;
1158 for (i=0; i < oc->n_sections; i++) {
1159 if (oc->sections[i].start != NULL) {
1160 switch(oc->sections[i].alloc){
1161 #if RTS_LINKER_USE_MMAP
1162 case SECTION_MMAP:
1163 munmap(oc->sections[i].mapped_start,
1164 oc->sections[i].mapped_size);
1165 break;
1166 case SECTION_M32:
1167 m32_free(oc->sections[i].start,
1168 oc->sections[i].size);
1169 break;
1170 #endif
1171 case SECTION_MALLOC:
1172 stgFree(oc->sections[i].start);
1173 break;
1174 default:
1175 break;
1176 }
1177 }
1178 if (oc->sections[i].info) {
1179 stgFree(oc->sections[i].info);
1180 }
1181 }
1182 stgFree(oc->sections);
1183 }
1184
1185 freeProddableBlocks(oc);
1186
1187 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1188 * alongside the image, so we don't need to free. */
1189 #if defined(NEED_SYMBOL_EXTRAS) && (!defined(x86_64_HOST_ARCH) \
1190 || !defined(mingw32_HOST_OS))
1191 if (RTS_LINKER_USE_MMAP) {
1192 if (!USE_CONTIGUOUS_MMAP && oc->symbol_extras != NULL) {
1193 m32_free(oc->symbol_extras,
1194 sizeof(SymbolExtra) * oc->n_symbol_extras);
1195 }
1196 }
1197 else {
1198 stgFree(oc->symbol_extras);
1199 }
1200 #endif
1201
1202 #if defined(OBJECTFORMAT_MACHO)
1203 ocDeinit_MachO(oc);
1204 #endif
1205 #if defined(OBJFORMAT_ELF)
1206 ocDeinit_ELF(oc);
1207 #endif
1208
1209 stgFree(oc->fileName);
1210 stgFree(oc->archiveMemberName);
1211
1212 stgFree(oc);
1213 }
1214
1215 /* -----------------------------------------------------------------------------
1216 * Sets the initial status of a fresh ObjectCode
1217 */
1218 static void setOcInitialStatus(ObjectCode* oc) {
1219 if (oc->archiveMemberName == NULL) {
1220 oc->status = OBJECT_NEEDED;
1221 } else {
1222 oc->status = OBJECT_LOADED;
1223 }
1224 }
1225
1226 ObjectCode*
1227 mkOc( pathchar *path, char *image, int imageSize,
1228 bool mapped, char *archiveMemberName, int misalignment ) {
1229 ObjectCode* oc;
1230
1231 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1232 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1233
1234 oc->info = NULL;
1235
1236 # if defined(OBJFORMAT_ELF)
1237 oc->formatName = "ELF";
1238 # elif defined(OBJFORMAT_PEi386)
1239 oc->formatName = "PEi386";
1240 # elif defined(OBJFORMAT_MACHO)
1241 oc->formatName = "Mach-O";
1242 # else
1243 stgFree(oc);
1244 barf("loadObj: not implemented on this platform");
1245 # endif
1246
1247 oc->image = image;
1248 oc->fileName = pathdup(path);
1249
1250 if (archiveMemberName) {
1251 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1,
1252 "loadObj" );
1253 strcpy(oc->archiveMemberName, archiveMemberName);
1254 } else {
1255 oc->archiveMemberName = NULL;
1256 }
1257
1258 setOcInitialStatus( oc );
1259
1260 oc->fileSize = imageSize;
1261 oc->symbols = NULL;
1262 oc->n_sections = 0;
1263 oc->sections = NULL;
1264 oc->proddables = NULL;
1265 oc->stable_ptrs = NULL;
1266 #if defined(NEED_SYMBOL_EXTRAS)
1267 oc->symbol_extras = NULL;
1268 #endif
1269 oc->imageMapped = mapped;
1270
1271 oc->misalignment = misalignment;
1272 oc->extraInfos = NULL;
1273
1274 /* chain it onto the list of objects */
1275 oc->next = NULL;
1276
1277 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1278 return oc;
1279 }
1280
1281 /* -----------------------------------------------------------------------------
1282 * Check if an object or archive is already loaded.
1283 *
1284 * Returns: 1 if the path is already loaded, 0 otherwise.
1285 */
1286 HsInt
1287 isAlreadyLoaded( pathchar *path )
1288 {
1289 ObjectCode *o;
1290 for (o = objects; o; o = o->next) {
1291 if (0 == pathcmp(o->fileName, path)) {
1292 return 1; /* already loaded */
1293 }
1294 }
1295 return 0; /* not loaded yet */
1296 }
1297
1298 //
1299 // Load the object file into memory. This will not be its final resting place,
1300 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
1301 // address space, properly aligned.
1302 //
1303 static ObjectCode *
1304 preloadObjectFile (pathchar *path)
1305 {
1306 int fileSize;
1307 struct_stat st;
1308 int r;
1309 void *image;
1310 ObjectCode *oc;
1311 int misalignment = 0;
1312
1313 r = pathstat(path, &st);
1314 if (r == -1) {
1315 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
1316 return NULL;
1317 }
1318
1319 fileSize = st.st_size;
1320
1321 #if RTS_LINKER_USE_MMAP
1322 int fd;
1323
1324 /* On many architectures malloc'd memory isn't executable, so we need to use
1325 * mmap. */
1326
1327 #if defined(openbsd_HOST_OS)
1328 fd = open(path, O_RDONLY, S_IRUSR);
1329 #else
1330 fd = open(path, O_RDONLY);
1331 #endif
1332 if (fd == -1) {
1333 errorBelch("loadObj: can't open %s", path);
1334 return NULL;
1335 }
1336
1337 /* iOS does not permit to mmap with r+w+x, however while the comment for
1338 * this function says this is not the final resting place, for some
1339 * architectures / hosts (at least mach-o non-iOS -- see ocGetNames_MachO)
1340 * the image mmaped here in fact ends up being the final resting place for
1341 * the sections. And hence we need to leave r+w+x here for other hosts
1342 * until all hosts have been made aware of the initial image being r+w only.
1343 *
1344 * See also the misalignment logic for darwin below.
1345 */
1346 #if defined(ios_HOST_OS)
1347 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
1348 #else
1349 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
1350 MAP_PRIVATE, fd, 0);
1351 #endif
1352
1353 if (image == MAP_FAILED) {
1354 errorBelch("mmap: failed. errno = %d", errno);
1355 }
1356 // not 32-bit yet, we'll remap later
1357 close(fd);
1358
1359 #else /* !RTS_LINKER_USE_MMAP */
1360 FILE *f;
1361
1362 /* load the image into memory */
1363 /* coverity[toctou] */
1364 f = pathopen(path, WSTR("rb"));
1365 if (!f) {
1366 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
1367 return NULL;
1368 }
1369
1370 # if defined(mingw32_HOST_OS)
1371
1372 // TODO: We would like to use allocateExec here, but allocateExec
1373 // cannot currently allocate blocks large enough.
1374 image = allocateImageAndTrampolines(path, "itself", f, fileSize,
1375 HS_BOOL_FALSE);
1376 if (image == NULL) {
1377 fclose(f);
1378 return NULL;
1379 }
1380
1381 # elif defined(darwin_HOST_OS)
1382
1383 // In a Mach-O .o file, all sections can and will be misaligned
1384 // if the total size of the headers is not a multiple of the
1385 // desired alignment. This is fine for .o files that only serve
1386 // as input for the static linker, but it's not fine for us,
1387 // as SSE (used by gcc for floating point) and Altivec require
1388 // 16-byte alignment.
1389 // We calculate the correct alignment from the header before
1390 // reading the file, and then we misalign image on purpose so
1391 // that the actual sections end up aligned again.
1392 misalignment = machoGetMisalignment(f);
1393 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
1394 image += misalignment;
1395
1396 # else /* !defined(mingw32_HOST_OS) */
1397
1398 image = stgMallocBytes(fileSize, "loadObj(image)");
1399
1400 #endif
1401
1402 int n;
1403 n = fread ( image, 1, fileSize, f );
1404 fclose(f);
1405 if (n != fileSize) {
1406 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
1407 stgFree(image);
1408 return NULL;
1409 }
1410
1411 #endif /* RTS_LINKER_USE_MMAP */
1412
1413 oc = mkOc(path, image, fileSize, true, NULL, misalignment);
1414
1415 #if defined(OBJFORMAT_MACHO)
1416 if (ocVerifyImage_MachO( oc ))
1417 ocInit_MachO( oc );
1418 #endif
1419 #if defined(OBJFORMAT_ELF)
1420 if(ocVerifyImage_ELF( oc ))
1421 ocInit_ELF( oc );
1422 #endif
1423 return oc;
1424 }
1425
1426 /* -----------------------------------------------------------------------------
1427 * Load an obj (populate the global symbol table, but don't resolve yet)
1428 *
1429 * Returns: 1 if ok, 0 on error.
1430 */
1431 static HsInt loadObj_ (pathchar *path)
1432 {
1433 ObjectCode* oc;
1434 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
1435
1436 /* debugBelch("loadObj %s\n", path ); */
1437
1438 /* Check that we haven't already loaded this object.
1439 Ignore requests to load multiple times */
1440
1441 if (isAlreadyLoaded(path)) {
1442 IF_DEBUG(linker,
1443 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1444 return 1; /* success */
1445 }
1446
1447 oc = preloadObjectFile(path);
1448 if (oc == NULL) return 0;
1449
1450 if (! loadOc(oc)) {
1451 // failed; free everything we've allocated
1452 removeOcSymbols(oc);
1453 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
1454 freeObjectCode(oc);
1455 return 0;
1456 }
1457
1458 oc->next = objects;
1459 objects = oc;
1460 return 1;
1461 }
1462
1463 HsInt loadObj (pathchar *path)
1464 {
1465 ACQUIRE_LOCK(&linker_mutex);
1466 HsInt r = loadObj_(path);
1467 RELEASE_LOCK(&linker_mutex);
1468 return r;
1469 }
1470
1471 HsInt loadOc (ObjectCode* oc)
1472 {
1473 int r;
1474
1475 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
1476
1477 /* verify the in-memory image */
1478 # if defined(OBJFORMAT_ELF)
1479 r = ocVerifyImage_ELF ( oc );
1480 # elif defined(OBJFORMAT_PEi386)
1481 r = ocVerifyImage_PEi386 ( oc );
1482 # elif defined(OBJFORMAT_MACHO)
1483 r = ocVerifyImage_MachO ( oc );
1484 # else
1485 barf("loadObj: no verify method");
1486 # endif
1487 if (!r) {
1488 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
1489 return r;
1490 }
1491
1492 #if defined(NEED_SYMBOL_EXTRAS)
1493 # if defined(OBJFORMAT_MACHO)
1494 r = ocAllocateSymbolExtras_MachO ( oc );
1495 if (!r) {
1496 IF_DEBUG(linker,
1497 debugBelch("loadOc: ocAllocateSymbolExtras_MachO failed\n"));
1498 return r;
1499 }
1500 # elif defined(OBJFORMAT_ELF)
1501 r = ocAllocateSymbolExtras_ELF ( oc );
1502 if (!r) {
1503 IF_DEBUG(linker,
1504 debugBelch("loadOc: ocAllocateSymbolExtras_ELF failed\n"));
1505 return r;
1506 }
1507 # elif defined(OBJFORMAT_PEi386)
1508 ocAllocateSymbolExtras_PEi386 ( oc );
1509 # endif
1510 #endif
1511
1512 /* build the symbol list for this image */
1513 # if defined(OBJFORMAT_ELF)
1514 r = ocGetNames_ELF ( oc );
1515 # elif defined(OBJFORMAT_PEi386)
1516 r = ocGetNames_PEi386 ( oc );
1517 # elif defined(OBJFORMAT_MACHO)
1518 r = ocGetNames_MachO ( oc );
1519 # else
1520 barf("loadObj: no getNames method");
1521 # endif
1522 if (!r) {
1523 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
1524 return r;
1525 }
1526
1527 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
1528 setOcInitialStatus( oc );
1529 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
1530
1531 return 1;
1532 }
1533
1534 /* -----------------------------------------------------------------------------
1535 * try to load and initialize an ObjectCode into memory
1536 *
1537 * Returns: 1 if ok, 0 on error.
1538 */
1539 int ocTryLoad (ObjectCode* oc) {
1540 int r;
1541
1542 if (oc->status != OBJECT_NEEDED) {
1543 return 1;
1544 }
1545
1546 /* Check for duplicate symbols by looking into `symhash`.
1547 Duplicate symbols are any symbols which exist
1548 in different ObjectCodes that have both been loaded, or
1549 are to be loaded by this call.
1550
1551 This call is intended to have no side-effects when a non-duplicate
1552 symbol is re-inserted.
1553
1554 We set the Address to NULL since that is not used to distinguish
1555 symbols. Duplicate symbols are distinguished by name and oc.
1556 */
1557 int x;
1558 SymbolName* symbol;
1559 for (x = 0; x < oc->n_symbols; x++) {
1560 symbol = oc->symbols[x];
1561 if ( symbol
1562 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol, NULL,
1563 isSymbolWeak(oc, symbol), oc)) {
1564 return 0;
1565 }
1566 }
1567
1568 # if defined(OBJFORMAT_ELF)
1569 r = ocResolve_ELF ( oc );
1570 # elif defined(OBJFORMAT_PEi386)
1571 r = ocResolve_PEi386 ( oc );
1572 # elif defined(OBJFORMAT_MACHO)
1573 r = ocResolve_MachO ( oc );
1574 # else
1575 barf("ocTryLoad: not implemented on this platform");
1576 # endif
1577 if (!r) { return r; }
1578
1579 // run init/init_array/ctors/mod_init_func
1580
1581 loading_obj = oc; // tells foreignExportStablePtr what to do
1582 #if defined(OBJFORMAT_ELF)
1583 r = ocRunInit_ELF ( oc );
1584 #elif defined(OBJFORMAT_PEi386)
1585 r = ocRunInit_PEi386 ( oc );
1586 #elif defined(OBJFORMAT_MACHO)
1587 r = ocRunInit_MachO ( oc );
1588 #else
1589 barf("ocTryLoad: initializers not implemented on this platform");
1590 #endif
1591 loading_obj = NULL;
1592
1593 if (!r) { return r; }
1594
1595 oc->status = OBJECT_RESOLVED;
1596
1597 return 1;
1598 }
1599
1600 /* -----------------------------------------------------------------------------
1601 * resolve all the currently unlinked objects in memory
1602 *
1603 * Returns: 1 if ok, 0 on error.
1604 */
1605 static HsInt resolveObjs_ (void)
1606 {
1607 ObjectCode *oc;
1608 int r;
1609
1610 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
1611
1612 for (oc = objects; oc; oc = oc->next) {
1613 r = ocTryLoad(oc);
1614 if (!r)
1615 {
1616 return r;
1617 }
1618 }
1619
1620 #if defined(PROFILING)
1621 // collect any new cost centres & CCSs that were defined during runInit
1622 initProfiling2();
1623 #endif
1624
1625 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
1626 return 1;
1627 }
1628
1629 HsInt resolveObjs (void)
1630 {
1631 ACQUIRE_LOCK(&linker_mutex);
1632 HsInt r = resolveObjs_();
1633 RELEASE_LOCK(&linker_mutex);
1634 return r;
1635 }
1636
1637 /* -----------------------------------------------------------------------------
1638 * delete an object from the pool
1639 */
1640 static HsInt unloadObj_ (pathchar *path, bool just_purge)
1641 {
1642 ObjectCode *oc, *prev, *next;
1643 HsBool unloadedAnyObj = HS_BOOL_FALSE;
1644
1645 ASSERT(symhash != NULL);
1646 ASSERT(objects != NULL);
1647
1648 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
1649
1650 prev = NULL;
1651 for (oc = objects; oc; oc = next) {
1652 next = oc->next; // oc might be freed
1653
1654 if (!pathcmp(oc->fileName,path)) {
1655
1656 // these are both idempotent, so in just_purge mode we can
1657 // later call unloadObj() to really unload the object.
1658 removeOcSymbols(oc);
1659 freeOcStablePtrs(oc);
1660
1661 if (!just_purge) {
1662 if (prev == NULL) {
1663 objects = oc->next;
1664 } else {
1665 prev->next = oc->next;
1666 }
1667 ACQUIRE_LOCK(&linker_unloaded_mutex);
1668 oc->next = unloaded_objects;
1669 unloaded_objects = oc;
1670 oc->status = OBJECT_UNLOADED;
1671 RELEASE_LOCK(&linker_unloaded_mutex);
1672 // We do not own oc any more; it can be released at any time by
1673 // the GC in checkUnload().
1674 } else {
1675 prev = oc;
1676 }
1677
1678 /* This could be a member of an archive so continue
1679 * unloading other members. */
1680 unloadedAnyObj = HS_BOOL_TRUE;
1681 } else {
1682 prev = oc;
1683 }
1684 }
1685
1686 if (unloadedAnyObj) {
1687 return 1;
1688 }
1689 else {
1690 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
1691 return 0;
1692 }
1693 }
1694
1695 HsInt unloadObj (pathchar *path)
1696 {
1697 ACQUIRE_LOCK(&linker_mutex);
1698 HsInt r = unloadObj_(path, false);
1699 RELEASE_LOCK(&linker_mutex);
1700 return r;
1701 }
1702
1703 HsInt purgeObj (pathchar *path)
1704 {
1705 ACQUIRE_LOCK(&linker_mutex);
1706 HsInt r = unloadObj_(path, true);
1707 RELEASE_LOCK(&linker_mutex);
1708 return r;
1709 }
1710
1711 /* -----------------------------------------------------------------------------
1712 * Sanity checking. For each ObjectCode, maintain a list of address ranges
1713 * which may be prodded during relocation, and abort if we try and write
1714 * outside any of these.
1715 */
1716 void
1717 addProddableBlock ( ObjectCode* oc, void* start, int size )
1718 {
1719 ProddableBlock* pb
1720 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
1721
1722 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
1723 ASSERT(size > 0);
1724 pb->start = start;
1725 pb->size = size;
1726 pb->next = oc->proddables;
1727 oc->proddables = pb;
1728 }
1729
1730 void
1731 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
1732 {
1733 ProddableBlock* pb;
1734
1735 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
1736 char* s = (char*)(pb->start);
1737 char* e = s + pb->size;
1738 char* a = (char*)addr;
1739 if (a >= s && (a+size) <= e) return;
1740 }
1741 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
1742 }
1743
1744 void freeProddableBlocks (ObjectCode *oc)
1745 {
1746 ProddableBlock *pb, *next;
1747
1748 for (pb = oc->proddables; pb != NULL; pb = next) {
1749 next = pb->next;
1750 stgFree(pb);
1751 }
1752 oc->proddables = NULL;
1753 }
1754
1755 /* -----------------------------------------------------------------------------
1756 * Section management.
1757 */
1758 void
1759 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
1760 void* start, StgWord size, StgWord mapped_offset,
1761 void* mapped_start, StgWord mapped_size)
1762 {
1763 s->start = start; /* actual start of section in memory */
1764 s->size = size; /* actual size of section in memory */
1765 s->kind = kind;
1766 s->alloc = alloc;
1767 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
1768
1769 s->mapped_start = mapped_start; /* start of mmap() block */
1770 s->mapped_size = mapped_size; /* size of mmap() block */
1771
1772 s->info = (struct SectionFormatInfo*)stgCallocBytes(1, sizeof *s->info,
1773 "addSection(SectionFormatInfo)");
1774
1775 IF_DEBUG(linker,
1776 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
1777 start, (void*)((StgWord)start + size),
1778 size, kind ));
1779 }
1780