hadrian: eliminate most of the remaining big rule enumerations
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "StablePtr.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31 #include "linker/CacheFlush.h"
32 #include "linker/SymbolExtras.h"
33 #include "PathUtils.h"
34
35 #if !defined(mingw32_HOST_OS)
36 #include "posix/Signals.h"
37 #endif
38
39 // get protos for is*()
40 #include <ctype.h>
41
42 #if defined(HAVE_SYS_TYPES_H)
43 #include <sys/types.h>
44 #endif
45
46 #include <inttypes.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <stdio.h>
50 #include <assert.h>
51 #include <fs_rts.h>
52
53 #if defined(HAVE_SYS_STAT_H)
54 #include <sys/stat.h>
55 #endif
56
57 #if defined(HAVE_DLFCN_H)
58 #include <dlfcn.h>
59 #endif
60
61 #if defined(OBJFORMAT_ELF)
62 # include "linker/Elf.h"
63 # include <regex.h> // regex is already used by dlopen() so this is OK
64 // to use here without requiring an additional lib
65 #elif defined(OBJFORMAT_PEi386)
66 # include "linker/PEi386.h"
67 # include <windows.h>
68 #elif defined(OBJFORMAT_MACHO)
69 # include "linker/MachO.h"
70 # include <regex.h>
71 # include <mach/machine.h>
72 # include <mach-o/fat.h>
73 #endif
74
75 #if defined(x86_64_HOST_ARCH) && defined(darwin_HOST_OS)
76 #define ALWAYS_PIC
77 #endif
78
79 #if defined(dragonfly_HOST_OS)
80 #include <sys/tls.h>
81 #endif
82 /*
83 Note [runtime-linker-support]
84 -----------------------------
85 When adding support for a new platform to the runtime linker please
86 update `$TOP/configure.ac` under heading `Does target have runtime
87 linker support?`.
88 */
89 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
90 This hashtable will contain information on all symbols
91 that we know of, however the .o they are in may not be loaded.
92
93 Until the ObjectCode the symbol belongs to is actually
94 loaded this symbol may be replaced. So do not rely on
95 addresses of unloaded symbols.
96
97 Note [runtime-linker-phases]
98 --------------------------------------
99 Broadly the behavior of the runtime linker can be
100 split into the following four phases:
101
102 - Indexing (e.g. ocVerifyImage and ocGetNames)
103 - Initialization (e.g. ocResolve and ocRunInit)
104 - Resolve (e.g. resolveObjs())
105 - Lookup (e.g. lookupSymbol)
106
107 This is to enable lazy loading of symbols. Eager loading is problematic
108 as it means that all symbols must be available, even those which we will
109 never use. This is especially painful on Windows, where the number of
110 libraries required to link things like mingwex grows to be quite high.
111
112 We proceed through these stages as follows,
113
114 * During Indexing we verify and open the ObjectCode and
115 perform a quick scan/indexing of the ObjectCode. All the work
116 required to actually load the ObjectCode is done.
117
118 All symbols from the ObjectCode are also inserted into
119 `symhash`, where possible duplicates are handled via the semantics
120 described in `ghciInsertSymbolTable`.
121
122 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
123 depending on whether they are an archive member or not.
124
125 * During initialization we load ObjectCode, perform relocations, execute
126 static constructors etc. This phase may trigger other ObjectCodes to
127 be loaded because of the calls to lookupSymbol.
128
129 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
130 previous status was `OBJECT_LOADED`.
131
132 * During resolve we attempt to resolve all the symbols needed for the
133 initial link. This essentially means, that for any ObjectCode given
134 directly to the command-line we perform lookupSymbols on the required
135 symbols. lookupSymbols may trigger the loading of additional ObjectCode
136 if required.
137
138 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
139 the previous status was `OBJECT_NEEDED`.
140
141 * lookupSymbols is used to lookup any symbols required, both during initial
142 link and during statement and expression compilations in the REPL.
143 Declaration of e.g. a foreign import, will eventually call lookupSymbol
144 which will either fail (symbol unknown) or succeed (and possibly trigger a
145 load).
146
147 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
148
149 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
150 by calling unloadObj and starting over.
151 When a new declaration or statement is performed ultimately lookupSymbol is called
152 without doing a re-link.
153
154 The goal of these different phases is to allow the linker to be able to perform
155 "lazy loading" of ObjectCode. The reason for this is that we want to only link
156 in symbols that are actually required for the link. This reduces:
157
158 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
159 then we don't need to load libB. This means the dependency chain for libraries
160 such as mingw32 and mingwex can be broken down.
161
162 2) The number of duplicate symbols, since now only symbols that are
163 true duplicates will display the error.
164 */
165 /*Str*/HashTable *symhash;
166
167 /* List of currently loaded objects */
168 ObjectCode *objects = NULL; /* initially empty */
169
170 /* List of objects that have been unloaded via unloadObj(), but are waiting
171 to be actually freed via checkUnload() */
172 ObjectCode *unloaded_objects = NULL; /* initially empty */
173
174 #if defined(THREADED_RTS)
175 /* This protects all the Linker's global state except unloaded_objects */
176 Mutex linker_mutex;
177 /*
178 * This protects unloaded_objects. We have a separate mutex for this, because
179 * the GC needs to access unloaded_objects in checkUnload, while the linker only
180 * needs to access unloaded_objects in unloadObj(), so this allows most linker
181 * operations proceed concurrently with the GC.
182 */
183 Mutex linker_unloaded_mutex;
184 #endif
185
186 /* Generic wrapper function to try and Resolve and RunInit oc files */
187 int ocTryLoad( ObjectCode* oc );
188
189 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
190 * small memory model on this architecture (see gcc docs,
191 * -mcmodel=small).
192 *
193 * MAP_32BIT not available on OpenBSD/amd64
194 */
195 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
196 #define TRY_MAP_32BIT MAP_32BIT
197 #else
198 #define TRY_MAP_32BIT 0
199 #endif
200
201 /*
202 * Due to the small memory model (see above), on x86_64 we have to map
203 * all our non-PIC object files into the low 2Gb of the address space
204 * (why 2Gb and not 4Gb? Because all addresses must be reachable
205 * using a 32-bit signed PC-relative offset). On Linux we can do this
206 * using the MAP_32BIT flag to mmap(), however on other OSs
207 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
208 * can't do this. So on these systems, we have to pick a base address
209 * in the low 2Gb of the address space and try to allocate memory from
210 * there.
211 *
212 * We pick a default address based on the OS, but also make this
213 * configurable via an RTS flag (+RTS -xm)
214 */
215 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
216
217 #if defined(MAP_32BIT)
218 // Try to use MAP_32BIT
219 #define MMAP_32BIT_BASE_DEFAULT 0
220 #else
221 // A guess: 1Gb.
222 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
223 #endif
224
225 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
226 #endif
227
228 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
229 ObjectCode *owner)
230 {
231 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
232 if (!pinfo || owner != pinfo->owner) return;
233 removeStrHashTable(table, key, NULL);
234 if (isSymbolImport (owner, key))
235 stgFree(pinfo->value);
236
237 stgFree(pinfo);
238 }
239
240 /* -----------------------------------------------------------------------------
241 * Insert symbols into hash tables, checking for duplicates.
242 *
243 * Returns: 0 on failure, nonzero on success
244 */
245 /*
246 Note [weak-symbols-support]
247 -------------------------------------
248 While ghciInsertSymbolTable does implement extensive
249 logic for weak symbol support, weak symbols are not currently
250 fully supported by the RTS. This code is mostly here for COMDAT
251 support which uses the weak symbols support.
252
253 Linking weak symbols defined purely in C code with other C code
254 should also work, probably. Observing weak symbols in Haskell
255 won't.
256
257 Some test have been written for weak symbols but have been disabled
258 mostly because it's unsure how the weak symbols support should look.
259 See Trac #11223
260 */
261 int ghciInsertSymbolTable(
262 pathchar* obj_name,
263 HashTable *table,
264 const SymbolName* key,
265 SymbolAddr* data,
266 HsBool weak,
267 ObjectCode *owner)
268 {
269 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
270 if (!pinfo) /* new entry */
271 {
272 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
273 pinfo->value = data;
274 pinfo->owner = owner;
275 pinfo->weak = weak;
276 insertStrHashTable(table, key, pinfo);
277 return 1;
278 }
279 else if (weak && data && pinfo->weak && !pinfo->value)
280 {
281 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
282 pinfo->value = data;
283 pinfo->owner = owner;
284 return 1;
285 }
286 else if (weak)
287 {
288 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
289 already know of another copy throw this one away.
290
291 or both weak symbols have a nonzero value. Keep the existing one.
292
293 This also preserves the semantics of linking against
294 the first symbol we find. */
295 }
296 else if (pinfo->weak && !weak) /* weak symbol is in the table */
297 {
298 /* override the weak definition with the non-weak one */
299 pinfo->value = data;
300 pinfo->owner = owner;
301 pinfo->weak = HS_BOOL_FALSE;
302 return 1;
303 }
304 else if ( pinfo->owner
305 && pinfo->owner->status != OBJECT_RESOLVED
306 && pinfo->owner->status != OBJECT_NEEDED)
307 {
308 /* If the other symbol hasn't been loaded or will be loaded and we want to
309 explicitly load the new one, we can just swap it out and load the one
310 that has been requested. If not, just keep the first one encountered.
311
312 Because the `symHash' table consists symbols we've also not loaded but
313 found during the initial scan this is safe to do. If however the existing
314 symbol has been loaded then it means we have a duplicate.
315
316 This is essentially emulating the behavior of a linker wherein it will always
317 link in object files that are .o file arguments, but only take object files
318 from archives as needed. */
319 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
320 pinfo->value = data;
321 pinfo->owner = owner;
322 pinfo->weak = weak;
323 }
324
325 return 1;
326 }
327 else if (pinfo->owner == owner)
328 {
329 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
330 return 1;
331 }
332 else if (owner && owner->status == OBJECT_LOADED)
333 {
334 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
335 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
336 call this function again to trigger the duplicate error. */
337 return 1;
338 }
339
340 pathchar* archiveName = NULL;
341 debugBelch(
342 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
343 " %s\n"
344 "whilst processing object file\n"
345 " %" PATH_FMT "\n"
346 "The symbol was previously defined in\n"
347 " %" PATH_FMT "\n"
348 "This could be caused by:\n"
349 " * Loading two different object files which export the same symbol\n"
350 " * Specifying the same object file twice on the GHCi command line\n"
351 " * An incorrect `package.conf' entry, causing some object to be\n"
352 " loaded twice.\n",
353 (char*)key,
354 obj_name,
355 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
356 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
357 : pinfo->owner->fileName
358 );
359
360 if (archiveName)
361 {
362 stgFree(archiveName);
363 archiveName = NULL;
364 }
365 return 0;
366 }
367
368 /* -----------------------------------------------------------------------------
369 * Looks up symbols into hash tables.
370 *
371 * Returns: 0 on failure and result is not set,
372 * nonzero on success and result set to nonzero pointer
373 */
374 HsBool ghciLookupSymbolInfo(HashTable *table,
375 const SymbolName* key, RtsSymbolInfo **result)
376 {
377 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
378 if (!pinfo) {
379 *result = NULL;
380 return HS_BOOL_FALSE;
381 }
382 if (pinfo->weak)
383 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
384 /* Once it's looked up, it can no longer be overridden */
385 pinfo->weak = HS_BOOL_FALSE;
386
387 *result = pinfo;
388 return HS_BOOL_TRUE;
389 }
390
391 /* -----------------------------------------------------------------------------
392 * initialize the object linker
393 */
394
395
396 static int linker_init_done = 0 ;
397
398 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
399 static void *dl_prog_handle;
400 static regex_t re_invalid;
401 static regex_t re_realso;
402 #if defined(THREADED_RTS)
403 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
404 #endif
405 #endif
406
407 void initLinker (void)
408 {
409 // default to retaining CAFs for backwards compatibility. Most
410 // users will want initLinker_(0): otherwise unloadObj() will not
411 // be able to unload object files when they contain CAFs.
412 initLinker_(1);
413 }
414
415 void
416 initLinker_ (int retain_cafs)
417 {
418 RtsSymbolVal *sym;
419 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
420 int compileResult;
421 #endif
422
423 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
424
425 /* Make initLinker idempotent, so we can call it
426 before every relevant operation; that means we
427 don't need to initialise the linker separately */
428 if (linker_init_done == 1) {
429 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
430 return;
431 } else {
432 linker_init_done = 1;
433 }
434
435 objects = NULL;
436 unloaded_objects = NULL;
437
438 #if defined(THREADED_RTS)
439 initMutex(&linker_mutex);
440 initMutex(&linker_unloaded_mutex);
441 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
442 initMutex(&dl_mutex);
443 #endif
444 #endif
445
446 symhash = allocStrHashTable();
447
448 /* populate the symbol table with stuff from the RTS */
449 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
450 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
451 symhash, sym->lbl, sym->addr,
452 sym->weak, NULL)) {
453 barf("ghciInsertSymbolTable failed");
454 }
455 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
456 }
457 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
458 machoInitSymbolsWithoutUnderscore();
459 # endif
460 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
461 referenced from a statically linked module. We need to mimic this, but
462 we cannot use NULL because we use it to mean nonexistent symbols. So we
463 use an arbitrary (hopefully unique) address here.
464 */
465 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
466 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
467 barf("ghciInsertSymbolTable failed");
468 }
469
470 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
471 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
472 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
473 retain_cafs ? newRetainedCAF : newGCdCAF,
474 HS_BOOL_FALSE, NULL)) {
475 barf("ghciInsertSymbolTable failed");
476 }
477
478 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
479 # if defined(RTLD_DEFAULT)
480 dl_prog_handle = RTLD_DEFAULT;
481 # else
482 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
483 # endif /* RTLD_DEFAULT */
484
485 compileResult = regcomp(&re_invalid,
486 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short|invalid file format)",
487 REG_EXTENDED);
488 if (compileResult != 0) {
489 barf("Compiling re_invalid failed");
490 }
491 compileResult = regcomp(&re_realso,
492 "(GROUP|INPUT) *\\( *([^ )]+)",
493 REG_EXTENDED);
494 if (compileResult != 0) {
495 barf("Compiling re_realso failed");
496 }
497 # endif
498
499 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
500 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
501 // User-override for mmap_32bit_base
502 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
503 }
504 #endif
505
506 if (RTS_LINKER_USE_MMAP)
507 m32_allocator_init();
508
509 #if defined(OBJFORMAT_PEi386)
510 initLinker_PEi386();
511 #endif
512
513 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
514 return;
515 }
516
517 void
518 exitLinker( void ) {
519 #if defined(OBJFORMAT_PEi386)
520 exitLinker_PEi386();
521 #endif
522 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
523 if (linker_init_done == 1) {
524 regfree(&re_invalid);
525 regfree(&re_realso);
526 #if defined(THREADED_RTS)
527 closeMutex(&dl_mutex);
528 #endif
529 }
530 #endif
531 if (linker_init_done == 1) {
532 freeHashTable(symhash, free);
533 }
534 #if defined(THREADED_RTS)
535 closeMutex(&linker_mutex);
536 #endif
537 }
538
539 /* -----------------------------------------------------------------------------
540 * Loading DLL or .so dynamic libraries
541 * -----------------------------------------------------------------------------
542 *
543 * Add a DLL from which symbols may be found. In the ELF case, just
544 * do RTLD_GLOBAL-style add, so no further messing around needs to
545 * happen in order that symbols in the loaded .so are findable --
546 * lookupSymbol() will subsequently see them by dlsym on the program's
547 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
548 *
549 * In the PEi386 case, open the DLLs and put handles to them in a
550 * linked list. When looking for a symbol, try all handles in the
551 * list. This means that we need to load even DLLs that are guaranteed
552 * to be in the ghc.exe image already, just so we can get a handle
553 * to give to loadSymbol, so that we can find the symbols. For such
554 * libraries, the LoadLibrary call should be a no-op except for returning
555 * the handle.
556 *
557 */
558
559 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
560
561 /* Suppose in ghci we load a temporary SO for a module containing
562 f = 1
563 and then modify the module, recompile, and load another temporary
564 SO with
565 f = 2
566 Then as we don't unload the first SO, dlsym will find the
567 f = 1
568 symbol whereas we want the
569 f = 2
570 symbol. We therefore need to keep our own SO handle list, and
571 try SOs in the right order. */
572
573 typedef
574 struct _OpenedSO {
575 struct _OpenedSO* next;
576 void *handle;
577 }
578 OpenedSO;
579
580 /* A list thereof. */
581 static OpenedSO* openedSOs = NULL;
582
583 static const char *
584 internal_dlopen(const char *dll_name)
585 {
586 OpenedSO* o_so;
587 void *hdl;
588 const char *errmsg;
589 char *errmsg_copy;
590
591 // omitted: RTLD_NOW
592 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
593 IF_DEBUG(linker,
594 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
595
596 //-------------- Begin critical section ------------------
597 // This critical section is necessary because dlerror() is not
598 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
599 // Also, the error message returned must be copied to preserve it
600 // (see POSIX also)
601
602 ACQUIRE_LOCK(&dl_mutex);
603 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
604
605 errmsg = NULL;
606 if (hdl == NULL) {
607 /* dlopen failed; return a ptr to the error msg. */
608 errmsg = dlerror();
609 if (errmsg == NULL) errmsg = "addDLL: unknown error";
610 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
611 strcpy(errmsg_copy, errmsg);
612 errmsg = errmsg_copy;
613 } else {
614 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
615 o_so->handle = hdl;
616 o_so->next = openedSOs;
617 openedSOs = o_so;
618 }
619
620 RELEASE_LOCK(&dl_mutex);
621 //--------------- End critical section -------------------
622
623 return errmsg;
624 }
625
626 /*
627 Note [RTLD_LOCAL]
628
629 In GHCi we want to be able to override previous .so's with newly
630 loaded .so's when we recompile something. This further implies that
631 when we look up a symbol in internal_dlsym() we have to iterate
632 through the loaded libraries (in order from most recently loaded to
633 oldest) looking up the symbol in each one until we find it.
634
635 However, this can cause problems for some symbols that are copied
636 by the linker into the executable image at runtime - see #8935 for a
637 lengthy discussion. To solve that problem we need to look up
638 symbols in the main executable *first*, before attempting to look
639 them up in the loaded .so's. But in order to make that work, we
640 have to always call dlopen with RTLD_LOCAL, so that the loaded
641 libraries don't populate the global symbol table.
642 */
643
644 static void *
645 internal_dlsym(const char *symbol) {
646 OpenedSO* o_so;
647 void *v;
648
649 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
650 ACQUIRE_LOCK(&dl_mutex);
651 dlerror();
652 // look in program first
653 v = dlsym(dl_prog_handle, symbol);
654 if (dlerror() == NULL) {
655 RELEASE_LOCK(&dl_mutex);
656 return v;
657 }
658
659 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
660 v = dlsym(o_so->handle, symbol);
661 if (dlerror() == NULL) {
662 RELEASE_LOCK(&dl_mutex);
663 return v;
664 }
665 }
666 RELEASE_LOCK(&dl_mutex);
667 return v;
668 }
669 # endif
670
671 const char *
672 addDLL( pathchar *dll_name )
673 {
674 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
675 /* ------------------- ELF DLL loader ------------------- */
676
677 #define NMATCH 5
678 regmatch_t match[NMATCH];
679 const char *errmsg;
680 FILE* fp;
681 size_t match_length;
682 #define MAXLINE 1000
683 char line[MAXLINE];
684 int result;
685
686 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
687 errmsg = internal_dlopen(dll_name);
688
689 if (errmsg == NULL) {
690 return NULL;
691 }
692
693 // GHC Trac ticket #2615
694 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
695 // contain linker scripts rather than ELF-format object code. This
696 // code handles the situation by recognizing the real object code
697 // file name given in the linker script.
698 //
699 // If an "invalid ELF header" error occurs, it is assumed that the
700 // .so file contains a linker script instead of ELF object code.
701 // In this case, the code looks for the GROUP ( ... ) linker
702 // directive. If one is found, the first file name inside the
703 // parentheses is treated as the name of a dynamic library and the
704 // code attempts to dlopen that file. If this is also unsuccessful,
705 // an error message is returned.
706
707 // see if the error message is due to an invalid ELF header
708 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
709 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
710 IF_DEBUG(linker, debugBelch("result = %i\n", result));
711 if (result == 0) {
712 // success -- try to read the named file as a linker script
713 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
714 MAXLINE-1);
715 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
716 line[match_length] = '\0'; // make sure string is null-terminated
717 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
718 if ((fp = __rts_fopen(line, "r")) == NULL) {
719 return errmsg; // return original error if open fails
720 }
721 // try to find a GROUP or INPUT ( ... ) command
722 while (fgets(line, MAXLINE, fp) != NULL) {
723 IF_DEBUG(linker, debugBelch("input line = %s", line));
724 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
725 // success -- try to dlopen the first named file
726 IF_DEBUG(linker, debugBelch("match%s\n",""));
727 line[match[2].rm_eo] = '\0';
728 stgFree((void*)errmsg); // Free old message before creating new one
729 errmsg = internal_dlopen(line+match[2].rm_so);
730 break;
731 }
732 // if control reaches here, no GROUP or INPUT ( ... ) directive
733 // was found and the original error message is returned to the
734 // caller
735 }
736 fclose(fp);
737 }
738 return errmsg;
739
740 # elif defined(OBJFORMAT_PEi386)
741 return addDLL_PEi386(dll_name, NULL);
742
743 # else
744 barf("addDLL: not implemented on this platform");
745 # endif
746 }
747
748 /* -----------------------------------------------------------------------------
749 * Searches the system directories to determine if there is a system DLL that
750 * satisfies the given name. This prevent GHCi from linking against a static
751 * library if a DLL is available.
752 *
753 * Returns: NULL on failure or no DLL found, else the full path to the DLL
754 * that can be loaded.
755 */
756 pathchar* findSystemLibrary(pathchar* dll_name)
757 {
758 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%"
759 PATH_FMT "'\n", dll_name));
760
761 #if defined(OBJFORMAT_PEi386)
762 return findSystemLibrary_PEi386(dll_name);
763 #else
764 (void)(dll_name); // Function not implemented for other platforms.
765 return NULL;
766 #endif
767 }
768
769 /* -----------------------------------------------------------------------------
770 * Emits a warning determining that the system is missing a required security
771 * update that we need to get access to the proper APIs
772 */
773 void warnMissingKBLibraryPaths( void )
774 {
775 static HsBool missing_update_warn = HS_BOOL_FALSE;
776 if (!missing_update_warn) {
777 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
778 missing_update_warn = HS_BOOL_TRUE;
779 }
780 }
781
782 /* -----------------------------------------------------------------------------
783 * appends a directory to the process DLL Load path so LoadLibrary can find it
784 *
785 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
786 * restore the search path to what it was before this call.
787 */
788 HsPtr addLibrarySearchPath(pathchar* dll_path)
789 {
790 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%"
791 PATH_FMT "'\n", dll_path));
792
793 #if defined(OBJFORMAT_PEi386)
794 return addLibrarySearchPath_PEi386(dll_path);
795 #else
796 (void)(dll_path); // Function not implemented for other platforms.
797 return NULL;
798 #endif
799 }
800
801 /* -----------------------------------------------------------------------------
802 * removes a directory from the process DLL Load path
803 *
804 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
805 */
806 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
807 {
808 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n",
809 dll_path_index));
810
811 #if defined(OBJFORMAT_PEi386)
812 return removeLibrarySearchPath_PEi386(dll_path_index);
813 #else
814 (void)(dll_path_index); // Function not implemented for other platforms.
815 return HS_BOOL_FALSE;
816 #endif
817 }
818
819 /* -----------------------------------------------------------------------------
820 * insert a symbol in the hash table
821 *
822 * Returns: 0 on failure, nozero on success
823 */
824 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
825 {
826 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE,
827 NULL);
828 }
829
830 /* -----------------------------------------------------------------------------
831 * lookup a symbol in the hash table
832 */
833 #if defined(OBJFORMAT_PEi386)
834 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
835 {
836 return lookupSymbol_PEi386(lbl);
837 }
838
839 #else
840
841 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
842 {
843 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
844
845 ASSERT(symhash != NULL);
846 RtsSymbolInfo *pinfo;
847
848 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
849 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
850
851 # if defined(OBJFORMAT_ELF)
852 return internal_dlsym(lbl);
853 # elif defined(OBJFORMAT_MACHO)
854
855 /* HACK: On OS X, all symbols are prefixed with an underscore.
856 However, dlsym wants us to omit the leading underscore from the
857 symbol name -- the dlsym routine puts it back on before
858 searching for the symbol. For now, we simply strip it off here
859 (and ONLY here).
860 */
861 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n",
862 lbl));
863 ASSERT(lbl[0] == '_');
864 return internal_dlsym(lbl + 1);
865
866 # else
867 ASSERT(2+2 == 5);
868 return NULL;
869 # endif
870 } else {
871 return loadSymbol(lbl, pinfo);
872 }
873 }
874 #endif /* OBJFORMAT_PEi386 */
875
876 /*
877 * Load and relocate the object code for a symbol as necessary.
878 * Symbol name only used for diagnostics output.
879 */
880 SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo) {
881 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl,
882 pinfo->value));
883 ObjectCode* oc = pinfo->owner;
884
885 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
886 See Note [runtime-linker-phases] */
887 if (oc && lbl && oc->status == OBJECT_LOADED) {
888 oc->status = OBJECT_NEEDED;
889 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand "
890 "loading symbol '%s'\n", lbl));
891 int r = ocTryLoad(oc);
892 if (!r) {
893 return NULL;
894 }
895
896 #if defined(PROFILING)
897 // collect any new cost centres & CCSs
898 // that were defined during runInit
899 initProfiling2();
900 #endif
901 }
902
903 return pinfo->value;
904 }
905
906 SymbolAddr* lookupSymbol( SymbolName* lbl )
907 {
908 ACQUIRE_LOCK(&linker_mutex);
909 SymbolAddr* r = lookupSymbol_(lbl);
910 if (!r) {
911 errorBelch("^^ Could not load '%s', dependency unresolved. "
912 "See top entry above.\n", lbl);
913 fflush(stderr);
914 }
915 RELEASE_LOCK(&linker_mutex);
916 return r;
917 }
918
919 /* -----------------------------------------------------------------------------
920 Create a StablePtr for a foreign export. This is normally called by
921 a C function with __attribute__((constructor)), which is generated
922 by GHC and linked into the module.
923
924 If the object code is being loaded dynamically, then we remember
925 which StablePtrs were allocated by the constructors and free them
926 again in unloadObj().
927 -------------------------------------------------------------------------- */
928
929 static ObjectCode *loading_obj = NULL;
930
931 StgStablePtr foreignExportStablePtr (StgPtr p)
932 {
933 ForeignExportStablePtr *fe_sptr;
934 StgStablePtr *sptr;
935
936 sptr = getStablePtr(p);
937
938 if (loading_obj != NULL) {
939 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
940 "foreignExportStablePtr");
941 fe_sptr->stable_ptr = sptr;
942 fe_sptr->next = loading_obj->stable_ptrs;
943 loading_obj->stable_ptrs = fe_sptr;
944 }
945
946 return sptr;
947 }
948
949
950 /* -----------------------------------------------------------------------------
951 * Debugging aid: look in GHCi's object symbol tables for symbols
952 * within DELTA bytes of the specified address, and show their names.
953 */
954 #if defined(DEBUG)
955 void ghci_enquire ( SymbolAddr* addr );
956
957 void ghci_enquire(SymbolAddr* addr)
958 {
959 int i;
960 SymbolName* sym;
961 RtsSymbolInfo* a;
962 const int DELTA = 64;
963 ObjectCode* oc;
964
965 for (oc = objects; oc; oc = oc->next) {
966 for (i = 0; i < oc->n_symbols; i++) {
967 sym = oc->symbols[i].name;
968 if (sym == NULL) continue;
969 a = NULL;
970 if (a == NULL) {
971 ghciLookupSymbolInfo(symhash, sym, &a);
972 }
973 if (a == NULL) {
974 // debugBelch("ghci_enquire: can't find %s\n", sym);
975 }
976 else if ( a->value
977 && (char*)addr-DELTA <= (char*)a->value
978 && (char*)a->value <= (char*)addr+DELTA) {
979 debugBelch("%p + %3d == `%s'\n", addr,
980 (int)((char*)a->value - (char*)addr), sym);
981 }
982 }
983 }
984 }
985 #endif
986
987 pathchar*
988 resolveSymbolAddr (pathchar* buffer, int size,
989 SymbolAddr* symbol, uintptr_t* top)
990 {
991 #if defined(OBJFORMAT_PEi386)
992 return resolveSymbolAddr_PEi386 (buffer, size, symbol, top);
993 #else /* OBJFORMAT_PEi386 */
994 (void)buffer;
995 (void)size;
996 (void)symbol;
997 (void)top;
998 return NULL;
999 #endif /* OBJFORMAT_PEi386 */
1000 }
1001
1002 #if RTS_LINKER_USE_MMAP
1003 //
1004 // Returns NULL on failure.
1005 //
1006 void *
1007 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
1008 {
1009 void *map_addr = NULL;
1010 void *result;
1011 size_t size;
1012 static uint32_t fixed = 0;
1013
1014 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1015 size = roundUpToPage(bytes);
1016
1017 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1018 mmap_again:
1019
1020 if (mmap_32bit_base != 0) {
1021 map_addr = mmap_32bit_base;
1022 }
1023 #endif
1024
1025 IF_DEBUG(linker,
1026 debugBelch("mmapForLinker: \tprotection %#0x\n",
1027 PROT_EXEC | PROT_READ | PROT_WRITE));
1028 IF_DEBUG(linker,
1029 debugBelch("mmapForLinker: \tflags %#0x\n",
1030 MAP_PRIVATE | TRY_MAP_32BIT | fixed | flags));
1031
1032 result = mmap(map_addr, size,
1033 PROT_EXEC|PROT_READ|PROT_WRITE,
1034 MAP_PRIVATE|TRY_MAP_32BIT|fixed|flags, fd, offset);
1035
1036 if (result == MAP_FAILED) {
1037 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1038 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1039 return NULL;
1040 }
1041
1042 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1043 if (mmap_32bit_base != 0) {
1044 if (result == map_addr) {
1045 mmap_32bit_base = (StgWord8*)map_addr + size;
1046 } else {
1047 if ((W_)result > 0x80000000) {
1048 // oops, we were given memory over 2Gb
1049 munmap(result,size);
1050 #if defined(freebsd_HOST_OS) || \
1051 defined(kfreebsdgnu_HOST_OS) || \
1052 defined(dragonfly_HOST_OS)
1053 // Some platforms require MAP_FIXED. This is normally
1054 // a bad idea, because MAP_FIXED will overwrite
1055 // existing mappings.
1056 fixed = MAP_FIXED;
1057 goto mmap_again;
1058 #else
1059 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1060 "asked for %lu bytes at %p. "
1061 "Try specifying an address with +RTS -xm<addr> -RTS",
1062 size, map_addr);
1063 return NULL;
1064 #endif
1065 } else {
1066 // hmm, we were given memory somewhere else, but it's
1067 // still under 2Gb so we can use it. Next time, ask
1068 // for memory right after the place we just got some
1069 mmap_32bit_base = (StgWord8*)result + size;
1070 }
1071 }
1072 } else {
1073 if ((W_)result > 0x80000000) {
1074 // oops, we were given memory over 2Gb
1075 // ... try allocating memory somewhere else?;
1076 debugTrace(DEBUG_linker,
1077 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1078 bytes, result);
1079 munmap(result, size);
1080
1081 // Set a base address and try again... (guess: 1Gb)
1082 mmap_32bit_base = (void*)0x40000000;
1083 goto mmap_again;
1084 }
1085 }
1086 #endif
1087
1088 IF_DEBUG(linker,
1089 debugBelch("mmapForLinker: mapped %" FMT_Word
1090 " bytes starting at %p\n", (W_)size, result));
1091 IF_DEBUG(linker,
1092 debugBelch("mmapForLinker: done\n"));
1093
1094 return result;
1095 }
1096 #endif
1097
1098 /*
1099 * Remove symbols from the symbol table, and free oc->symbols.
1100 * This operation is idempotent.
1101 */
1102 static void removeOcSymbols (ObjectCode *oc)
1103 {
1104 if (oc->symbols == NULL) return;
1105
1106 // Remove all the mappings for the symbols within this object..
1107 int i;
1108 for (i = 0; i < oc->n_symbols; i++) {
1109 if (oc->symbols[i].name != NULL) {
1110 ghciRemoveSymbolTable(symhash, oc->symbols[i].name, oc);
1111 }
1112 }
1113
1114 stgFree(oc->symbols);
1115 oc->symbols = NULL;
1116 }
1117
1118 /*
1119 * Release StablePtrs and free oc->stable_ptrs.
1120 * This operation is idempotent.
1121 */
1122 static void freeOcStablePtrs (ObjectCode *oc)
1123 {
1124 // Release any StablePtrs that were created when this
1125 // object module was initialized.
1126 ForeignExportStablePtr *fe_ptr, *next;
1127
1128 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1129 next = fe_ptr->next;
1130 freeStablePtr(fe_ptr->stable_ptr);
1131 stgFree(fe_ptr);
1132 }
1133 oc->stable_ptrs = NULL;
1134 }
1135
1136 static void
1137 freePreloadObjectFile (ObjectCode *oc)
1138 {
1139 #if defined(mingw32_HOST_OS)
1140 freePreloadObjectFile_PEi386(oc);
1141 #else
1142
1143 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1144 munmap(oc->image, oc->fileSize);
1145 }
1146 else {
1147 stgFree(oc->image);
1148 }
1149
1150 #endif
1151
1152 oc->image = NULL;
1153 oc->fileSize = 0;
1154 }
1155
1156 /*
1157 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1158 * the GC when a previously unloaded ObjectCode has been determined to be
1159 * unused, and when an error occurs during loadObj().
1160 */
1161 void freeObjectCode (ObjectCode *oc)
1162 {
1163 freePreloadObjectFile(oc);
1164
1165 if (oc->symbols != NULL) {
1166 stgFree(oc->symbols);
1167 oc->symbols = NULL;
1168 }
1169
1170 if (oc->extraInfos != NULL) {
1171 freeHashTable(oc->extraInfos, NULL);
1172 oc->extraInfos = NULL;
1173 }
1174
1175 if (oc->sections != NULL) {
1176 int i;
1177 for (i=0; i < oc->n_sections; i++) {
1178 if (oc->sections[i].start != NULL) {
1179 switch(oc->sections[i].alloc){
1180 #if RTS_LINKER_USE_MMAP
1181 case SECTION_MMAP:
1182 munmap(oc->sections[i].mapped_start,
1183 oc->sections[i].mapped_size);
1184 break;
1185 case SECTION_M32:
1186 m32_free(oc->sections[i].start,
1187 oc->sections[i].size);
1188 break;
1189 #endif
1190 case SECTION_MALLOC:
1191 stgFree(oc->sections[i].start);
1192 break;
1193 default:
1194 break;
1195 }
1196 }
1197 if (oc->sections[i].info) {
1198 stgFree(oc->sections[i].info);
1199 }
1200 }
1201 stgFree(oc->sections);
1202 }
1203
1204 freeProddableBlocks(oc);
1205
1206 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1207 * alongside the image, so we don't need to free. */
1208 #if defined(NEED_SYMBOL_EXTRAS) && (!defined(x86_64_HOST_ARCH) \
1209 || !defined(mingw32_HOST_OS))
1210 if (RTS_LINKER_USE_MMAP) {
1211 if (!USE_CONTIGUOUS_MMAP && oc->symbol_extras != NULL) {
1212 m32_free(oc->symbol_extras,
1213 sizeof(SymbolExtra) * oc->n_symbol_extras);
1214 }
1215 }
1216 else {
1217 stgFree(oc->symbol_extras);
1218 }
1219 #endif
1220
1221 #if defined(OBJECTFORMAT_MACHO)
1222 ocDeinit_MachO(oc);
1223 #endif
1224 #if defined(OBJFORMAT_ELF)
1225 ocDeinit_ELF(oc);
1226 #endif
1227
1228 stgFree(oc->fileName);
1229 stgFree(oc->archiveMemberName);
1230
1231 stgFree(oc);
1232 }
1233
1234 /* -----------------------------------------------------------------------------
1235 * Sets the initial status of a fresh ObjectCode
1236 */
1237 static void setOcInitialStatus(ObjectCode* oc) {
1238 /* If a target has requested the ObjectCode not to be resolved then
1239 honor this requests. Usually this means the ObjectCode has not been
1240 initialized and can't be. */
1241 if (oc->status == OBJECT_DONT_RESOLVE)
1242 return;
1243
1244 if (oc->archiveMemberName == NULL) {
1245 oc->status = OBJECT_NEEDED;
1246 } else {
1247 oc->status = OBJECT_LOADED;
1248 }
1249 }
1250
1251 ObjectCode*
1252 mkOc( pathchar *path, char *image, int imageSize,
1253 bool mapped, char *archiveMemberName, int misalignment ) {
1254 ObjectCode* oc;
1255
1256 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1257 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1258
1259 oc->info = NULL;
1260
1261 # if defined(OBJFORMAT_ELF)
1262 oc->formatName = "ELF";
1263 # elif defined(OBJFORMAT_PEi386)
1264 oc->formatName = "PEi386";
1265 # elif defined(OBJFORMAT_MACHO)
1266 oc->formatName = "Mach-O";
1267 # else
1268 stgFree(oc);
1269 barf("loadObj: not implemented on this platform");
1270 # endif
1271
1272 oc->image = image;
1273 oc->fileName = pathdup(path);
1274
1275 if (archiveMemberName) {
1276 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1,
1277 "loadObj" );
1278 strcpy(oc->archiveMemberName, archiveMemberName);
1279 } else {
1280 oc->archiveMemberName = NULL;
1281 }
1282
1283 setOcInitialStatus( oc );
1284
1285 oc->fileSize = imageSize;
1286 oc->symbols = NULL;
1287 oc->n_sections = 0;
1288 oc->sections = NULL;
1289 oc->proddables = NULL;
1290 oc->stable_ptrs = NULL;
1291 #if defined(NEED_SYMBOL_EXTRAS)
1292 oc->symbol_extras = NULL;
1293 #endif
1294 oc->imageMapped = mapped;
1295
1296 oc->misalignment = misalignment;
1297 oc->extraInfos = NULL;
1298
1299 /* chain it onto the list of objects */
1300 oc->next = NULL;
1301
1302 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1303 return oc;
1304 }
1305
1306 /* -----------------------------------------------------------------------------
1307 * Check if an object or archive is already loaded.
1308 *
1309 * Returns: 1 if the path is already loaded, 0 otherwise.
1310 */
1311 HsInt
1312 isAlreadyLoaded( pathchar *path )
1313 {
1314 ObjectCode *o;
1315 for (o = objects; o; o = o->next) {
1316 if (0 == pathcmp(o->fileName, path)) {
1317 return 1; /* already loaded */
1318 }
1319 }
1320 return 0; /* not loaded yet */
1321 }
1322
1323 //
1324 // Load the object file into memory. This will not be its final resting place,
1325 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
1326 // address space, properly aligned.
1327 //
1328 static ObjectCode *
1329 preloadObjectFile (pathchar *path)
1330 {
1331 int fileSize;
1332 struct_stat st;
1333 int r;
1334 void *image;
1335 ObjectCode *oc;
1336 int misalignment = 0;
1337
1338 r = pathstat(path, &st);
1339 if (r == -1) {
1340 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
1341 return NULL;
1342 }
1343
1344 fileSize = st.st_size;
1345
1346 #if RTS_LINKER_USE_MMAP
1347 int fd;
1348
1349 /* On many architectures malloc'd memory isn't executable, so we need to use
1350 * mmap. */
1351
1352 #if defined(openbsd_HOST_OS)
1353 fd = open(path, O_RDONLY, S_IRUSR);
1354 #else
1355 fd = open(path, O_RDONLY);
1356 #endif
1357 if (fd == -1) {
1358 errorBelch("loadObj: can't open %s", path);
1359 return NULL;
1360 }
1361
1362 /* iOS does not permit to mmap with r+w+x, however while the comment for
1363 * this function says this is not the final resting place, for some
1364 * architectures / hosts (at least mach-o non-iOS -- see ocGetNames_MachO)
1365 * the image mmaped here in fact ends up being the final resting place for
1366 * the sections. And hence we need to leave r+w+x here for other hosts
1367 * until all hosts have been made aware of the initial image being r+w only.
1368 *
1369 * See also the misalignment logic for darwin below.
1370 */
1371 #if defined(ios_HOST_OS)
1372 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
1373 #else
1374 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
1375 MAP_PRIVATE, fd, 0);
1376 #endif
1377
1378 if (image == MAP_FAILED) {
1379 errorBelch("mmap: failed. errno = %d", errno);
1380 }
1381 // not 32-bit yet, we'll remap later
1382 close(fd);
1383
1384 #else /* !RTS_LINKER_USE_MMAP */
1385 FILE *f;
1386
1387 /* load the image into memory */
1388 /* coverity[toctou] */
1389 f = pathopen(path, WSTR("rb"));
1390 if (!f) {
1391 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
1392 return NULL;
1393 }
1394
1395 # if defined(darwin_HOST_OS)
1396
1397 // In a Mach-O .o file, all sections can and will be misaligned
1398 // if the total size of the headers is not a multiple of the
1399 // desired alignment. This is fine for .o files that only serve
1400 // as input for the static linker, but it's not fine for us,
1401 // as SSE (used by gcc for floating point) and Altivec require
1402 // 16-byte alignment.
1403 // We calculate the correct alignment from the header before
1404 // reading the file, and then we misalign image on purpose so
1405 // that the actual sections end up aligned again.
1406 misalignment = machoGetMisalignment(f);
1407 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
1408 image += misalignment;
1409
1410 # else /* !defined(darwin_HOST_OS) */
1411
1412 image = stgMallocBytes(fileSize, "loadObj(image)");
1413
1414 #endif
1415
1416 int n;
1417 n = fread ( image, 1, fileSize, f );
1418 fclose(f);
1419 if (n != fileSize) {
1420 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
1421 stgFree(image);
1422 return NULL;
1423 }
1424
1425 #endif /* RTS_LINKER_USE_MMAP */
1426
1427 oc = mkOc(path, image, fileSize, true, NULL, misalignment);
1428
1429 #if defined(OBJFORMAT_MACHO)
1430 if (ocVerifyImage_MachO( oc ))
1431 ocInit_MachO( oc );
1432 #endif
1433 #if defined(OBJFORMAT_ELF)
1434 if(ocVerifyImage_ELF( oc ))
1435 ocInit_ELF( oc );
1436 #endif
1437 return oc;
1438 }
1439
1440 /* -----------------------------------------------------------------------------
1441 * Load an obj (populate the global symbol table, but don't resolve yet)
1442 *
1443 * Returns: 1 if ok, 0 on error.
1444 */
1445 static HsInt loadObj_ (pathchar *path)
1446 {
1447 ObjectCode* oc;
1448 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
1449
1450 /* debugBelch("loadObj %s\n", path ); */
1451
1452 /* Check that we haven't already loaded this object.
1453 Ignore requests to load multiple times */
1454
1455 if (isAlreadyLoaded(path)) {
1456 IF_DEBUG(linker,
1457 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1458 return 1; /* success */
1459 }
1460
1461 oc = preloadObjectFile(path);
1462 if (oc == NULL) return 0;
1463
1464 if (! loadOc(oc)) {
1465 // failed; free everything we've allocated
1466 removeOcSymbols(oc);
1467 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
1468 freeObjectCode(oc);
1469 return 0;
1470 }
1471
1472 oc->next = objects;
1473 objects = oc;
1474 return 1;
1475 }
1476
1477 HsInt loadObj (pathchar *path)
1478 {
1479 ACQUIRE_LOCK(&linker_mutex);
1480 HsInt r = loadObj_(path);
1481 RELEASE_LOCK(&linker_mutex);
1482 return r;
1483 }
1484
1485 HsInt loadOc (ObjectCode* oc)
1486 {
1487 int r;
1488
1489 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
1490
1491 /* verify the in-memory image */
1492 # if defined(OBJFORMAT_ELF)
1493 r = ocVerifyImage_ELF ( oc );
1494 # elif defined(OBJFORMAT_PEi386)
1495 r = ocVerifyImage_PEi386 ( oc );
1496 # elif defined(OBJFORMAT_MACHO)
1497 r = ocVerifyImage_MachO ( oc );
1498 # else
1499 barf("loadObj: no verify method");
1500 # endif
1501 if (!r) {
1502 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
1503 return r;
1504 }
1505
1506 /* Note [loadOc orderings]
1507 ocAllocateSymbolsExtras has only two pre-requisites, it must run after
1508 preloadObjectFile and ocVerify. Neither have changed. On most targets
1509 allocating the extras is independent on parsing the section data, so the
1510 order between these two never mattered.
1511
1512 On Windows, when we have an import library we (for now, as we don't honor
1513 the lazy loading semantics of the library and instead GHCi is already
1514 lazy) don't use the library after ocGetNames as it just populates the
1515 symbol table. Allocating space for jump tables in ocAllocateSymbolExtras
1516 would just be a waste then as we'll be stopping further processing of the
1517 library in the next few steps. */
1518
1519 /* build the symbol list for this image */
1520 # if defined(OBJFORMAT_ELF)
1521 r = ocGetNames_ELF ( oc );
1522 # elif defined(OBJFORMAT_PEi386)
1523 r = ocGetNames_PEi386 ( oc );
1524 # elif defined(OBJFORMAT_MACHO)
1525 r = ocGetNames_MachO ( oc );
1526 # else
1527 barf("loadObj: no getNames method");
1528 # endif
1529 if (!r) {
1530 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
1531 return r;
1532 }
1533
1534 #if defined(NEED_SYMBOL_EXTRAS)
1535 # if defined(OBJFORMAT_MACHO)
1536 r = ocAllocateSymbolExtras_MachO ( oc );
1537 if (!r) {
1538 IF_DEBUG(linker,
1539 debugBelch("loadOc: ocAllocateSymbolExtras_MachO failed\n"));
1540 return r;
1541 }
1542 # elif defined(OBJFORMAT_ELF)
1543 r = ocAllocateSymbolExtras_ELF ( oc );
1544 if (!r) {
1545 IF_DEBUG(linker,
1546 debugBelch("loadOc: ocAllocateSymbolExtras_ELF failed\n"));
1547 return r;
1548 }
1549 # elif defined(OBJFORMAT_PEi386)
1550 ocAllocateSymbolExtras_PEi386 ( oc );
1551 # endif
1552 #endif
1553
1554 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
1555 setOcInitialStatus( oc );
1556 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
1557
1558 return 1;
1559 }
1560
1561 /* -----------------------------------------------------------------------------
1562 * try to load and initialize an ObjectCode into memory
1563 *
1564 * Returns: 1 if ok, 0 on error.
1565 */
1566 int ocTryLoad (ObjectCode* oc) {
1567 int r;
1568
1569 if (oc->status != OBJECT_NEEDED) {
1570 return 1;
1571 }
1572
1573 /* Check for duplicate symbols by looking into `symhash`.
1574 Duplicate symbols are any symbols which exist
1575 in different ObjectCodes that have both been loaded, or
1576 are to be loaded by this call.
1577
1578 This call is intended to have no side-effects when a non-duplicate
1579 symbol is re-inserted. A symbol is only a duplicate if the object file
1580 it is defined in has had it's relocations resolved. A resolved object
1581 file means the symbols inside it are required.
1582
1583 The symbol address is not used to distinguish symbols. Duplicate symbols
1584 are distinguished by name, oc and attributes (weak symbols etc).
1585 */
1586 int x;
1587 Symbol_t symbol;
1588 for (x = 0; x < oc->n_symbols; x++) {
1589 symbol = oc->symbols[x];
1590 if ( symbol.name
1591 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol.name,
1592 symbol.addr,
1593 isSymbolWeak(oc, symbol.name), oc)) {
1594 return 0;
1595 }
1596 }
1597
1598 # if defined(OBJFORMAT_ELF)
1599 r = ocResolve_ELF ( oc );
1600 # elif defined(OBJFORMAT_PEi386)
1601 r = ocResolve_PEi386 ( oc );
1602 # elif defined(OBJFORMAT_MACHO)
1603 r = ocResolve_MachO ( oc );
1604 # else
1605 barf("ocTryLoad: not implemented on this platform");
1606 # endif
1607 if (!r) { return r; }
1608
1609 // run init/init_array/ctors/mod_init_func
1610
1611 loading_obj = oc; // tells foreignExportStablePtr what to do
1612 #if defined(OBJFORMAT_ELF)
1613 r = ocRunInit_ELF ( oc );
1614 #elif defined(OBJFORMAT_PEi386)
1615 r = ocRunInit_PEi386 ( oc );
1616 #elif defined(OBJFORMAT_MACHO)
1617 r = ocRunInit_MachO ( oc );
1618 #else
1619 barf("ocTryLoad: initializers not implemented on this platform");
1620 #endif
1621 loading_obj = NULL;
1622
1623 if (!r) { return r; }
1624
1625 oc->status = OBJECT_RESOLVED;
1626
1627 return 1;
1628 }
1629
1630 /* -----------------------------------------------------------------------------
1631 * resolve all the currently unlinked objects in memory
1632 *
1633 * Returns: 1 if ok, 0 on error.
1634 */
1635 static HsInt resolveObjs_ (void)
1636 {
1637 ObjectCode *oc;
1638 int r;
1639
1640 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
1641
1642 for (oc = objects; oc; oc = oc->next) {
1643 r = ocTryLoad(oc);
1644 if (!r)
1645 {
1646 return r;
1647 }
1648 }
1649
1650 #if defined(PROFILING)
1651 // collect any new cost centres & CCSs that were defined during runInit
1652 initProfiling2();
1653 #endif
1654
1655 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
1656 return 1;
1657 }
1658
1659 HsInt resolveObjs (void)
1660 {
1661 ACQUIRE_LOCK(&linker_mutex);
1662 HsInt r = resolveObjs_();
1663 RELEASE_LOCK(&linker_mutex);
1664 return r;
1665 }
1666
1667 /* -----------------------------------------------------------------------------
1668 * delete an object from the pool
1669 */
1670 static HsInt unloadObj_ (pathchar *path, bool just_purge)
1671 {
1672 ObjectCode *oc, *prev, *next;
1673 HsBool unloadedAnyObj = HS_BOOL_FALSE;
1674
1675 ASSERT(symhash != NULL);
1676 ASSERT(objects != NULL);
1677
1678 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
1679
1680 prev = NULL;
1681 for (oc = objects; oc; oc = next) {
1682 next = oc->next; // oc might be freed
1683
1684 if (!pathcmp(oc->fileName,path)) {
1685
1686 // these are both idempotent, so in just_purge mode we can
1687 // later call unloadObj() to really unload the object.
1688 removeOcSymbols(oc);
1689 freeOcStablePtrs(oc);
1690
1691 if (!just_purge) {
1692 if (prev == NULL) {
1693 objects = oc->next;
1694 } else {
1695 prev->next = oc->next;
1696 }
1697 ACQUIRE_LOCK(&linker_unloaded_mutex);
1698 oc->next = unloaded_objects;
1699 unloaded_objects = oc;
1700 oc->status = OBJECT_UNLOADED;
1701 RELEASE_LOCK(&linker_unloaded_mutex);
1702 // We do not own oc any more; it can be released at any time by
1703 // the GC in checkUnload().
1704 } else {
1705 prev = oc;
1706 }
1707
1708 /* This could be a member of an archive so continue
1709 * unloading other members. */
1710 unloadedAnyObj = HS_BOOL_TRUE;
1711 } else {
1712 prev = oc;
1713 }
1714 }
1715
1716 if (unloadedAnyObj) {
1717 return 1;
1718 }
1719 else {
1720 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
1721 return 0;
1722 }
1723 }
1724
1725 HsInt unloadObj (pathchar *path)
1726 {
1727 ACQUIRE_LOCK(&linker_mutex);
1728 HsInt r = unloadObj_(path, false);
1729 RELEASE_LOCK(&linker_mutex);
1730 return r;
1731 }
1732
1733 HsInt purgeObj (pathchar *path)
1734 {
1735 ACQUIRE_LOCK(&linker_mutex);
1736 HsInt r = unloadObj_(path, true);
1737 RELEASE_LOCK(&linker_mutex);
1738 return r;
1739 }
1740
1741 static OStatus getObjectLoadStatus_ (pathchar *path)
1742 {
1743 ObjectCode *o;
1744 for (o = objects; o; o = o->next) {
1745 if (0 == pathcmp(o->fileName, path)) {
1746 return o->status;
1747 }
1748 }
1749 for (o = unloaded_objects; o; o = o->next) {
1750 if (0 == pathcmp(o->fileName, path)) {
1751 return o->status;
1752 }
1753 }
1754 return OBJECT_NOT_LOADED;
1755 }
1756
1757 OStatus getObjectLoadStatus (pathchar *path)
1758 {
1759 ACQUIRE_LOCK(&linker_mutex);
1760 OStatus r = getObjectLoadStatus_(path);
1761 RELEASE_LOCK(&linker_mutex);
1762 return r;
1763 }
1764
1765 /* -----------------------------------------------------------------------------
1766 * Sanity checking. For each ObjectCode, maintain a list of address ranges
1767 * which may be prodded during relocation, and abort if we try and write
1768 * outside any of these.
1769 */
1770 void
1771 addProddableBlock ( ObjectCode* oc, void* start, int size )
1772 {
1773 ProddableBlock* pb
1774 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
1775
1776 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
1777 ASSERT(size > 0);
1778 pb->start = start;
1779 pb->size = size;
1780 pb->next = oc->proddables;
1781 oc->proddables = pb;
1782 }
1783
1784 void
1785 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
1786 {
1787 ProddableBlock* pb;
1788
1789 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
1790 char* s = (char*)(pb->start);
1791 char* e = s + pb->size;
1792 char* a = (char*)addr;
1793 if (a >= s && (a+size) <= e) return;
1794 }
1795 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
1796 }
1797
1798 void freeProddableBlocks (ObjectCode *oc)
1799 {
1800 ProddableBlock *pb, *next;
1801
1802 for (pb = oc->proddables; pb != NULL; pb = next) {
1803 next = pb->next;
1804 stgFree(pb);
1805 }
1806 oc->proddables = NULL;
1807 }
1808
1809 /* -----------------------------------------------------------------------------
1810 * Section management.
1811 */
1812 void
1813 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
1814 void* start, StgWord size, StgWord mapped_offset,
1815 void* mapped_start, StgWord mapped_size)
1816 {
1817 s->start = start; /* actual start of section in memory */
1818 s->size = size; /* actual size of section in memory */
1819 s->kind = kind;
1820 s->alloc = alloc;
1821 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
1822
1823 s->mapped_start = mapped_start; /* start of mmap() block */
1824 s->mapped_size = mapped_size; /* size of mmap() block */
1825
1826 if (!s->info)
1827 s->info
1828 = (struct SectionFormatInfo*)stgCallocBytes(1, sizeof *s->info,
1829 "addSection(SectionFormatInfo)");
1830
1831 IF_DEBUG(linker,
1832 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
1833 start, (void*)((StgWord)start + size),
1834 size, kind ));
1835 }
1836