9fb91bf3ca21998adf0d1d329fa506da1b7c0ad7
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "StablePtr.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31 #include "linker/CacheFlush.h"
32 #include "linker/SymbolExtras.h"
33 #include "PathUtils.h"
34
35 #if !defined(mingw32_HOST_OS)
36 #include "posix/Signals.h"
37 #endif
38
39 // get protos for is*()
40 #include <ctype.h>
41
42 #if defined(HAVE_SYS_TYPES_H)
43 #include <sys/types.h>
44 #endif
45
46 #include <inttypes.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <stdio.h>
50 #include <assert.h>
51 #include <fs_rts.h>
52
53 #if defined(HAVE_SYS_STAT_H)
54 #include <sys/stat.h>
55 #endif
56
57 #if defined(HAVE_DLFCN_H)
58 #include <dlfcn.h>
59 #endif
60
61 #if defined(OBJFORMAT_ELF)
62 # include "linker/Elf.h"
63 # include <regex.h> // regex is already used by dlopen() so this is OK
64 // to use here without requiring an additional lib
65 #elif defined(OBJFORMAT_PEi386)
66 # include "linker/PEi386.h"
67 # include <windows.h>
68 #elif defined(OBJFORMAT_MACHO)
69 # include "linker/MachO.h"
70 # include <regex.h>
71 # include <mach/machine.h>
72 # include <mach-o/fat.h>
73 #endif
74
75 #if defined(dragonfly_HOST_OS)
76 #include <sys/tls.h>
77 #endif
78 /*
79 Note [runtime-linker-support]
80 -----------------------------
81 When adding support for a new platform to the runtime linker please
82 update `$TOP/configure.ac` under heading `Does target have runtime
83 linker support?`.
84 */
85 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
86 This hashtable will contain information on all symbols
87 that we know of, however the .o they are in may not be loaded.
88
89 Until the ObjectCode the symbol belongs to is actually
90 loaded this symbol may be replaced. So do not rely on
91 addresses of unloaded symbols.
92
93 Note [runtime-linker-phases]
94 --------------------------------------
95 Broadly the behavior of the runtime linker can be
96 split into the following four phases:
97
98 - Indexing (e.g. ocVerifyImage and ocGetNames)
99 - Initialization (e.g. ocResolve and ocRunInit)
100 - Resolve (e.g. resolveObjs())
101 - Lookup (e.g. lookupSymbol)
102
103 This is to enable lazy loading of symbols. Eager loading is problematic
104 as it means that all symbols must be available, even those which we will
105 never use. This is especially painful on Windows, where the number of
106 libraries required to link things like mingwex grows to be quite high.
107
108 We proceed through these stages as follows,
109
110 * During Indexing we verify and open the ObjectCode and
111 perform a quick scan/indexing of the ObjectCode. All the work
112 required to actually load the ObjectCode is done.
113
114 All symbols from the ObjectCode are also inserted into
115 `symhash`, where possible duplicates are handled via the semantics
116 described in `ghciInsertSymbolTable`.
117
118 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
119 depending on whether they are an archive member or not.
120
121 * During initialization we load ObjectCode, perform relocations, execute
122 static constructors etc. This phase may trigger other ObjectCodes to
123 be loaded because of the calls to lookupSymbol.
124
125 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
126 previous status was `OBJECT_LOADED`.
127
128 * During resolve we attempt to resolve all the symbols needed for the
129 initial link. This essentially means, that for any ObjectCode given
130 directly to the command-line we perform lookupSymbols on the required
131 symbols. lookupSymbols may trigger the loading of additional ObjectCode
132 if required.
133
134 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
135 the previous status was `OBJECT_NEEDED`.
136
137 * lookupSymbols is used to lookup any symbols required, both during initial
138 link and during statement and expression compilations in the REPL.
139 Declaration of e.g. a foreign import, will eventually call lookupSymbol
140 which will either fail (symbol unknown) or succeed (and possibly trigger a
141 load).
142
143 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
144
145 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
146 by calling unloadObj and starting over.
147 When a new declaration or statement is performed ultimately lookupSymbol is called
148 without doing a re-link.
149
150 The goal of these different phases is to allow the linker to be able to perform
151 "lazy loading" of ObjectCode. The reason for this is that we want to only link
152 in symbols that are actually required for the link. This reduces:
153
154 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
155 then we don't need to load libB. This means the dependency chain for libraries
156 such as mingw32 and mingwex can be broken down.
157
158 2) The number of duplicate symbols, since now only symbols that are
159 true duplicates will display the error.
160 */
161 /*Str*/HashTable *symhash;
162
163 /* List of currently loaded objects */
164 ObjectCode *objects = NULL; /* initially empty */
165
166 /* List of objects that have been unloaded via unloadObj(), but are waiting
167 to be actually freed via checkUnload() */
168 ObjectCode *unloaded_objects = NULL; /* initially empty */
169
170 #if defined(THREADED_RTS)
171 /* This protects all the Linker's global state except unloaded_objects */
172 Mutex linker_mutex;
173 /*
174 * This protects unloaded_objects. We have a separate mutex for this, because
175 * the GC needs to access unloaded_objects in checkUnload, while the linker only
176 * needs to access unloaded_objects in unloadObj(), so this allows most linker
177 * operations proceed concurrently with the GC.
178 */
179 Mutex linker_unloaded_mutex;
180 #endif
181
182 /* Generic wrapper function to try and Resolve and RunInit oc files */
183 int ocTryLoad( ObjectCode* oc );
184
185 /* Link objects into the lower 2Gb on x86_64 and AArch64. GHC assumes the
186 * small memory model on this architecture (see gcc docs,
187 * -mcmodel=small).
188 *
189 * MAP_32BIT not available on OpenBSD/amd64
190 */
191 #if defined(MAP_32BIT) && defined(x86_64_HOST_ARCH)
192 #define MAP_LOW_MEM
193 #define TRY_MAP_32BIT MAP_32BIT
194 #else
195 #define TRY_MAP_32BIT 0
196 #endif
197
198 #if defined(aarch64_HOST_ARCH)
199 // On AArch64 MAP_32BIT is not available but we are still bound by the small
200 // memory model. Consequently we still try using the MAP_LOW_MEM allocation
201 // strategy.
202 #define MAP_LOW_MEM
203 #endif
204
205 /*
206 * Note [MAP_LOW_MEM]
207 * ~~~~~~~~~~~~~~~~~~
208 * Due to the small memory model (see above), on x86_64 and AArch64 we have to
209 * map all our non-PIC object files into the low 2Gb of the address space (why
210 * 2Gb and not 4Gb? Because all addresses must be reachable using a 32-bit
211 * signed PC-relative offset). On x86_64 Linux we can do this using the
212 * MAP_32BIT flag to mmap(), however on other OSs (e.g. *BSD, see #2063, and
213 * also on Linux inside Xen, see #2512), we can't do this. So on these
214 * systems, we have to pick a base address in the low 2Gb of the address space
215 * and try to allocate memory from there.
216 *
217 * We pick a default address based on the OS, but also make this
218 * configurable via an RTS flag (+RTS -xm)
219 */
220 #if defined(MAP_32BIT) || DEFAULT_LINKER_ALWAYS_PIC
221 // Try to use MAP_32BIT
222 #define MMAP_32BIT_BASE_DEFAULT 0
223 #else
224 // A guess: 1Gb.
225 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
226 #endif
227
228 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
229
230 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
231 ObjectCode *owner)
232 {
233 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
234 if (!pinfo || owner != pinfo->owner) return;
235 removeStrHashTable(table, key, NULL);
236 if (isSymbolImport (owner, key))
237 stgFree(pinfo->value);
238
239 stgFree(pinfo);
240 }
241
242 /* -----------------------------------------------------------------------------
243 * Insert symbols into hash tables, checking for duplicates.
244 *
245 * Returns: 0 on failure, nonzero on success
246 */
247 /*
248 Note [weak-symbols-support]
249 -------------------------------------
250 While ghciInsertSymbolTable does implement extensive
251 logic for weak symbol support, weak symbols are not currently
252 fully supported by the RTS. This code is mostly here for COMDAT
253 support which uses the weak symbols support.
254
255 Linking weak symbols defined purely in C code with other C code
256 should also work, probably. Observing weak symbols in Haskell
257 won't.
258
259 Some test have been written for weak symbols but have been disabled
260 mostly because it's unsure how the weak symbols support should look.
261 See #11223
262 */
263 int ghciInsertSymbolTable(
264 pathchar* obj_name,
265 HashTable *table,
266 const SymbolName* key,
267 SymbolAddr* data,
268 HsBool weak,
269 ObjectCode *owner)
270 {
271 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
272 if (!pinfo) /* new entry */
273 {
274 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
275 pinfo->value = data;
276 pinfo->owner = owner;
277 pinfo->weak = weak;
278 insertStrHashTable(table, key, pinfo);
279 return 1;
280 }
281 else if (weak && data && pinfo->weak && !pinfo->value)
282 {
283 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
284 pinfo->value = data;
285 pinfo->owner = owner;
286 return 1;
287 }
288 else if (weak)
289 {
290 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
291 already know of another copy throw this one away.
292
293 or both weak symbols have a nonzero value. Keep the existing one.
294
295 This also preserves the semantics of linking against
296 the first symbol we find. */
297 }
298 else if (pinfo->weak && !weak) /* weak symbol is in the table */
299 {
300 /* override the weak definition with the non-weak one */
301 pinfo->value = data;
302 pinfo->owner = owner;
303 pinfo->weak = HS_BOOL_FALSE;
304 return 1;
305 }
306 else if ( pinfo->owner
307 && pinfo->owner->status != OBJECT_RESOLVED
308 && pinfo->owner->status != OBJECT_NEEDED)
309 {
310 /* If the other symbol hasn't been loaded or will be loaded and we want to
311 explicitly load the new one, we can just swap it out and load the one
312 that has been requested. If not, just keep the first one encountered.
313
314 Because the `symHash' table consists symbols we've also not loaded but
315 found during the initial scan this is safe to do. If however the existing
316 symbol has been loaded then it means we have a duplicate.
317
318 This is essentially emulating the behavior of a linker wherein it will always
319 link in object files that are .o file arguments, but only take object files
320 from archives as needed. */
321 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
322 pinfo->value = data;
323 pinfo->owner = owner;
324 pinfo->weak = weak;
325 }
326
327 return 1;
328 }
329 else if (pinfo->owner == owner)
330 {
331 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
332 return 1;
333 }
334 else if (owner && owner->status == OBJECT_LOADED)
335 {
336 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
337 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
338 call this function again to trigger the duplicate error. */
339 return 1;
340 }
341
342 pathchar* archiveName = NULL;
343 debugBelch(
344 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
345 " %s\n"
346 "whilst processing object file\n"
347 " %" PATH_FMT "\n"
348 "The symbol was previously defined in\n"
349 " %" PATH_FMT "\n"
350 "This could be caused by:\n"
351 " * Loading two different object files which export the same symbol\n"
352 " * Specifying the same object file twice on the GHCi command line\n"
353 " * An incorrect `package.conf' entry, causing some object to be\n"
354 " loaded twice.\n",
355 (char*)key,
356 obj_name,
357 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
358 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
359 : pinfo->owner->fileName
360 );
361
362 if (archiveName)
363 {
364 stgFree(archiveName);
365 archiveName = NULL;
366 }
367 return 0;
368 }
369
370 /* -----------------------------------------------------------------------------
371 * Looks up symbols into hash tables.
372 *
373 * Returns: 0 on failure and result is not set,
374 * nonzero on success and result set to nonzero pointer
375 */
376 HsBool ghciLookupSymbolInfo(HashTable *table,
377 const SymbolName* key, RtsSymbolInfo **result)
378 {
379 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
380 if (!pinfo) {
381 *result = NULL;
382 return HS_BOOL_FALSE;
383 }
384 if (pinfo->weak)
385 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
386 /* Once it's looked up, it can no longer be overridden */
387 pinfo->weak = HS_BOOL_FALSE;
388
389 *result = pinfo;
390 return HS_BOOL_TRUE;
391 }
392
393 /* -----------------------------------------------------------------------------
394 * initialize the object linker
395 */
396
397
398 static int linker_init_done = 0 ;
399
400 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
401 static void *dl_prog_handle;
402 static regex_t re_invalid;
403 static regex_t re_realso;
404 #if defined(THREADED_RTS)
405 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
406 #endif
407 #endif
408
409 void initLinker (void)
410 {
411 // default to retaining CAFs for backwards compatibility. Most
412 // users will want initLinker_(0): otherwise unloadObj() will not
413 // be able to unload object files when they contain CAFs.
414 initLinker_(1);
415 }
416
417 void
418 initLinker_ (int retain_cafs)
419 {
420 RtsSymbolVal *sym;
421 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
422 int compileResult;
423 #endif
424
425 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
426
427 /* Make initLinker idempotent, so we can call it
428 before every relevant operation; that means we
429 don't need to initialise the linker separately */
430 if (linker_init_done == 1) {
431 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
432 return;
433 } else {
434 linker_init_done = 1;
435 }
436
437 objects = NULL;
438 unloaded_objects = NULL;
439
440 #if defined(THREADED_RTS)
441 initMutex(&linker_mutex);
442 initMutex(&linker_unloaded_mutex);
443 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
444 initMutex(&dl_mutex);
445 #endif
446 #endif
447
448 symhash = allocStrHashTable();
449
450 /* populate the symbol table with stuff from the RTS */
451 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
452 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
453 symhash, sym->lbl, sym->addr,
454 sym->weak, NULL)) {
455 barf("ghciInsertSymbolTable failed");
456 }
457 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
458 }
459
460 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
461 referenced from a statically linked module. We need to mimic this, but
462 we cannot use NULL because we use it to mean nonexistent symbols. So we
463 use an arbitrary (hopefully unique) address here.
464 */
465 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
466 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
467 barf("ghciInsertSymbolTable failed");
468 }
469
470 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
471 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
472 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
473 retain_cafs ? newRetainedCAF : newGCdCAF,
474 HS_BOOL_FALSE, NULL)) {
475 barf("ghciInsertSymbolTable failed");
476 }
477
478 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
479 # if defined(RTLD_DEFAULT)
480 dl_prog_handle = RTLD_DEFAULT;
481 # else
482 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
483 # endif /* RTLD_DEFAULT */
484
485 compileResult = regcomp(&re_invalid,
486 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short|invalid file format|Exec format error)",
487 REG_EXTENDED);
488 if (compileResult != 0) {
489 barf("Compiling re_invalid failed");
490 }
491 compileResult = regcomp(&re_realso,
492 "(GROUP|INPUT) *\\( *([^ )]+)",
493 REG_EXTENDED);
494 if (compileResult != 0) {
495 barf("Compiling re_realso failed");
496 }
497 # endif
498
499 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
500 // User-override for mmap_32bit_base
501 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
502 }
503
504 if (RTS_LINKER_USE_MMAP)
505 m32_allocator_init();
506
507 #if defined(OBJFORMAT_PEi386)
508 initLinker_PEi386();
509 #endif
510
511 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
512 return;
513 }
514
515 void
516 exitLinker( void ) {
517 #if defined(OBJFORMAT_PEi386)
518 exitLinker_PEi386();
519 #endif
520 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
521 if (linker_init_done == 1) {
522 regfree(&re_invalid);
523 regfree(&re_realso);
524 #if defined(THREADED_RTS)
525 closeMutex(&dl_mutex);
526 #endif
527 }
528 #endif
529 if (linker_init_done == 1) {
530 freeHashTable(symhash, free);
531 }
532 #if defined(THREADED_RTS)
533 closeMutex(&linker_mutex);
534 #endif
535 }
536
537 /* -----------------------------------------------------------------------------
538 * Loading DLL or .so dynamic libraries
539 * -----------------------------------------------------------------------------
540 *
541 * Add a DLL from which symbols may be found. In the ELF case, just
542 * do RTLD_GLOBAL-style add, so no further messing around needs to
543 * happen in order that symbols in the loaded .so are findable --
544 * lookupSymbol() will subsequently see them by dlsym on the program's
545 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
546 *
547 * In the PEi386 case, open the DLLs and put handles to them in a
548 * linked list. When looking for a symbol, try all handles in the
549 * list. This means that we need to load even DLLs that are guaranteed
550 * to be in the ghc.exe image already, just so we can get a handle
551 * to give to loadSymbol, so that we can find the symbols. For such
552 * libraries, the LoadLibrary call should be a no-op except for returning
553 * the handle.
554 *
555 */
556
557 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
558
559 /* Suppose in ghci we load a temporary SO for a module containing
560 f = 1
561 and then modify the module, recompile, and load another temporary
562 SO with
563 f = 2
564 Then as we don't unload the first SO, dlsym will find the
565 f = 1
566 symbol whereas we want the
567 f = 2
568 symbol. We therefore need to keep our own SO handle list, and
569 try SOs in the right order. */
570
571 typedef
572 struct _OpenedSO {
573 struct _OpenedSO* next;
574 void *handle;
575 }
576 OpenedSO;
577
578 /* A list thereof. */
579 static OpenedSO* openedSOs = NULL;
580
581 static const char *
582 internal_dlopen(const char *dll_name)
583 {
584 OpenedSO* o_so;
585 void *hdl;
586 const char *errmsg;
587 char *errmsg_copy;
588
589 // omitted: RTLD_NOW
590 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
591 IF_DEBUG(linker,
592 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
593
594 //-------------- Begin critical section ------------------
595 // This critical section is necessary because dlerror() is not
596 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
597 // Also, the error message returned must be copied to preserve it
598 // (see POSIX also)
599
600 ACQUIRE_LOCK(&dl_mutex);
601 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
602
603 errmsg = NULL;
604 if (hdl == NULL) {
605 /* dlopen failed; return a ptr to the error msg. */
606 errmsg = dlerror();
607 if (errmsg == NULL) errmsg = "addDLL: unknown error";
608 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
609 strcpy(errmsg_copy, errmsg);
610 errmsg = errmsg_copy;
611 } else {
612 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
613 o_so->handle = hdl;
614 o_so->next = openedSOs;
615 openedSOs = o_so;
616 }
617
618 RELEASE_LOCK(&dl_mutex);
619 //--------------- End critical section -------------------
620
621 return errmsg;
622 }
623
624 /*
625 Note [RTLD_LOCAL]
626
627 In GHCi we want to be able to override previous .so's with newly
628 loaded .so's when we recompile something. This further implies that
629 when we look up a symbol in internal_dlsym() we have to iterate
630 through the loaded libraries (in order from most recently loaded to
631 oldest) looking up the symbol in each one until we find it.
632
633 However, this can cause problems for some symbols that are copied
634 by the linker into the executable image at runtime - see #8935 for a
635 lengthy discussion. To solve that problem we need to look up
636 symbols in the main executable *first*, before attempting to look
637 them up in the loaded .so's. But in order to make that work, we
638 have to always call dlopen with RTLD_LOCAL, so that the loaded
639 libraries don't populate the global symbol table.
640 */
641
642 static void *
643 internal_dlsym(const char *symbol) {
644 OpenedSO* o_so;
645 void *v;
646
647 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
648 ACQUIRE_LOCK(&dl_mutex);
649 dlerror();
650 // look in program first
651 v = dlsym(dl_prog_handle, symbol);
652 if (dlerror() == NULL) {
653 RELEASE_LOCK(&dl_mutex);
654 return v;
655 }
656
657 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
658 v = dlsym(o_so->handle, symbol);
659 if (dlerror() == NULL) {
660 RELEASE_LOCK(&dl_mutex);
661 return v;
662 }
663 }
664 RELEASE_LOCK(&dl_mutex);
665 return v;
666 }
667 # endif
668
669 const char *
670 addDLL( pathchar *dll_name )
671 {
672 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
673 /* ------------------- ELF DLL loader ------------------- */
674
675 #define NMATCH 5
676 regmatch_t match[NMATCH];
677 const char *errmsg;
678 FILE* fp;
679 size_t match_length;
680 #define MAXLINE 1000
681 char line[MAXLINE];
682 int result;
683
684 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
685 errmsg = internal_dlopen(dll_name);
686
687 if (errmsg == NULL) {
688 return NULL;
689 }
690
691 // GHC #2615
692 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
693 // contain linker scripts rather than ELF-format object code. This
694 // code handles the situation by recognizing the real object code
695 // file name given in the linker script.
696 //
697 // If an "invalid ELF header" error occurs, it is assumed that the
698 // .so file contains a linker script instead of ELF object code.
699 // In this case, the code looks for the GROUP ( ... ) linker
700 // directive. If one is found, the first file name inside the
701 // parentheses is treated as the name of a dynamic library and the
702 // code attempts to dlopen that file. If this is also unsuccessful,
703 // an error message is returned.
704
705 // see if the error message is due to an invalid ELF header
706 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
707 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
708 IF_DEBUG(linker, debugBelch("result = %i\n", result));
709 if (result == 0) {
710 // success -- try to read the named file as a linker script
711 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
712 MAXLINE-1);
713 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
714 line[match_length] = '\0'; // make sure string is null-terminated
715 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
716 if ((fp = __rts_fopen(line, "r")) == NULL) {
717 return errmsg; // return original error if open fails
718 }
719 // try to find a GROUP or INPUT ( ... ) command
720 while (fgets(line, MAXLINE, fp) != NULL) {
721 IF_DEBUG(linker, debugBelch("input line = %s", line));
722 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
723 // success -- try to dlopen the first named file
724 IF_DEBUG(linker, debugBelch("match%s\n",""));
725 line[match[2].rm_eo] = '\0';
726 stgFree((void*)errmsg); // Free old message before creating new one
727 errmsg = internal_dlopen(line+match[2].rm_so);
728 break;
729 }
730 // if control reaches here, no GROUP or INPUT ( ... ) directive
731 // was found and the original error message is returned to the
732 // caller
733 }
734 fclose(fp);
735 }
736 return errmsg;
737
738 # elif defined(OBJFORMAT_PEi386)
739 return addDLL_PEi386(dll_name, NULL);
740
741 # else
742 barf("addDLL: not implemented on this platform");
743 # endif
744 }
745
746 /* -----------------------------------------------------------------------------
747 * Searches the system directories to determine if there is a system DLL that
748 * satisfies the given name. This prevent GHCi from linking against a static
749 * library if a DLL is available.
750 *
751 * Returns: NULL on failure or no DLL found, else the full path to the DLL
752 * that can be loaded.
753 */
754 pathchar* findSystemLibrary(pathchar* dll_name)
755 {
756 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%"
757 PATH_FMT "'\n", dll_name));
758
759 #if defined(OBJFORMAT_PEi386)
760 return findSystemLibrary_PEi386(dll_name);
761 #else
762 (void)(dll_name); // Function not implemented for other platforms.
763 return NULL;
764 #endif
765 }
766
767 /* -----------------------------------------------------------------------------
768 * Emits a warning determining that the system is missing a required security
769 * update that we need to get access to the proper APIs
770 */
771 void warnMissingKBLibraryPaths( void )
772 {
773 static HsBool missing_update_warn = HS_BOOL_FALSE;
774 if (!missing_update_warn) {
775 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
776 missing_update_warn = HS_BOOL_TRUE;
777 }
778 }
779
780 /* -----------------------------------------------------------------------------
781 * appends a directory to the process DLL Load path so LoadLibrary can find it
782 *
783 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
784 * restore the search path to what it was before this call.
785 */
786 HsPtr addLibrarySearchPath(pathchar* dll_path)
787 {
788 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%"
789 PATH_FMT "'\n", dll_path));
790
791 #if defined(OBJFORMAT_PEi386)
792 return addLibrarySearchPath_PEi386(dll_path);
793 #else
794 (void)(dll_path); // Function not implemented for other platforms.
795 return NULL;
796 #endif
797 }
798
799 /* -----------------------------------------------------------------------------
800 * removes a directory from the process DLL Load path
801 *
802 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
803 */
804 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
805 {
806 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n",
807 dll_path_index));
808
809 #if defined(OBJFORMAT_PEi386)
810 return removeLibrarySearchPath_PEi386(dll_path_index);
811 #else
812 (void)(dll_path_index); // Function not implemented for other platforms.
813 return HS_BOOL_FALSE;
814 #endif
815 }
816
817 /* -----------------------------------------------------------------------------
818 * insert a symbol in the hash table
819 *
820 * Returns: 0 on failure, nozero on success
821 */
822 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
823 {
824 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE,
825 NULL);
826 }
827
828 /* -----------------------------------------------------------------------------
829 * lookup a symbol in the hash table
830 */
831 #if defined(OBJFORMAT_PEi386)
832 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
833 {
834 return lookupSymbol_PEi386(lbl);
835 }
836
837 #else
838
839 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
840 {
841 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
842
843 ASSERT(symhash != NULL);
844 RtsSymbolInfo *pinfo;
845
846 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
847 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
848
849 # if defined(OBJFORMAT_ELF)
850 return internal_dlsym(lbl);
851 # elif defined(OBJFORMAT_MACHO)
852
853 /* HACK: On OS X, all symbols are prefixed with an underscore.
854 However, dlsym wants us to omit the leading underscore from the
855 symbol name -- the dlsym routine puts it back on before
856 searching for the symbol. For now, we simply strip it off here
857 (and ONLY here).
858 */
859 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n",
860 lbl));
861 ASSERT(lbl[0] == '_');
862 return internal_dlsym(lbl + 1);
863
864 # else
865 ASSERT(2+2 == 5);
866 return NULL;
867 # endif
868 } else {
869 return loadSymbol(lbl, pinfo);
870 }
871 }
872 #endif /* OBJFORMAT_PEi386 */
873
874 /*
875 * Load and relocate the object code for a symbol as necessary.
876 * Symbol name only used for diagnostics output.
877 */
878 SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo) {
879 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl,
880 pinfo->value));
881 ObjectCode* oc = pinfo->owner;
882
883 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
884 See Note [runtime-linker-phases] */
885 if (oc && lbl && oc->status == OBJECT_LOADED) {
886 oc->status = OBJECT_NEEDED;
887 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand "
888 "loading symbol '%s'\n", lbl));
889 int r = ocTryLoad(oc);
890 if (!r) {
891 return NULL;
892 }
893
894 #if defined(PROFILING)
895 // collect any new cost centres & CCSs
896 // that were defined during runInit
897 initProfiling2();
898 #endif
899 }
900
901 return pinfo->value;
902 }
903
904 SymbolAddr* lookupSymbol( SymbolName* lbl )
905 {
906 ACQUIRE_LOCK(&linker_mutex);
907 SymbolAddr* r = lookupSymbol_(lbl);
908 if (!r) {
909 errorBelch("^^ Could not load '%s', dependency unresolved. "
910 "See top entry above.\n", lbl);
911 fflush(stderr);
912 }
913 RELEASE_LOCK(&linker_mutex);
914 return r;
915 }
916
917 /* -----------------------------------------------------------------------------
918 Create a StablePtr for a foreign export. This is normally called by
919 a C function with __attribute__((constructor)), which is generated
920 by GHC and linked into the module.
921
922 If the object code is being loaded dynamically, then we remember
923 which StablePtrs were allocated by the constructors and free them
924 again in unloadObj().
925 -------------------------------------------------------------------------- */
926
927 static ObjectCode *loading_obj = NULL;
928
929 StgStablePtr foreignExportStablePtr (StgPtr p)
930 {
931 ForeignExportStablePtr *fe_sptr;
932 StgStablePtr *sptr;
933
934 sptr = getStablePtr(p);
935
936 if (loading_obj != NULL) {
937 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
938 "foreignExportStablePtr");
939 fe_sptr->stable_ptr = sptr;
940 fe_sptr->next = loading_obj->stable_ptrs;
941 loading_obj->stable_ptrs = fe_sptr;
942 }
943
944 return sptr;
945 }
946
947
948 /* -----------------------------------------------------------------------------
949 * Debugging aid: look in GHCi's object symbol tables for symbols
950 * within DELTA bytes of the specified address, and show their names.
951 */
952 #if defined(DEBUG)
953 void ghci_enquire ( SymbolAddr* addr );
954
955 void ghci_enquire(SymbolAddr* addr)
956 {
957 int i;
958 SymbolName* sym;
959 RtsSymbolInfo* a;
960 const int DELTA = 64;
961 ObjectCode* oc;
962
963 for (oc = objects; oc; oc = oc->next) {
964 for (i = 0; i < oc->n_symbols; i++) {
965 sym = oc->symbols[i].name;
966 if (sym == NULL) continue;
967 a = NULL;
968 if (a == NULL) {
969 ghciLookupSymbolInfo(symhash, sym, &a);
970 }
971 if (a == NULL) {
972 // debugBelch("ghci_enquire: can't find %s\n", sym);
973 }
974 else if ( a->value
975 && (char*)addr-DELTA <= (char*)a->value
976 && (char*)a->value <= (char*)addr+DELTA) {
977 debugBelch("%p + %3d == `%s'\n", addr,
978 (int)((char*)a->value - (char*)addr), sym);
979 }
980 }
981 }
982 }
983 #endif
984
985 pathchar*
986 resolveSymbolAddr (pathchar* buffer, int size,
987 SymbolAddr* symbol, uintptr_t* top)
988 {
989 #if defined(OBJFORMAT_PEi386)
990 return resolveSymbolAddr_PEi386 (buffer, size, symbol, top);
991 #else /* OBJFORMAT_PEi386 */
992 (void)buffer;
993 (void)size;
994 (void)symbol;
995 (void)top;
996 return NULL;
997 #endif /* OBJFORMAT_PEi386 */
998 }
999
1000 #if RTS_LINKER_USE_MMAP
1001 //
1002 // Returns NULL on failure.
1003 //
1004 void *
1005 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
1006 {
1007 void *map_addr = NULL;
1008 void *result;
1009 size_t size;
1010 uint32_t tryMap32Bit = RtsFlags.MiscFlags.linkerAlwaysPic
1011 ? 0
1012 : TRY_MAP_32BIT;
1013 static uint32_t fixed = 0;
1014
1015 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1016 size = roundUpToPage(bytes);
1017
1018 #if defined(MAP_LOW_MEM)
1019 mmap_again:
1020 #endif
1021
1022 if (mmap_32bit_base != 0) {
1023 map_addr = mmap_32bit_base;
1024 }
1025
1026 IF_DEBUG(linker,
1027 debugBelch("mmapForLinker: \tprotection %#0x\n",
1028 PROT_EXEC | PROT_READ | PROT_WRITE));
1029 IF_DEBUG(linker,
1030 debugBelch("mmapForLinker: \tflags %#0x\n",
1031 MAP_PRIVATE | tryMap32Bit | fixed | flags));
1032
1033 result = mmap(map_addr, size,
1034 PROT_EXEC|PROT_READ|PROT_WRITE,
1035 MAP_PRIVATE|tryMap32Bit|fixed|flags, fd, offset);
1036
1037 if (result == MAP_FAILED) {
1038 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1039 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1040 return NULL;
1041 }
1042
1043 #if defined(MAP_LOW_MEM)
1044 if (RtsFlags.MiscFlags.linkerAlwaysPic) {
1045 } else if (mmap_32bit_base != 0) {
1046 if (result == map_addr) {
1047 mmap_32bit_base = (StgWord8*)map_addr + size;
1048 } else {
1049 if ((W_)result > 0x80000000) {
1050 // oops, we were given memory over 2Gb
1051 munmap(result,size);
1052 #if defined(freebsd_HOST_OS) || \
1053 defined(kfreebsdgnu_HOST_OS) || \
1054 defined(dragonfly_HOST_OS)
1055 // Some platforms require MAP_FIXED. This is normally
1056 // a bad idea, because MAP_FIXED will overwrite
1057 // existing mappings.
1058 fixed = MAP_FIXED;
1059 goto mmap_again;
1060 #else
1061 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1062 "asked for %lu bytes at %p. "
1063 "Try specifying an address with +RTS -xm<addr> -RTS",
1064 size, map_addr);
1065 return NULL;
1066 #endif
1067 } else {
1068 // hmm, we were given memory somewhere else, but it's
1069 // still under 2Gb so we can use it. Next time, ask
1070 // for memory right after the place we just got some
1071 mmap_32bit_base = (StgWord8*)result + size;
1072 }
1073 }
1074 } else {
1075 if ((W_)result > 0x80000000) {
1076 // oops, we were given memory over 2Gb
1077 // ... try allocating memory somewhere else?;
1078 debugTrace(DEBUG_linker,
1079 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1080 bytes, result);
1081 munmap(result, size);
1082
1083 // Set a base address and try again... (guess: 1Gb)
1084 mmap_32bit_base = (void*)0x40000000;
1085 goto mmap_again;
1086 }
1087 }
1088 #endif
1089
1090 IF_DEBUG(linker,
1091 debugBelch("mmapForLinker: mapped %" FMT_Word
1092 " bytes starting at %p\n", (W_)size, result));
1093 IF_DEBUG(linker,
1094 debugBelch("mmapForLinker: done\n"));
1095
1096 return result;
1097 }
1098 #endif
1099
1100 /*
1101 * Remove symbols from the symbol table, and free oc->symbols.
1102 * This operation is idempotent.
1103 */
1104 static void removeOcSymbols (ObjectCode *oc)
1105 {
1106 if (oc->symbols == NULL) return;
1107
1108 // Remove all the mappings for the symbols within this object..
1109 int i;
1110 for (i = 0; i < oc->n_symbols; i++) {
1111 if (oc->symbols[i].name != NULL) {
1112 ghciRemoveSymbolTable(symhash, oc->symbols[i].name, oc);
1113 }
1114 }
1115
1116 stgFree(oc->symbols);
1117 oc->symbols = NULL;
1118 }
1119
1120 /*
1121 * Release StablePtrs and free oc->stable_ptrs.
1122 * This operation is idempotent.
1123 */
1124 static void freeOcStablePtrs (ObjectCode *oc)
1125 {
1126 // Release any StablePtrs that were created when this
1127 // object module was initialized.
1128 ForeignExportStablePtr *fe_ptr, *next;
1129
1130 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1131 next = fe_ptr->next;
1132 freeStablePtr(fe_ptr->stable_ptr);
1133 stgFree(fe_ptr);
1134 }
1135 oc->stable_ptrs = NULL;
1136 }
1137
1138 static void
1139 freePreloadObjectFile (ObjectCode *oc)
1140 {
1141 #if defined(mingw32_HOST_OS)
1142 freePreloadObjectFile_PEi386(oc);
1143 #else
1144
1145 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1146 munmap(oc->image, oc->fileSize);
1147 }
1148 else {
1149 stgFree(oc->image);
1150 }
1151
1152 #endif
1153
1154 oc->image = NULL;
1155 oc->fileSize = 0;
1156 }
1157
1158 /*
1159 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1160 * the GC when a previously unloaded ObjectCode has been determined to be
1161 * unused, and when an error occurs during loadObj().
1162 */
1163 void freeObjectCode (ObjectCode *oc)
1164 {
1165 freePreloadObjectFile(oc);
1166
1167 if (oc->symbols != NULL) {
1168 stgFree(oc->symbols);
1169 oc->symbols = NULL;
1170 }
1171
1172 if (oc->extraInfos != NULL) {
1173 freeHashTable(oc->extraInfos, NULL);
1174 oc->extraInfos = NULL;
1175 }
1176
1177 if (oc->sections != NULL) {
1178 int i;
1179 for (i=0; i < oc->n_sections; i++) {
1180 if (oc->sections[i].start != NULL) {
1181 switch(oc->sections[i].alloc){
1182 #if RTS_LINKER_USE_MMAP
1183 case SECTION_MMAP:
1184 munmap(oc->sections[i].mapped_start,
1185 oc->sections[i].mapped_size);
1186 break;
1187 case SECTION_M32:
1188 IF_DEBUG(sanity,
1189 memset(oc->sections[i].start,
1190 0x00, oc->sections[i].size));
1191 m32_free(oc->sections[i].start,
1192 oc->sections[i].size);
1193 break;
1194 #endif
1195 case SECTION_MALLOC:
1196 IF_DEBUG(sanity,
1197 memset(oc->sections[i].start,
1198 0x00, oc->sections[i].size));
1199 stgFree(oc->sections[i].start);
1200 break;
1201 default:
1202 break;
1203 }
1204 }
1205 if (oc->sections[i].info) {
1206 stgFree(oc->sections[i].info);
1207 }
1208 }
1209 stgFree(oc->sections);
1210 }
1211
1212 freeProddableBlocks(oc);
1213 freeSegments(oc);
1214
1215 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1216 * alongside the image, so we don't need to free. */
1217 #if defined(NEED_SYMBOL_EXTRAS) && (!defined(x86_64_HOST_ARCH) \
1218 || !defined(mingw32_HOST_OS))
1219 if (RTS_LINKER_USE_MMAP) {
1220 if (!USE_CONTIGUOUS_MMAP && !RtsFlags.MiscFlags.linkerAlwaysPic &&
1221 oc->symbol_extras != NULL) {
1222 m32_free(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras);
1223 }
1224 }
1225 else {
1226 stgFree(oc->symbol_extras);
1227 }
1228 #endif
1229
1230 #if defined(OBJECTFORMAT_MACHO)
1231 ocDeinit_MachO(oc);
1232 #endif
1233 #if defined(OBJFORMAT_ELF)
1234 ocDeinit_ELF(oc);
1235 #endif
1236
1237 stgFree(oc->fileName);
1238 stgFree(oc->archiveMemberName);
1239
1240 stgFree(oc);
1241 }
1242
1243 /* -----------------------------------------------------------------------------
1244 * Sets the initial status of a fresh ObjectCode
1245 */
1246 static void setOcInitialStatus(ObjectCode* oc) {
1247 /* If a target has requested the ObjectCode not to be resolved then
1248 honor this requests. Usually this means the ObjectCode has not been
1249 initialized and can't be. */
1250 if (oc->status == OBJECT_DONT_RESOLVE)
1251 return;
1252
1253 if (oc->archiveMemberName == NULL) {
1254 oc->status = OBJECT_NEEDED;
1255 } else {
1256 oc->status = OBJECT_LOADED;
1257 }
1258 }
1259
1260 ObjectCode*
1261 mkOc( pathchar *path, char *image, int imageSize,
1262 bool mapped, char *archiveMemberName, int misalignment ) {
1263 ObjectCode* oc;
1264
1265 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1266 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1267
1268 oc->info = NULL;
1269
1270 # if defined(OBJFORMAT_ELF)
1271 oc->formatName = "ELF";
1272 # elif defined(OBJFORMAT_PEi386)
1273 oc->formatName = "PEi386";
1274 # elif defined(OBJFORMAT_MACHO)
1275 oc->formatName = "Mach-O";
1276 # else
1277 stgFree(oc);
1278 barf("loadObj: not implemented on this platform");
1279 # endif
1280
1281 oc->image = image;
1282 oc->fileName = pathdup(path);
1283
1284 if (archiveMemberName) {
1285 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1,
1286 "loadObj" );
1287 strcpy(oc->archiveMemberName, archiveMemberName);
1288 } else {
1289 oc->archiveMemberName = NULL;
1290 }
1291
1292 setOcInitialStatus( oc );
1293
1294 oc->fileSize = imageSize;
1295 oc->symbols = NULL;
1296 oc->n_sections = 0;
1297 oc->sections = NULL;
1298 oc->n_segments = 0;
1299 oc->segments = NULL;
1300 oc->proddables = NULL;
1301 oc->stable_ptrs = NULL;
1302 #if defined(NEED_SYMBOL_EXTRAS)
1303 oc->symbol_extras = NULL;
1304 #endif
1305 oc->bssBegin = NULL;
1306 oc->bssEnd = NULL;
1307 oc->imageMapped = mapped;
1308
1309 oc->misalignment = misalignment;
1310 oc->extraInfos = NULL;
1311
1312 /* chain it onto the list of objects */
1313 oc->next = NULL;
1314
1315 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1316 return oc;
1317 }
1318
1319 /* -----------------------------------------------------------------------------
1320 * Check if an object or archive is already loaded.
1321 *
1322 * Returns: 1 if the path is already loaded, 0 otherwise.
1323 */
1324 HsInt
1325 isAlreadyLoaded( pathchar *path )
1326 {
1327 ObjectCode *o;
1328 for (o = objects; o; o = o->next) {
1329 if (0 == pathcmp(o->fileName, path)) {
1330 return 1; /* already loaded */
1331 }
1332 }
1333 return 0; /* not loaded yet */
1334 }
1335
1336 //
1337 // Load the object file into memory. This will not be its final resting place,
1338 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
1339 // address space, properly aligned.
1340 //
1341 static ObjectCode *
1342 preloadObjectFile (pathchar *path)
1343 {
1344 int fileSize;
1345 struct_stat st;
1346 int r;
1347 void *image;
1348 ObjectCode *oc;
1349 int misalignment = 0;
1350
1351 r = pathstat(path, &st);
1352 if (r == -1) {
1353 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
1354 return NULL;
1355 }
1356
1357 fileSize = st.st_size;
1358
1359 #if RTS_LINKER_USE_MMAP
1360 int fd;
1361
1362 /* On many architectures malloc'd memory isn't executable, so we need to use
1363 * mmap. */
1364
1365 #if defined(openbsd_HOST_OS)
1366 fd = open(path, O_RDONLY, S_IRUSR);
1367 #else
1368 fd = open(path, O_RDONLY);
1369 #endif
1370 if (fd == -1) {
1371 errorBelch("loadObj: can't open %s", path);
1372 return NULL;
1373 }
1374
1375 /* iOS does not permit to mmap with r+w+x, however while the comment for
1376 * this function says this is not the final resting place, for some
1377 * architectures / hosts (at least mach-o non-iOS -- see ocGetNames_MachO)
1378 * the image mmaped here in fact ends up being the final resting place for
1379 * the sections. And hence we need to leave r+w+x here for other hosts
1380 * until all hosts have been made aware of the initial image being r+w only.
1381 *
1382 * See also the misalignment logic for darwin below.
1383 */
1384 #if defined(ios_HOST_OS)
1385 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
1386 #else
1387 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
1388 MAP_PRIVATE, fd, 0);
1389 #endif
1390
1391 if (image == MAP_FAILED) {
1392 errorBelch("mmap: failed. errno = %d", errno);
1393 }
1394 // not 32-bit yet, we'll remap later
1395 close(fd);
1396
1397 #else /* !RTS_LINKER_USE_MMAP */
1398 FILE *f;
1399
1400 /* load the image into memory */
1401 /* coverity[toctou] */
1402 f = pathopen(path, WSTR("rb"));
1403 if (!f) {
1404 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
1405 return NULL;
1406 }
1407
1408 # if defined(darwin_HOST_OS)
1409
1410 // In a Mach-O .o file, all sections can and will be misaligned
1411 // if the total size of the headers is not a multiple of the
1412 // desired alignment. This is fine for .o files that only serve
1413 // as input for the static linker, but it's not fine for us,
1414 // as SSE (used by gcc for floating point) and Altivec require
1415 // 16-byte alignment.
1416 // We calculate the correct alignment from the header before
1417 // reading the file, and then we misalign image on purpose so
1418 // that the actual sections end up aligned again.
1419 misalignment = machoGetMisalignment(f);
1420 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
1421 image += misalignment;
1422
1423 # else /* !defined(darwin_HOST_OS) */
1424
1425 image = stgMallocBytes(fileSize, "loadObj(image)");
1426
1427 #endif
1428
1429 int n;
1430 n = fread ( image, 1, fileSize, f );
1431 fclose(f);
1432 if (n != fileSize) {
1433 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
1434 stgFree(image);
1435 return NULL;
1436 }
1437
1438 #endif /* RTS_LINKER_USE_MMAP */
1439
1440 IF_DEBUG(linker, debugBelch("loadObj: preloaded image at %p\n", (void *) image));
1441
1442 /* FIXME (AP): =mapped= parameter unconditionally set to true */
1443 oc = mkOc(path, image, fileSize, true, NULL, misalignment);
1444
1445 #if defined(OBJFORMAT_MACHO)
1446 if (ocVerifyImage_MachO( oc ))
1447 ocInit_MachO( oc );
1448 #endif
1449 #if defined(OBJFORMAT_ELF)
1450 if(ocVerifyImage_ELF( oc ))
1451 ocInit_ELF( oc );
1452 #endif
1453 return oc;
1454 }
1455
1456 /* -----------------------------------------------------------------------------
1457 * Load an obj (populate the global symbol table, but don't resolve yet)
1458 *
1459 * Returns: 1 if ok, 0 on error.
1460 */
1461 static HsInt loadObj_ (pathchar *path)
1462 {
1463 ObjectCode* oc;
1464 IF_DEBUG(linker, debugBelch("loadObj: %" PATH_FMT "\n", path));
1465
1466 /* debugBelch("loadObj %s\n", path ); */
1467
1468 /* Check that we haven't already loaded this object.
1469 Ignore requests to load multiple times */
1470
1471 if (isAlreadyLoaded(path)) {
1472 IF_DEBUG(linker,
1473 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1474 return 1; /* success */
1475 }
1476
1477 oc = preloadObjectFile(path);
1478 if (oc == NULL) return 0;
1479
1480 if (! loadOc(oc)) {
1481 // failed; free everything we've allocated
1482 removeOcSymbols(oc);
1483 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
1484 freeObjectCode(oc);
1485 return 0;
1486 }
1487
1488 oc->next = objects;
1489 objects = oc;
1490 return 1;
1491 }
1492
1493 HsInt loadObj (pathchar *path)
1494 {
1495 ACQUIRE_LOCK(&linker_mutex);
1496 HsInt r = loadObj_(path);
1497 RELEASE_LOCK(&linker_mutex);
1498 return r;
1499 }
1500
1501 HsInt loadOc (ObjectCode* oc)
1502 {
1503 int r;
1504
1505 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
1506
1507 /* verify the in-memory image */
1508 # if defined(OBJFORMAT_ELF)
1509 r = ocVerifyImage_ELF ( oc );
1510 # elif defined(OBJFORMAT_PEi386)
1511 r = ocVerifyImage_PEi386 ( oc );
1512 # elif defined(OBJFORMAT_MACHO)
1513 r = ocVerifyImage_MachO ( oc );
1514 # else
1515 barf("loadObj: no verify method");
1516 # endif
1517 if (!r) {
1518 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
1519 return r;
1520 }
1521
1522 /* Note [loadOc orderings]
1523 The order of `ocAllocateExtras` and `ocGetNames` matters. For MachO
1524 and ELF, `ocInit` and `ocGetNames` initialize a bunch of pointers based
1525 on the offset to `oc->image`, but `ocAllocateExtras` may relocate
1526 the address of `oc->image` and invalidate those pointers. So we must
1527 compute or recompute those pointers after `ocAllocateExtras`.
1528
1529 On Windows, when we have an import library we (for now, as we don't honor
1530 the lazy loading semantics of the library and instead GHCi is already
1531 lazy) don't use the library after ocGetNames as it just populates the
1532 symbol table. Allocating space for jump tables in ocAllocateExtras
1533 would just be a waste then as we'll be stopping further processing of the
1534 library in the next few steps. If necessary, the actual allocation
1535 happens in `ocGetNames_PEi386` and `ocAllocateExtras_PEi386` simply
1536 set the correct pointers.
1537 */
1538
1539 #if defined(NEED_SYMBOL_EXTRAS)
1540 # if defined(OBJFORMAT_MACHO)
1541 r = ocAllocateExtras_MachO ( oc );
1542 if (!r) {
1543 IF_DEBUG(linker,
1544 debugBelch("loadOc: ocAllocateExtras_MachO failed\n"));
1545 return r;
1546 }
1547 # elif defined(OBJFORMAT_ELF)
1548 r = ocAllocateExtras_ELF ( oc );
1549 if (!r) {
1550 IF_DEBUG(linker,
1551 debugBelch("loadOc: ocAllocateExtras_ELF failed\n"));
1552 return r;
1553 }
1554 # endif
1555 #endif
1556
1557 /* build the symbol list for this image */
1558 # if defined(OBJFORMAT_ELF)
1559 r = ocGetNames_ELF ( oc );
1560 # elif defined(OBJFORMAT_PEi386)
1561 r = ocGetNames_PEi386 ( oc );
1562 # elif defined(OBJFORMAT_MACHO)
1563 r = ocGetNames_MachO ( oc );
1564 # else
1565 barf("loadObj: no getNames method");
1566 # endif
1567 if (!r) {
1568 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
1569 return r;
1570 }
1571
1572 #if defined(NEED_SYMBOL_EXTRAS)
1573 # if defined(OBJFORMAT_PEi386)
1574 ocAllocateExtras_PEi386 ( oc );
1575 # endif
1576 #endif
1577
1578 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
1579 setOcInitialStatus( oc );
1580 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
1581
1582 return 1;
1583 }
1584
1585 /* -----------------------------------------------------------------------------
1586 * try to load and initialize an ObjectCode into memory
1587 *
1588 * Returns: 1 if ok, 0 on error.
1589 */
1590 int ocTryLoad (ObjectCode* oc) {
1591 int r;
1592
1593 if (oc->status != OBJECT_NEEDED) {
1594 return 1;
1595 }
1596
1597 /* Check for duplicate symbols by looking into `symhash`.
1598 Duplicate symbols are any symbols which exist
1599 in different ObjectCodes that have both been loaded, or
1600 are to be loaded by this call.
1601
1602 This call is intended to have no side-effects when a non-duplicate
1603 symbol is re-inserted. A symbol is only a duplicate if the object file
1604 it is defined in has had it's relocations resolved. A resolved object
1605 file means the symbols inside it are required.
1606
1607 The symbol address is not used to distinguish symbols. Duplicate symbols
1608 are distinguished by name, oc and attributes (weak symbols etc).
1609 */
1610 int x;
1611 Symbol_t symbol;
1612 for (x = 0; x < oc->n_symbols; x++) {
1613 symbol = oc->symbols[x];
1614 if ( symbol.name
1615 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol.name,
1616 symbol.addr,
1617 isSymbolWeak(oc, symbol.name), oc)) {
1618 return 0;
1619 }
1620 }
1621
1622 # if defined(OBJFORMAT_ELF)
1623 r = ocResolve_ELF ( oc );
1624 # elif defined(OBJFORMAT_PEi386)
1625 r = ocResolve_PEi386 ( oc );
1626 # elif defined(OBJFORMAT_MACHO)
1627 r = ocResolve_MachO ( oc );
1628 # else
1629 barf("ocTryLoad: not implemented on this platform");
1630 # endif
1631 if (!r) { return r; }
1632
1633 // run init/init_array/ctors/mod_init_func
1634
1635 IF_DEBUG(linker, debugBelch("ocTryLoad: ocRunInit start\n"));
1636
1637 loading_obj = oc; // tells foreignExportStablePtr what to do
1638 #if defined(OBJFORMAT_ELF)
1639 r = ocRunInit_ELF ( oc );
1640 #elif defined(OBJFORMAT_PEi386)
1641 r = ocRunInit_PEi386 ( oc );
1642 #elif defined(OBJFORMAT_MACHO)
1643 r = ocRunInit_MachO ( oc );
1644 #else
1645 barf("ocTryLoad: initializers not implemented on this platform");
1646 #endif
1647 loading_obj = NULL;
1648
1649 if (!r) { return r; }
1650
1651 oc->status = OBJECT_RESOLVED;
1652
1653 return 1;
1654 }
1655
1656 /* -----------------------------------------------------------------------------
1657 * resolve all the currently unlinked objects in memory
1658 *
1659 * Returns: 1 if ok, 0 on error.
1660 */
1661 static HsInt resolveObjs_ (void)
1662 {
1663 ObjectCode *oc;
1664 int r;
1665
1666 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
1667
1668 for (oc = objects; oc; oc = oc->next) {
1669 r = ocTryLoad(oc);
1670 if (!r)
1671 {
1672 return r;
1673 }
1674 }
1675
1676 #if defined(PROFILING)
1677 // collect any new cost centres & CCSs that were defined during runInit
1678 initProfiling2();
1679 #endif
1680
1681 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
1682 return 1;
1683 }
1684
1685 HsInt resolveObjs (void)
1686 {
1687 ACQUIRE_LOCK(&linker_mutex);
1688 HsInt r = resolveObjs_();
1689 RELEASE_LOCK(&linker_mutex);
1690 return r;
1691 }
1692
1693 /* -----------------------------------------------------------------------------
1694 * delete an object from the pool
1695 */
1696 static HsInt unloadObj_ (pathchar *path, bool just_purge)
1697 {
1698 ObjectCode *oc, *prev, *next;
1699 HsBool unloadedAnyObj = HS_BOOL_FALSE;
1700
1701 ASSERT(symhash != NULL);
1702 ASSERT(objects != NULL);
1703
1704 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
1705
1706 prev = NULL;
1707 for (oc = objects; oc; oc = next) {
1708 next = oc->next; // oc might be freed
1709
1710 if (!pathcmp(oc->fileName,path)) {
1711
1712 // these are both idempotent, so in just_purge mode we can
1713 // later call unloadObj() to really unload the object.
1714 removeOcSymbols(oc);
1715 freeOcStablePtrs(oc);
1716
1717 if (!just_purge) {
1718 if (prev == NULL) {
1719 objects = oc->next;
1720 } else {
1721 prev->next = oc->next;
1722 }
1723 ACQUIRE_LOCK(&linker_unloaded_mutex);
1724 oc->next = unloaded_objects;
1725 unloaded_objects = oc;
1726 oc->status = OBJECT_UNLOADED;
1727 RELEASE_LOCK(&linker_unloaded_mutex);
1728 // We do not own oc any more; it can be released at any time by
1729 // the GC in checkUnload().
1730 } else {
1731 prev = oc;
1732 }
1733
1734 /* This could be a member of an archive so continue
1735 * unloading other members. */
1736 unloadedAnyObj = HS_BOOL_TRUE;
1737 } else {
1738 prev = oc;
1739 }
1740 }
1741
1742 if (unloadedAnyObj) {
1743 return 1;
1744 }
1745 else {
1746 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
1747 return 0;
1748 }
1749 }
1750
1751 HsInt unloadObj (pathchar *path)
1752 {
1753 ACQUIRE_LOCK(&linker_mutex);
1754 HsInt r = unloadObj_(path, false);
1755 RELEASE_LOCK(&linker_mutex);
1756 return r;
1757 }
1758
1759 HsInt purgeObj (pathchar *path)
1760 {
1761 ACQUIRE_LOCK(&linker_mutex);
1762 HsInt r = unloadObj_(path, true);
1763 RELEASE_LOCK(&linker_mutex);
1764 return r;
1765 }
1766
1767 static OStatus getObjectLoadStatus_ (pathchar *path)
1768 {
1769 ObjectCode *o;
1770 for (o = objects; o; o = o->next) {
1771 if (0 == pathcmp(o->fileName, path)) {
1772 return o->status;
1773 }
1774 }
1775 for (o = unloaded_objects; o; o = o->next) {
1776 if (0 == pathcmp(o->fileName, path)) {
1777 return o->status;
1778 }
1779 }
1780 return OBJECT_NOT_LOADED;
1781 }
1782
1783 OStatus getObjectLoadStatus (pathchar *path)
1784 {
1785 ACQUIRE_LOCK(&linker_mutex);
1786 OStatus r = getObjectLoadStatus_(path);
1787 RELEASE_LOCK(&linker_mutex);
1788 return r;
1789 }
1790
1791 /* -----------------------------------------------------------------------------
1792 * Sanity checking. For each ObjectCode, maintain a list of address ranges
1793 * which may be prodded during relocation, and abort if we try and write
1794 * outside any of these.
1795 */
1796 void
1797 addProddableBlock ( ObjectCode* oc, void* start, int size )
1798 {
1799 ProddableBlock* pb
1800 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
1801
1802 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
1803 ASSERT(size > 0);
1804 pb->start = start;
1805 pb->size = size;
1806 pb->next = oc->proddables;
1807 oc->proddables = pb;
1808 }
1809
1810 void
1811 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
1812 {
1813 ProddableBlock* pb;
1814
1815 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
1816 char* s = (char*)(pb->start);
1817 char* e = s + pb->size;
1818 char* a = (char*)addr;
1819 if (a >= s && (a+size) <= e) return;
1820 }
1821 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
1822 }
1823
1824 void freeProddableBlocks (ObjectCode *oc)
1825 {
1826 ProddableBlock *pb, *next;
1827
1828 for (pb = oc->proddables; pb != NULL; pb = next) {
1829 next = pb->next;
1830 stgFree(pb);
1831 }
1832 oc->proddables = NULL;
1833 }
1834
1835 /* -----------------------------------------------------------------------------
1836 * Section management.
1837 */
1838 void
1839 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
1840 void* start, StgWord size,
1841 StgWord mapped_offset, void* mapped_start, StgWord mapped_size)
1842 {
1843 s->start = start; /* actual start of section in memory */
1844 s->size = size; /* actual size of section in memory */
1845 s->kind = kind;
1846 s->alloc = alloc;
1847 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
1848
1849 s->mapped_start = mapped_start; /* start of mmap() block */
1850 s->mapped_size = mapped_size; /* size of mmap() block */
1851
1852 if (!s->info)
1853 s->info
1854 = (struct SectionFormatInfo*)stgCallocBytes(1, sizeof *s->info,
1855 "addSection(SectionFormatInfo)");
1856
1857 IF_DEBUG(linker,
1858 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
1859 start, (void*)((StgWord)start + size),
1860 size, kind ));
1861 }
1862
1863 /* -----------------------------------------------------------------------------
1864 * Segment management
1865 */
1866 void
1867 initSegment (Segment *s, void *start, size_t size, SegmentProt prot, int n_sections)
1868 {
1869 s->start = start;
1870 s->size = size;
1871 s->prot = prot;
1872 s->sections_idx = (int *)stgCallocBytes(n_sections, sizeof(int),
1873 "initSegment(segment)");
1874 s->n_sections = n_sections;
1875 }
1876
1877 void freeSegments (ObjectCode *oc)
1878 {
1879 if (oc->segments != NULL) {
1880 IF_DEBUG(linker, debugBelch("freeSegments: freeing %d segments\n", oc->n_segments));
1881
1882 for (int i = 0; i < oc->n_segments; i++) {
1883 Segment *s = &oc->segments[i];
1884
1885 IF_DEBUG(linker, debugBelch("freeSegments: freeing segment %d at %p size %zu\n",
1886 i, s->start, s->size));
1887
1888 stgFree(s->sections_idx);
1889 s->sections_idx = NULL;
1890
1891 if (0 == s->size) {
1892 IF_DEBUG(linker, debugBelch("freeSegment: skipping segment of 0 size\n"));
1893 continue;
1894 } else {
1895 #if RTS_LINKER_USE_MMAP
1896 CHECKM(0 == munmap(s->start, s->size), "freeSegments: failed to unmap memory");
1897 #else
1898 stgFree(s->start);
1899 #endif
1900 }
1901 s->start = NULL;
1902 }
1903
1904 stgFree(oc->segments);
1905 oc->segments = NULL;
1906 }
1907 }
1908