b6f1de9be65cb14f8e2328c84f14532407016261
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "StablePtr.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31 #include "linker/CacheFlush.h"
32 #include "linker/SymbolExtras.h"
33 #include "PathUtils.h"
34
35 #if !defined(mingw32_HOST_OS)
36 #include "posix/Signals.h"
37 #endif
38
39 // get protos for is*()
40 #include <ctype.h>
41
42 #if defined(HAVE_SYS_TYPES_H)
43 #include <sys/types.h>
44 #endif
45
46 #include <inttypes.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <stdio.h>
50 #include <assert.h>
51 #include <fs_rts.h>
52
53 #if defined(HAVE_SYS_STAT_H)
54 #include <sys/stat.h>
55 #endif
56
57 #if defined(HAVE_DLFCN_H)
58 #include <dlfcn.h>
59 #endif
60
61 #if defined(OBJFORMAT_ELF)
62 # include "linker/Elf.h"
63 # include <regex.h> // regex is already used by dlopen() so this is OK
64 // to use here without requiring an additional lib
65 #elif defined(OBJFORMAT_PEi386)
66 # include "linker/PEi386.h"
67 # include <windows.h>
68 #elif defined(OBJFORMAT_MACHO)
69 # include "linker/MachO.h"
70 # include <regex.h>
71 # include <mach/machine.h>
72 # include <mach-o/fat.h>
73 #endif
74
75 #if defined(dragonfly_HOST_OS)
76 #include <sys/tls.h>
77 #endif
78 /*
79 Note [runtime-linker-support]
80 -----------------------------
81 When adding support for a new platform to the runtime linker please
82 update `$TOP/configure.ac` under heading `Does target have runtime
83 linker support?`.
84 */
85 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
86 This hashtable will contain information on all symbols
87 that we know of, however the .o they are in may not be loaded.
88
89 Until the ObjectCode the symbol belongs to is actually
90 loaded this symbol may be replaced. So do not rely on
91 addresses of unloaded symbols.
92
93 Note [runtime-linker-phases]
94 --------------------------------------
95 Broadly the behavior of the runtime linker can be
96 split into the following four phases:
97
98 - Indexing (e.g. ocVerifyImage and ocGetNames)
99 - Initialization (e.g. ocResolve and ocRunInit)
100 - Resolve (e.g. resolveObjs())
101 - Lookup (e.g. lookupSymbol)
102
103 This is to enable lazy loading of symbols. Eager loading is problematic
104 as it means that all symbols must be available, even those which we will
105 never use. This is especially painful on Windows, where the number of
106 libraries required to link things like mingwex grows to be quite high.
107
108 We proceed through these stages as follows,
109
110 * During Indexing we verify and open the ObjectCode and
111 perform a quick scan/indexing of the ObjectCode. All the work
112 required to actually load the ObjectCode is done.
113
114 All symbols from the ObjectCode are also inserted into
115 `symhash`, where possible duplicates are handled via the semantics
116 described in `ghciInsertSymbolTable`.
117
118 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
119 depending on whether they are an archive member or not.
120
121 * During initialization we load ObjectCode, perform relocations, execute
122 static constructors etc. This phase may trigger other ObjectCodes to
123 be loaded because of the calls to lookupSymbol.
124
125 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
126 previous status was `OBJECT_LOADED`.
127
128 * During resolve we attempt to resolve all the symbols needed for the
129 initial link. This essentially means, that for any ObjectCode given
130 directly to the command-line we perform lookupSymbols on the required
131 symbols. lookupSymbols may trigger the loading of additional ObjectCode
132 if required.
133
134 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
135 the previous status was `OBJECT_NEEDED`.
136
137 * lookupSymbols is used to lookup any symbols required, both during initial
138 link and during statement and expression compilations in the REPL.
139 Declaration of e.g. a foreign import, will eventually call lookupSymbol
140 which will either fail (symbol unknown) or succeed (and possibly trigger a
141 load).
142
143 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
144
145 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
146 by calling unloadObj and starting over.
147 When a new declaration or statement is performed ultimately lookupSymbol is called
148 without doing a re-link.
149
150 The goal of these different phases is to allow the linker to be able to perform
151 "lazy loading" of ObjectCode. The reason for this is that we want to only link
152 in symbols that are actually required for the link. This reduces:
153
154 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
155 then we don't need to load libB. This means the dependency chain for libraries
156 such as mingw32 and mingwex can be broken down.
157
158 2) The number of duplicate symbols, since now only symbols that are
159 true duplicates will display the error.
160 */
161 /*Str*/HashTable *symhash;
162
163 /* List of currently loaded objects */
164 ObjectCode *objects = NULL; /* initially empty */
165
166 /* List of objects that have been unloaded via unloadObj(), but are waiting
167 to be actually freed via checkUnload() */
168 ObjectCode *unloaded_objects = NULL; /* initially empty */
169
170 #if defined(THREADED_RTS)
171 /* This protects all the Linker's global state except unloaded_objects */
172 Mutex linker_mutex;
173 /*
174 * This protects unloaded_objects. We have a separate mutex for this, because
175 * the GC needs to access unloaded_objects in checkUnload, while the linker only
176 * needs to access unloaded_objects in unloadObj(), so this allows most linker
177 * operations proceed concurrently with the GC.
178 */
179 Mutex linker_unloaded_mutex;
180 #endif
181
182 /* Generic wrapper function to try and Resolve and RunInit oc files */
183 int ocTryLoad( ObjectCode* oc );
184
185 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
186 * small memory model on this architecture (see gcc docs,
187 * -mcmodel=small).
188 *
189 * MAP_32BIT not available on OpenBSD/amd64
190 */
191 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
192 #define TRY_MAP_32BIT MAP_32BIT
193 #else
194 #define TRY_MAP_32BIT 0
195 #endif
196
197 /*
198 * Due to the small memory model (see above), on x86_64 we have to map
199 * all our non-PIC object files into the low 2Gb of the address space
200 * (why 2Gb and not 4Gb? Because all addresses must be reachable
201 * using a 32-bit signed PC-relative offset). On Linux we can do this
202 * using the MAP_32BIT flag to mmap(), however on other OSs
203 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
204 * can't do this. So on these systems, we have to pick a base address
205 * in the low 2Gb of the address space and try to allocate memory from
206 * there.
207 *
208 * We pick a default address based on the OS, but also make this
209 * configurable via an RTS flag (+RTS -xm)
210 */
211 #if defined(MAP_32BIT) || DEFAULT_LINKER_ALWAYS_PIC
212 // Try to use MAP_32BIT
213 #define MMAP_32BIT_BASE_DEFAULT 0
214 #else
215 // A guess: 1Gb.
216 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
217 #endif
218
219 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
220
221 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
222 ObjectCode *owner)
223 {
224 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
225 if (!pinfo || owner != pinfo->owner) return;
226 removeStrHashTable(table, key, NULL);
227 if (isSymbolImport (owner, key))
228 stgFree(pinfo->value);
229
230 stgFree(pinfo);
231 }
232
233 /* -----------------------------------------------------------------------------
234 * Insert symbols into hash tables, checking for duplicates.
235 *
236 * Returns: 0 on failure, nonzero on success
237 */
238 /*
239 Note [weak-symbols-support]
240 -------------------------------------
241 While ghciInsertSymbolTable does implement extensive
242 logic for weak symbol support, weak symbols are not currently
243 fully supported by the RTS. This code is mostly here for COMDAT
244 support which uses the weak symbols support.
245
246 Linking weak symbols defined purely in C code with other C code
247 should also work, probably. Observing weak symbols in Haskell
248 won't.
249
250 Some test have been written for weak symbols but have been disabled
251 mostly because it's unsure how the weak symbols support should look.
252 See Trac #11223
253 */
254 int ghciInsertSymbolTable(
255 pathchar* obj_name,
256 HashTable *table,
257 const SymbolName* key,
258 SymbolAddr* data,
259 HsBool weak,
260 ObjectCode *owner)
261 {
262 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
263 if (!pinfo) /* new entry */
264 {
265 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
266 pinfo->value = data;
267 pinfo->owner = owner;
268 pinfo->weak = weak;
269 insertStrHashTable(table, key, pinfo);
270 return 1;
271 }
272 else if (weak && data && pinfo->weak && !pinfo->value)
273 {
274 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
275 pinfo->value = data;
276 pinfo->owner = owner;
277 return 1;
278 }
279 else if (weak)
280 {
281 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
282 already know of another copy throw this one away.
283
284 or both weak symbols have a nonzero value. Keep the existing one.
285
286 This also preserves the semantics of linking against
287 the first symbol we find. */
288 }
289 else if (pinfo->weak && !weak) /* weak symbol is in the table */
290 {
291 /* override the weak definition with the non-weak one */
292 pinfo->value = data;
293 pinfo->owner = owner;
294 pinfo->weak = HS_BOOL_FALSE;
295 return 1;
296 }
297 else if ( pinfo->owner
298 && pinfo->owner->status != OBJECT_RESOLVED
299 && pinfo->owner->status != OBJECT_NEEDED)
300 {
301 /* If the other symbol hasn't been loaded or will be loaded and we want to
302 explicitly load the new one, we can just swap it out and load the one
303 that has been requested. If not, just keep the first one encountered.
304
305 Because the `symHash' table consists symbols we've also not loaded but
306 found during the initial scan this is safe to do. If however the existing
307 symbol has been loaded then it means we have a duplicate.
308
309 This is essentially emulating the behavior of a linker wherein it will always
310 link in object files that are .o file arguments, but only take object files
311 from archives as needed. */
312 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
313 pinfo->value = data;
314 pinfo->owner = owner;
315 pinfo->weak = weak;
316 }
317
318 return 1;
319 }
320 else if (pinfo->owner == owner)
321 {
322 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
323 return 1;
324 }
325 else if (owner && owner->status == OBJECT_LOADED)
326 {
327 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
328 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
329 call this function again to trigger the duplicate error. */
330 return 1;
331 }
332
333 pathchar* archiveName = NULL;
334 debugBelch(
335 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
336 " %s\n"
337 "whilst processing object file\n"
338 " %" PATH_FMT "\n"
339 "The symbol was previously defined in\n"
340 " %" PATH_FMT "\n"
341 "This could be caused by:\n"
342 " * Loading two different object files which export the same symbol\n"
343 " * Specifying the same object file twice on the GHCi command line\n"
344 " * An incorrect `package.conf' entry, causing some object to be\n"
345 " loaded twice.\n",
346 (char*)key,
347 obj_name,
348 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
349 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
350 : pinfo->owner->fileName
351 );
352
353 if (archiveName)
354 {
355 stgFree(archiveName);
356 archiveName = NULL;
357 }
358 return 0;
359 }
360
361 /* -----------------------------------------------------------------------------
362 * Looks up symbols into hash tables.
363 *
364 * Returns: 0 on failure and result is not set,
365 * nonzero on success and result set to nonzero pointer
366 */
367 HsBool ghciLookupSymbolInfo(HashTable *table,
368 const SymbolName* key, RtsSymbolInfo **result)
369 {
370 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
371 if (!pinfo) {
372 *result = NULL;
373 return HS_BOOL_FALSE;
374 }
375 if (pinfo->weak)
376 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
377 /* Once it's looked up, it can no longer be overridden */
378 pinfo->weak = HS_BOOL_FALSE;
379
380 *result = pinfo;
381 return HS_BOOL_TRUE;
382 }
383
384 /* -----------------------------------------------------------------------------
385 * initialize the object linker
386 */
387
388
389 static int linker_init_done = 0 ;
390
391 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
392 static void *dl_prog_handle;
393 static regex_t re_invalid;
394 static regex_t re_realso;
395 #if defined(THREADED_RTS)
396 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
397 #endif
398 #endif
399
400 void initLinker (void)
401 {
402 // default to retaining CAFs for backwards compatibility. Most
403 // users will want initLinker_(0): otherwise unloadObj() will not
404 // be able to unload object files when they contain CAFs.
405 initLinker_(1);
406 }
407
408 void
409 initLinker_ (int retain_cafs)
410 {
411 RtsSymbolVal *sym;
412 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
413 int compileResult;
414 #endif
415
416 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
417
418 /* Make initLinker idempotent, so we can call it
419 before every relevant operation; that means we
420 don't need to initialise the linker separately */
421 if (linker_init_done == 1) {
422 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
423 return;
424 } else {
425 linker_init_done = 1;
426 }
427
428 objects = NULL;
429 unloaded_objects = NULL;
430
431 #if defined(THREADED_RTS)
432 initMutex(&linker_mutex);
433 initMutex(&linker_unloaded_mutex);
434 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
435 initMutex(&dl_mutex);
436 #endif
437 #endif
438
439 symhash = allocStrHashTable();
440
441 /* populate the symbol table with stuff from the RTS */
442 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
443 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
444 symhash, sym->lbl, sym->addr,
445 sym->weak, NULL)) {
446 barf("ghciInsertSymbolTable failed");
447 }
448 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
449 }
450 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
451 machoInitSymbolsWithoutUnderscore();
452 # endif
453 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
454 referenced from a statically linked module. We need to mimic this, but
455 we cannot use NULL because we use it to mean nonexistent symbols. So we
456 use an arbitrary (hopefully unique) address here.
457 */
458 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
459 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
460 barf("ghciInsertSymbolTable failed");
461 }
462
463 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
464 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
465 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
466 retain_cafs ? newRetainedCAF : newGCdCAF,
467 HS_BOOL_FALSE, NULL)) {
468 barf("ghciInsertSymbolTable failed");
469 }
470
471 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
472 # if defined(RTLD_DEFAULT)
473 dl_prog_handle = RTLD_DEFAULT;
474 # else
475 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
476 # endif /* RTLD_DEFAULT */
477
478 compileResult = regcomp(&re_invalid,
479 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short|invalid file format|Exec format error)",
480 REG_EXTENDED);
481 if (compileResult != 0) {
482 barf("Compiling re_invalid failed");
483 }
484 compileResult = regcomp(&re_realso,
485 "(GROUP|INPUT) *\\( *([^ )]+)",
486 REG_EXTENDED);
487 if (compileResult != 0) {
488 barf("Compiling re_realso failed");
489 }
490 # endif
491
492 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
493 // User-override for mmap_32bit_base
494 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
495 }
496
497 if (RTS_LINKER_USE_MMAP)
498 m32_allocator_init();
499
500 #if defined(OBJFORMAT_PEi386)
501 initLinker_PEi386();
502 #endif
503
504 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
505 return;
506 }
507
508 void
509 exitLinker( void ) {
510 #if defined(OBJFORMAT_PEi386)
511 exitLinker_PEi386();
512 #endif
513 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
514 if (linker_init_done == 1) {
515 regfree(&re_invalid);
516 regfree(&re_realso);
517 #if defined(THREADED_RTS)
518 closeMutex(&dl_mutex);
519 #endif
520 }
521 #endif
522 if (linker_init_done == 1) {
523 freeHashTable(symhash, free);
524 }
525 #if defined(THREADED_RTS)
526 closeMutex(&linker_mutex);
527 #endif
528 }
529
530 /* -----------------------------------------------------------------------------
531 * Loading DLL or .so dynamic libraries
532 * -----------------------------------------------------------------------------
533 *
534 * Add a DLL from which symbols may be found. In the ELF case, just
535 * do RTLD_GLOBAL-style add, so no further messing around needs to
536 * happen in order that symbols in the loaded .so are findable --
537 * lookupSymbol() will subsequently see them by dlsym on the program's
538 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
539 *
540 * In the PEi386 case, open the DLLs and put handles to them in a
541 * linked list. When looking for a symbol, try all handles in the
542 * list. This means that we need to load even DLLs that are guaranteed
543 * to be in the ghc.exe image already, just so we can get a handle
544 * to give to loadSymbol, so that we can find the symbols. For such
545 * libraries, the LoadLibrary call should be a no-op except for returning
546 * the handle.
547 *
548 */
549
550 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
551
552 /* Suppose in ghci we load a temporary SO for a module containing
553 f = 1
554 and then modify the module, recompile, and load another temporary
555 SO with
556 f = 2
557 Then as we don't unload the first SO, dlsym will find the
558 f = 1
559 symbol whereas we want the
560 f = 2
561 symbol. We therefore need to keep our own SO handle list, and
562 try SOs in the right order. */
563
564 typedef
565 struct _OpenedSO {
566 struct _OpenedSO* next;
567 void *handle;
568 }
569 OpenedSO;
570
571 /* A list thereof. */
572 static OpenedSO* openedSOs = NULL;
573
574 static const char *
575 internal_dlopen(const char *dll_name)
576 {
577 OpenedSO* o_so;
578 void *hdl;
579 const char *errmsg;
580 char *errmsg_copy;
581
582 // omitted: RTLD_NOW
583 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
584 IF_DEBUG(linker,
585 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
586
587 //-------------- Begin critical section ------------------
588 // This critical section is necessary because dlerror() is not
589 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
590 // Also, the error message returned must be copied to preserve it
591 // (see POSIX also)
592
593 ACQUIRE_LOCK(&dl_mutex);
594 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
595
596 errmsg = NULL;
597 if (hdl == NULL) {
598 /* dlopen failed; return a ptr to the error msg. */
599 errmsg = dlerror();
600 if (errmsg == NULL) errmsg = "addDLL: unknown error";
601 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
602 strcpy(errmsg_copy, errmsg);
603 errmsg = errmsg_copy;
604 } else {
605 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
606 o_so->handle = hdl;
607 o_so->next = openedSOs;
608 openedSOs = o_so;
609 }
610
611 RELEASE_LOCK(&dl_mutex);
612 //--------------- End critical section -------------------
613
614 return errmsg;
615 }
616
617 /*
618 Note [RTLD_LOCAL]
619
620 In GHCi we want to be able to override previous .so's with newly
621 loaded .so's when we recompile something. This further implies that
622 when we look up a symbol in internal_dlsym() we have to iterate
623 through the loaded libraries (in order from most recently loaded to
624 oldest) looking up the symbol in each one until we find it.
625
626 However, this can cause problems for some symbols that are copied
627 by the linker into the executable image at runtime - see #8935 for a
628 lengthy discussion. To solve that problem we need to look up
629 symbols in the main executable *first*, before attempting to look
630 them up in the loaded .so's. But in order to make that work, we
631 have to always call dlopen with RTLD_LOCAL, so that the loaded
632 libraries don't populate the global symbol table.
633 */
634
635 static void *
636 internal_dlsym(const char *symbol) {
637 OpenedSO* o_so;
638 void *v;
639
640 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
641 ACQUIRE_LOCK(&dl_mutex);
642 dlerror();
643 // look in program first
644 v = dlsym(dl_prog_handle, symbol);
645 if (dlerror() == NULL) {
646 RELEASE_LOCK(&dl_mutex);
647 return v;
648 }
649
650 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
651 v = dlsym(o_so->handle, symbol);
652 if (dlerror() == NULL) {
653 RELEASE_LOCK(&dl_mutex);
654 return v;
655 }
656 }
657 RELEASE_LOCK(&dl_mutex);
658 return v;
659 }
660 # endif
661
662 const char *
663 addDLL( pathchar *dll_name )
664 {
665 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
666 /* ------------------- ELF DLL loader ------------------- */
667
668 #define NMATCH 5
669 regmatch_t match[NMATCH];
670 const char *errmsg;
671 FILE* fp;
672 size_t match_length;
673 #define MAXLINE 1000
674 char line[MAXLINE];
675 int result;
676
677 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
678 errmsg = internal_dlopen(dll_name);
679
680 if (errmsg == NULL) {
681 return NULL;
682 }
683
684 // GHC Trac ticket #2615
685 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
686 // contain linker scripts rather than ELF-format object code. This
687 // code handles the situation by recognizing the real object code
688 // file name given in the linker script.
689 //
690 // If an "invalid ELF header" error occurs, it is assumed that the
691 // .so file contains a linker script instead of ELF object code.
692 // In this case, the code looks for the GROUP ( ... ) linker
693 // directive. If one is found, the first file name inside the
694 // parentheses is treated as the name of a dynamic library and the
695 // code attempts to dlopen that file. If this is also unsuccessful,
696 // an error message is returned.
697
698 // see if the error message is due to an invalid ELF header
699 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
700 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
701 IF_DEBUG(linker, debugBelch("result = %i\n", result));
702 if (result == 0) {
703 // success -- try to read the named file as a linker script
704 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
705 MAXLINE-1);
706 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
707 line[match_length] = '\0'; // make sure string is null-terminated
708 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
709 if ((fp = __rts_fopen(line, "r")) == NULL) {
710 return errmsg; // return original error if open fails
711 }
712 // try to find a GROUP or INPUT ( ... ) command
713 while (fgets(line, MAXLINE, fp) != NULL) {
714 IF_DEBUG(linker, debugBelch("input line = %s", line));
715 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
716 // success -- try to dlopen the first named file
717 IF_DEBUG(linker, debugBelch("match%s\n",""));
718 line[match[2].rm_eo] = '\0';
719 stgFree((void*)errmsg); // Free old message before creating new one
720 errmsg = internal_dlopen(line+match[2].rm_so);
721 break;
722 }
723 // if control reaches here, no GROUP or INPUT ( ... ) directive
724 // was found and the original error message is returned to the
725 // caller
726 }
727 fclose(fp);
728 }
729 return errmsg;
730
731 # elif defined(OBJFORMAT_PEi386)
732 return addDLL_PEi386(dll_name, NULL);
733
734 # else
735 barf("addDLL: not implemented on this platform");
736 # endif
737 }
738
739 /* -----------------------------------------------------------------------------
740 * Searches the system directories to determine if there is a system DLL that
741 * satisfies the given name. This prevent GHCi from linking against a static
742 * library if a DLL is available.
743 *
744 * Returns: NULL on failure or no DLL found, else the full path to the DLL
745 * that can be loaded.
746 */
747 pathchar* findSystemLibrary(pathchar* dll_name)
748 {
749 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%"
750 PATH_FMT "'\n", dll_name));
751
752 #if defined(OBJFORMAT_PEi386)
753 return findSystemLibrary_PEi386(dll_name);
754 #else
755 (void)(dll_name); // Function not implemented for other platforms.
756 return NULL;
757 #endif
758 }
759
760 /* -----------------------------------------------------------------------------
761 * Emits a warning determining that the system is missing a required security
762 * update that we need to get access to the proper APIs
763 */
764 void warnMissingKBLibraryPaths( void )
765 {
766 static HsBool missing_update_warn = HS_BOOL_FALSE;
767 if (!missing_update_warn) {
768 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
769 missing_update_warn = HS_BOOL_TRUE;
770 }
771 }
772
773 /* -----------------------------------------------------------------------------
774 * appends a directory to the process DLL Load path so LoadLibrary can find it
775 *
776 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
777 * restore the search path to what it was before this call.
778 */
779 HsPtr addLibrarySearchPath(pathchar* dll_path)
780 {
781 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%"
782 PATH_FMT "'\n", dll_path));
783
784 #if defined(OBJFORMAT_PEi386)
785 return addLibrarySearchPath_PEi386(dll_path);
786 #else
787 (void)(dll_path); // Function not implemented for other platforms.
788 return NULL;
789 #endif
790 }
791
792 /* -----------------------------------------------------------------------------
793 * removes a directory from the process DLL Load path
794 *
795 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
796 */
797 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
798 {
799 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n",
800 dll_path_index));
801
802 #if defined(OBJFORMAT_PEi386)
803 return removeLibrarySearchPath_PEi386(dll_path_index);
804 #else
805 (void)(dll_path_index); // Function not implemented for other platforms.
806 return HS_BOOL_FALSE;
807 #endif
808 }
809
810 /* -----------------------------------------------------------------------------
811 * insert a symbol in the hash table
812 *
813 * Returns: 0 on failure, nozero on success
814 */
815 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
816 {
817 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE,
818 NULL);
819 }
820
821 /* -----------------------------------------------------------------------------
822 * lookup a symbol in the hash table
823 */
824 #if defined(OBJFORMAT_PEi386)
825 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
826 {
827 return lookupSymbol_PEi386(lbl);
828 }
829
830 #else
831
832 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
833 {
834 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
835
836 ASSERT(symhash != NULL);
837 RtsSymbolInfo *pinfo;
838
839 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
840 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
841
842 # if defined(OBJFORMAT_ELF)
843 return internal_dlsym(lbl);
844 # elif defined(OBJFORMAT_MACHO)
845
846 /* HACK: On OS X, all symbols are prefixed with an underscore.
847 However, dlsym wants us to omit the leading underscore from the
848 symbol name -- the dlsym routine puts it back on before
849 searching for the symbol. For now, we simply strip it off here
850 (and ONLY here).
851 */
852 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n",
853 lbl));
854 ASSERT(lbl[0] == '_');
855 return internal_dlsym(lbl + 1);
856
857 # else
858 ASSERT(2+2 == 5);
859 return NULL;
860 # endif
861 } else {
862 return loadSymbol(lbl, pinfo);
863 }
864 }
865 #endif /* OBJFORMAT_PEi386 */
866
867 /*
868 * Load and relocate the object code for a symbol as necessary.
869 * Symbol name only used for diagnostics output.
870 */
871 SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo) {
872 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl,
873 pinfo->value));
874 ObjectCode* oc = pinfo->owner;
875
876 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
877 See Note [runtime-linker-phases] */
878 if (oc && lbl && oc->status == OBJECT_LOADED) {
879 oc->status = OBJECT_NEEDED;
880 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand "
881 "loading symbol '%s'\n", lbl));
882 int r = ocTryLoad(oc);
883 if (!r) {
884 return NULL;
885 }
886
887 #if defined(PROFILING)
888 // collect any new cost centres & CCSs
889 // that were defined during runInit
890 initProfiling2();
891 #endif
892 }
893
894 return pinfo->value;
895 }
896
897 SymbolAddr* lookupSymbol( SymbolName* lbl )
898 {
899 ACQUIRE_LOCK(&linker_mutex);
900 SymbolAddr* r = lookupSymbol_(lbl);
901 if (!r) {
902 errorBelch("^^ Could not load '%s', dependency unresolved. "
903 "See top entry above.\n", lbl);
904 fflush(stderr);
905 }
906 RELEASE_LOCK(&linker_mutex);
907 return r;
908 }
909
910 /* -----------------------------------------------------------------------------
911 Create a StablePtr for a foreign export. This is normally called by
912 a C function with __attribute__((constructor)), which is generated
913 by GHC and linked into the module.
914
915 If the object code is being loaded dynamically, then we remember
916 which StablePtrs were allocated by the constructors and free them
917 again in unloadObj().
918 -------------------------------------------------------------------------- */
919
920 static ObjectCode *loading_obj = NULL;
921
922 StgStablePtr foreignExportStablePtr (StgPtr p)
923 {
924 ForeignExportStablePtr *fe_sptr;
925 StgStablePtr *sptr;
926
927 sptr = getStablePtr(p);
928
929 if (loading_obj != NULL) {
930 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
931 "foreignExportStablePtr");
932 fe_sptr->stable_ptr = sptr;
933 fe_sptr->next = loading_obj->stable_ptrs;
934 loading_obj->stable_ptrs = fe_sptr;
935 }
936
937 return sptr;
938 }
939
940
941 /* -----------------------------------------------------------------------------
942 * Debugging aid: look in GHCi's object symbol tables for symbols
943 * within DELTA bytes of the specified address, and show their names.
944 */
945 #if defined(DEBUG)
946 void ghci_enquire ( SymbolAddr* addr );
947
948 void ghci_enquire(SymbolAddr* addr)
949 {
950 int i;
951 SymbolName* sym;
952 RtsSymbolInfo* a;
953 const int DELTA = 64;
954 ObjectCode* oc;
955
956 for (oc = objects; oc; oc = oc->next) {
957 for (i = 0; i < oc->n_symbols; i++) {
958 sym = oc->symbols[i].name;
959 if (sym == NULL) continue;
960 a = NULL;
961 if (a == NULL) {
962 ghciLookupSymbolInfo(symhash, sym, &a);
963 }
964 if (a == NULL) {
965 // debugBelch("ghci_enquire: can't find %s\n", sym);
966 }
967 else if ( a->value
968 && (char*)addr-DELTA <= (char*)a->value
969 && (char*)a->value <= (char*)addr+DELTA) {
970 debugBelch("%p + %3d == `%s'\n", addr,
971 (int)((char*)a->value - (char*)addr), sym);
972 }
973 }
974 }
975 }
976 #endif
977
978 pathchar*
979 resolveSymbolAddr (pathchar* buffer, int size,
980 SymbolAddr* symbol, uintptr_t* top)
981 {
982 #if defined(OBJFORMAT_PEi386)
983 return resolveSymbolAddr_PEi386 (buffer, size, symbol, top);
984 #else /* OBJFORMAT_PEi386 */
985 (void)buffer;
986 (void)size;
987 (void)symbol;
988 (void)top;
989 return NULL;
990 #endif /* OBJFORMAT_PEi386 */
991 }
992
993 #if RTS_LINKER_USE_MMAP
994 //
995 // Returns NULL on failure.
996 //
997 void *
998 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
999 {
1000 void *map_addr = NULL;
1001 void *result;
1002 size_t size;
1003 uint32_t tryMap32Bit = RtsFlags.MiscFlags.linkerAlwaysPic
1004 ? 0
1005 : TRY_MAP_32BIT;
1006 static uint32_t fixed = 0;
1007
1008 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1009 size = roundUpToPage(bytes);
1010
1011 #if defined(x86_64_HOST_ARCH)
1012 mmap_again:
1013 #endif
1014
1015 if (mmap_32bit_base != 0) {
1016 map_addr = mmap_32bit_base;
1017 }
1018
1019 IF_DEBUG(linker,
1020 debugBelch("mmapForLinker: \tprotection %#0x\n",
1021 PROT_EXEC | PROT_READ | PROT_WRITE));
1022 IF_DEBUG(linker,
1023 debugBelch("mmapForLinker: \tflags %#0x\n",
1024 MAP_PRIVATE | tryMap32Bit | fixed | flags));
1025
1026 result = mmap(map_addr, size,
1027 PROT_EXEC|PROT_READ|PROT_WRITE,
1028 MAP_PRIVATE|tryMap32Bit|fixed|flags, fd, offset);
1029
1030 if (result == MAP_FAILED) {
1031 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1032 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1033 return NULL;
1034 }
1035
1036 #if defined(x86_64_HOST_ARCH)
1037 if (RtsFlags.MiscFlags.linkerAlwaysPic) {
1038 } else if (mmap_32bit_base != 0) {
1039 if (result == map_addr) {
1040 mmap_32bit_base = (StgWord8*)map_addr + size;
1041 } else {
1042 if ((W_)result > 0x80000000) {
1043 // oops, we were given memory over 2Gb
1044 munmap(result,size);
1045 #if defined(freebsd_HOST_OS) || \
1046 defined(kfreebsdgnu_HOST_OS) || \
1047 defined(dragonfly_HOST_OS)
1048 // Some platforms require MAP_FIXED. This is normally
1049 // a bad idea, because MAP_FIXED will overwrite
1050 // existing mappings.
1051 fixed = MAP_FIXED;
1052 goto mmap_again;
1053 #else
1054 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1055 "asked for %lu bytes at %p. "
1056 "Try specifying an address with +RTS -xm<addr> -RTS",
1057 size, map_addr);
1058 return NULL;
1059 #endif
1060 } else {
1061 // hmm, we were given memory somewhere else, but it's
1062 // still under 2Gb so we can use it. Next time, ask
1063 // for memory right after the place we just got some
1064 mmap_32bit_base = (StgWord8*)result + size;
1065 }
1066 }
1067 } else {
1068 if ((W_)result > 0x80000000) {
1069 // oops, we were given memory over 2Gb
1070 // ... try allocating memory somewhere else?;
1071 debugTrace(DEBUG_linker,
1072 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1073 bytes, result);
1074 munmap(result, size);
1075
1076 // Set a base address and try again... (guess: 1Gb)
1077 mmap_32bit_base = (void*)0x40000000;
1078 goto mmap_again;
1079 }
1080 }
1081 #endif
1082
1083 IF_DEBUG(linker,
1084 debugBelch("mmapForLinker: mapped %" FMT_Word
1085 " bytes starting at %p\n", (W_)size, result));
1086 IF_DEBUG(linker,
1087 debugBelch("mmapForLinker: done\n"));
1088
1089 return result;
1090 }
1091 #endif
1092
1093 /*
1094 * Remove symbols from the symbol table, and free oc->symbols.
1095 * This operation is idempotent.
1096 */
1097 static void removeOcSymbols (ObjectCode *oc)
1098 {
1099 if (oc->symbols == NULL) return;
1100
1101 // Remove all the mappings for the symbols within this object..
1102 int i;
1103 for (i = 0; i < oc->n_symbols; i++) {
1104 if (oc->symbols[i].name != NULL) {
1105 ghciRemoveSymbolTable(symhash, oc->symbols[i].name, oc);
1106 }
1107 }
1108
1109 stgFree(oc->symbols);
1110 oc->symbols = NULL;
1111 }
1112
1113 /*
1114 * Release StablePtrs and free oc->stable_ptrs.
1115 * This operation is idempotent.
1116 */
1117 static void freeOcStablePtrs (ObjectCode *oc)
1118 {
1119 // Release any StablePtrs that were created when this
1120 // object module was initialized.
1121 ForeignExportStablePtr *fe_ptr, *next;
1122
1123 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1124 next = fe_ptr->next;
1125 freeStablePtr(fe_ptr->stable_ptr);
1126 stgFree(fe_ptr);
1127 }
1128 oc->stable_ptrs = NULL;
1129 }
1130
1131 static void
1132 freePreloadObjectFile (ObjectCode *oc)
1133 {
1134 #if defined(mingw32_HOST_OS)
1135 freePreloadObjectFile_PEi386(oc);
1136 #else
1137
1138 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1139 munmap(oc->image, oc->fileSize);
1140 }
1141 else {
1142 stgFree(oc->image);
1143 }
1144
1145 #endif
1146
1147 oc->image = NULL;
1148 oc->fileSize = 0;
1149 }
1150
1151 /*
1152 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1153 * the GC when a previously unloaded ObjectCode has been determined to be
1154 * unused, and when an error occurs during loadObj().
1155 */
1156 void freeObjectCode (ObjectCode *oc)
1157 {
1158 freePreloadObjectFile(oc);
1159
1160 if (oc->symbols != NULL) {
1161 stgFree(oc->symbols);
1162 oc->symbols = NULL;
1163 }
1164
1165 if (oc->extraInfos != NULL) {
1166 freeHashTable(oc->extraInfos, NULL);
1167 oc->extraInfos = NULL;
1168 }
1169
1170 if (oc->sections != NULL) {
1171 int i;
1172 for (i=0; i < oc->n_sections; i++) {
1173 if (oc->sections[i].start != NULL) {
1174 switch(oc->sections[i].alloc){
1175 #if RTS_LINKER_USE_MMAP
1176 case SECTION_MMAP:
1177 munmap(oc->sections[i].mapped_start,
1178 oc->sections[i].mapped_size);
1179 break;
1180 case SECTION_M32:
1181 m32_free(oc->sections[i].start,
1182 oc->sections[i].size);
1183 break;
1184 #endif
1185 case SECTION_MALLOC:
1186 stgFree(oc->sections[i].start);
1187 break;
1188 default:
1189 break;
1190 }
1191 }
1192 if (oc->sections[i].info) {
1193 stgFree(oc->sections[i].info);
1194 }
1195 }
1196 stgFree(oc->sections);
1197 }
1198
1199 freeProddableBlocks(oc);
1200
1201 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1202 * alongside the image, so we don't need to free. */
1203 #if defined(NEED_SYMBOL_EXTRAS) && (!defined(x86_64_HOST_ARCH) \
1204 || !defined(mingw32_HOST_OS))
1205 if (RTS_LINKER_USE_MMAP) {
1206 if (!USE_CONTIGUOUS_MMAP && !RtsFlags.MiscFlags.linkerAlwaysPic &&
1207 oc->symbol_extras != NULL) {
1208 m32_free(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras);
1209 }
1210 }
1211 else {
1212 stgFree(oc->symbol_extras);
1213 }
1214 #endif
1215
1216 #if defined(OBJECTFORMAT_MACHO)
1217 ocDeinit_MachO(oc);
1218 #endif
1219 #if defined(OBJFORMAT_ELF)
1220 ocDeinit_ELF(oc);
1221 #endif
1222
1223 stgFree(oc->fileName);
1224 stgFree(oc->archiveMemberName);
1225
1226 stgFree(oc);
1227 }
1228
1229 /* -----------------------------------------------------------------------------
1230 * Sets the initial status of a fresh ObjectCode
1231 */
1232 static void setOcInitialStatus(ObjectCode* oc) {
1233 /* If a target has requested the ObjectCode not to be resolved then
1234 honor this requests. Usually this means the ObjectCode has not been
1235 initialized and can't be. */
1236 if (oc->status == OBJECT_DONT_RESOLVE)
1237 return;
1238
1239 if (oc->archiveMemberName == NULL) {
1240 oc->status = OBJECT_NEEDED;
1241 } else {
1242 oc->status = OBJECT_LOADED;
1243 }
1244 }
1245
1246 ObjectCode*
1247 mkOc( pathchar *path, char *image, int imageSize,
1248 bool mapped, char *archiveMemberName, int misalignment ) {
1249 ObjectCode* oc;
1250
1251 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1252 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1253
1254 oc->info = NULL;
1255
1256 # if defined(OBJFORMAT_ELF)
1257 oc->formatName = "ELF";
1258 # elif defined(OBJFORMAT_PEi386)
1259 oc->formatName = "PEi386";
1260 # elif defined(OBJFORMAT_MACHO)
1261 oc->formatName = "Mach-O";
1262 # else
1263 stgFree(oc);
1264 barf("loadObj: not implemented on this platform");
1265 # endif
1266
1267 oc->image = image;
1268 oc->fileName = pathdup(path);
1269
1270 if (archiveMemberName) {
1271 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1,
1272 "loadObj" );
1273 strcpy(oc->archiveMemberName, archiveMemberName);
1274 } else {
1275 oc->archiveMemberName = NULL;
1276 }
1277
1278 setOcInitialStatus( oc );
1279
1280 oc->fileSize = imageSize;
1281 oc->symbols = NULL;
1282 oc->n_sections = 0;
1283 oc->sections = NULL;
1284 oc->proddables = NULL;
1285 oc->stable_ptrs = NULL;
1286 #if defined(NEED_SYMBOL_EXTRAS)
1287 oc->symbol_extras = NULL;
1288 #endif
1289 oc->bssBegin = NULL;
1290 oc->bssEnd = NULL;
1291 oc->imageMapped = mapped;
1292
1293 oc->misalignment = misalignment;
1294 oc->extraInfos = NULL;
1295
1296 /* chain it onto the list of objects */
1297 oc->next = NULL;
1298
1299 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1300 return oc;
1301 }
1302
1303 /* -----------------------------------------------------------------------------
1304 * Check if an object or archive is already loaded.
1305 *
1306 * Returns: 1 if the path is already loaded, 0 otherwise.
1307 */
1308 HsInt
1309 isAlreadyLoaded( pathchar *path )
1310 {
1311 ObjectCode *o;
1312 for (o = objects; o; o = o->next) {
1313 if (0 == pathcmp(o->fileName, path)) {
1314 return 1; /* already loaded */
1315 }
1316 }
1317 return 0; /* not loaded yet */
1318 }
1319
1320 //
1321 // Load the object file into memory. This will not be its final resting place,
1322 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
1323 // address space, properly aligned.
1324 //
1325 static ObjectCode *
1326 preloadObjectFile (pathchar *path)
1327 {
1328 int fileSize;
1329 struct_stat st;
1330 int r;
1331 void *image;
1332 ObjectCode *oc;
1333 int misalignment = 0;
1334
1335 r = pathstat(path, &st);
1336 if (r == -1) {
1337 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
1338 return NULL;
1339 }
1340
1341 fileSize = st.st_size;
1342
1343 #if RTS_LINKER_USE_MMAP
1344 int fd;
1345
1346 /* On many architectures malloc'd memory isn't executable, so we need to use
1347 * mmap. */
1348
1349 #if defined(openbsd_HOST_OS)
1350 fd = open(path, O_RDONLY, S_IRUSR);
1351 #else
1352 fd = open(path, O_RDONLY);
1353 #endif
1354 if (fd == -1) {
1355 errorBelch("loadObj: can't open %s", path);
1356 return NULL;
1357 }
1358
1359 /* iOS does not permit to mmap with r+w+x, however while the comment for
1360 * this function says this is not the final resting place, for some
1361 * architectures / hosts (at least mach-o non-iOS -- see ocGetNames_MachO)
1362 * the image mmaped here in fact ends up being the final resting place for
1363 * the sections. And hence we need to leave r+w+x here for other hosts
1364 * until all hosts have been made aware of the initial image being r+w only.
1365 *
1366 * See also the misalignment logic for darwin below.
1367 */
1368 #if defined(ios_HOST_OS)
1369 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
1370 #else
1371 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
1372 MAP_PRIVATE, fd, 0);
1373 #endif
1374
1375 if (image == MAP_FAILED) {
1376 errorBelch("mmap: failed. errno = %d", errno);
1377 }
1378 // not 32-bit yet, we'll remap later
1379 close(fd);
1380
1381 #else /* !RTS_LINKER_USE_MMAP */
1382 FILE *f;
1383
1384 /* load the image into memory */
1385 /* coverity[toctou] */
1386 f = pathopen(path, WSTR("rb"));
1387 if (!f) {
1388 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
1389 return NULL;
1390 }
1391
1392 # if defined(darwin_HOST_OS)
1393
1394 // In a Mach-O .o file, all sections can and will be misaligned
1395 // if the total size of the headers is not a multiple of the
1396 // desired alignment. This is fine for .o files that only serve
1397 // as input for the static linker, but it's not fine for us,
1398 // as SSE (used by gcc for floating point) and Altivec require
1399 // 16-byte alignment.
1400 // We calculate the correct alignment from the header before
1401 // reading the file, and then we misalign image on purpose so
1402 // that the actual sections end up aligned again.
1403 misalignment = machoGetMisalignment(f);
1404 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
1405 image += misalignment;
1406
1407 # else /* !defined(darwin_HOST_OS) */
1408
1409 image = stgMallocBytes(fileSize, "loadObj(image)");
1410
1411 #endif
1412
1413 int n;
1414 n = fread ( image, 1, fileSize, f );
1415 fclose(f);
1416 if (n != fileSize) {
1417 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
1418 stgFree(image);
1419 return NULL;
1420 }
1421
1422 #endif /* RTS_LINKER_USE_MMAP */
1423
1424 oc = mkOc(path, image, fileSize, true, NULL, misalignment);
1425
1426 #if defined(OBJFORMAT_MACHO)
1427 if (ocVerifyImage_MachO( oc ))
1428 ocInit_MachO( oc );
1429 #endif
1430 #if defined(OBJFORMAT_ELF)
1431 if(ocVerifyImage_ELF( oc ))
1432 ocInit_ELF( oc );
1433 #endif
1434 return oc;
1435 }
1436
1437 /* -----------------------------------------------------------------------------
1438 * Load an obj (populate the global symbol table, but don't resolve yet)
1439 *
1440 * Returns: 1 if ok, 0 on error.
1441 */
1442 static HsInt loadObj_ (pathchar *path)
1443 {
1444 ObjectCode* oc;
1445 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
1446
1447 /* debugBelch("loadObj %s\n", path ); */
1448
1449 /* Check that we haven't already loaded this object.
1450 Ignore requests to load multiple times */
1451
1452 if (isAlreadyLoaded(path)) {
1453 IF_DEBUG(linker,
1454 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1455 return 1; /* success */
1456 }
1457
1458 oc = preloadObjectFile(path);
1459 if (oc == NULL) return 0;
1460
1461 if (! loadOc(oc)) {
1462 // failed; free everything we've allocated
1463 removeOcSymbols(oc);
1464 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
1465 freeObjectCode(oc);
1466 return 0;
1467 }
1468
1469 oc->next = objects;
1470 objects = oc;
1471 return 1;
1472 }
1473
1474 HsInt loadObj (pathchar *path)
1475 {
1476 ACQUIRE_LOCK(&linker_mutex);
1477 HsInt r = loadObj_(path);
1478 RELEASE_LOCK(&linker_mutex);
1479 return r;
1480 }
1481
1482 HsInt loadOc (ObjectCode* oc)
1483 {
1484 int r;
1485
1486 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
1487
1488 /* verify the in-memory image */
1489 # if defined(OBJFORMAT_ELF)
1490 r = ocVerifyImage_ELF ( oc );
1491 # elif defined(OBJFORMAT_PEi386)
1492 r = ocVerifyImage_PEi386 ( oc );
1493 # elif defined(OBJFORMAT_MACHO)
1494 r = ocVerifyImage_MachO ( oc );
1495 # else
1496 barf("loadObj: no verify method");
1497 # endif
1498 if (!r) {
1499 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
1500 return r;
1501 }
1502
1503 /* Note [loadOc orderings]
1504 The order of `ocAllocateExtras` and `ocGetNames` matters. For MachO
1505 and ELF, `ocInit` and `ocGetNames` initialize a bunch of pointers based
1506 on the offset to `oc->image`, but `ocAllocateExtras` may relocate
1507 the address of `oc->image` and invalidate those pointers. So we must
1508 compute or recompute those pointers after `ocAllocateExtras`.
1509
1510 On Windows, when we have an import library we (for now, as we don't honor
1511 the lazy loading semantics of the library and instead GHCi is already
1512 lazy) don't use the library after ocGetNames as it just populates the
1513 symbol table. Allocating space for jump tables in ocAllocateExtras
1514 would just be a waste then as we'll be stopping further processing of the
1515 library in the next few steps. If necessary, the actual allocation
1516 happens in `ocGetNames_PEi386` and `ocAllocateExtras_PEi386` simply
1517 set the correct pointers.
1518 */
1519
1520 #if defined(NEED_SYMBOL_EXTRAS)
1521 # if defined(OBJFORMAT_MACHO)
1522 r = ocAllocateExtras_MachO ( oc );
1523 if (!r) {
1524 IF_DEBUG(linker,
1525 debugBelch("loadOc: ocAllocateExtras_MachO failed\n"));
1526 return r;
1527 }
1528 # elif defined(OBJFORMAT_ELF)
1529 r = ocAllocateExtras_ELF ( oc );
1530 if (!r) {
1531 IF_DEBUG(linker,
1532 debugBelch("loadOc: ocAllocateExtras_ELF failed\n"));
1533 return r;
1534 }
1535 # endif
1536 #endif
1537
1538 /* build the symbol list for this image */
1539 # if defined(OBJFORMAT_ELF)
1540 r = ocGetNames_ELF ( oc );
1541 # elif defined(OBJFORMAT_PEi386)
1542 r = ocGetNames_PEi386 ( oc );
1543 # elif defined(OBJFORMAT_MACHO)
1544 r = ocGetNames_MachO ( oc );
1545 # else
1546 barf("loadObj: no getNames method");
1547 # endif
1548 if (!r) {
1549 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
1550 return r;
1551 }
1552
1553 #if defined(NEED_SYMBOL_EXTRAS)
1554 # if defined(OBJFORMAT_PEi386)
1555 ocAllocateExtras_PEi386 ( oc );
1556 # endif
1557 #endif
1558
1559 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
1560 setOcInitialStatus( oc );
1561 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
1562
1563 return 1;
1564 }
1565
1566 /* -----------------------------------------------------------------------------
1567 * try to load and initialize an ObjectCode into memory
1568 *
1569 * Returns: 1 if ok, 0 on error.
1570 */
1571 int ocTryLoad (ObjectCode* oc) {
1572 int r;
1573
1574 if (oc->status != OBJECT_NEEDED) {
1575 return 1;
1576 }
1577
1578 /* Check for duplicate symbols by looking into `symhash`.
1579 Duplicate symbols are any symbols which exist
1580 in different ObjectCodes that have both been loaded, or
1581 are to be loaded by this call.
1582
1583 This call is intended to have no side-effects when a non-duplicate
1584 symbol is re-inserted. A symbol is only a duplicate if the object file
1585 it is defined in has had it's relocations resolved. A resolved object
1586 file means the symbols inside it are required.
1587
1588 The symbol address is not used to distinguish symbols. Duplicate symbols
1589 are distinguished by name, oc and attributes (weak symbols etc).
1590 */
1591 int x;
1592 Symbol_t symbol;
1593 for (x = 0; x < oc->n_symbols; x++) {
1594 symbol = oc->symbols[x];
1595 if ( symbol.name
1596 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol.name,
1597 symbol.addr,
1598 isSymbolWeak(oc, symbol.name), oc)) {
1599 return 0;
1600 }
1601 }
1602
1603 # if defined(OBJFORMAT_ELF)
1604 r = ocResolve_ELF ( oc );
1605 # elif defined(OBJFORMAT_PEi386)
1606 r = ocResolve_PEi386 ( oc );
1607 # elif defined(OBJFORMAT_MACHO)
1608 r = ocResolve_MachO ( oc );
1609 # else
1610 barf("ocTryLoad: not implemented on this platform");
1611 # endif
1612 if (!r) { return r; }
1613
1614 // run init/init_array/ctors/mod_init_func
1615
1616 loading_obj = oc; // tells foreignExportStablePtr what to do
1617 #if defined(OBJFORMAT_ELF)
1618 r = ocRunInit_ELF ( oc );
1619 #elif defined(OBJFORMAT_PEi386)
1620 r = ocRunInit_PEi386 ( oc );
1621 #elif defined(OBJFORMAT_MACHO)
1622 r = ocRunInit_MachO ( oc );
1623 #else
1624 barf("ocTryLoad: initializers not implemented on this platform");
1625 #endif
1626 loading_obj = NULL;
1627
1628 if (!r) { return r; }
1629
1630 oc->status = OBJECT_RESOLVED;
1631
1632 return 1;
1633 }
1634
1635 /* -----------------------------------------------------------------------------
1636 * resolve all the currently unlinked objects in memory
1637 *
1638 * Returns: 1 if ok, 0 on error.
1639 */
1640 static HsInt resolveObjs_ (void)
1641 {
1642 ObjectCode *oc;
1643 int r;
1644
1645 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
1646
1647 for (oc = objects; oc; oc = oc->next) {
1648 r = ocTryLoad(oc);
1649 if (!r)
1650 {
1651 return r;
1652 }
1653 }
1654
1655 #if defined(PROFILING)
1656 // collect any new cost centres & CCSs that were defined during runInit
1657 initProfiling2();
1658 #endif
1659
1660 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
1661 return 1;
1662 }
1663
1664 HsInt resolveObjs (void)
1665 {
1666 ACQUIRE_LOCK(&linker_mutex);
1667 HsInt r = resolveObjs_();
1668 RELEASE_LOCK(&linker_mutex);
1669 return r;
1670 }
1671
1672 /* -----------------------------------------------------------------------------
1673 * delete an object from the pool
1674 */
1675 static HsInt unloadObj_ (pathchar *path, bool just_purge)
1676 {
1677 ObjectCode *oc, *prev, *next;
1678 HsBool unloadedAnyObj = HS_BOOL_FALSE;
1679
1680 ASSERT(symhash != NULL);
1681 ASSERT(objects != NULL);
1682
1683 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
1684
1685 prev = NULL;
1686 for (oc = objects; oc; oc = next) {
1687 next = oc->next; // oc might be freed
1688
1689 if (!pathcmp(oc->fileName,path)) {
1690
1691 // these are both idempotent, so in just_purge mode we can
1692 // later call unloadObj() to really unload the object.
1693 removeOcSymbols(oc);
1694 freeOcStablePtrs(oc);
1695
1696 if (!just_purge) {
1697 if (prev == NULL) {
1698 objects = oc->next;
1699 } else {
1700 prev->next = oc->next;
1701 }
1702 ACQUIRE_LOCK(&linker_unloaded_mutex);
1703 oc->next = unloaded_objects;
1704 unloaded_objects = oc;
1705 oc->status = OBJECT_UNLOADED;
1706 RELEASE_LOCK(&linker_unloaded_mutex);
1707 // We do not own oc any more; it can be released at any time by
1708 // the GC in checkUnload().
1709 } else {
1710 prev = oc;
1711 }
1712
1713 /* This could be a member of an archive so continue
1714 * unloading other members. */
1715 unloadedAnyObj = HS_BOOL_TRUE;
1716 } else {
1717 prev = oc;
1718 }
1719 }
1720
1721 if (unloadedAnyObj) {
1722 return 1;
1723 }
1724 else {
1725 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
1726 return 0;
1727 }
1728 }
1729
1730 HsInt unloadObj (pathchar *path)
1731 {
1732 ACQUIRE_LOCK(&linker_mutex);
1733 HsInt r = unloadObj_(path, false);
1734 RELEASE_LOCK(&linker_mutex);
1735 return r;
1736 }
1737
1738 HsInt purgeObj (pathchar *path)
1739 {
1740 ACQUIRE_LOCK(&linker_mutex);
1741 HsInt r = unloadObj_(path, true);
1742 RELEASE_LOCK(&linker_mutex);
1743 return r;
1744 }
1745
1746 static OStatus getObjectLoadStatus_ (pathchar *path)
1747 {
1748 ObjectCode *o;
1749 for (o = objects; o; o = o->next) {
1750 if (0 == pathcmp(o->fileName, path)) {
1751 return o->status;
1752 }
1753 }
1754 for (o = unloaded_objects; o; o = o->next) {
1755 if (0 == pathcmp(o->fileName, path)) {
1756 return o->status;
1757 }
1758 }
1759 return OBJECT_NOT_LOADED;
1760 }
1761
1762 OStatus getObjectLoadStatus (pathchar *path)
1763 {
1764 ACQUIRE_LOCK(&linker_mutex);
1765 OStatus r = getObjectLoadStatus_(path);
1766 RELEASE_LOCK(&linker_mutex);
1767 return r;
1768 }
1769
1770 /* -----------------------------------------------------------------------------
1771 * Sanity checking. For each ObjectCode, maintain a list of address ranges
1772 * which may be prodded during relocation, and abort if we try and write
1773 * outside any of these.
1774 */
1775 void
1776 addProddableBlock ( ObjectCode* oc, void* start, int size )
1777 {
1778 ProddableBlock* pb
1779 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
1780
1781 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
1782 ASSERT(size > 0);
1783 pb->start = start;
1784 pb->size = size;
1785 pb->next = oc->proddables;
1786 oc->proddables = pb;
1787 }
1788
1789 void
1790 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
1791 {
1792 ProddableBlock* pb;
1793
1794 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
1795 char* s = (char*)(pb->start);
1796 char* e = s + pb->size;
1797 char* a = (char*)addr;
1798 if (a >= s && (a+size) <= e) return;
1799 }
1800 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
1801 }
1802
1803 void freeProddableBlocks (ObjectCode *oc)
1804 {
1805 ProddableBlock *pb, *next;
1806
1807 for (pb = oc->proddables; pb != NULL; pb = next) {
1808 next = pb->next;
1809 stgFree(pb);
1810 }
1811 oc->proddables = NULL;
1812 }
1813
1814 /* -----------------------------------------------------------------------------
1815 * Section management.
1816 */
1817 void
1818 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
1819 void* start, StgWord size, StgWord mapped_offset,
1820 void* mapped_start, StgWord mapped_size)
1821 {
1822 s->start = start; /* actual start of section in memory */
1823 s->size = size; /* actual size of section in memory */
1824 s->kind = kind;
1825 s->alloc = alloc;
1826 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
1827
1828 s->mapped_start = mapped_start; /* start of mmap() block */
1829 s->mapped_size = mapped_size; /* size of mmap() block */
1830
1831 if (!s->info)
1832 s->info
1833 = (struct SectionFormatInfo*)stgCallocBytes(1, sizeof *s->info,
1834 "addSection(SectionFormatInfo)");
1835
1836 IF_DEBUG(linker,
1837 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
1838 start, (void*)((StgWord)start + size),
1839 size, kind ));
1840 }