b42a0de9c38f81709765e93d63170f056c3638cb
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "StablePtr.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31 #include "linker/CacheFlush.h"
32 #include "linker/SymbolExtras.h"
33 #include "PathUtils.h"
34
35 #if !defined(mingw32_HOST_OS)
36 #include "posix/Signals.h"
37 #endif
38
39 // get protos for is*()
40 #include <ctype.h>
41
42 #if defined(HAVE_SYS_TYPES_H)
43 #include <sys/types.h>
44 #endif
45
46 #include <inttypes.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <stdio.h>
50 #include <assert.h>
51 #include <fs_rts.h>
52
53 #if defined(HAVE_SYS_STAT_H)
54 #include <sys/stat.h>
55 #endif
56
57 #if defined(HAVE_DLFCN_H)
58 #include <dlfcn.h>
59 #endif
60
61 #if defined(OBJFORMAT_ELF)
62 # include "linker/Elf.h"
63 # include <regex.h> // regex is already used by dlopen() so this is OK
64 // to use here without requiring an additional lib
65 #elif defined(OBJFORMAT_PEi386)
66 # include "linker/PEi386.h"
67 # include <windows.h>
68 #elif defined(OBJFORMAT_MACHO)
69 # include "linker/MachO.h"
70 # include <regex.h>
71 # include <mach/machine.h>
72 # include <mach-o/fat.h>
73 #endif
74
75 #if defined(dragonfly_HOST_OS)
76 #include <sys/tls.h>
77 #endif
78 /*
79 Note [runtime-linker-support]
80 -----------------------------
81 When adding support for a new platform to the runtime linker please
82 update `$TOP/configure.ac` under heading `Does target have runtime
83 linker support?`.
84 */
85 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
86 This hashtable will contain information on all symbols
87 that we know of, however the .o they are in may not be loaded.
88
89 Until the ObjectCode the symbol belongs to is actually
90 loaded this symbol may be replaced. So do not rely on
91 addresses of unloaded symbols.
92
93 Note [runtime-linker-phases]
94 --------------------------------------
95 Broadly the behavior of the runtime linker can be
96 split into the following four phases:
97
98 - Indexing (e.g. ocVerifyImage and ocGetNames)
99 - Initialization (e.g. ocResolve and ocRunInit)
100 - Resolve (e.g. resolveObjs())
101 - Lookup (e.g. lookupSymbol)
102
103 This is to enable lazy loading of symbols. Eager loading is problematic
104 as it means that all symbols must be available, even those which we will
105 never use. This is especially painful on Windows, where the number of
106 libraries required to link things like mingwex grows to be quite high.
107
108 We proceed through these stages as follows,
109
110 * During Indexing we verify and open the ObjectCode and
111 perform a quick scan/indexing of the ObjectCode. All the work
112 required to actually load the ObjectCode is done.
113
114 All symbols from the ObjectCode are also inserted into
115 `symhash`, where possible duplicates are handled via the semantics
116 described in `ghciInsertSymbolTable`.
117
118 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
119 depending on whether they are an archive member or not.
120
121 * During initialization we load ObjectCode, perform relocations, execute
122 static constructors etc. This phase may trigger other ObjectCodes to
123 be loaded because of the calls to lookupSymbol.
124
125 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
126 previous status was `OBJECT_LOADED`.
127
128 * During resolve we attempt to resolve all the symbols needed for the
129 initial link. This essentially means, that for any ObjectCode given
130 directly to the command-line we perform lookupSymbols on the required
131 symbols. lookupSymbols may trigger the loading of additional ObjectCode
132 if required.
133
134 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
135 the previous status was `OBJECT_NEEDED`.
136
137 * lookupSymbols is used to lookup any symbols required, both during initial
138 link and during statement and expression compilations in the REPL.
139 Declaration of e.g. a foreign import, will eventually call lookupSymbol
140 which will either fail (symbol unknown) or succeed (and possibly trigger a
141 load).
142
143 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
144
145 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
146 by calling unloadObj and starting over.
147 When a new declaration or statement is performed ultimately lookupSymbol is called
148 without doing a re-link.
149
150 The goal of these different phases is to allow the linker to be able to perform
151 "lazy loading" of ObjectCode. The reason for this is that we want to only link
152 in symbols that are actually required for the link. This reduces:
153
154 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
155 then we don't need to load libB. This means the dependency chain for libraries
156 such as mingw32 and mingwex can be broken down.
157
158 2) The number of duplicate symbols, since now only symbols that are
159 true duplicates will display the error.
160 */
161 /*Str*/HashTable *symhash;
162
163 /* List of currently loaded objects */
164 ObjectCode *objects = NULL; /* initially empty */
165
166 /* List of objects that have been unloaded via unloadObj(), but are waiting
167 to be actually freed via checkUnload() */
168 ObjectCode *unloaded_objects = NULL; /* initially empty */
169
170 #if defined(THREADED_RTS)
171 /* This protects all the Linker's global state except unloaded_objects */
172 Mutex linker_mutex;
173 /*
174 * This protects unloaded_objects. We have a separate mutex for this, because
175 * the GC needs to access unloaded_objects in checkUnload, while the linker only
176 * needs to access unloaded_objects in unloadObj(), so this allows most linker
177 * operations proceed concurrently with the GC.
178 */
179 Mutex linker_unloaded_mutex;
180 #endif
181
182 /* Generic wrapper function to try and Resolve and RunInit oc files */
183 int ocTryLoad( ObjectCode* oc );
184
185 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
186 * small memory model on this architecture (see gcc docs,
187 * -mcmodel=small).
188 *
189 * MAP_32BIT not available on OpenBSD/amd64
190 */
191 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
192 #define TRY_MAP_32BIT MAP_32BIT
193 #else
194 #define TRY_MAP_32BIT 0
195 #endif
196
197 /*
198 * Due to the small memory model (see above), on x86_64 we have to map
199 * all our non-PIC object files into the low 2Gb of the address space
200 * (why 2Gb and not 4Gb? Because all addresses must be reachable
201 * using a 32-bit signed PC-relative offset). On Linux we can do this
202 * using the MAP_32BIT flag to mmap(), however on other OSs
203 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
204 * can't do this. So on these systems, we have to pick a base address
205 * in the low 2Gb of the address space and try to allocate memory from
206 * there.
207 *
208 * We pick a default address based on the OS, but also make this
209 * configurable via an RTS flag (+RTS -xm)
210 */
211 #if defined(MAP_32BIT) || DEFAULT_LINKER_ALWAYS_PIC
212 // Try to use MAP_32BIT
213 #define MMAP_32BIT_BASE_DEFAULT 0
214 #else
215 // A guess: 1Gb.
216 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
217 #endif
218
219 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
220
221 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
222 ObjectCode *owner)
223 {
224 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
225 if (!pinfo || owner != pinfo->owner) return;
226 removeStrHashTable(table, key, NULL);
227 if (isSymbolImport (owner, key))
228 stgFree(pinfo->value);
229
230 stgFree(pinfo);
231 }
232
233 /* -----------------------------------------------------------------------------
234 * Insert symbols into hash tables, checking for duplicates.
235 *
236 * Returns: 0 on failure, nonzero on success
237 */
238 /*
239 Note [weak-symbols-support]
240 -------------------------------------
241 While ghciInsertSymbolTable does implement extensive
242 logic for weak symbol support, weak symbols are not currently
243 fully supported by the RTS. This code is mostly here for COMDAT
244 support which uses the weak symbols support.
245
246 Linking weak symbols defined purely in C code with other C code
247 should also work, probably. Observing weak symbols in Haskell
248 won't.
249
250 Some test have been written for weak symbols but have been disabled
251 mostly because it's unsure how the weak symbols support should look.
252 See Trac #11223
253 */
254 int ghciInsertSymbolTable(
255 pathchar* obj_name,
256 HashTable *table,
257 const SymbolName* key,
258 SymbolAddr* data,
259 HsBool weak,
260 ObjectCode *owner)
261 {
262 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
263 if (!pinfo) /* new entry */
264 {
265 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
266 pinfo->value = data;
267 pinfo->owner = owner;
268 pinfo->weak = weak;
269 insertStrHashTable(table, key, pinfo);
270 return 1;
271 }
272 else if (weak && data && pinfo->weak && !pinfo->value)
273 {
274 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
275 pinfo->value = data;
276 pinfo->owner = owner;
277 return 1;
278 }
279 else if (weak)
280 {
281 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
282 already know of another copy throw this one away.
283
284 or both weak symbols have a nonzero value. Keep the existing one.
285
286 This also preserves the semantics of linking against
287 the first symbol we find. */
288 }
289 else if (pinfo->weak && !weak) /* weak symbol is in the table */
290 {
291 /* override the weak definition with the non-weak one */
292 pinfo->value = data;
293 pinfo->owner = owner;
294 pinfo->weak = HS_BOOL_FALSE;
295 return 1;
296 }
297 else if ( pinfo->owner
298 && pinfo->owner->status != OBJECT_RESOLVED
299 && pinfo->owner->status != OBJECT_NEEDED)
300 {
301 /* If the other symbol hasn't been loaded or will be loaded and we want to
302 explicitly load the new one, we can just swap it out and load the one
303 that has been requested. If not, just keep the first one encountered.
304
305 Because the `symHash' table consists symbols we've also not loaded but
306 found during the initial scan this is safe to do. If however the existing
307 symbol has been loaded then it means we have a duplicate.
308
309 This is essentially emulating the behavior of a linker wherein it will always
310 link in object files that are .o file arguments, but only take object files
311 from archives as needed. */
312 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
313 pinfo->value = data;
314 pinfo->owner = owner;
315 pinfo->weak = weak;
316 }
317
318 return 1;
319 }
320 else if (pinfo->owner == owner)
321 {
322 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
323 return 1;
324 }
325 else if (owner && owner->status == OBJECT_LOADED)
326 {
327 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
328 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
329 call this function again to trigger the duplicate error. */
330 return 1;
331 }
332
333 pathchar* archiveName = NULL;
334 debugBelch(
335 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
336 " %s\n"
337 "whilst processing object file\n"
338 " %" PATH_FMT "\n"
339 "The symbol was previously defined in\n"
340 " %" PATH_FMT "\n"
341 "This could be caused by:\n"
342 " * Loading two different object files which export the same symbol\n"
343 " * Specifying the same object file twice on the GHCi command line\n"
344 " * An incorrect `package.conf' entry, causing some object to be\n"
345 " loaded twice.\n",
346 (char*)key,
347 obj_name,
348 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
349 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
350 : pinfo->owner->fileName
351 );
352
353 if (archiveName)
354 {
355 stgFree(archiveName);
356 archiveName = NULL;
357 }
358 return 0;
359 }
360
361 /* -----------------------------------------------------------------------------
362 * Looks up symbols into hash tables.
363 *
364 * Returns: 0 on failure and result is not set,
365 * nonzero on success and result set to nonzero pointer
366 */
367 HsBool ghciLookupSymbolInfo(HashTable *table,
368 const SymbolName* key, RtsSymbolInfo **result)
369 {
370 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
371 if (!pinfo) {
372 *result = NULL;
373 return HS_BOOL_FALSE;
374 }
375 if (pinfo->weak)
376 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
377 /* Once it's looked up, it can no longer be overridden */
378 pinfo->weak = HS_BOOL_FALSE;
379
380 *result = pinfo;
381 return HS_BOOL_TRUE;
382 }
383
384 /* -----------------------------------------------------------------------------
385 * initialize the object linker
386 */
387
388
389 static int linker_init_done = 0 ;
390
391 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
392 static void *dl_prog_handle;
393 static regex_t re_invalid;
394 static regex_t re_realso;
395 #if defined(THREADED_RTS)
396 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
397 #endif
398 #endif
399
400 void initLinker (void)
401 {
402 // default to retaining CAFs for backwards compatibility. Most
403 // users will want initLinker_(0): otherwise unloadObj() will not
404 // be able to unload object files when they contain CAFs.
405 initLinker_(1);
406 }
407
408 void
409 initLinker_ (int retain_cafs)
410 {
411 RtsSymbolVal *sym;
412 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
413 int compileResult;
414 #endif
415
416 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
417
418 /* Make initLinker idempotent, so we can call it
419 before every relevant operation; that means we
420 don't need to initialise the linker separately */
421 if (linker_init_done == 1) {
422 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
423 return;
424 } else {
425 linker_init_done = 1;
426 }
427
428 objects = NULL;
429 unloaded_objects = NULL;
430
431 #if defined(THREADED_RTS)
432 initMutex(&linker_mutex);
433 initMutex(&linker_unloaded_mutex);
434 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
435 initMutex(&dl_mutex);
436 #endif
437 #endif
438
439 symhash = allocStrHashTable();
440
441 /* populate the symbol table with stuff from the RTS */
442 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
443 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
444 symhash, sym->lbl, sym->addr,
445 sym->weak, NULL)) {
446 barf("ghciInsertSymbolTable failed");
447 }
448 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
449 }
450 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
451 machoInitSymbolsWithoutUnderscore();
452 # endif
453 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
454 referenced from a statically linked module. We need to mimic this, but
455 we cannot use NULL because we use it to mean nonexistent symbols. So we
456 use an arbitrary (hopefully unique) address here.
457 */
458 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
459 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
460 barf("ghciInsertSymbolTable failed");
461 }
462
463 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
464 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
465 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
466 retain_cafs ? newRetainedCAF : newGCdCAF,
467 HS_BOOL_FALSE, NULL)) {
468 barf("ghciInsertSymbolTable failed");
469 }
470
471 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
472 # if defined(RTLD_DEFAULT)
473 dl_prog_handle = RTLD_DEFAULT;
474 # else
475 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
476 # endif /* RTLD_DEFAULT */
477
478 compileResult = regcomp(&re_invalid,
479 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short|invalid file format)",
480 REG_EXTENDED);
481 if (compileResult != 0) {
482 barf("Compiling re_invalid failed");
483 }
484 compileResult = regcomp(&re_realso,
485 "(GROUP|INPUT) *\\( *([^ )]+)",
486 REG_EXTENDED);
487 if (compileResult != 0) {
488 barf("Compiling re_realso failed");
489 }
490 # endif
491
492 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
493 // User-override for mmap_32bit_base
494 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
495 }
496
497 if (RTS_LINKER_USE_MMAP)
498 m32_allocator_init();
499
500 #if defined(OBJFORMAT_PEi386)
501 initLinker_PEi386();
502 #endif
503
504 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
505 return;
506 }
507
508 void
509 exitLinker( void ) {
510 #if defined(OBJFORMAT_PEi386)
511 exitLinker_PEi386();
512 #endif
513 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
514 if (linker_init_done == 1) {
515 regfree(&re_invalid);
516 regfree(&re_realso);
517 #if defined(THREADED_RTS)
518 closeMutex(&dl_mutex);
519 #endif
520 }
521 #endif
522 if (linker_init_done == 1) {
523 freeHashTable(symhash, free);
524 }
525 #if defined(THREADED_RTS)
526 closeMutex(&linker_mutex);
527 #endif
528 }
529
530 /* -----------------------------------------------------------------------------
531 * Loading DLL or .so dynamic libraries
532 * -----------------------------------------------------------------------------
533 *
534 * Add a DLL from which symbols may be found. In the ELF case, just
535 * do RTLD_GLOBAL-style add, so no further messing around needs to
536 * happen in order that symbols in the loaded .so are findable --
537 * lookupSymbol() will subsequently see them by dlsym on the program's
538 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
539 *
540 * In the PEi386 case, open the DLLs and put handles to them in a
541 * linked list. When looking for a symbol, try all handles in the
542 * list. This means that we need to load even DLLs that are guaranteed
543 * to be in the ghc.exe image already, just so we can get a handle
544 * to give to loadSymbol, so that we can find the symbols. For such
545 * libraries, the LoadLibrary call should be a no-op except for returning
546 * the handle.
547 *
548 */
549
550 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
551
552 /* Suppose in ghci we load a temporary SO for a module containing
553 f = 1
554 and then modify the module, recompile, and load another temporary
555 SO with
556 f = 2
557 Then as we don't unload the first SO, dlsym will find the
558 f = 1
559 symbol whereas we want the
560 f = 2
561 symbol. We therefore need to keep our own SO handle list, and
562 try SOs in the right order. */
563
564 typedef
565 struct _OpenedSO {
566 struct _OpenedSO* next;
567 void *handle;
568 }
569 OpenedSO;
570
571 /* A list thereof. */
572 static OpenedSO* openedSOs = NULL;
573
574 static const char *
575 internal_dlopen(const char *dll_name)
576 {
577 OpenedSO* o_so;
578 void *hdl;
579 const char *errmsg;
580 char *errmsg_copy;
581
582 // omitted: RTLD_NOW
583 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
584 IF_DEBUG(linker,
585 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
586
587 //-------------- Begin critical section ------------------
588 // This critical section is necessary because dlerror() is not
589 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
590 // Also, the error message returned must be copied to preserve it
591 // (see POSIX also)
592
593 ACQUIRE_LOCK(&dl_mutex);
594 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
595
596 errmsg = NULL;
597 if (hdl == NULL) {
598 /* dlopen failed; return a ptr to the error msg. */
599 errmsg = dlerror();
600 if (errmsg == NULL) errmsg = "addDLL: unknown error";
601 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
602 strcpy(errmsg_copy, errmsg);
603 errmsg = errmsg_copy;
604 } else {
605 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
606 o_so->handle = hdl;
607 o_so->next = openedSOs;
608 openedSOs = o_so;
609 }
610
611 RELEASE_LOCK(&dl_mutex);
612 //--------------- End critical section -------------------
613
614 return errmsg;
615 }
616
617 /*
618 Note [RTLD_LOCAL]
619
620 In GHCi we want to be able to override previous .so's with newly
621 loaded .so's when we recompile something. This further implies that
622 when we look up a symbol in internal_dlsym() we have to iterate
623 through the loaded libraries (in order from most recently loaded to
624 oldest) looking up the symbol in each one until we find it.
625
626 However, this can cause problems for some symbols that are copied
627 by the linker into the executable image at runtime - see #8935 for a
628 lengthy discussion. To solve that problem we need to look up
629 symbols in the main executable *first*, before attempting to look
630 them up in the loaded .so's. But in order to make that work, we
631 have to always call dlopen with RTLD_LOCAL, so that the loaded
632 libraries don't populate the global symbol table.
633 */
634
635 static void *
636 internal_dlsym(const char *symbol) {
637 OpenedSO* o_so;
638 void *v;
639
640 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
641 ACQUIRE_LOCK(&dl_mutex);
642 dlerror();
643 // look in program first
644 v = dlsym(dl_prog_handle, symbol);
645 if (dlerror() == NULL) {
646 RELEASE_LOCK(&dl_mutex);
647 return v;
648 }
649
650 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
651 v = dlsym(o_so->handle, symbol);
652 if (dlerror() == NULL) {
653 RELEASE_LOCK(&dl_mutex);
654 return v;
655 }
656 }
657 RELEASE_LOCK(&dl_mutex);
658 return v;
659 }
660 # endif
661
662 const char *
663 addDLL( pathchar *dll_name )
664 {
665 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
666 /* ------------------- ELF DLL loader ------------------- */
667
668 #define NMATCH 5
669 regmatch_t match[NMATCH];
670 const char *errmsg;
671 FILE* fp;
672 size_t match_length;
673 #define MAXLINE 1000
674 char line[MAXLINE];
675 int result;
676
677 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
678 errmsg = internal_dlopen(dll_name);
679
680 if (errmsg == NULL) {
681 return NULL;
682 }
683
684 // GHC Trac ticket #2615
685 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
686 // contain linker scripts rather than ELF-format object code. This
687 // code handles the situation by recognizing the real object code
688 // file name given in the linker script.
689 //
690 // If an "invalid ELF header" error occurs, it is assumed that the
691 // .so file contains a linker script instead of ELF object code.
692 // In this case, the code looks for the GROUP ( ... ) linker
693 // directive. If one is found, the first file name inside the
694 // parentheses is treated as the name of a dynamic library and the
695 // code attempts to dlopen that file. If this is also unsuccessful,
696 // an error message is returned.
697
698 // see if the error message is due to an invalid ELF header
699 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
700 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
701 IF_DEBUG(linker, debugBelch("result = %i\n", result));
702 if (result == 0) {
703 // success -- try to read the named file as a linker script
704 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
705 MAXLINE-1);
706 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
707 line[match_length] = '\0'; // make sure string is null-terminated
708 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
709 if ((fp = __rts_fopen(line, "r")) == NULL) {
710 return errmsg; // return original error if open fails
711 }
712 // try to find a GROUP or INPUT ( ... ) command
713 while (fgets(line, MAXLINE, fp) != NULL) {
714 IF_DEBUG(linker, debugBelch("input line = %s", line));
715 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
716 // success -- try to dlopen the first named file
717 IF_DEBUG(linker, debugBelch("match%s\n",""));
718 line[match[2].rm_eo] = '\0';
719 stgFree((void*)errmsg); // Free old message before creating new one
720 errmsg = internal_dlopen(line+match[2].rm_so);
721 break;
722 }
723 // if control reaches here, no GROUP or INPUT ( ... ) directive
724 // was found and the original error message is returned to the
725 // caller
726 }
727 fclose(fp);
728 }
729 return errmsg;
730
731 # elif defined(OBJFORMAT_PEi386)
732 return addDLL_PEi386(dll_name, NULL);
733
734 # else
735 barf("addDLL: not implemented on this platform");
736 # endif
737 }
738
739 /* -----------------------------------------------------------------------------
740 * Searches the system directories to determine if there is a system DLL that
741 * satisfies the given name. This prevent GHCi from linking against a static
742 * library if a DLL is available.
743 *
744 * Returns: NULL on failure or no DLL found, else the full path to the DLL
745 * that can be loaded.
746 */
747 pathchar* findSystemLibrary(pathchar* dll_name)
748 {
749 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%"
750 PATH_FMT "'\n", dll_name));
751
752 #if defined(OBJFORMAT_PEi386)
753 return findSystemLibrary_PEi386(dll_name);
754 #else
755 (void)(dll_name); // Function not implemented for other platforms.
756 return NULL;
757 #endif
758 }
759
760 /* -----------------------------------------------------------------------------
761 * Emits a warning determining that the system is missing a required security
762 * update that we need to get access to the proper APIs
763 */
764 void warnMissingKBLibraryPaths( void )
765 {
766 static HsBool missing_update_warn = HS_BOOL_FALSE;
767 if (!missing_update_warn) {
768 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
769 missing_update_warn = HS_BOOL_TRUE;
770 }
771 }
772
773 /* -----------------------------------------------------------------------------
774 * appends a directory to the process DLL Load path so LoadLibrary can find it
775 *
776 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
777 * restore the search path to what it was before this call.
778 */
779 HsPtr addLibrarySearchPath(pathchar* dll_path)
780 {
781 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%"
782 PATH_FMT "'\n", dll_path));
783
784 #if defined(OBJFORMAT_PEi386)
785 return addLibrarySearchPath_PEi386(dll_path);
786 #else
787 (void)(dll_path); // Function not implemented for other platforms.
788 return NULL;
789 #endif
790 }
791
792 /* -----------------------------------------------------------------------------
793 * removes a directory from the process DLL Load path
794 *
795 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
796 */
797 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
798 {
799 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n",
800 dll_path_index));
801
802 #if defined(OBJFORMAT_PEi386)
803 return removeLibrarySearchPath_PEi386(dll_path_index);
804 #else
805 (void)(dll_path_index); // Function not implemented for other platforms.
806 return HS_BOOL_FALSE;
807 #endif
808 }
809
810 /* -----------------------------------------------------------------------------
811 * insert a symbol in the hash table
812 *
813 * Returns: 0 on failure, nozero on success
814 */
815 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
816 {
817 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE,
818 NULL);
819 }
820
821 /* -----------------------------------------------------------------------------
822 * lookup a symbol in the hash table
823 */
824 #if defined(OBJFORMAT_PEi386)
825 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
826 {
827 return lookupSymbol_PEi386(lbl);
828 }
829
830 #else
831
832 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
833 {
834 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
835
836 ASSERT(symhash != NULL);
837 RtsSymbolInfo *pinfo;
838
839 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
840 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
841
842 # if defined(OBJFORMAT_ELF)
843 return internal_dlsym(lbl);
844 # elif defined(OBJFORMAT_MACHO)
845
846 /* HACK: On OS X, all symbols are prefixed with an underscore.
847 However, dlsym wants us to omit the leading underscore from the
848 symbol name -- the dlsym routine puts it back on before
849 searching for the symbol. For now, we simply strip it off here
850 (and ONLY here).
851 */
852 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n",
853 lbl));
854 ASSERT(lbl[0] == '_');
855 return internal_dlsym(lbl + 1);
856
857 # else
858 ASSERT(2+2 == 5);
859 return NULL;
860 # endif
861 } else {
862 return loadSymbol(lbl, pinfo);
863 }
864 }
865 #endif /* OBJFORMAT_PEi386 */
866
867 /*
868 * Load and relocate the object code for a symbol as necessary.
869 * Symbol name only used for diagnostics output.
870 */
871 SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo) {
872 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl,
873 pinfo->value));
874 ObjectCode* oc = pinfo->owner;
875
876 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
877 See Note [runtime-linker-phases] */
878 if (oc && lbl && oc->status == OBJECT_LOADED) {
879 oc->status = OBJECT_NEEDED;
880 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand "
881 "loading symbol '%s'\n", lbl));
882 int r = ocTryLoad(oc);
883 if (!r) {
884 return NULL;
885 }
886
887 #if defined(PROFILING)
888 // collect any new cost centres & CCSs
889 // that were defined during runInit
890 initProfiling2();
891 #endif
892 }
893
894 return pinfo->value;
895 }
896
897 SymbolAddr* lookupSymbol( SymbolName* lbl )
898 {
899 ACQUIRE_LOCK(&linker_mutex);
900 SymbolAddr* r = lookupSymbol_(lbl);
901 if (!r) {
902 errorBelch("^^ Could not load '%s', dependency unresolved. "
903 "See top entry above.\n", lbl);
904 fflush(stderr);
905 }
906 RELEASE_LOCK(&linker_mutex);
907 return r;
908 }
909
910 /* -----------------------------------------------------------------------------
911 Create a StablePtr for a foreign export. This is normally called by
912 a C function with __attribute__((constructor)), which is generated
913 by GHC and linked into the module.
914
915 If the object code is being loaded dynamically, then we remember
916 which StablePtrs were allocated by the constructors and free them
917 again in unloadObj().
918 -------------------------------------------------------------------------- */
919
920 static ObjectCode *loading_obj = NULL;
921
922 StgStablePtr foreignExportStablePtr (StgPtr p)
923 {
924 ForeignExportStablePtr *fe_sptr;
925 StgStablePtr *sptr;
926
927 sptr = getStablePtr(p);
928
929 if (loading_obj != NULL) {
930 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
931 "foreignExportStablePtr");
932 fe_sptr->stable_ptr = sptr;
933 fe_sptr->next = loading_obj->stable_ptrs;
934 loading_obj->stable_ptrs = fe_sptr;
935 }
936
937 return sptr;
938 }
939
940
941 /* -----------------------------------------------------------------------------
942 * Debugging aid: look in GHCi's object symbol tables for symbols
943 * within DELTA bytes of the specified address, and show their names.
944 */
945 #if defined(DEBUG)
946 void ghci_enquire ( SymbolAddr* addr );
947
948 void ghci_enquire(SymbolAddr* addr)
949 {
950 int i;
951 SymbolName* sym;
952 RtsSymbolInfo* a;
953 const int DELTA = 64;
954 ObjectCode* oc;
955
956 for (oc = objects; oc; oc = oc->next) {
957 for (i = 0; i < oc->n_symbols; i++) {
958 sym = oc->symbols[i];
959 if (sym == NULL) continue;
960 a = NULL;
961 if (a == NULL) {
962 ghciLookupSymbolInfo(symhash, sym, &a);
963 }
964 if (a == NULL) {
965 // debugBelch("ghci_enquire: can't find %s\n", sym);
966 }
967 else if ( a->value
968 && (char*)addr-DELTA <= (char*)a->value
969 && (char*)a->value <= (char*)addr+DELTA) {
970 debugBelch("%p + %3d == `%s'\n", addr,
971 (int)((char*)a->value - (char*)addr), sym);
972 }
973 }
974 }
975 }
976 #endif
977
978 pathchar*
979 resolveSymbolAddr (pathchar* buffer, int size,
980 SymbolAddr* symbol, uintptr_t* top)
981 {
982 #if defined(OBJFORMAT_PEi386)
983 return resolveSymbolAddr_PEi386 (buffer, size, symbol, top);
984 #else /* OBJFORMAT_PEi386 */
985 (void)buffer;
986 (void)size;
987 (void)symbol;
988 (void)top;
989 return NULL;
990 #endif /* OBJFORMAT_PEi386 */
991 }
992
993 #if RTS_LINKER_USE_MMAP
994 //
995 // Returns NULL on failure.
996 //
997 void *
998 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
999 {
1000 void *map_addr = NULL;
1001 void *result;
1002 size_t size;
1003 uint32_t tryMap32Bit = RtsFlags.MiscFlags.linkerAlwaysPic
1004 ? 0
1005 : TRY_MAP_32BIT;
1006 static uint32_t fixed = 0;
1007
1008 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1009 size = roundUpToPage(bytes);
1010
1011 mmap_again:
1012
1013 if (mmap_32bit_base != 0) {
1014 map_addr = mmap_32bit_base;
1015 }
1016
1017 IF_DEBUG(linker,
1018 debugBelch("mmapForLinker: \tprotection %#0x\n",
1019 PROT_EXEC | PROT_READ | PROT_WRITE));
1020 IF_DEBUG(linker,
1021 debugBelch("mmapForLinker: \tflags %#0x\n",
1022 MAP_PRIVATE | tryMap32Bit | fixed | flags));
1023
1024 result = mmap(map_addr, size,
1025 PROT_EXEC|PROT_READ|PROT_WRITE,
1026 MAP_PRIVATE|tryMap32Bit|fixed|flags, fd, offset);
1027
1028 if (result == MAP_FAILED) {
1029 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1030 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1031 return NULL;
1032 }
1033
1034 #if defined(x86_64_HOST_ARCH)
1035 if (RtsFlags.MiscFlags.linkerAlwaysPic) {
1036 } else if (mmap_32bit_base != 0) {
1037 if (result == map_addr) {
1038 mmap_32bit_base = (StgWord8*)map_addr + size;
1039 } else {
1040 if ((W_)result > 0x80000000) {
1041 // oops, we were given memory over 2Gb
1042 munmap(result,size);
1043 #if defined(freebsd_HOST_OS) || \
1044 defined(kfreebsdgnu_HOST_OS) || \
1045 defined(dragonfly_HOST_OS)
1046 // Some platforms require MAP_FIXED. This is normally
1047 // a bad idea, because MAP_FIXED will overwrite
1048 // existing mappings.
1049 fixed = MAP_FIXED;
1050 goto mmap_again;
1051 #else
1052 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1053 "asked for %lu bytes at %p. "
1054 "Try specifying an address with +RTS -xm<addr> -RTS",
1055 size, map_addr);
1056 return NULL;
1057 #endif
1058 } else {
1059 // hmm, we were given memory somewhere else, but it's
1060 // still under 2Gb so we can use it. Next time, ask
1061 // for memory right after the place we just got some
1062 mmap_32bit_base = (StgWord8*)result + size;
1063 }
1064 }
1065 } else {
1066 if ((W_)result > 0x80000000) {
1067 // oops, we were given memory over 2Gb
1068 // ... try allocating memory somewhere else?;
1069 debugTrace(DEBUG_linker,
1070 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1071 bytes, result);
1072 munmap(result, size);
1073
1074 // Set a base address and try again... (guess: 1Gb)
1075 mmap_32bit_base = (void*)0x40000000;
1076 goto mmap_again;
1077 }
1078 }
1079 #endif
1080
1081 IF_DEBUG(linker,
1082 debugBelch("mmapForLinker: mapped %" FMT_Word
1083 " bytes starting at %p\n", (W_)size, result));
1084 IF_DEBUG(linker,
1085 debugBelch("mmapForLinker: done\n"));
1086
1087 return result;
1088 }
1089 #endif
1090
1091 /*
1092 * Remove symbols from the symbol table, and free oc->symbols.
1093 * This operation is idempotent.
1094 */
1095 static void removeOcSymbols (ObjectCode *oc)
1096 {
1097 if (oc->symbols == NULL) return;
1098
1099 // Remove all the mappings for the symbols within this object..
1100 int i;
1101 for (i = 0; i < oc->n_symbols; i++) {
1102 if (oc->symbols[i] != NULL) {
1103 ghciRemoveSymbolTable(symhash, oc->symbols[i], oc);
1104 }
1105 }
1106
1107 stgFree(oc->symbols);
1108 oc->symbols = NULL;
1109 }
1110
1111 /*
1112 * Release StablePtrs and free oc->stable_ptrs.
1113 * This operation is idempotent.
1114 */
1115 static void freeOcStablePtrs (ObjectCode *oc)
1116 {
1117 // Release any StablePtrs that were created when this
1118 // object module was initialized.
1119 ForeignExportStablePtr *fe_ptr, *next;
1120
1121 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1122 next = fe_ptr->next;
1123 freeStablePtr(fe_ptr->stable_ptr);
1124 stgFree(fe_ptr);
1125 }
1126 oc->stable_ptrs = NULL;
1127 }
1128
1129 static void
1130 freePreloadObjectFile (ObjectCode *oc)
1131 {
1132 #if defined(mingw32_HOST_OS)
1133 freePreloadObjectFile_PEi386(oc);
1134 #else
1135
1136 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1137 munmap(oc->image, oc->fileSize);
1138 }
1139 else {
1140 stgFree(oc->image);
1141 }
1142
1143 #endif
1144
1145 oc->image = NULL;
1146 oc->fileSize = 0;
1147 }
1148
1149 /*
1150 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1151 * the GC when a previously unloaded ObjectCode has been determined to be
1152 * unused, and when an error occurs during loadObj().
1153 */
1154 void freeObjectCode (ObjectCode *oc)
1155 {
1156 freePreloadObjectFile(oc);
1157
1158 if (oc->symbols != NULL) {
1159 stgFree(oc->symbols);
1160 oc->symbols = NULL;
1161 }
1162
1163 if (oc->extraInfos != NULL) {
1164 freeHashTable(oc->extraInfos, NULL);
1165 oc->extraInfos = NULL;
1166 }
1167
1168 if (oc->sections != NULL) {
1169 int i;
1170 for (i=0; i < oc->n_sections; i++) {
1171 if (oc->sections[i].start != NULL) {
1172 switch(oc->sections[i].alloc){
1173 #if RTS_LINKER_USE_MMAP
1174 case SECTION_MMAP:
1175 munmap(oc->sections[i].mapped_start,
1176 oc->sections[i].mapped_size);
1177 break;
1178 case SECTION_M32:
1179 m32_free(oc->sections[i].start,
1180 oc->sections[i].size);
1181 break;
1182 #endif
1183 case SECTION_MALLOC:
1184 stgFree(oc->sections[i].start);
1185 break;
1186 default:
1187 break;
1188 }
1189 }
1190 if (oc->sections[i].info) {
1191 stgFree(oc->sections[i].info);
1192 }
1193 }
1194 stgFree(oc->sections);
1195 }
1196
1197 freeProddableBlocks(oc);
1198
1199 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1200 * alongside the image, so we don't need to free. */
1201 #if defined(NEED_SYMBOL_EXTRAS) && (!defined(x86_64_HOST_ARCH) \
1202 || !defined(mingw32_HOST_OS))
1203 if (RTS_LINKER_USE_MMAP) {
1204 if (!USE_CONTIGUOUS_MMAP && !RtsFlags.MiscFlags.linkerAlwaysPic &&
1205 oc->symbol_extras != NULL) {
1206 m32_free(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras);
1207 }
1208 }
1209 else {
1210 stgFree(oc->symbol_extras);
1211 }
1212 #endif
1213
1214 #if defined(OBJECTFORMAT_MACHO)
1215 ocDeinit_MachO(oc);
1216 #endif
1217 #if defined(OBJFORMAT_ELF)
1218 ocDeinit_ELF(oc);
1219 #endif
1220
1221 stgFree(oc->fileName);
1222 stgFree(oc->archiveMemberName);
1223
1224 stgFree(oc);
1225 }
1226
1227 /* -----------------------------------------------------------------------------
1228 * Sets the initial status of a fresh ObjectCode
1229 */
1230 static void setOcInitialStatus(ObjectCode* oc) {
1231 /* If a target has requested the ObjectCode not to be resolved then
1232 honor this requests. Usually this means the ObjectCode has not been
1233 initialized and can't be. */
1234 if (oc->status == OBJECT_DONT_RESOLVE)
1235 return;
1236
1237 if (oc->archiveMemberName == NULL) {
1238 oc->status = OBJECT_NEEDED;
1239 } else {
1240 oc->status = OBJECT_LOADED;
1241 }
1242 }
1243
1244 ObjectCode*
1245 mkOc( pathchar *path, char *image, int imageSize,
1246 bool mapped, char *archiveMemberName, int misalignment ) {
1247 ObjectCode* oc;
1248
1249 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1250 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1251
1252 oc->info = NULL;
1253
1254 # if defined(OBJFORMAT_ELF)
1255 oc->formatName = "ELF";
1256 # elif defined(OBJFORMAT_PEi386)
1257 oc->formatName = "PEi386";
1258 # elif defined(OBJFORMAT_MACHO)
1259 oc->formatName = "Mach-O";
1260 # else
1261 stgFree(oc);
1262 barf("loadObj: not implemented on this platform");
1263 # endif
1264
1265 oc->image = image;
1266 oc->fileName = pathdup(path);
1267
1268 if (archiveMemberName) {
1269 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1,
1270 "loadObj" );
1271 strcpy(oc->archiveMemberName, archiveMemberName);
1272 } else {
1273 oc->archiveMemberName = NULL;
1274 }
1275
1276 setOcInitialStatus( oc );
1277
1278 oc->fileSize = imageSize;
1279 oc->symbols = NULL;
1280 oc->n_sections = 0;
1281 oc->sections = NULL;
1282 oc->proddables = NULL;
1283 oc->stable_ptrs = NULL;
1284 #if defined(NEED_SYMBOL_EXTRAS)
1285 oc->symbol_extras = NULL;
1286 #endif
1287 oc->imageMapped = mapped;
1288
1289 oc->misalignment = misalignment;
1290 oc->extraInfos = NULL;
1291
1292 /* chain it onto the list of objects */
1293 oc->next = NULL;
1294
1295 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1296 return oc;
1297 }
1298
1299 /* -----------------------------------------------------------------------------
1300 * Check if an object or archive is already loaded.
1301 *
1302 * Returns: 1 if the path is already loaded, 0 otherwise.
1303 */
1304 HsInt
1305 isAlreadyLoaded( pathchar *path )
1306 {
1307 ObjectCode *o;
1308 for (o = objects; o; o = o->next) {
1309 if (0 == pathcmp(o->fileName, path)) {
1310 return 1; /* already loaded */
1311 }
1312 }
1313 return 0; /* not loaded yet */
1314 }
1315
1316 //
1317 // Load the object file into memory. This will not be its final resting place,
1318 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
1319 // address space, properly aligned.
1320 //
1321 static ObjectCode *
1322 preloadObjectFile (pathchar *path)
1323 {
1324 int fileSize;
1325 struct_stat st;
1326 int r;
1327 void *image;
1328 ObjectCode *oc;
1329 int misalignment = 0;
1330
1331 r = pathstat(path, &st);
1332 if (r == -1) {
1333 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
1334 return NULL;
1335 }
1336
1337 fileSize = st.st_size;
1338
1339 #if RTS_LINKER_USE_MMAP
1340 int fd;
1341
1342 /* On many architectures malloc'd memory isn't executable, so we need to use
1343 * mmap. */
1344
1345 #if defined(openbsd_HOST_OS)
1346 fd = open(path, O_RDONLY, S_IRUSR);
1347 #else
1348 fd = open(path, O_RDONLY);
1349 #endif
1350 if (fd == -1) {
1351 errorBelch("loadObj: can't open %s", path);
1352 return NULL;
1353 }
1354
1355 /* iOS does not permit to mmap with r+w+x, however while the comment for
1356 * this function says this is not the final resting place, for some
1357 * architectures / hosts (at least mach-o non-iOS -- see ocGetNames_MachO)
1358 * the image mmaped here in fact ends up being the final resting place for
1359 * the sections. And hence we need to leave r+w+x here for other hosts
1360 * until all hosts have been made aware of the initial image being r+w only.
1361 *
1362 * See also the misalignment logic for darwin below.
1363 */
1364 #if defined(ios_HOST_OS)
1365 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
1366 #else
1367 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
1368 MAP_PRIVATE, fd, 0);
1369 #endif
1370
1371 if (image == MAP_FAILED) {
1372 errorBelch("mmap: failed. errno = %d", errno);
1373 }
1374 // not 32-bit yet, we'll remap later
1375 close(fd);
1376
1377 #else /* !RTS_LINKER_USE_MMAP */
1378 FILE *f;
1379
1380 /* load the image into memory */
1381 /* coverity[toctou] */
1382 f = pathopen(path, WSTR("rb"));
1383 if (!f) {
1384 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
1385 return NULL;
1386 }
1387
1388 # if defined(darwin_HOST_OS)
1389
1390 // In a Mach-O .o file, all sections can and will be misaligned
1391 // if the total size of the headers is not a multiple of the
1392 // desired alignment. This is fine for .o files that only serve
1393 // as input for the static linker, but it's not fine for us,
1394 // as SSE (used by gcc for floating point) and Altivec require
1395 // 16-byte alignment.
1396 // We calculate the correct alignment from the header before
1397 // reading the file, and then we misalign image on purpose so
1398 // that the actual sections end up aligned again.
1399 misalignment = machoGetMisalignment(f);
1400 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
1401 image += misalignment;
1402
1403 # else /* !defined(darwin_HOST_OS) */
1404
1405 image = stgMallocBytes(fileSize, "loadObj(image)");
1406
1407 #endif
1408
1409 int n;
1410 n = fread ( image, 1, fileSize, f );
1411 fclose(f);
1412 if (n != fileSize) {
1413 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
1414 stgFree(image);
1415 return NULL;
1416 }
1417
1418 #endif /* RTS_LINKER_USE_MMAP */
1419
1420 oc = mkOc(path, image, fileSize, true, NULL, misalignment);
1421
1422 #if defined(OBJFORMAT_MACHO)
1423 if (ocVerifyImage_MachO( oc ))
1424 ocInit_MachO( oc );
1425 #endif
1426 #if defined(OBJFORMAT_ELF)
1427 if(ocVerifyImage_ELF( oc ))
1428 ocInit_ELF( oc );
1429 #endif
1430 return oc;
1431 }
1432
1433 /* -----------------------------------------------------------------------------
1434 * Load an obj (populate the global symbol table, but don't resolve yet)
1435 *
1436 * Returns: 1 if ok, 0 on error.
1437 */
1438 static HsInt loadObj_ (pathchar *path)
1439 {
1440 ObjectCode* oc;
1441 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
1442
1443 /* debugBelch("loadObj %s\n", path ); */
1444
1445 /* Check that we haven't already loaded this object.
1446 Ignore requests to load multiple times */
1447
1448 if (isAlreadyLoaded(path)) {
1449 IF_DEBUG(linker,
1450 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1451 return 1; /* success */
1452 }
1453
1454 oc = preloadObjectFile(path);
1455 if (oc == NULL) return 0;
1456
1457 if (! loadOc(oc)) {
1458 // failed; free everything we've allocated
1459 removeOcSymbols(oc);
1460 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
1461 freeObjectCode(oc);
1462 return 0;
1463 }
1464
1465 oc->next = objects;
1466 objects = oc;
1467 return 1;
1468 }
1469
1470 HsInt loadObj (pathchar *path)
1471 {
1472 ACQUIRE_LOCK(&linker_mutex);
1473 HsInt r = loadObj_(path);
1474 RELEASE_LOCK(&linker_mutex);
1475 return r;
1476 }
1477
1478 HsInt loadOc (ObjectCode* oc)
1479 {
1480 int r;
1481
1482 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
1483
1484 /* verify the in-memory image */
1485 # if defined(OBJFORMAT_ELF)
1486 r = ocVerifyImage_ELF ( oc );
1487 # elif defined(OBJFORMAT_PEi386)
1488 r = ocVerifyImage_PEi386 ( oc );
1489 # elif defined(OBJFORMAT_MACHO)
1490 r = ocVerifyImage_MachO ( oc );
1491 # else
1492 barf("loadObj: no verify method");
1493 # endif
1494 if (!r) {
1495 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
1496 return r;
1497 }
1498
1499 /* Note [loadOc orderings]
1500 The order of `ocAllocateSymbolExtras` and `ocGetNames` matters. For MachO
1501 and ELF, `ocInit` and `ocGetNames` initialize a bunch of pointers based
1502 on the offset to `oc->image`, but `ocAllocateSymbolExtras` may relocate
1503 the address of `oc->image` and invalidate those pointers. So we must
1504 compute or recompute those pointers after `ocAllocateSymbolExtras`.
1505
1506 On Windows, when we have an import library we (for now, as we don't honor
1507 the lazy loading semantics of the library and instead GHCi is already
1508 lazy) don't use the library after ocGetNames as it just populates the
1509 symbol table. Allocating space for jump tables in ocAllocateSymbolExtras
1510 would just be a waste then as we'll be stopping further processing of the
1511 library in the next few steps. If necessary, the actual allocation
1512 happens in `ocGetNames_PEi386` and `ocAllocateSymbolExtras_PEi386` simply
1513 set the correct pointers.
1514 */
1515
1516 #if defined(NEED_SYMBOL_EXTRAS)
1517 # if defined(OBJFORMAT_MACHO)
1518 r = ocAllocateSymbolExtras_MachO ( oc );
1519 if (!r) {
1520 IF_DEBUG(linker,
1521 debugBelch("loadOc: ocAllocateSymbolExtras_MachO failed\n"));
1522 return r;
1523 }
1524 # elif defined(OBJFORMAT_ELF)
1525 r = ocAllocateSymbolExtras_ELF ( oc );
1526 if (!r) {
1527 IF_DEBUG(linker,
1528 debugBelch("loadOc: ocAllocateSymbolExtras_ELF failed\n"));
1529 return r;
1530 }
1531 # endif
1532
1533 /* build the symbol list for this image */
1534 # if defined(OBJFORMAT_ELF)
1535 r = ocGetNames_ELF ( oc );
1536 # elif defined(OBJFORMAT_PEi386)
1537 r = ocGetNames_PEi386 ( oc );
1538 # elif defined(OBJFORMAT_MACHO)
1539 r = ocGetNames_MachO ( oc );
1540 # else
1541 barf("loadObj: no getNames method");
1542 # endif
1543 if (!r) {
1544 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
1545 return r;
1546 }
1547
1548 # if defined(OBJFORMAT_PEi386)
1549 ocAllocateSymbolExtras_PEi386 ( oc );
1550 # endif
1551 #endif
1552
1553 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
1554 setOcInitialStatus( oc );
1555 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
1556
1557 return 1;
1558 }
1559
1560 /* -----------------------------------------------------------------------------
1561 * try to load and initialize an ObjectCode into memory
1562 *
1563 * Returns: 1 if ok, 0 on error.
1564 */
1565 int ocTryLoad (ObjectCode* oc) {
1566 int r;
1567
1568 if (oc->status != OBJECT_NEEDED) {
1569 return 1;
1570 }
1571
1572 /* Check for duplicate symbols by looking into `symhash`.
1573 Duplicate symbols are any symbols which exist
1574 in different ObjectCodes that have both been loaded, or
1575 are to be loaded by this call.
1576
1577 This call is intended to have no side-effects when a non-duplicate
1578 symbol is re-inserted.
1579
1580 We set the Address to NULL since that is not used to distinguish
1581 symbols. Duplicate symbols are distinguished by name and oc.
1582 */
1583 int x;
1584 SymbolName* symbol;
1585 for (x = 0; x < oc->n_symbols; x++) {
1586 symbol = oc->symbols[x];
1587 if ( symbol
1588 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol, NULL,
1589 isSymbolWeak(oc, symbol), oc)) {
1590 return 0;
1591 }
1592 }
1593
1594 # if defined(OBJFORMAT_ELF)
1595 r = ocResolve_ELF ( oc );
1596 # elif defined(OBJFORMAT_PEi386)
1597 r = ocResolve_PEi386 ( oc );
1598 # elif defined(OBJFORMAT_MACHO)
1599 r = ocResolve_MachO ( oc );
1600 # else
1601 barf("ocTryLoad: not implemented on this platform");
1602 # endif
1603 if (!r) { return r; }
1604
1605 // run init/init_array/ctors/mod_init_func
1606
1607 loading_obj = oc; // tells foreignExportStablePtr what to do
1608 #if defined(OBJFORMAT_ELF)
1609 r = ocRunInit_ELF ( oc );
1610 #elif defined(OBJFORMAT_PEi386)
1611 r = ocRunInit_PEi386 ( oc );
1612 #elif defined(OBJFORMAT_MACHO)
1613 r = ocRunInit_MachO ( oc );
1614 #else
1615 barf("ocTryLoad: initializers not implemented on this platform");
1616 #endif
1617 loading_obj = NULL;
1618
1619 if (!r) { return r; }
1620
1621 oc->status = OBJECT_RESOLVED;
1622
1623 return 1;
1624 }
1625
1626 /* -----------------------------------------------------------------------------
1627 * resolve all the currently unlinked objects in memory
1628 *
1629 * Returns: 1 if ok, 0 on error.
1630 */
1631 static HsInt resolveObjs_ (void)
1632 {
1633 ObjectCode *oc;
1634 int r;
1635
1636 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
1637
1638 for (oc = objects; oc; oc = oc->next) {
1639 r = ocTryLoad(oc);
1640 if (!r)
1641 {
1642 return r;
1643 }
1644 }
1645
1646 #if defined(PROFILING)
1647 // collect any new cost centres & CCSs that were defined during runInit
1648 initProfiling2();
1649 #endif
1650
1651 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
1652 return 1;
1653 }
1654
1655 HsInt resolveObjs (void)
1656 {
1657 ACQUIRE_LOCK(&linker_mutex);
1658 HsInt r = resolveObjs_();
1659 RELEASE_LOCK(&linker_mutex);
1660 return r;
1661 }
1662
1663 /* -----------------------------------------------------------------------------
1664 * delete an object from the pool
1665 */
1666 static HsInt unloadObj_ (pathchar *path, bool just_purge)
1667 {
1668 ObjectCode *oc, *prev, *next;
1669 HsBool unloadedAnyObj = HS_BOOL_FALSE;
1670
1671 ASSERT(symhash != NULL);
1672 ASSERT(objects != NULL);
1673
1674 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
1675
1676 prev = NULL;
1677 for (oc = objects; oc; oc = next) {
1678 next = oc->next; // oc might be freed
1679
1680 if (!pathcmp(oc->fileName,path)) {
1681
1682 // these are both idempotent, so in just_purge mode we can
1683 // later call unloadObj() to really unload the object.
1684 removeOcSymbols(oc);
1685 freeOcStablePtrs(oc);
1686
1687 if (!just_purge) {
1688 if (prev == NULL) {
1689 objects = oc->next;
1690 } else {
1691 prev->next = oc->next;
1692 }
1693 ACQUIRE_LOCK(&linker_unloaded_mutex);
1694 oc->next = unloaded_objects;
1695 unloaded_objects = oc;
1696 oc->status = OBJECT_UNLOADED;
1697 RELEASE_LOCK(&linker_unloaded_mutex);
1698 // We do not own oc any more; it can be released at any time by
1699 // the GC in checkUnload().
1700 } else {
1701 prev = oc;
1702 }
1703
1704 /* This could be a member of an archive so continue
1705 * unloading other members. */
1706 unloadedAnyObj = HS_BOOL_TRUE;
1707 } else {
1708 prev = oc;
1709 }
1710 }
1711
1712 if (unloadedAnyObj) {
1713 return 1;
1714 }
1715 else {
1716 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
1717 return 0;
1718 }
1719 }
1720
1721 HsInt unloadObj (pathchar *path)
1722 {
1723 ACQUIRE_LOCK(&linker_mutex);
1724 HsInt r = unloadObj_(path, false);
1725 RELEASE_LOCK(&linker_mutex);
1726 return r;
1727 }
1728
1729 HsInt purgeObj (pathchar *path)
1730 {
1731 ACQUIRE_LOCK(&linker_mutex);
1732 HsInt r = unloadObj_(path, true);
1733 RELEASE_LOCK(&linker_mutex);
1734 return r;
1735 }
1736
1737 static OStatus getObjectLoadStatus_ (pathchar *path)
1738 {
1739 ObjectCode *o;
1740 for (o = objects; o; o = o->next) {
1741 if (0 == pathcmp(o->fileName, path)) {
1742 return o->status;
1743 }
1744 }
1745 for (o = unloaded_objects; o; o = o->next) {
1746 if (0 == pathcmp(o->fileName, path)) {
1747 return o->status;
1748 }
1749 }
1750 return OBJECT_NOT_LOADED;
1751 }
1752
1753 OStatus getObjectLoadStatus (pathchar *path)
1754 {
1755 ACQUIRE_LOCK(&linker_mutex);
1756 OStatus r = getObjectLoadStatus_(path);
1757 RELEASE_LOCK(&linker_mutex);
1758 return r;
1759 }
1760
1761 /* -----------------------------------------------------------------------------
1762 * Sanity checking. For each ObjectCode, maintain a list of address ranges
1763 * which may be prodded during relocation, and abort if we try and write
1764 * outside any of these.
1765 */
1766 void
1767 addProddableBlock ( ObjectCode* oc, void* start, int size )
1768 {
1769 ProddableBlock* pb
1770 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
1771
1772 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
1773 ASSERT(size > 0);
1774 pb->start = start;
1775 pb->size = size;
1776 pb->next = oc->proddables;
1777 oc->proddables = pb;
1778 }
1779
1780 void
1781 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
1782 {
1783 ProddableBlock* pb;
1784
1785 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
1786 char* s = (char*)(pb->start);
1787 char* e = s + pb->size;
1788 char* a = (char*)addr;
1789 if (a >= s && (a+size) <= e) return;
1790 }
1791 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
1792 }
1793
1794 void freeProddableBlocks (ObjectCode *oc)
1795 {
1796 ProddableBlock *pb, *next;
1797
1798 for (pb = oc->proddables; pb != NULL; pb = next) {
1799 next = pb->next;
1800 stgFree(pb);
1801 }
1802 oc->proddables = NULL;
1803 }
1804
1805 /* -----------------------------------------------------------------------------
1806 * Section management.
1807 */
1808 void
1809 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
1810 void* start, StgWord size, StgWord mapped_offset,
1811 void* mapped_start, StgWord mapped_size)
1812 {
1813 s->start = start; /* actual start of section in memory */
1814 s->size = size; /* actual size of section in memory */
1815 s->kind = kind;
1816 s->alloc = alloc;
1817 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
1818
1819 s->mapped_start = mapped_start; /* start of mmap() block */
1820 s->mapped_size = mapped_size; /* size of mmap() block */
1821
1822 if (!s->info)
1823 s->info
1824 = (struct SectionFormatInfo*)stgCallocBytes(1, sizeof *s->info,
1825 "addSection(SectionFormatInfo)");
1826
1827 IF_DEBUG(linker,
1828 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
1829 start, (void*)((StgWord)start + size),
1830 size, kind ));
1831 }