Remove redundant SOURCE import
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "StablePtr.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31 #include "linker/CacheFlush.h"
32 #include "linker/SymbolExtras.h"
33 #include "PathUtils.h"
34
35 #if !defined(mingw32_HOST_OS)
36 #include "posix/Signals.h"
37 #endif
38
39 // get protos for is*()
40 #include <ctype.h>
41
42 #if defined(HAVE_SYS_TYPES_H)
43 #include <sys/types.h>
44 #endif
45
46 #include <inttypes.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <stdio.h>
50 #include <assert.h>
51 #include <fs_rts.h>
52
53 #if defined(HAVE_SYS_STAT_H)
54 #include <sys/stat.h>
55 #endif
56
57 #if defined(HAVE_DLFCN_H)
58 #include <dlfcn.h>
59 #endif
60
61 #if defined(OBJFORMAT_ELF)
62 # include "linker/Elf.h"
63 # include <regex.h> // regex is already used by dlopen() so this is OK
64 // to use here without requiring an additional lib
65 #elif defined(OBJFORMAT_PEi386)
66 # include "linker/PEi386.h"
67 # include <windows.h>
68 #elif defined(OBJFORMAT_MACHO)
69 # include "linker/MachO.h"
70 # include <regex.h>
71 # include <mach/machine.h>
72 # include <mach-o/fat.h>
73 #endif
74
75 #if defined(dragonfly_HOST_OS)
76 #include <sys/tls.h>
77 #endif
78 /*
79 Note [runtime-linker-support]
80 -----------------------------
81 When adding support for a new platform to the runtime linker please
82 update `$TOP/configure.ac` under heading `Does target have runtime
83 linker support?`.
84 */
85 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
86 This hashtable will contain information on all symbols
87 that we know of, however the .o they are in may not be loaded.
88
89 Until the ObjectCode the symbol belongs to is actually
90 loaded this symbol may be replaced. So do not rely on
91 addresses of unloaded symbols.
92
93 Note [runtime-linker-phases]
94 --------------------------------------
95 Broadly the behavior of the runtime linker can be
96 split into the following four phases:
97
98 - Indexing (e.g. ocVerifyImage and ocGetNames)
99 - Initialization (e.g. ocResolve and ocRunInit)
100 - Resolve (e.g. resolveObjs())
101 - Lookup (e.g. lookupSymbol)
102
103 This is to enable lazy loading of symbols. Eager loading is problematic
104 as it means that all symbols must be available, even those which we will
105 never use. This is especially painful on Windows, where the number of
106 libraries required to link things like mingwex grows to be quite high.
107
108 We proceed through these stages as follows,
109
110 * During Indexing we verify and open the ObjectCode and
111 perform a quick scan/indexing of the ObjectCode. All the work
112 required to actually load the ObjectCode is done.
113
114 All symbols from the ObjectCode are also inserted into
115 `symhash`, where possible duplicates are handled via the semantics
116 described in `ghciInsertSymbolTable`.
117
118 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
119 depending on whether they are an archive member or not.
120
121 * During initialization we load ObjectCode, perform relocations, execute
122 static constructors etc. This phase may trigger other ObjectCodes to
123 be loaded because of the calls to lookupSymbol.
124
125 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
126 previous status was `OBJECT_LOADED`.
127
128 * During resolve we attempt to resolve all the symbols needed for the
129 initial link. This essentially means, that for any ObjectCode given
130 directly to the command-line we perform lookupSymbols on the required
131 symbols. lookupSymbols may trigger the loading of additional ObjectCode
132 if required.
133
134 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
135 the previous status was `OBJECT_NEEDED`.
136
137 * lookupSymbols is used to lookup any symbols required, both during initial
138 link and during statement and expression compilations in the REPL.
139 Declaration of e.g. a foreign import, will eventually call lookupSymbol
140 which will either fail (symbol unknown) or succeed (and possibly trigger a
141 load).
142
143 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
144
145 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
146 by calling unloadObj and starting over.
147 When a new declaration or statement is performed ultimately lookupSymbol is called
148 without doing a re-link.
149
150 The goal of these different phases is to allow the linker to be able to perform
151 "lazy loading" of ObjectCode. The reason for this is that we want to only link
152 in symbols that are actually required for the link. This reduces:
153
154 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
155 then we don't need to load libB. This means the dependency chain for libraries
156 such as mingw32 and mingwex can be broken down.
157
158 2) The number of duplicate symbols, since now only symbols that are
159 true duplicates will display the error.
160 */
161 /*Str*/HashTable *symhash;
162
163 /* List of currently loaded objects */
164 ObjectCode *objects = NULL; /* initially empty */
165
166 /* List of objects that have been unloaded via unloadObj(), but are waiting
167 to be actually freed via checkUnload() */
168 ObjectCode *unloaded_objects = NULL; /* initially empty */
169
170 #if defined(THREADED_RTS)
171 /* This protects all the Linker's global state except unloaded_objects */
172 Mutex linker_mutex;
173 /*
174 * This protects unloaded_objects. We have a separate mutex for this, because
175 * the GC needs to access unloaded_objects in checkUnload, while the linker only
176 * needs to access unloaded_objects in unloadObj(), so this allows most linker
177 * operations proceed concurrently with the GC.
178 */
179 Mutex linker_unloaded_mutex;
180 #endif
181
182 /* Generic wrapper function to try and Resolve and RunInit oc files */
183 int ocTryLoad( ObjectCode* oc );
184
185 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
186 * small memory model on this architecture (see gcc docs,
187 * -mcmodel=small).
188 *
189 * MAP_32BIT not available on OpenBSD/amd64
190 */
191 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
192 #define TRY_MAP_32BIT MAP_32BIT
193 #else
194 #define TRY_MAP_32BIT 0
195 #endif
196
197 /*
198 * Due to the small memory model (see above), on x86_64 we have to map
199 * all our non-PIC object files into the low 2Gb of the address space
200 * (why 2Gb and not 4Gb? Because all addresses must be reachable
201 * using a 32-bit signed PC-relative offset). On Linux we can do this
202 * using the MAP_32BIT flag to mmap(), however on other OSs
203 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
204 * can't do this. So on these systems, we have to pick a base address
205 * in the low 2Gb of the address space and try to allocate memory from
206 * there.
207 *
208 * We pick a default address based on the OS, but also make this
209 * configurable via an RTS flag (+RTS -xm)
210 */
211 #if defined(MAP_32BIT) || DEFAULT_LINKER_ALWAYS_PIC
212 // Try to use MAP_32BIT
213 #define MMAP_32BIT_BASE_DEFAULT 0
214 #else
215 // A guess: 1Gb.
216 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
217 #endif
218
219 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
220
221 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
222 ObjectCode *owner)
223 {
224 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
225 if (!pinfo || owner != pinfo->owner) return;
226 removeStrHashTable(table, key, NULL);
227 if (isSymbolImport (owner, key))
228 stgFree(pinfo->value);
229
230 stgFree(pinfo);
231 }
232
233 /* -----------------------------------------------------------------------------
234 * Insert symbols into hash tables, checking for duplicates.
235 *
236 * Returns: 0 on failure, nonzero on success
237 */
238 /*
239 Note [weak-symbols-support]
240 -------------------------------------
241 While ghciInsertSymbolTable does implement extensive
242 logic for weak symbol support, weak symbols are not currently
243 fully supported by the RTS. This code is mostly here for COMDAT
244 support which uses the weak symbols support.
245
246 Linking weak symbols defined purely in C code with other C code
247 should also work, probably. Observing weak symbols in Haskell
248 won't.
249
250 Some test have been written for weak symbols but have been disabled
251 mostly because it's unsure how the weak symbols support should look.
252 See Trac #11223
253 */
254 int ghciInsertSymbolTable(
255 pathchar* obj_name,
256 HashTable *table,
257 const SymbolName* key,
258 SymbolAddr* data,
259 HsBool weak,
260 ObjectCode *owner)
261 {
262 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
263 if (!pinfo) /* new entry */
264 {
265 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
266 pinfo->value = data;
267 pinfo->owner = owner;
268 pinfo->weak = weak;
269 insertStrHashTable(table, key, pinfo);
270 return 1;
271 }
272 else if (weak && data && pinfo->weak && !pinfo->value)
273 {
274 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
275 pinfo->value = data;
276 pinfo->owner = owner;
277 return 1;
278 }
279 else if (weak)
280 {
281 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
282 already know of another copy throw this one away.
283
284 or both weak symbols have a nonzero value. Keep the existing one.
285
286 This also preserves the semantics of linking against
287 the first symbol we find. */
288 }
289 else if (pinfo->weak && !weak) /* weak symbol is in the table */
290 {
291 /* override the weak definition with the non-weak one */
292 pinfo->value = data;
293 pinfo->owner = owner;
294 pinfo->weak = HS_BOOL_FALSE;
295 return 1;
296 }
297 else if ( pinfo->owner
298 && pinfo->owner->status != OBJECT_RESOLVED
299 && pinfo->owner->status != OBJECT_NEEDED)
300 {
301 /* If the other symbol hasn't been loaded or will be loaded and we want to
302 explicitly load the new one, we can just swap it out and load the one
303 that has been requested. If not, just keep the first one encountered.
304
305 Because the `symHash' table consists symbols we've also not loaded but
306 found during the initial scan this is safe to do. If however the existing
307 symbol has been loaded then it means we have a duplicate.
308
309 This is essentially emulating the behavior of a linker wherein it will always
310 link in object files that are .o file arguments, but only take object files
311 from archives as needed. */
312 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
313 pinfo->value = data;
314 pinfo->owner = owner;
315 pinfo->weak = weak;
316 }
317
318 return 1;
319 }
320 else if (pinfo->owner == owner)
321 {
322 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
323 return 1;
324 }
325 else if (owner && owner->status == OBJECT_LOADED)
326 {
327 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
328 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
329 call this function again to trigger the duplicate error. */
330 return 1;
331 }
332
333 pathchar* archiveName = NULL;
334 debugBelch(
335 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
336 " %s\n"
337 "whilst processing object file\n"
338 " %" PATH_FMT "\n"
339 "The symbol was previously defined in\n"
340 " %" PATH_FMT "\n"
341 "This could be caused by:\n"
342 " * Loading two different object files which export the same symbol\n"
343 " * Specifying the same object file twice on the GHCi command line\n"
344 " * An incorrect `package.conf' entry, causing some object to be\n"
345 " loaded twice.\n",
346 (char*)key,
347 obj_name,
348 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
349 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
350 : pinfo->owner->fileName
351 );
352
353 if (archiveName)
354 {
355 stgFree(archiveName);
356 archiveName = NULL;
357 }
358 return 0;
359 }
360
361 /* -----------------------------------------------------------------------------
362 * Looks up symbols into hash tables.
363 *
364 * Returns: 0 on failure and result is not set,
365 * nonzero on success and result set to nonzero pointer
366 */
367 HsBool ghciLookupSymbolInfo(HashTable *table,
368 const SymbolName* key, RtsSymbolInfo **result)
369 {
370 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
371 if (!pinfo) {
372 *result = NULL;
373 return HS_BOOL_FALSE;
374 }
375 if (pinfo->weak)
376 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
377 /* Once it's looked up, it can no longer be overridden */
378 pinfo->weak = HS_BOOL_FALSE;
379
380 *result = pinfo;
381 return HS_BOOL_TRUE;
382 }
383
384 /* -----------------------------------------------------------------------------
385 * initialize the object linker
386 */
387
388
389 static int linker_init_done = 0 ;
390
391 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
392 static void *dl_prog_handle;
393 static regex_t re_invalid;
394 static regex_t re_realso;
395 #if defined(THREADED_RTS)
396 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
397 #endif
398 #endif
399
400 void initLinker (void)
401 {
402 // default to retaining CAFs for backwards compatibility. Most
403 // users will want initLinker_(0): otherwise unloadObj() will not
404 // be able to unload object files when they contain CAFs.
405 initLinker_(1);
406 }
407
408 void
409 initLinker_ (int retain_cafs)
410 {
411 RtsSymbolVal *sym;
412 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
413 int compileResult;
414 #endif
415
416 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
417
418 /* Make initLinker idempotent, so we can call it
419 before every relevant operation; that means we
420 don't need to initialise the linker separately */
421 if (linker_init_done == 1) {
422 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
423 return;
424 } else {
425 linker_init_done = 1;
426 }
427
428 objects = NULL;
429 unloaded_objects = NULL;
430
431 #if defined(THREADED_RTS)
432 initMutex(&linker_mutex);
433 initMutex(&linker_unloaded_mutex);
434 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
435 initMutex(&dl_mutex);
436 #endif
437 #endif
438
439 symhash = allocStrHashTable();
440
441 /* populate the symbol table with stuff from the RTS */
442 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
443 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
444 symhash, sym->lbl, sym->addr,
445 sym->weak, NULL)) {
446 barf("ghciInsertSymbolTable failed");
447 }
448 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
449 }
450 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
451 machoInitSymbolsWithoutUnderscore();
452 # endif
453 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
454 referenced from a statically linked module. We need to mimic this, but
455 we cannot use NULL because we use it to mean nonexistent symbols. So we
456 use an arbitrary (hopefully unique) address here.
457 */
458 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
459 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
460 barf("ghciInsertSymbolTable failed");
461 }
462
463 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
464 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
465 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
466 retain_cafs ? newRetainedCAF : newGCdCAF,
467 HS_BOOL_FALSE, NULL)) {
468 barf("ghciInsertSymbolTable failed");
469 }
470
471 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
472 # if defined(RTLD_DEFAULT)
473 dl_prog_handle = RTLD_DEFAULT;
474 # else
475 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
476 # endif /* RTLD_DEFAULT */
477
478 compileResult = regcomp(&re_invalid,
479 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short|invalid file format)",
480 REG_EXTENDED);
481 if (compileResult != 0) {
482 barf("Compiling re_invalid failed");
483 }
484 compileResult = regcomp(&re_realso,
485 "(GROUP|INPUT) *\\( *([^ )]+)",
486 REG_EXTENDED);
487 if (compileResult != 0) {
488 barf("Compiling re_realso failed");
489 }
490 # endif
491
492 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
493 // User-override for mmap_32bit_base
494 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
495 }
496
497 if (RTS_LINKER_USE_MMAP)
498 m32_allocator_init();
499
500 #if defined(OBJFORMAT_PEi386)
501 initLinker_PEi386();
502 #endif
503
504 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
505 return;
506 }
507
508 void
509 exitLinker( void ) {
510 #if defined(OBJFORMAT_PEi386)
511 exitLinker_PEi386();
512 #endif
513 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
514 if (linker_init_done == 1) {
515 regfree(&re_invalid);
516 regfree(&re_realso);
517 #if defined(THREADED_RTS)
518 closeMutex(&dl_mutex);
519 #endif
520 }
521 #endif
522 if (linker_init_done == 1) {
523 freeHashTable(symhash, free);
524 }
525 #if defined(THREADED_RTS)
526 closeMutex(&linker_mutex);
527 #endif
528 }
529
530 /* -----------------------------------------------------------------------------
531 * Loading DLL or .so dynamic libraries
532 * -----------------------------------------------------------------------------
533 *
534 * Add a DLL from which symbols may be found. In the ELF case, just
535 * do RTLD_GLOBAL-style add, so no further messing around needs to
536 * happen in order that symbols in the loaded .so are findable --
537 * lookupSymbol() will subsequently see them by dlsym on the program's
538 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
539 *
540 * In the PEi386 case, open the DLLs and put handles to them in a
541 * linked list. When looking for a symbol, try all handles in the
542 * list. This means that we need to load even DLLs that are guaranteed
543 * to be in the ghc.exe image already, just so we can get a handle
544 * to give to loadSymbol, so that we can find the symbols. For such
545 * libraries, the LoadLibrary call should be a no-op except for returning
546 * the handle.
547 *
548 */
549
550 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
551
552 /* Suppose in ghci we load a temporary SO for a module containing
553 f = 1
554 and then modify the module, recompile, and load another temporary
555 SO with
556 f = 2
557 Then as we don't unload the first SO, dlsym will find the
558 f = 1
559 symbol whereas we want the
560 f = 2
561 symbol. We therefore need to keep our own SO handle list, and
562 try SOs in the right order. */
563
564 typedef
565 struct _OpenedSO {
566 struct _OpenedSO* next;
567 void *handle;
568 }
569 OpenedSO;
570
571 /* A list thereof. */
572 static OpenedSO* openedSOs = NULL;
573
574 static const char *
575 internal_dlopen(const char *dll_name)
576 {
577 OpenedSO* o_so;
578 void *hdl;
579 const char *errmsg;
580 char *errmsg_copy;
581
582 // omitted: RTLD_NOW
583 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
584 IF_DEBUG(linker,
585 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
586
587 //-------------- Begin critical section ------------------
588 // This critical section is necessary because dlerror() is not
589 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
590 // Also, the error message returned must be copied to preserve it
591 // (see POSIX also)
592
593 ACQUIRE_LOCK(&dl_mutex);
594 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
595
596 errmsg = NULL;
597 if (hdl == NULL) {
598 /* dlopen failed; return a ptr to the error msg. */
599 errmsg = dlerror();
600 if (errmsg == NULL) errmsg = "addDLL: unknown error";
601 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
602 strcpy(errmsg_copy, errmsg);
603 errmsg = errmsg_copy;
604 } else {
605 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
606 o_so->handle = hdl;
607 o_so->next = openedSOs;
608 openedSOs = o_so;
609 }
610
611 RELEASE_LOCK(&dl_mutex);
612 //--------------- End critical section -------------------
613
614 return errmsg;
615 }
616
617 /*
618 Note [RTLD_LOCAL]
619
620 In GHCi we want to be able to override previous .so's with newly
621 loaded .so's when we recompile something. This further implies that
622 when we look up a symbol in internal_dlsym() we have to iterate
623 through the loaded libraries (in order from most recently loaded to
624 oldest) looking up the symbol in each one until we find it.
625
626 However, this can cause problems for some symbols that are copied
627 by the linker into the executable image at runtime - see #8935 for a
628 lengthy discussion. To solve that problem we need to look up
629 symbols in the main executable *first*, before attempting to look
630 them up in the loaded .so's. But in order to make that work, we
631 have to always call dlopen with RTLD_LOCAL, so that the loaded
632 libraries don't populate the global symbol table.
633 */
634
635 static void *
636 internal_dlsym(const char *symbol) {
637 OpenedSO* o_so;
638 void *v;
639
640 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
641 ACQUIRE_LOCK(&dl_mutex);
642 dlerror();
643 // look in program first
644 v = dlsym(dl_prog_handle, symbol);
645 if (dlerror() == NULL) {
646 RELEASE_LOCK(&dl_mutex);
647 return v;
648 }
649
650 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
651 v = dlsym(o_so->handle, symbol);
652 if (dlerror() == NULL) {
653 RELEASE_LOCK(&dl_mutex);
654 return v;
655 }
656 }
657 RELEASE_LOCK(&dl_mutex);
658 return v;
659 }
660 # endif
661
662 const char *
663 addDLL( pathchar *dll_name )
664 {
665 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
666 /* ------------------- ELF DLL loader ------------------- */
667
668 #define NMATCH 5
669 regmatch_t match[NMATCH];
670 const char *errmsg;
671 FILE* fp;
672 size_t match_length;
673 #define MAXLINE 1000
674 char line[MAXLINE];
675 int result;
676
677 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
678 errmsg = internal_dlopen(dll_name);
679
680 if (errmsg == NULL) {
681 return NULL;
682 }
683
684 // GHC Trac ticket #2615
685 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
686 // contain linker scripts rather than ELF-format object code. This
687 // code handles the situation by recognizing the real object code
688 // file name given in the linker script.
689 //
690 // If an "invalid ELF header" error occurs, it is assumed that the
691 // .so file contains a linker script instead of ELF object code.
692 // In this case, the code looks for the GROUP ( ... ) linker
693 // directive. If one is found, the first file name inside the
694 // parentheses is treated as the name of a dynamic library and the
695 // code attempts to dlopen that file. If this is also unsuccessful,
696 // an error message is returned.
697
698 // see if the error message is due to an invalid ELF header
699 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
700 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
701 IF_DEBUG(linker, debugBelch("result = %i\n", result));
702 if (result == 0) {
703 // success -- try to read the named file as a linker script
704 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
705 MAXLINE-1);
706 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
707 line[match_length] = '\0'; // make sure string is null-terminated
708 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
709 if ((fp = __rts_fopen(line, "r")) == NULL) {
710 return errmsg; // return original error if open fails
711 }
712 // try to find a GROUP or INPUT ( ... ) command
713 while (fgets(line, MAXLINE, fp) != NULL) {
714 IF_DEBUG(linker, debugBelch("input line = %s", line));
715 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
716 // success -- try to dlopen the first named file
717 IF_DEBUG(linker, debugBelch("match%s\n",""));
718 line[match[2].rm_eo] = '\0';
719 stgFree((void*)errmsg); // Free old message before creating new one
720 errmsg = internal_dlopen(line+match[2].rm_so);
721 break;
722 }
723 // if control reaches here, no GROUP or INPUT ( ... ) directive
724 // was found and the original error message is returned to the
725 // caller
726 }
727 fclose(fp);
728 }
729 return errmsg;
730
731 # elif defined(OBJFORMAT_PEi386)
732 return addDLL_PEi386(dll_name, NULL);
733
734 # else
735 barf("addDLL: not implemented on this platform");
736 # endif
737 }
738
739 /* -----------------------------------------------------------------------------
740 * Searches the system directories to determine if there is a system DLL that
741 * satisfies the given name. This prevent GHCi from linking against a static
742 * library if a DLL is available.
743 *
744 * Returns: NULL on failure or no DLL found, else the full path to the DLL
745 * that can be loaded.
746 */
747 pathchar* findSystemLibrary(pathchar* dll_name)
748 {
749 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%"
750 PATH_FMT "'\n", dll_name));
751
752 #if defined(OBJFORMAT_PEi386)
753 return findSystemLibrary_PEi386(dll_name);
754 #else
755 (void)(dll_name); // Function not implemented for other platforms.
756 return NULL;
757 #endif
758 }
759
760 /* -----------------------------------------------------------------------------
761 * Emits a warning determining that the system is missing a required security
762 * update that we need to get access to the proper APIs
763 */
764 void warnMissingKBLibraryPaths( void )
765 {
766 static HsBool missing_update_warn = HS_BOOL_FALSE;
767 if (!missing_update_warn) {
768 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
769 missing_update_warn = HS_BOOL_TRUE;
770 }
771 }
772
773 /* -----------------------------------------------------------------------------
774 * appends a directory to the process DLL Load path so LoadLibrary can find it
775 *
776 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
777 * restore the search path to what it was before this call.
778 */
779 HsPtr addLibrarySearchPath(pathchar* dll_path)
780 {
781 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%"
782 PATH_FMT "'\n", dll_path));
783
784 #if defined(OBJFORMAT_PEi386)
785 return addLibrarySearchPath_PEi386(dll_path);
786 #else
787 (void)(dll_path); // Function not implemented for other platforms.
788 return NULL;
789 #endif
790 }
791
792 /* -----------------------------------------------------------------------------
793 * removes a directory from the process DLL Load path
794 *
795 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
796 */
797 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
798 {
799 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n",
800 dll_path_index));
801
802 #if defined(OBJFORMAT_PEi386)
803 return removeLibrarySearchPath_PEi386(dll_path_index);
804 #else
805 (void)(dll_path_index); // Function not implemented for other platforms.
806 return HS_BOOL_FALSE;
807 #endif
808 }
809
810 /* -----------------------------------------------------------------------------
811 * insert a symbol in the hash table
812 *
813 * Returns: 0 on failure, nozero on success
814 */
815 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
816 {
817 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE,
818 NULL);
819 }
820
821 /* -----------------------------------------------------------------------------
822 * lookup a symbol in the hash table
823 */
824 #if defined(OBJFORMAT_PEi386)
825 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
826 {
827 return lookupSymbol_PEi386(lbl);
828 }
829
830 #else
831
832 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
833 {
834 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
835
836 ASSERT(symhash != NULL);
837 RtsSymbolInfo *pinfo;
838
839 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
840 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
841
842 # if defined(OBJFORMAT_ELF)
843 return internal_dlsym(lbl);
844 # elif defined(OBJFORMAT_MACHO)
845
846 /* HACK: On OS X, all symbols are prefixed with an underscore.
847 However, dlsym wants us to omit the leading underscore from the
848 symbol name -- the dlsym routine puts it back on before
849 searching for the symbol. For now, we simply strip it off here
850 (and ONLY here).
851 */
852 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n",
853 lbl));
854 ASSERT(lbl[0] == '_');
855 return internal_dlsym(lbl + 1);
856
857 # else
858 ASSERT(2+2 == 5);
859 return NULL;
860 # endif
861 } else {
862 return loadSymbol(lbl, pinfo);
863 }
864 }
865 #endif /* OBJFORMAT_PEi386 */
866
867 /*
868 * Load and relocate the object code for a symbol as necessary.
869 * Symbol name only used for diagnostics output.
870 */
871 SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo) {
872 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl,
873 pinfo->value));
874 ObjectCode* oc = pinfo->owner;
875
876 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
877 See Note [runtime-linker-phases] */
878 if (oc && lbl && oc->status == OBJECT_LOADED) {
879 oc->status = OBJECT_NEEDED;
880 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand "
881 "loading symbol '%s'\n", lbl));
882 int r = ocTryLoad(oc);
883 if (!r) {
884 return NULL;
885 }
886
887 #if defined(PROFILING)
888 // collect any new cost centres & CCSs
889 // that were defined during runInit
890 initProfiling2();
891 #endif
892 }
893
894 return pinfo->value;
895 }
896
897 SymbolAddr* lookupSymbol( SymbolName* lbl )
898 {
899 ACQUIRE_LOCK(&linker_mutex);
900 SymbolAddr* r = lookupSymbol_(lbl);
901 if (!r) {
902 errorBelch("^^ Could not load '%s', dependency unresolved. "
903 "See top entry above.\n", lbl);
904 fflush(stderr);
905 }
906 RELEASE_LOCK(&linker_mutex);
907 return r;
908 }
909
910 /* -----------------------------------------------------------------------------
911 Create a StablePtr for a foreign export. This is normally called by
912 a C function with __attribute__((constructor)), which is generated
913 by GHC and linked into the module.
914
915 If the object code is being loaded dynamically, then we remember
916 which StablePtrs were allocated by the constructors and free them
917 again in unloadObj().
918 -------------------------------------------------------------------------- */
919
920 static ObjectCode *loading_obj = NULL;
921
922 StgStablePtr foreignExportStablePtr (StgPtr p)
923 {
924 ForeignExportStablePtr *fe_sptr;
925 StgStablePtr *sptr;
926
927 sptr = getStablePtr(p);
928
929 if (loading_obj != NULL) {
930 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
931 "foreignExportStablePtr");
932 fe_sptr->stable_ptr = sptr;
933 fe_sptr->next = loading_obj->stable_ptrs;
934 loading_obj->stable_ptrs = fe_sptr;
935 }
936
937 return sptr;
938 }
939
940
941 /* -----------------------------------------------------------------------------
942 * Debugging aid: look in GHCi's object symbol tables for symbols
943 * within DELTA bytes of the specified address, and show their names.
944 */
945 #if defined(DEBUG)
946 void ghci_enquire ( SymbolAddr* addr );
947
948 void ghci_enquire(SymbolAddr* addr)
949 {
950 int i;
951 SymbolName* sym;
952 RtsSymbolInfo* a;
953 const int DELTA = 64;
954 ObjectCode* oc;
955
956 for (oc = objects; oc; oc = oc->next) {
957 for (i = 0; i < oc->n_symbols; i++) {
958 sym = oc->symbols[i];
959 if (sym == NULL) continue;
960 a = NULL;
961 if (a == NULL) {
962 ghciLookupSymbolInfo(symhash, sym, &a);
963 }
964 if (a == NULL) {
965 // debugBelch("ghci_enquire: can't find %s\n", sym);
966 }
967 else if ( a->value
968 && (char*)addr-DELTA <= (char*)a->value
969 && (char*)a->value <= (char*)addr+DELTA) {
970 debugBelch("%p + %3d == `%s'\n", addr,
971 (int)((char*)a->value - (char*)addr), sym);
972 }
973 }
974 }
975 }
976 #endif
977
978 pathchar*
979 resolveSymbolAddr (pathchar* buffer, int size,
980 SymbolAddr* symbol, uintptr_t* top)
981 {
982 #if defined(OBJFORMAT_PEi386)
983 return resolveSymbolAddr_PEi386 (buffer, size, symbol, top);
984 #else /* OBJFORMAT_PEi386 */
985 (void)buffer;
986 (void)size;
987 (void)symbol;
988 (void)top;
989 return NULL;
990 #endif /* OBJFORMAT_PEi386 */
991 }
992
993 #if RTS_LINKER_USE_MMAP
994 //
995 // Returns NULL on failure.
996 //
997 void *
998 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
999 {
1000 void *map_addr = NULL;
1001 void *result;
1002 size_t size;
1003 uint32_t tryMap32Bit = RtsFlags.MiscFlags.linkerAlwaysPic
1004 ? 0
1005 : TRY_MAP_32BIT;
1006 static uint32_t fixed = 0;
1007
1008 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1009 size = roundUpToPage(bytes);
1010
1011 mmap_again:
1012
1013 if (mmap_32bit_base != 0) {
1014 map_addr = mmap_32bit_base;
1015 }
1016
1017 IF_DEBUG(linker,
1018 debugBelch("mmapForLinker: \tprotection %#0x\n",
1019 PROT_EXEC | PROT_READ | PROT_WRITE));
1020 IF_DEBUG(linker,
1021 debugBelch("mmapForLinker: \tflags %#0x\n",
1022 MAP_PRIVATE | tryMap32Bit | fixed | flags));
1023
1024 result = mmap(map_addr, size,
1025 PROT_EXEC|PROT_READ|PROT_WRITE,
1026 MAP_PRIVATE|tryMap32Bit|fixed|flags, fd, offset);
1027
1028 if (result == MAP_FAILED) {
1029 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1030 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1031 return NULL;
1032 }
1033
1034 #if defined(x86_64_HOST_ARCH)
1035 if (RtsFlags.MiscFlags.linkerAlwaysPic) {
1036 } else if (mmap_32bit_base != 0) {
1037 if (result == map_addr) {
1038 mmap_32bit_base = (StgWord8*)map_addr + size;
1039 } else {
1040 if ((W_)result > 0x80000000) {
1041 // oops, we were given memory over 2Gb
1042 munmap(result,size);
1043 #if defined(freebsd_HOST_OS) || \
1044 defined(kfreebsdgnu_HOST_OS) || \
1045 defined(dragonfly_HOST_OS)
1046 // Some platforms require MAP_FIXED. This is normally
1047 // a bad idea, because MAP_FIXED will overwrite
1048 // existing mappings.
1049 fixed = MAP_FIXED;
1050 goto mmap_again;
1051 #else
1052 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1053 "asked for %lu bytes at %p. "
1054 "Try specifying an address with +RTS -xm<addr> -RTS",
1055 size, map_addr);
1056 return NULL;
1057 #endif
1058 } else {
1059 // hmm, we were given memory somewhere else, but it's
1060 // still under 2Gb so we can use it. Next time, ask
1061 // for memory right after the place we just got some
1062 mmap_32bit_base = (StgWord8*)result + size;
1063 }
1064 }
1065 } else {
1066 if ((W_)result > 0x80000000) {
1067 // oops, we were given memory over 2Gb
1068 // ... try allocating memory somewhere else?;
1069 debugTrace(DEBUG_linker,
1070 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1071 bytes, result);
1072 munmap(result, size);
1073
1074 // Set a base address and try again... (guess: 1Gb)
1075 mmap_32bit_base = (void*)0x40000000;
1076 goto mmap_again;
1077 }
1078 }
1079 #endif
1080
1081 IF_DEBUG(linker,
1082 debugBelch("mmapForLinker: mapped %" FMT_Word
1083 " bytes starting at %p\n", (W_)size, result));
1084 IF_DEBUG(linker,
1085 debugBelch("mmapForLinker: done\n"));
1086
1087 return result;
1088 }
1089 #endif
1090
1091 /*
1092 * Remove symbols from the symbol table, and free oc->symbols.
1093 * This operation is idempotent.
1094 */
1095 static void removeOcSymbols (ObjectCode *oc)
1096 {
1097 if (oc->symbols == NULL) return;
1098
1099 // Remove all the mappings for the symbols within this object..
1100 int i;
1101 for (i = 0; i < oc->n_symbols; i++) {
1102 if (oc->symbols[i] != NULL) {
1103 ghciRemoveSymbolTable(symhash, oc->symbols[i], oc);
1104 }
1105 }
1106
1107 stgFree(oc->symbols);
1108 oc->symbols = NULL;
1109 }
1110
1111 /*
1112 * Release StablePtrs and free oc->stable_ptrs.
1113 * This operation is idempotent.
1114 */
1115 static void freeOcStablePtrs (ObjectCode *oc)
1116 {
1117 // Release any StablePtrs that were created when this
1118 // object module was initialized.
1119 ForeignExportStablePtr *fe_ptr, *next;
1120
1121 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1122 next = fe_ptr->next;
1123 freeStablePtr(fe_ptr->stable_ptr);
1124 stgFree(fe_ptr);
1125 }
1126 oc->stable_ptrs = NULL;
1127 }
1128
1129 static void
1130 freePreloadObjectFile (ObjectCode *oc)
1131 {
1132 #if defined(mingw32_HOST_OS)
1133 freePreloadObjectFile_PEi386(oc);
1134 #else
1135
1136 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1137 munmap(oc->image, oc->fileSize);
1138 }
1139 else {
1140 stgFree(oc->image);
1141 }
1142
1143 #endif
1144
1145 oc->image = NULL;
1146 oc->fileSize = 0;
1147 }
1148
1149 /*
1150 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1151 * the GC when a previously unloaded ObjectCode has been determined to be
1152 * unused, and when an error occurs during loadObj().
1153 */
1154 void freeObjectCode (ObjectCode *oc)
1155 {
1156 freePreloadObjectFile(oc);
1157
1158 if (oc->symbols != NULL) {
1159 stgFree(oc->symbols);
1160 oc->symbols = NULL;
1161 }
1162
1163 if (oc->extraInfos != NULL) {
1164 freeHashTable(oc->extraInfos, NULL);
1165 oc->extraInfos = NULL;
1166 }
1167
1168 if (oc->sections != NULL) {
1169 int i;
1170 for (i=0; i < oc->n_sections; i++) {
1171 if (oc->sections[i].start != NULL) {
1172 switch(oc->sections[i].alloc){
1173 #if RTS_LINKER_USE_MMAP
1174 case SECTION_MMAP:
1175 munmap(oc->sections[i].mapped_start,
1176 oc->sections[i].mapped_size);
1177 break;
1178 case SECTION_M32:
1179 m32_free(oc->sections[i].start,
1180 oc->sections[i].size);
1181 break;
1182 #endif
1183 case SECTION_MALLOC:
1184 stgFree(oc->sections[i].start);
1185 break;
1186 default:
1187 break;
1188 }
1189 }
1190 if (oc->sections[i].info) {
1191 stgFree(oc->sections[i].info);
1192 }
1193 }
1194 stgFree(oc->sections);
1195 }
1196
1197 freeProddableBlocks(oc);
1198
1199 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1200 * alongside the image, so we don't need to free. */
1201 #if defined(NEED_SYMBOL_EXTRAS) && (!defined(x86_64_HOST_ARCH) \
1202 || !defined(mingw32_HOST_OS))
1203 if (RTS_LINKER_USE_MMAP) {
1204 if (!USE_CONTIGUOUS_MMAP && !RtsFlags.MiscFlags.linkerAlwaysPic &&
1205 oc->symbol_extras != NULL) {
1206 m32_free(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras);
1207 }
1208 }
1209 else {
1210 stgFree(oc->symbol_extras);
1211 }
1212 #endif
1213
1214 #if defined(OBJECTFORMAT_MACHO)
1215 ocDeinit_MachO(oc);
1216 #endif
1217 #if defined(OBJFORMAT_ELF)
1218 ocDeinit_ELF(oc);
1219 #endif
1220
1221 stgFree(oc->fileName);
1222 stgFree(oc->archiveMemberName);
1223
1224 stgFree(oc);
1225 }
1226
1227 /* -----------------------------------------------------------------------------
1228 * Sets the initial status of a fresh ObjectCode
1229 */
1230 static void setOcInitialStatus(ObjectCode* oc) {
1231 /* If a target has requested the ObjectCode not to be resolved then
1232 honor this requests. Usually this means the ObjectCode has not been
1233 initialized and can't be. */
1234 if (oc->status == OBJECT_DONT_RESOLVE)
1235 return;
1236
1237 if (oc->archiveMemberName == NULL) {
1238 oc->status = OBJECT_NEEDED;
1239 } else {
1240 oc->status = OBJECT_LOADED;
1241 }
1242 }
1243
1244 ObjectCode*
1245 mkOc( pathchar *path, char *image, int imageSize,
1246 bool mapped, char *archiveMemberName, int misalignment ) {
1247 ObjectCode* oc;
1248
1249 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1250 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1251
1252 oc->info = NULL;
1253
1254 # if defined(OBJFORMAT_ELF)
1255 oc->formatName = "ELF";
1256 # elif defined(OBJFORMAT_PEi386)
1257 oc->formatName = "PEi386";
1258 # elif defined(OBJFORMAT_MACHO)
1259 oc->formatName = "Mach-O";
1260 # else
1261 stgFree(oc);
1262 barf("loadObj: not implemented on this platform");
1263 # endif
1264
1265 oc->image = image;
1266 oc->fileName = pathdup(path);
1267
1268 if (archiveMemberName) {
1269 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1,
1270 "loadObj" );
1271 strcpy(oc->archiveMemberName, archiveMemberName);
1272 } else {
1273 oc->archiveMemberName = NULL;
1274 }
1275
1276 setOcInitialStatus( oc );
1277
1278 oc->fileSize = imageSize;
1279 oc->symbols = NULL;
1280 oc->n_sections = 0;
1281 oc->sections = NULL;
1282 oc->proddables = NULL;
1283 oc->stable_ptrs = NULL;
1284 #if defined(NEED_SYMBOL_EXTRAS)
1285 oc->symbol_extras = NULL;
1286 #endif
1287 oc->bssBegin = NULL;
1288 oc->bssEnd = NULL;
1289 oc->imageMapped = mapped;
1290
1291 oc->misalignment = misalignment;
1292 oc->extraInfos = NULL;
1293
1294 /* chain it onto the list of objects */
1295 oc->next = NULL;
1296
1297 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1298 return oc;
1299 }
1300
1301 /* -----------------------------------------------------------------------------
1302 * Check if an object or archive is already loaded.
1303 *
1304 * Returns: 1 if the path is already loaded, 0 otherwise.
1305 */
1306 HsInt
1307 isAlreadyLoaded( pathchar *path )
1308 {
1309 ObjectCode *o;
1310 for (o = objects; o; o = o->next) {
1311 if (0 == pathcmp(o->fileName, path)) {
1312 return 1; /* already loaded */
1313 }
1314 }
1315 return 0; /* not loaded yet */
1316 }
1317
1318 //
1319 // Load the object file into memory. This will not be its final resting place,
1320 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
1321 // address space, properly aligned.
1322 //
1323 static ObjectCode *
1324 preloadObjectFile (pathchar *path)
1325 {
1326 int fileSize;
1327 struct_stat st;
1328 int r;
1329 void *image;
1330 ObjectCode *oc;
1331 int misalignment = 0;
1332
1333 r = pathstat(path, &st);
1334 if (r == -1) {
1335 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
1336 return NULL;
1337 }
1338
1339 fileSize = st.st_size;
1340
1341 #if RTS_LINKER_USE_MMAP
1342 int fd;
1343
1344 /* On many architectures malloc'd memory isn't executable, so we need to use
1345 * mmap. */
1346
1347 #if defined(openbsd_HOST_OS)
1348 fd = open(path, O_RDONLY, S_IRUSR);
1349 #else
1350 fd = open(path, O_RDONLY);
1351 #endif
1352 if (fd == -1) {
1353 errorBelch("loadObj: can't open %s", path);
1354 return NULL;
1355 }
1356
1357 /* iOS does not permit to mmap with r+w+x, however while the comment for
1358 * this function says this is not the final resting place, for some
1359 * architectures / hosts (at least mach-o non-iOS -- see ocGetNames_MachO)
1360 * the image mmaped here in fact ends up being the final resting place for
1361 * the sections. And hence we need to leave r+w+x here for other hosts
1362 * until all hosts have been made aware of the initial image being r+w only.
1363 *
1364 * See also the misalignment logic for darwin below.
1365 */
1366 #if defined(ios_HOST_OS)
1367 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
1368 #else
1369 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
1370 MAP_PRIVATE, fd, 0);
1371 #endif
1372
1373 if (image == MAP_FAILED) {
1374 errorBelch("mmap: failed. errno = %d", errno);
1375 }
1376 // not 32-bit yet, we'll remap later
1377 close(fd);
1378
1379 #else /* !RTS_LINKER_USE_MMAP */
1380 FILE *f;
1381
1382 /* load the image into memory */
1383 /* coverity[toctou] */
1384 f = pathopen(path, WSTR("rb"));
1385 if (!f) {
1386 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
1387 return NULL;
1388 }
1389
1390 # if defined(darwin_HOST_OS)
1391
1392 // In a Mach-O .o file, all sections can and will be misaligned
1393 // if the total size of the headers is not a multiple of the
1394 // desired alignment. This is fine for .o files that only serve
1395 // as input for the static linker, but it's not fine for us,
1396 // as SSE (used by gcc for floating point) and Altivec require
1397 // 16-byte alignment.
1398 // We calculate the correct alignment from the header before
1399 // reading the file, and then we misalign image on purpose so
1400 // that the actual sections end up aligned again.
1401 misalignment = machoGetMisalignment(f);
1402 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
1403 image += misalignment;
1404
1405 # else /* !defined(darwin_HOST_OS) */
1406
1407 image = stgMallocBytes(fileSize, "loadObj(image)");
1408
1409 #endif
1410
1411 int n;
1412 n = fread ( image, 1, fileSize, f );
1413 fclose(f);
1414 if (n != fileSize) {
1415 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
1416 stgFree(image);
1417 return NULL;
1418 }
1419
1420 #endif /* RTS_LINKER_USE_MMAP */
1421
1422 oc = mkOc(path, image, fileSize, true, NULL, misalignment);
1423
1424 #if defined(OBJFORMAT_MACHO)
1425 if (ocVerifyImage_MachO( oc ))
1426 ocInit_MachO( oc );
1427 #endif
1428 #if defined(OBJFORMAT_ELF)
1429 if(ocVerifyImage_ELF( oc ))
1430 ocInit_ELF( oc );
1431 #endif
1432 return oc;
1433 }
1434
1435 /* -----------------------------------------------------------------------------
1436 * Load an obj (populate the global symbol table, but don't resolve yet)
1437 *
1438 * Returns: 1 if ok, 0 on error.
1439 */
1440 static HsInt loadObj_ (pathchar *path)
1441 {
1442 ObjectCode* oc;
1443 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
1444
1445 /* debugBelch("loadObj %s\n", path ); */
1446
1447 /* Check that we haven't already loaded this object.
1448 Ignore requests to load multiple times */
1449
1450 if (isAlreadyLoaded(path)) {
1451 IF_DEBUG(linker,
1452 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1453 return 1; /* success */
1454 }
1455
1456 oc = preloadObjectFile(path);
1457 if (oc == NULL) return 0;
1458
1459 if (! loadOc(oc)) {
1460 // failed; free everything we've allocated
1461 removeOcSymbols(oc);
1462 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
1463 freeObjectCode(oc);
1464 return 0;
1465 }
1466
1467 oc->next = objects;
1468 objects = oc;
1469 return 1;
1470 }
1471
1472 HsInt loadObj (pathchar *path)
1473 {
1474 ACQUIRE_LOCK(&linker_mutex);
1475 HsInt r = loadObj_(path);
1476 RELEASE_LOCK(&linker_mutex);
1477 return r;
1478 }
1479
1480 HsInt loadOc (ObjectCode* oc)
1481 {
1482 int r;
1483
1484 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
1485
1486 /* verify the in-memory image */
1487 # if defined(OBJFORMAT_ELF)
1488 r = ocVerifyImage_ELF ( oc );
1489 # elif defined(OBJFORMAT_PEi386)
1490 r = ocVerifyImage_PEi386 ( oc );
1491 # elif defined(OBJFORMAT_MACHO)
1492 r = ocVerifyImage_MachO ( oc );
1493 # else
1494 barf("loadObj: no verify method");
1495 # endif
1496 if (!r) {
1497 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
1498 return r;
1499 }
1500
1501 /* Note [loadOc orderings]
1502 The order of `ocAllocateExtras` and `ocGetNames` matters. For MachO
1503 and ELF, `ocInit` and `ocGetNames` initialize a bunch of pointers based
1504 on the offset to `oc->image`, but `ocAllocateExtras` may relocate
1505 the address of `oc->image` and invalidate those pointers. So we must
1506 compute or recompute those pointers after `ocAllocateExtras`.
1507
1508 On Windows, when we have an import library we (for now, as we don't honor
1509 the lazy loading semantics of the library and instead GHCi is already
1510 lazy) don't use the library after ocGetNames as it just populates the
1511 symbol table. Allocating space for jump tables in ocAllocateExtras
1512 would just be a waste then as we'll be stopping further processing of the
1513 library in the next few steps. If necessary, the actual allocation
1514 happens in `ocGetNames_PEi386` and `ocAllocateExtras_PEi386` simply
1515 set the correct pointers.
1516 */
1517
1518 #if defined(NEED_SYMBOL_EXTRAS)
1519 # if defined(OBJFORMAT_MACHO)
1520 r = ocAllocateExtras_MachO ( oc );
1521 if (!r) {
1522 IF_DEBUG(linker,
1523 debugBelch("loadOc: ocAllocateExtras_MachO failed\n"));
1524 return r;
1525 }
1526 # elif defined(OBJFORMAT_ELF)
1527 r = ocAllocateExtras_ELF ( oc );
1528 if (!r) {
1529 IF_DEBUG(linker,
1530 debugBelch("loadOc: ocAllocateExtras_ELF failed\n"));
1531 return r;
1532 }
1533 # endif
1534
1535 /* build the symbol list for this image */
1536 # if defined(OBJFORMAT_ELF)
1537 r = ocGetNames_ELF ( oc );
1538 # elif defined(OBJFORMAT_PEi386)
1539 r = ocGetNames_PEi386 ( oc );
1540 # elif defined(OBJFORMAT_MACHO)
1541 r = ocGetNames_MachO ( oc );
1542 # else
1543 barf("loadObj: no getNames method");
1544 # endif
1545 if (!r) {
1546 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
1547 return r;
1548 }
1549
1550 # if defined(OBJFORMAT_PEi386)
1551 ocAllocateExtras_PEi386 ( oc );
1552 # endif
1553 #endif
1554
1555 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
1556 setOcInitialStatus( oc );
1557 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
1558
1559 return 1;
1560 }
1561
1562 /* -----------------------------------------------------------------------------
1563 * try to load and initialize an ObjectCode into memory
1564 *
1565 * Returns: 1 if ok, 0 on error.
1566 */
1567 int ocTryLoad (ObjectCode* oc) {
1568 int r;
1569
1570 if (oc->status != OBJECT_NEEDED) {
1571 return 1;
1572 }
1573
1574 /* Check for duplicate symbols by looking into `symhash`.
1575 Duplicate symbols are any symbols which exist
1576 in different ObjectCodes that have both been loaded, or
1577 are to be loaded by this call.
1578
1579 This call is intended to have no side-effects when a non-duplicate
1580 symbol is re-inserted.
1581
1582 We set the Address to NULL since that is not used to distinguish
1583 symbols. Duplicate symbols are distinguished by name and oc.
1584 */
1585 int x;
1586 SymbolName* symbol;
1587 for (x = 0; x < oc->n_symbols; x++) {
1588 symbol = oc->symbols[x];
1589 if ( symbol
1590 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol, NULL,
1591 isSymbolWeak(oc, symbol), oc)) {
1592 return 0;
1593 }
1594 }
1595
1596 # if defined(OBJFORMAT_ELF)
1597 r = ocResolve_ELF ( oc );
1598 # elif defined(OBJFORMAT_PEi386)
1599 r = ocResolve_PEi386 ( oc );
1600 # elif defined(OBJFORMAT_MACHO)
1601 r = ocResolve_MachO ( oc );
1602 # else
1603 barf("ocTryLoad: not implemented on this platform");
1604 # endif
1605 if (!r) { return r; }
1606
1607 // run init/init_array/ctors/mod_init_func
1608
1609 loading_obj = oc; // tells foreignExportStablePtr what to do
1610 #if defined(OBJFORMAT_ELF)
1611 r = ocRunInit_ELF ( oc );
1612 #elif defined(OBJFORMAT_PEi386)
1613 r = ocRunInit_PEi386 ( oc );
1614 #elif defined(OBJFORMAT_MACHO)
1615 r = ocRunInit_MachO ( oc );
1616 #else
1617 barf("ocTryLoad: initializers not implemented on this platform");
1618 #endif
1619 loading_obj = NULL;
1620
1621 if (!r) { return r; }
1622
1623 oc->status = OBJECT_RESOLVED;
1624
1625 return 1;
1626 }
1627
1628 /* -----------------------------------------------------------------------------
1629 * resolve all the currently unlinked objects in memory
1630 *
1631 * Returns: 1 if ok, 0 on error.
1632 */
1633 static HsInt resolveObjs_ (void)
1634 {
1635 ObjectCode *oc;
1636 int r;
1637
1638 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
1639
1640 for (oc = objects; oc; oc = oc->next) {
1641 r = ocTryLoad(oc);
1642 if (!r)
1643 {
1644 return r;
1645 }
1646 }
1647
1648 #if defined(PROFILING)
1649 // collect any new cost centres & CCSs that were defined during runInit
1650 initProfiling2();
1651 #endif
1652
1653 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
1654 return 1;
1655 }
1656
1657 HsInt resolveObjs (void)
1658 {
1659 ACQUIRE_LOCK(&linker_mutex);
1660 HsInt r = resolveObjs_();
1661 RELEASE_LOCK(&linker_mutex);
1662 return r;
1663 }
1664
1665 /* -----------------------------------------------------------------------------
1666 * delete an object from the pool
1667 */
1668 static HsInt unloadObj_ (pathchar *path, bool just_purge)
1669 {
1670 ObjectCode *oc, *prev, *next;
1671 HsBool unloadedAnyObj = HS_BOOL_FALSE;
1672
1673 ASSERT(symhash != NULL);
1674 ASSERT(objects != NULL);
1675
1676 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
1677
1678 prev = NULL;
1679 for (oc = objects; oc; oc = next) {
1680 next = oc->next; // oc might be freed
1681
1682 if (!pathcmp(oc->fileName,path)) {
1683
1684 // these are both idempotent, so in just_purge mode we can
1685 // later call unloadObj() to really unload the object.
1686 removeOcSymbols(oc);
1687 freeOcStablePtrs(oc);
1688
1689 if (!just_purge) {
1690 if (prev == NULL) {
1691 objects = oc->next;
1692 } else {
1693 prev->next = oc->next;
1694 }
1695 ACQUIRE_LOCK(&linker_unloaded_mutex);
1696 oc->next = unloaded_objects;
1697 unloaded_objects = oc;
1698 oc->status = OBJECT_UNLOADED;
1699 RELEASE_LOCK(&linker_unloaded_mutex);
1700 // We do not own oc any more; it can be released at any time by
1701 // the GC in checkUnload().
1702 } else {
1703 prev = oc;
1704 }
1705
1706 /* This could be a member of an archive so continue
1707 * unloading other members. */
1708 unloadedAnyObj = HS_BOOL_TRUE;
1709 } else {
1710 prev = oc;
1711 }
1712 }
1713
1714 if (unloadedAnyObj) {
1715 return 1;
1716 }
1717 else {
1718 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
1719 return 0;
1720 }
1721 }
1722
1723 HsInt unloadObj (pathchar *path)
1724 {
1725 ACQUIRE_LOCK(&linker_mutex);
1726 HsInt r = unloadObj_(path, false);
1727 RELEASE_LOCK(&linker_mutex);
1728 return r;
1729 }
1730
1731 HsInt purgeObj (pathchar *path)
1732 {
1733 ACQUIRE_LOCK(&linker_mutex);
1734 HsInt r = unloadObj_(path, true);
1735 RELEASE_LOCK(&linker_mutex);
1736 return r;
1737 }
1738
1739 static OStatus getObjectLoadStatus_ (pathchar *path)
1740 {
1741 ObjectCode *o;
1742 for (o = objects; o; o = o->next) {
1743 if (0 == pathcmp(o->fileName, path)) {
1744 return o->status;
1745 }
1746 }
1747 for (o = unloaded_objects; o; o = o->next) {
1748 if (0 == pathcmp(o->fileName, path)) {
1749 return o->status;
1750 }
1751 }
1752 return OBJECT_NOT_LOADED;
1753 }
1754
1755 OStatus getObjectLoadStatus (pathchar *path)
1756 {
1757 ACQUIRE_LOCK(&linker_mutex);
1758 OStatus r = getObjectLoadStatus_(path);
1759 RELEASE_LOCK(&linker_mutex);
1760 return r;
1761 }
1762
1763 /* -----------------------------------------------------------------------------
1764 * Sanity checking. For each ObjectCode, maintain a list of address ranges
1765 * which may be prodded during relocation, and abort if we try and write
1766 * outside any of these.
1767 */
1768 void
1769 addProddableBlock ( ObjectCode* oc, void* start, int size )
1770 {
1771 ProddableBlock* pb
1772 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
1773
1774 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
1775 ASSERT(size > 0);
1776 pb->start = start;
1777 pb->size = size;
1778 pb->next = oc->proddables;
1779 oc->proddables = pb;
1780 }
1781
1782 void
1783 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
1784 {
1785 ProddableBlock* pb;
1786
1787 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
1788 char* s = (char*)(pb->start);
1789 char* e = s + pb->size;
1790 char* a = (char*)addr;
1791 if (a >= s && (a+size) <= e) return;
1792 }
1793 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
1794 }
1795
1796 void freeProddableBlocks (ObjectCode *oc)
1797 {
1798 ProddableBlock *pb, *next;
1799
1800 for (pb = oc->proddables; pb != NULL; pb = next) {
1801 next = pb->next;
1802 stgFree(pb);
1803 }
1804 oc->proddables = NULL;
1805 }
1806
1807 /* -----------------------------------------------------------------------------
1808 * Section management.
1809 */
1810 void
1811 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
1812 void* start, StgWord size, StgWord mapped_offset,
1813 void* mapped_start, StgWord mapped_size)
1814 {
1815 s->start = start; /* actual start of section in memory */
1816 s->size = size; /* actual size of section in memory */
1817 s->kind = kind;
1818 s->alloc = alloc;
1819 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
1820
1821 s->mapped_start = mapped_start; /* start of mmap() block */
1822 s->mapped_size = mapped_size; /* size of mmap() block */
1823
1824 if (!s->info)
1825 s->info
1826 = (struct SectionFormatInfo*)stgCallocBytes(1, sizeof *s->info,
1827 "addSection(SectionFormatInfo)");
1828
1829 IF_DEBUG(linker,
1830 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
1831 start, (void*)((StgWord)start + size),
1832 size, kind ));
1833 }