users-guide: Drop 8.6.1 release notes
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "StablePtr.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31 #include "linker/CacheFlush.h"
32 #include "linker/SymbolExtras.h"
33 #include "PathUtils.h"
34
35 #if !defined(mingw32_HOST_OS)
36 #include "posix/Signals.h"
37 #endif
38
39 // get protos for is*()
40 #include <ctype.h>
41
42 #if defined(HAVE_SYS_TYPES_H)
43 #include <sys/types.h>
44 #endif
45
46 #include <inttypes.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <stdio.h>
50 #include <assert.h>
51 #include <fs_rts.h>
52
53 #if defined(HAVE_SYS_STAT_H)
54 #include <sys/stat.h>
55 #endif
56
57 #if defined(HAVE_DLFCN_H)
58 #include <dlfcn.h>
59 #endif
60
61 #if defined(OBJFORMAT_ELF)
62 # include "linker/Elf.h"
63 # include <regex.h> // regex is already used by dlopen() so this is OK
64 // to use here without requiring an additional lib
65 #elif defined(OBJFORMAT_PEi386)
66 # include "linker/PEi386.h"
67 # include <windows.h>
68 #elif defined(OBJFORMAT_MACHO)
69 # include "linker/MachO.h"
70 # include <regex.h>
71 # include <mach/machine.h>
72 # include <mach-o/fat.h>
73 #endif
74
75 #if defined(dragonfly_HOST_OS)
76 #include <sys/tls.h>
77 #endif
78 /*
79 Note [runtime-linker-support]
80 -----------------------------
81 When adding support for a new platform to the runtime linker please
82 update `$TOP/configure.ac` under heading `Does target have runtime
83 linker support?`.
84 */
85 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
86 This hashtable will contain information on all symbols
87 that we know of, however the .o they are in may not be loaded.
88
89 Until the ObjectCode the symbol belongs to is actually
90 loaded this symbol may be replaced. So do not rely on
91 addresses of unloaded symbols.
92
93 Note [runtime-linker-phases]
94 --------------------------------------
95 Broadly the behavior of the runtime linker can be
96 split into the following four phases:
97
98 - Indexing (e.g. ocVerifyImage and ocGetNames)
99 - Initialization (e.g. ocResolve and ocRunInit)
100 - Resolve (e.g. resolveObjs())
101 - Lookup (e.g. lookupSymbol)
102
103 This is to enable lazy loading of symbols. Eager loading is problematic
104 as it means that all symbols must be available, even those which we will
105 never use. This is especially painful on Windows, where the number of
106 libraries required to link things like mingwex grows to be quite high.
107
108 We proceed through these stages as follows,
109
110 * During Indexing we verify and open the ObjectCode and
111 perform a quick scan/indexing of the ObjectCode. All the work
112 required to actually load the ObjectCode is done.
113
114 All symbols from the ObjectCode are also inserted into
115 `symhash`, where possible duplicates are handled via the semantics
116 described in `ghciInsertSymbolTable`.
117
118 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
119 depending on whether they are an archive member or not.
120
121 * During initialization we load ObjectCode, perform relocations, execute
122 static constructors etc. This phase may trigger other ObjectCodes to
123 be loaded because of the calls to lookupSymbol.
124
125 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
126 previous status was `OBJECT_LOADED`.
127
128 * During resolve we attempt to resolve all the symbols needed for the
129 initial link. This essentially means, that for any ObjectCode given
130 directly to the command-line we perform lookupSymbols on the required
131 symbols. lookupSymbols may trigger the loading of additional ObjectCode
132 if required.
133
134 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
135 the previous status was `OBJECT_NEEDED`.
136
137 * lookupSymbols is used to lookup any symbols required, both during initial
138 link and during statement and expression compilations in the REPL.
139 Declaration of e.g. a foreign import, will eventually call lookupSymbol
140 which will either fail (symbol unknown) or succeed (and possibly trigger a
141 load).
142
143 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
144
145 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
146 by calling unloadObj and starting over.
147 When a new declaration or statement is performed ultimately lookupSymbol is called
148 without doing a re-link.
149
150 The goal of these different phases is to allow the linker to be able to perform
151 "lazy loading" of ObjectCode. The reason for this is that we want to only link
152 in symbols that are actually required for the link. This reduces:
153
154 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
155 then we don't need to load libB. This means the dependency chain for libraries
156 such as mingw32 and mingwex can be broken down.
157
158 2) The number of duplicate symbols, since now only symbols that are
159 true duplicates will display the error.
160 */
161 /*Str*/HashTable *symhash;
162
163 /* List of currently loaded objects */
164 ObjectCode *objects = NULL; /* initially empty */
165
166 /* List of objects that have been unloaded via unloadObj(), but are waiting
167 to be actually freed via checkUnload() */
168 ObjectCode *unloaded_objects = NULL; /* initially empty */
169
170 #if defined(THREADED_RTS)
171 /* This protects all the Linker's global state except unloaded_objects */
172 Mutex linker_mutex;
173 /*
174 * This protects unloaded_objects. We have a separate mutex for this, because
175 * the GC needs to access unloaded_objects in checkUnload, while the linker only
176 * needs to access unloaded_objects in unloadObj(), so this allows most linker
177 * operations proceed concurrently with the GC.
178 */
179 Mutex linker_unloaded_mutex;
180 #endif
181
182 /* Generic wrapper function to try and Resolve and RunInit oc files */
183 int ocTryLoad( ObjectCode* oc );
184
185 /* Link objects into the lower 2Gb on x86_64 and AArch64. GHC assumes the
186 * small memory model on this architecture (see gcc docs,
187 * -mcmodel=small).
188 *
189 * MAP_32BIT not available on OpenBSD/amd64
190 */
191 #if defined(MAP_32BIT) && defined(x86_64_HOST_ARCH)
192 #define MAP_LOW_MEM
193 #define TRY_MAP_32BIT MAP_32BIT
194 #else
195 #define TRY_MAP_32BIT 0
196 #endif
197
198 #if defined(aarch64_HOST_ARCH)
199 // On AArch64 MAP_32BIT is not available but we are still bound by the small
200 // memory model. Consequently we still try using the MAP_LOW_MEM allocation
201 // strategy.
202 #define MAP_LOW_MEM
203 #endif
204
205 /*
206 * Note [MAP_LOW_MEM]
207 * ~~~~~~~~~~~~~~~~~~
208 * Due to the small memory model (see above), on x86_64 and AArch64 we have to
209 * map all our non-PIC object files into the low 2Gb of the address space (why
210 * 2Gb and not 4Gb? Because all addresses must be reachable using a 32-bit
211 * signed PC-relative offset). On x86_64 Linux we can do this using the
212 * MAP_32BIT flag to mmap(), however on other OSs (e.g. *BSD, see #2063, and
213 * also on Linux inside Xen, see #2512), we can't do this. So on these
214 * systems, we have to pick a base address in the low 2Gb of the address space
215 * and try to allocate memory from there.
216 *
217 * We pick a default address based on the OS, but also make this
218 * configurable via an RTS flag (+RTS -xm)
219 */
220 #if defined(MAP_32BIT) || DEFAULT_LINKER_ALWAYS_PIC
221 // Try to use MAP_32BIT
222 #define MMAP_32BIT_BASE_DEFAULT 0
223 #else
224 // A guess: 1Gb.
225 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
226 #endif
227
228 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
229
230 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
231 ObjectCode *owner)
232 {
233 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
234 if (!pinfo || owner != pinfo->owner) return;
235 removeStrHashTable(table, key, NULL);
236 if (isSymbolImport (owner, key))
237 stgFree(pinfo->value);
238
239 stgFree(pinfo);
240 }
241
242 /* -----------------------------------------------------------------------------
243 * Insert symbols into hash tables, checking for duplicates.
244 *
245 * Returns: 0 on failure, nonzero on success
246 */
247 /*
248 Note [weak-symbols-support]
249 -------------------------------------
250 While ghciInsertSymbolTable does implement extensive
251 logic for weak symbol support, weak symbols are not currently
252 fully supported by the RTS. This code is mostly here for COMDAT
253 support which uses the weak symbols support.
254
255 Linking weak symbols defined purely in C code with other C code
256 should also work, probably. Observing weak symbols in Haskell
257 won't.
258
259 Some test have been written for weak symbols but have been disabled
260 mostly because it's unsure how the weak symbols support should look.
261 See #11223
262 */
263 int ghciInsertSymbolTable(
264 pathchar* obj_name,
265 HashTable *table,
266 const SymbolName* key,
267 SymbolAddr* data,
268 HsBool weak,
269 ObjectCode *owner)
270 {
271 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
272 if (!pinfo) /* new entry */
273 {
274 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
275 pinfo->value = data;
276 pinfo->owner = owner;
277 pinfo->weak = weak;
278 insertStrHashTable(table, key, pinfo);
279 return 1;
280 }
281 else if (weak && data && pinfo->weak && !pinfo->value)
282 {
283 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
284 pinfo->value = data;
285 pinfo->owner = owner;
286 return 1;
287 }
288 else if (weak)
289 {
290 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
291 already know of another copy throw this one away.
292
293 or both weak symbols have a nonzero value. Keep the existing one.
294
295 This also preserves the semantics of linking against
296 the first symbol we find. */
297 }
298 else if (pinfo->weak && !weak) /* weak symbol is in the table */
299 {
300 /* override the weak definition with the non-weak one */
301 pinfo->value = data;
302 pinfo->owner = owner;
303 pinfo->weak = HS_BOOL_FALSE;
304 return 1;
305 }
306 else if ( pinfo->owner
307 && pinfo->owner->status != OBJECT_RESOLVED
308 && pinfo->owner->status != OBJECT_NEEDED)
309 {
310 /* If the other symbol hasn't been loaded or will be loaded and we want to
311 explicitly load the new one, we can just swap it out and load the one
312 that has been requested. If not, just keep the first one encountered.
313
314 Because the `symHash' table consists symbols we've also not loaded but
315 found during the initial scan this is safe to do. If however the existing
316 symbol has been loaded then it means we have a duplicate.
317
318 This is essentially emulating the behavior of a linker wherein it will always
319 link in object files that are .o file arguments, but only take object files
320 from archives as needed. */
321 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
322 pinfo->value = data;
323 pinfo->owner = owner;
324 pinfo->weak = weak;
325 }
326
327 return 1;
328 }
329 else if (pinfo->owner == owner)
330 {
331 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
332 return 1;
333 }
334 else if (owner && owner->status == OBJECT_LOADED)
335 {
336 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
337 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
338 call this function again to trigger the duplicate error. */
339 return 1;
340 }
341
342 pathchar* archiveName = NULL;
343 debugBelch(
344 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
345 " %s\n"
346 "whilst processing object file\n"
347 " %" PATH_FMT "\n"
348 "The symbol was previously defined in\n"
349 " %" PATH_FMT "\n"
350 "This could be caused by:\n"
351 " * Loading two different object files which export the same symbol\n"
352 " * Specifying the same object file twice on the GHCi command line\n"
353 " * An incorrect `package.conf' entry, causing some object to be\n"
354 " loaded twice.\n",
355 (char*)key,
356 obj_name,
357 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
358 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
359 : pinfo->owner->fileName
360 );
361
362 if (archiveName)
363 {
364 stgFree(archiveName);
365 archiveName = NULL;
366 }
367 return 0;
368 }
369
370 /* -----------------------------------------------------------------------------
371 * Looks up symbols into hash tables.
372 *
373 * Returns: 0 on failure and result is not set,
374 * nonzero on success and result set to nonzero pointer
375 */
376 HsBool ghciLookupSymbolInfo(HashTable *table,
377 const SymbolName* key, RtsSymbolInfo **result)
378 {
379 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
380 if (!pinfo) {
381 *result = NULL;
382 return HS_BOOL_FALSE;
383 }
384 if (pinfo->weak)
385 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
386 /* Once it's looked up, it can no longer be overridden */
387 pinfo->weak = HS_BOOL_FALSE;
388
389 *result = pinfo;
390 return HS_BOOL_TRUE;
391 }
392
393 /* -----------------------------------------------------------------------------
394 * initialize the object linker
395 */
396
397
398 static int linker_init_done = 0 ;
399
400 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
401 static void *dl_prog_handle;
402 static regex_t re_invalid;
403 static regex_t re_realso;
404 #if defined(THREADED_RTS)
405 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
406 #endif
407 #endif
408
409 void initLinker (void)
410 {
411 // default to retaining CAFs for backwards compatibility. Most
412 // users will want initLinker_(0): otherwise unloadObj() will not
413 // be able to unload object files when they contain CAFs.
414 initLinker_(1);
415 }
416
417 void
418 initLinker_ (int retain_cafs)
419 {
420 RtsSymbolVal *sym;
421 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
422 int compileResult;
423 #endif
424
425 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
426
427 /* Make initLinker idempotent, so we can call it
428 before every relevant operation; that means we
429 don't need to initialise the linker separately */
430 if (linker_init_done == 1) {
431 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
432 return;
433 } else {
434 linker_init_done = 1;
435 }
436
437 objects = NULL;
438 unloaded_objects = NULL;
439
440 #if defined(THREADED_RTS)
441 initMutex(&linker_mutex);
442 initMutex(&linker_unloaded_mutex);
443 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
444 initMutex(&dl_mutex);
445 #endif
446 #endif
447
448 symhash = allocStrHashTable();
449
450 /* populate the symbol table with stuff from the RTS */
451 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
452 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
453 symhash, sym->lbl, sym->addr,
454 sym->weak, NULL)) {
455 barf("ghciInsertSymbolTable failed");
456 }
457 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
458 }
459
460 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
461 referenced from a statically linked module. We need to mimic this, but
462 we cannot use NULL because we use it to mean nonexistent symbols. So we
463 use an arbitrary (hopefully unique) address here.
464 */
465 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
466 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
467 barf("ghciInsertSymbolTable failed");
468 }
469
470 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
471 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
472 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
473 retain_cafs ? newRetainedCAF : newGCdCAF,
474 HS_BOOL_FALSE, NULL)) {
475 barf("ghciInsertSymbolTable failed");
476 }
477
478 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
479 # if defined(RTLD_DEFAULT)
480 dl_prog_handle = RTLD_DEFAULT;
481 # else
482 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
483 # endif /* RTLD_DEFAULT */
484
485 compileResult = regcomp(&re_invalid,
486 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short|invalid file format|Exec format error)",
487 REG_EXTENDED);
488 if (compileResult != 0) {
489 barf("Compiling re_invalid failed");
490 }
491 compileResult = regcomp(&re_realso,
492 "(GROUP|INPUT) *\\( *([^ )]+)",
493 REG_EXTENDED);
494 if (compileResult != 0) {
495 barf("Compiling re_realso failed");
496 }
497 # endif
498
499 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
500 // User-override for mmap_32bit_base
501 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
502 }
503
504 #if defined(OBJFORMAT_PEi386)
505 initLinker_PEi386();
506 #endif
507
508 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
509 return;
510 }
511
512 void
513 exitLinker( void ) {
514 #if defined(OBJFORMAT_PEi386)
515 exitLinker_PEi386();
516 #endif
517 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
518 if (linker_init_done == 1) {
519 regfree(&re_invalid);
520 regfree(&re_realso);
521 #if defined(THREADED_RTS)
522 closeMutex(&dl_mutex);
523 #endif
524 }
525 #endif
526 if (linker_init_done == 1) {
527 freeHashTable(symhash, free);
528 }
529 #if defined(THREADED_RTS)
530 closeMutex(&linker_mutex);
531 #endif
532 }
533
534 /* -----------------------------------------------------------------------------
535 * Loading DLL or .so dynamic libraries
536 * -----------------------------------------------------------------------------
537 *
538 * Add a DLL from which symbols may be found. In the ELF case, just
539 * do RTLD_GLOBAL-style add, so no further messing around needs to
540 * happen in order that symbols in the loaded .so are findable --
541 * lookupSymbol() will subsequently see them by dlsym on the program's
542 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
543 *
544 * In the PEi386 case, open the DLLs and put handles to them in a
545 * linked list. When looking for a symbol, try all handles in the
546 * list. This means that we need to load even DLLs that are guaranteed
547 * to be in the ghc.exe image already, just so we can get a handle
548 * to give to loadSymbol, so that we can find the symbols. For such
549 * libraries, the LoadLibrary call should be a no-op except for returning
550 * the handle.
551 *
552 */
553
554 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
555
556 /* Suppose in ghci we load a temporary SO for a module containing
557 f = 1
558 and then modify the module, recompile, and load another temporary
559 SO with
560 f = 2
561 Then as we don't unload the first SO, dlsym will find the
562 f = 1
563 symbol whereas we want the
564 f = 2
565 symbol. We therefore need to keep our own SO handle list, and
566 try SOs in the right order. */
567
568 typedef
569 struct _OpenedSO {
570 struct _OpenedSO* next;
571 void *handle;
572 }
573 OpenedSO;
574
575 /* A list thereof. */
576 static OpenedSO* openedSOs = NULL;
577
578 static const char *
579 internal_dlopen(const char *dll_name)
580 {
581 OpenedSO* o_so;
582 void *hdl;
583 const char *errmsg;
584 char *errmsg_copy;
585
586 // omitted: RTLD_NOW
587 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
588 IF_DEBUG(linker,
589 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
590
591 //-------------- Begin critical section ------------------
592 // This critical section is necessary because dlerror() is not
593 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
594 // Also, the error message returned must be copied to preserve it
595 // (see POSIX also)
596
597 ACQUIRE_LOCK(&dl_mutex);
598 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
599
600 errmsg = NULL;
601 if (hdl == NULL) {
602 /* dlopen failed; return a ptr to the error msg. */
603 errmsg = dlerror();
604 if (errmsg == NULL) errmsg = "addDLL: unknown error";
605 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
606 strcpy(errmsg_copy, errmsg);
607 errmsg = errmsg_copy;
608 } else {
609 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
610 o_so->handle = hdl;
611 o_so->next = openedSOs;
612 openedSOs = o_so;
613 }
614
615 RELEASE_LOCK(&dl_mutex);
616 //--------------- End critical section -------------------
617
618 return errmsg;
619 }
620
621 /*
622 Note [RTLD_LOCAL]
623
624 In GHCi we want to be able to override previous .so's with newly
625 loaded .so's when we recompile something. This further implies that
626 when we look up a symbol in internal_dlsym() we have to iterate
627 through the loaded libraries (in order from most recently loaded to
628 oldest) looking up the symbol in each one until we find it.
629
630 However, this can cause problems for some symbols that are copied
631 by the linker into the executable image at runtime - see #8935 for a
632 lengthy discussion. To solve that problem we need to look up
633 symbols in the main executable *first*, before attempting to look
634 them up in the loaded .so's. But in order to make that work, we
635 have to always call dlopen with RTLD_LOCAL, so that the loaded
636 libraries don't populate the global symbol table.
637 */
638
639 static void *
640 internal_dlsym(const char *symbol) {
641 OpenedSO* o_so;
642 void *v;
643
644 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
645 ACQUIRE_LOCK(&dl_mutex);
646 dlerror();
647 // look in program first
648 v = dlsym(dl_prog_handle, symbol);
649 if (dlerror() == NULL) {
650 RELEASE_LOCK(&dl_mutex);
651 return v;
652 }
653
654 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
655 v = dlsym(o_so->handle, symbol);
656 if (dlerror() == NULL) {
657 RELEASE_LOCK(&dl_mutex);
658 return v;
659 }
660 }
661 RELEASE_LOCK(&dl_mutex);
662 return v;
663 }
664 # endif
665
666 const char *
667 addDLL( pathchar *dll_name )
668 {
669 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
670 /* ------------------- ELF DLL loader ------------------- */
671
672 #define NMATCH 5
673 regmatch_t match[NMATCH];
674 const char *errmsg;
675 FILE* fp;
676 size_t match_length;
677 #define MAXLINE 1000
678 char line[MAXLINE];
679 int result;
680
681 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
682 errmsg = internal_dlopen(dll_name);
683
684 if (errmsg == NULL) {
685 return NULL;
686 }
687
688 // GHC #2615
689 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
690 // contain linker scripts rather than ELF-format object code. This
691 // code handles the situation by recognizing the real object code
692 // file name given in the linker script.
693 //
694 // If an "invalid ELF header" error occurs, it is assumed that the
695 // .so file contains a linker script instead of ELF object code.
696 // In this case, the code looks for the GROUP ( ... ) linker
697 // directive. If one is found, the first file name inside the
698 // parentheses is treated as the name of a dynamic library and the
699 // code attempts to dlopen that file. If this is also unsuccessful,
700 // an error message is returned.
701
702 // see if the error message is due to an invalid ELF header
703 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
704 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
705 IF_DEBUG(linker, debugBelch("result = %i\n", result));
706 if (result == 0) {
707 // success -- try to read the named file as a linker script
708 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
709 MAXLINE-1);
710 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
711 line[match_length] = '\0'; // make sure string is null-terminated
712 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
713 if ((fp = __rts_fopen(line, "r")) == NULL) {
714 return errmsg; // return original error if open fails
715 }
716 // try to find a GROUP or INPUT ( ... ) command
717 while (fgets(line, MAXLINE, fp) != NULL) {
718 IF_DEBUG(linker, debugBelch("input line = %s", line));
719 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
720 // success -- try to dlopen the first named file
721 IF_DEBUG(linker, debugBelch("match%s\n",""));
722 line[match[2].rm_eo] = '\0';
723 stgFree((void*)errmsg); // Free old message before creating new one
724 errmsg = internal_dlopen(line+match[2].rm_so);
725 break;
726 }
727 // if control reaches here, no GROUP or INPUT ( ... ) directive
728 // was found and the original error message is returned to the
729 // caller
730 }
731 fclose(fp);
732 }
733 return errmsg;
734
735 # elif defined(OBJFORMAT_PEi386)
736 return addDLL_PEi386(dll_name, NULL);
737
738 # else
739 barf("addDLL: not implemented on this platform");
740 # endif
741 }
742
743 /* -----------------------------------------------------------------------------
744 * Searches the system directories to determine if there is a system DLL that
745 * satisfies the given name. This prevent GHCi from linking against a static
746 * library if a DLL is available.
747 *
748 * Returns: NULL on failure or no DLL found, else the full path to the DLL
749 * that can be loaded.
750 */
751 pathchar* findSystemLibrary(pathchar* dll_name)
752 {
753 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%"
754 PATH_FMT "'\n", dll_name));
755
756 #if defined(OBJFORMAT_PEi386)
757 return findSystemLibrary_PEi386(dll_name);
758 #else
759 (void)(dll_name); // Function not implemented for other platforms.
760 return NULL;
761 #endif
762 }
763
764 /* -----------------------------------------------------------------------------
765 * Emits a warning determining that the system is missing a required security
766 * update that we need to get access to the proper APIs
767 */
768 void warnMissingKBLibraryPaths( void )
769 {
770 static HsBool missing_update_warn = HS_BOOL_FALSE;
771 if (!missing_update_warn) {
772 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
773 missing_update_warn = HS_BOOL_TRUE;
774 }
775 }
776
777 /* -----------------------------------------------------------------------------
778 * appends a directory to the process DLL Load path so LoadLibrary can find it
779 *
780 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
781 * restore the search path to what it was before this call.
782 */
783 HsPtr addLibrarySearchPath(pathchar* dll_path)
784 {
785 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%"
786 PATH_FMT "'\n", dll_path));
787
788 #if defined(OBJFORMAT_PEi386)
789 return addLibrarySearchPath_PEi386(dll_path);
790 #else
791 (void)(dll_path); // Function not implemented for other platforms.
792 return NULL;
793 #endif
794 }
795
796 /* -----------------------------------------------------------------------------
797 * removes a directory from the process DLL Load path
798 *
799 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
800 */
801 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
802 {
803 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n",
804 dll_path_index));
805
806 #if defined(OBJFORMAT_PEi386)
807 return removeLibrarySearchPath_PEi386(dll_path_index);
808 #else
809 (void)(dll_path_index); // Function not implemented for other platforms.
810 return HS_BOOL_FALSE;
811 #endif
812 }
813
814 /* -----------------------------------------------------------------------------
815 * insert a symbol in the hash table
816 *
817 * Returns: 0 on failure, nozero on success
818 */
819 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
820 {
821 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE,
822 NULL);
823 }
824
825 /* -----------------------------------------------------------------------------
826 * lookup a symbol in the hash table
827 */
828 #if defined(OBJFORMAT_PEi386)
829 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
830 {
831 return lookupSymbol_PEi386(lbl);
832 }
833
834 #else
835
836 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
837 {
838 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
839
840 ASSERT(symhash != NULL);
841 RtsSymbolInfo *pinfo;
842
843 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
844 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
845
846 # if defined(OBJFORMAT_ELF)
847 return internal_dlsym(lbl);
848 # elif defined(OBJFORMAT_MACHO)
849
850 /* HACK: On OS X, all symbols are prefixed with an underscore.
851 However, dlsym wants us to omit the leading underscore from the
852 symbol name -- the dlsym routine puts it back on before
853 searching for the symbol. For now, we simply strip it off here
854 (and ONLY here).
855 */
856 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n",
857 lbl));
858 ASSERT(lbl[0] == '_');
859 return internal_dlsym(lbl + 1);
860
861 # else
862 ASSERT(2+2 == 5);
863 return NULL;
864 # endif
865 } else {
866 return loadSymbol(lbl, pinfo);
867 }
868 }
869 #endif /* OBJFORMAT_PEi386 */
870
871 /*
872 * Load and relocate the object code for a symbol as necessary.
873 * Symbol name only used for diagnostics output.
874 */
875 SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo) {
876 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl,
877 pinfo->value));
878 ObjectCode* oc = pinfo->owner;
879
880 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
881 See Note [runtime-linker-phases] */
882 if (oc && lbl && oc->status == OBJECT_LOADED) {
883 oc->status = OBJECT_NEEDED;
884 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand "
885 "loading symbol '%s'\n", lbl));
886 int r = ocTryLoad(oc);
887 if (!r) {
888 return NULL;
889 }
890
891 #if defined(PROFILING)
892 // collect any new cost centres & CCSs
893 // that were defined during runInit
894 refreshProfilingCCSs();
895 #endif
896 }
897
898 return pinfo->value;
899 }
900
901 SymbolAddr* lookupSymbol( SymbolName* lbl )
902 {
903 ACQUIRE_LOCK(&linker_mutex);
904 SymbolAddr* r = lookupSymbol_(lbl);
905 if (!r) {
906 errorBelch("^^ Could not load '%s', dependency unresolved. "
907 "See top entry above.\n", lbl);
908 fflush(stderr);
909 }
910 RELEASE_LOCK(&linker_mutex);
911 return r;
912 }
913
914 /* -----------------------------------------------------------------------------
915 Create a StablePtr for a foreign export. This is normally called by
916 a C function with __attribute__((constructor)), which is generated
917 by GHC and linked into the module.
918
919 If the object code is being loaded dynamically, then we remember
920 which StablePtrs were allocated by the constructors and free them
921 again in unloadObj().
922 -------------------------------------------------------------------------- */
923
924 static ObjectCode *loading_obj = NULL;
925
926 StgStablePtr foreignExportStablePtr (StgPtr p)
927 {
928 ForeignExportStablePtr *fe_sptr;
929 StgStablePtr *sptr;
930
931 sptr = getStablePtr(p);
932
933 if (loading_obj != NULL) {
934 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
935 "foreignExportStablePtr");
936 fe_sptr->stable_ptr = sptr;
937 fe_sptr->next = loading_obj->stable_ptrs;
938 loading_obj->stable_ptrs = fe_sptr;
939 }
940
941 return sptr;
942 }
943
944
945 /* -----------------------------------------------------------------------------
946 * Debugging aid: look in GHCi's object symbol tables for symbols
947 * within DELTA bytes of the specified address, and show their names.
948 */
949 #if defined(DEBUG)
950 void ghci_enquire ( SymbolAddr* addr );
951
952 void ghci_enquire(SymbolAddr* addr)
953 {
954 int i;
955 SymbolName* sym;
956 RtsSymbolInfo* a;
957 const int DELTA = 64;
958 ObjectCode* oc;
959
960 for (oc = objects; oc; oc = oc->next) {
961 for (i = 0; i < oc->n_symbols; i++) {
962 sym = oc->symbols[i].name;
963 if (sym == NULL) continue;
964 a = NULL;
965 if (a == NULL) {
966 ghciLookupSymbolInfo(symhash, sym, &a);
967 }
968 if (a == NULL) {
969 // debugBelch("ghci_enquire: can't find %s\n", sym);
970 }
971 else if ( a->value
972 && (char*)addr-DELTA <= (char*)a->value
973 && (char*)a->value <= (char*)addr+DELTA) {
974 debugBelch("%p + %3d == `%s'\n", addr,
975 (int)((char*)a->value - (char*)addr), sym);
976 }
977 }
978 }
979 }
980 #endif
981
982 pathchar*
983 resolveSymbolAddr (pathchar* buffer, int size,
984 SymbolAddr* symbol, uintptr_t* top)
985 {
986 #if defined(OBJFORMAT_PEi386)
987 return resolveSymbolAddr_PEi386 (buffer, size, symbol, top);
988 #else /* OBJFORMAT_PEi386 */
989 (void)buffer;
990 (void)size;
991 (void)symbol;
992 (void)top;
993 return NULL;
994 #endif /* OBJFORMAT_PEi386 */
995 }
996
997 #if RTS_LINKER_USE_MMAP
998 //
999 // Returns NULL on failure.
1000 //
1001 void *
1002 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
1003 {
1004 void *map_addr = NULL;
1005 void *result;
1006 size_t size;
1007 uint32_t tryMap32Bit = RtsFlags.MiscFlags.linkerAlwaysPic
1008 ? 0
1009 : TRY_MAP_32BIT;
1010 static uint32_t fixed = 0;
1011
1012 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1013 size = roundUpToPage(bytes);
1014
1015 #if defined(MAP_LOW_MEM)
1016 mmap_again:
1017 #endif
1018
1019 if (mmap_32bit_base != 0) {
1020 map_addr = mmap_32bit_base;
1021 }
1022
1023 const int prot = PROT_READ | PROT_WRITE;
1024 IF_DEBUG(linker,
1025 debugBelch("mmapForLinker: \tprotection %#0x\n", prot));
1026 IF_DEBUG(linker,
1027 debugBelch("mmapForLinker: \tflags %#0x\n",
1028 MAP_PRIVATE | tryMap32Bit | fixed | flags));
1029
1030 result = mmap(map_addr, size, prot,
1031 MAP_PRIVATE|tryMap32Bit|fixed|flags, fd, offset);
1032
1033 if (result == MAP_FAILED) {
1034 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1035 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1036 return NULL;
1037 }
1038
1039 #if defined(MAP_LOW_MEM)
1040 if (RtsFlags.MiscFlags.linkerAlwaysPic) {
1041 } else if (mmap_32bit_base != 0) {
1042 if (result == map_addr) {
1043 mmap_32bit_base = (StgWord8*)map_addr + size;
1044 } else {
1045 if ((W_)result > 0x80000000) {
1046 // oops, we were given memory over 2Gb
1047 munmap(result,size);
1048 #if defined(freebsd_HOST_OS) || \
1049 defined(kfreebsdgnu_HOST_OS) || \
1050 defined(dragonfly_HOST_OS)
1051 // Some platforms require MAP_FIXED. This is normally
1052 // a bad idea, because MAP_FIXED will overwrite
1053 // existing mappings.
1054 fixed = MAP_FIXED;
1055 goto mmap_again;
1056 #else
1057 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1058 "asked for %lu bytes at %p. "
1059 "Try specifying an address with +RTS -xm<addr> -RTS",
1060 size, map_addr);
1061 return NULL;
1062 #endif
1063 } else {
1064 // hmm, we were given memory somewhere else, but it's
1065 // still under 2Gb so we can use it. Next time, ask
1066 // for memory right after the place we just got some
1067 mmap_32bit_base = (StgWord8*)result + size;
1068 }
1069 }
1070 } else {
1071 if ((W_)result > 0x80000000) {
1072 // oops, we were given memory over 2Gb
1073 // ... try allocating memory somewhere else?;
1074 debugTrace(DEBUG_linker,
1075 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1076 bytes, result);
1077 munmap(result, size);
1078
1079 // Set a base address and try again... (guess: 1Gb)
1080 mmap_32bit_base = (void*)0x40000000;
1081 goto mmap_again;
1082 }
1083 }
1084 #endif
1085
1086 IF_DEBUG(linker,
1087 debugBelch("mmapForLinker: mapped %" FMT_Word
1088 " bytes starting at %p\n", (W_)size, result));
1089 IF_DEBUG(linker,
1090 debugBelch("mmapForLinker: done\n"));
1091
1092 return result;
1093 }
1094
1095 /* Note [Memory protection in the linker]
1096 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1097 * For many years the linker would simply map all of its memory
1098 * with PROT_READ|PROT_WRITE|PROT_EXEC. However operating systems have been
1099 * becoming increasingly reluctant to accept this practice (e.g. #17353,
1100 * #12657) and for good reason: writable code is ripe for exploitation.
1101 *
1102 * Consequently mmapForLinker now maps its memory with PROT_READ|PROT_WRITE.
1103 * After the linker has finished filling/relocating the mapping it must then
1104 * call mmapForLinkerMarkExecutable on the sections of the mapping which
1105 * contain executable code.
1106 *
1107 * Note that the m32 allocator handles protection of its allocations. For this
1108 * reason the caller to m32_alloc() must tell the allocator whether the
1109 * allocation needs to be executable. The caller must then ensure that they
1110 * call m32_flush() after they are finished filling the region, which will
1111 * cause the allocator to change the protection bits to PROT_READ|PROT_EXEC.
1112 *
1113 */
1114
1115 /*
1116 * Mark an portion of a mapping previously reserved by mmapForLinker
1117 * as executable (but not writable).
1118 */
1119 void mmapForLinkerMarkExecutable(void *start, size_t len)
1120 {
1121 IF_DEBUG(linker,
1122 debugBelch("mmapForLinkerMarkExecutable: protecting %" FMT_Word
1123 " bytes starting at %p\n", (W_)len, start));
1124 if (mprotect(start, len, PROT_READ|PROT_EXEC) == -1) {
1125 barf("mmapForLinkerMarkExecutable: mprotect: %s\n", strerror(errno));
1126 }
1127 }
1128 #endif
1129
1130 /*
1131 * Remove symbols from the symbol table, and free oc->symbols.
1132 * This operation is idempotent.
1133 */
1134 static void removeOcSymbols (ObjectCode *oc)
1135 {
1136 if (oc->symbols == NULL) return;
1137
1138 // Remove all the mappings for the symbols within this object..
1139 int i;
1140 for (i = 0; i < oc->n_symbols; i++) {
1141 if (oc->symbols[i].name != NULL) {
1142 ghciRemoveSymbolTable(symhash, oc->symbols[i].name, oc);
1143 }
1144 }
1145
1146 stgFree(oc->symbols);
1147 oc->symbols = NULL;
1148 }
1149
1150 /*
1151 * Release StablePtrs and free oc->stable_ptrs.
1152 * This operation is idempotent.
1153 */
1154 static void freeOcStablePtrs (ObjectCode *oc)
1155 {
1156 // Release any StablePtrs that were created when this
1157 // object module was initialized.
1158 ForeignExportStablePtr *fe_ptr, *next;
1159
1160 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1161 next = fe_ptr->next;
1162 freeStablePtr(fe_ptr->stable_ptr);
1163 stgFree(fe_ptr);
1164 }
1165 oc->stable_ptrs = NULL;
1166 }
1167
1168 static void
1169 freePreloadObjectFile (ObjectCode *oc)
1170 {
1171 #if defined(mingw32_HOST_OS)
1172 freePreloadObjectFile_PEi386(oc);
1173 #else
1174
1175 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1176 munmap(oc->image, oc->fileSize);
1177 }
1178 else {
1179 stgFree(oc->image);
1180 }
1181
1182 #endif
1183
1184 oc->image = NULL;
1185 oc->fileSize = 0;
1186 }
1187
1188 /*
1189 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1190 * the GC when a previously unloaded ObjectCode has been determined to be
1191 * unused, and when an error occurs during loadObj().
1192 */
1193 void freeObjectCode (ObjectCode *oc)
1194 {
1195 freePreloadObjectFile(oc);
1196
1197 if (oc->symbols != NULL) {
1198 stgFree(oc->symbols);
1199 oc->symbols = NULL;
1200 }
1201
1202 if (oc->extraInfos != NULL) {
1203 freeHashTable(oc->extraInfos, NULL);
1204 oc->extraInfos = NULL;
1205 }
1206
1207 if (oc->sections != NULL) {
1208 int i;
1209 for (i=0; i < oc->n_sections; i++) {
1210 if (oc->sections[i].start != NULL) {
1211 switch(oc->sections[i].alloc){
1212 #if RTS_LINKER_USE_MMAP
1213 case SECTION_MMAP:
1214 munmap(oc->sections[i].mapped_start,
1215 oc->sections[i].mapped_size);
1216 break;
1217 case SECTION_M32:
1218 IF_DEBUG(zero_on_gc,
1219 memset(oc->sections[i].start,
1220 0x00, oc->sections[i].size));
1221 // Freed by m32_allocator_free
1222 break;
1223 #endif
1224 case SECTION_MALLOC:
1225 IF_DEBUG(zero_on_gc,
1226 memset(oc->sections[i].start,
1227 0x00, oc->sections[i].size));
1228 stgFree(oc->sections[i].start);
1229 break;
1230 default:
1231 break;
1232 }
1233 }
1234 if (oc->sections[i].info) {
1235 stgFree(oc->sections[i].info);
1236 }
1237 }
1238 stgFree(oc->sections);
1239 }
1240
1241 freeProddableBlocks(oc);
1242 freeSegments(oc);
1243
1244 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1245 * alongside the image, so we don't need to free. */
1246 #if defined(NEED_SYMBOL_EXTRAS) && (!defined(x86_64_HOST_ARCH) \
1247 || !defined(mingw32_HOST_OS))
1248 if (RTS_LINKER_USE_MMAP) {
1249 if (!USE_CONTIGUOUS_MMAP && !RtsFlags.MiscFlags.linkerAlwaysPic &&
1250 oc->symbol_extras != NULL) {
1251 // Freed by m32_allocator_free
1252 }
1253 }
1254 else {
1255 stgFree(oc->symbol_extras);
1256 }
1257 #endif
1258
1259 #if defined(OBJECTFORMAT_MACHO)
1260 ocDeinit_MachO(oc);
1261 #endif
1262 #if defined(OBJFORMAT_ELF)
1263 ocDeinit_ELF(oc);
1264 #endif
1265
1266 #if RTS_LINKER_USE_MMAP == 1
1267 m32_allocator_free(oc->rx_m32);
1268 m32_allocator_free(oc->rw_m32);
1269 #endif
1270
1271 stgFree(oc->fileName);
1272 stgFree(oc->archiveMemberName);
1273
1274 stgFree(oc);
1275 }
1276
1277 /* -----------------------------------------------------------------------------
1278 * Sets the initial status of a fresh ObjectCode
1279 */
1280 static void setOcInitialStatus(ObjectCode* oc) {
1281 /* If a target has requested the ObjectCode not to be resolved then
1282 honor this requests. Usually this means the ObjectCode has not been
1283 initialized and can't be. */
1284 if (oc->status == OBJECT_DONT_RESOLVE)
1285 return;
1286
1287 if (oc->archiveMemberName == NULL) {
1288 oc->status = OBJECT_NEEDED;
1289 } else {
1290 oc->status = OBJECT_LOADED;
1291 }
1292 }
1293
1294 ObjectCode*
1295 mkOc( pathchar *path, char *image, int imageSize,
1296 bool mapped, char *archiveMemberName, int misalignment ) {
1297 ObjectCode* oc;
1298
1299 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1300 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1301
1302 oc->info = NULL;
1303
1304 # if defined(OBJFORMAT_ELF)
1305 oc->formatName = "ELF";
1306 # elif defined(OBJFORMAT_PEi386)
1307 oc->formatName = "PEi386";
1308 # elif defined(OBJFORMAT_MACHO)
1309 oc->formatName = "Mach-O";
1310 # else
1311 stgFree(oc);
1312 barf("loadObj: not implemented on this platform");
1313 # endif
1314
1315 oc->image = image;
1316 oc->fileName = pathdup(path);
1317
1318 if (archiveMemberName) {
1319 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1,
1320 "loadObj" );
1321 strcpy(oc->archiveMemberName, archiveMemberName);
1322 } else {
1323 oc->archiveMemberName = NULL;
1324 }
1325
1326 setOcInitialStatus( oc );
1327
1328 oc->fileSize = imageSize;
1329 oc->symbols = NULL;
1330 oc->n_sections = 0;
1331 oc->sections = NULL;
1332 oc->n_segments = 0;
1333 oc->segments = NULL;
1334 oc->proddables = NULL;
1335 oc->stable_ptrs = NULL;
1336 #if defined(NEED_SYMBOL_EXTRAS)
1337 oc->symbol_extras = NULL;
1338 #endif
1339 oc->bssBegin = NULL;
1340 oc->bssEnd = NULL;
1341 oc->imageMapped = mapped;
1342
1343 oc->misalignment = misalignment;
1344 oc->extraInfos = NULL;
1345
1346 /* chain it onto the list of objects */
1347 oc->next = NULL;
1348
1349 #if RTS_LINKER_USE_MMAP
1350 oc->rw_m32 = m32_allocator_new(false);
1351 oc->rx_m32 = m32_allocator_new(true);
1352 #endif
1353
1354 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1355 return oc;
1356 }
1357
1358 /* -----------------------------------------------------------------------------
1359 * Check if an object or archive is already loaded.
1360 *
1361 * Returns: 1 if the path is already loaded, 0 otherwise.
1362 */
1363 HsInt
1364 isAlreadyLoaded( pathchar *path )
1365 {
1366 ObjectCode *o;
1367 for (o = objects; o; o = o->next) {
1368 if (0 == pathcmp(o->fileName, path)) {
1369 return 1; /* already loaded */
1370 }
1371 }
1372 return 0; /* not loaded yet */
1373 }
1374
1375 //
1376 // Load the object file into memory. This will not be its final resting place,
1377 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
1378 // address space, properly aligned.
1379 //
1380 static ObjectCode *
1381 preloadObjectFile (pathchar *path)
1382 {
1383 int fileSize;
1384 struct_stat st;
1385 int r;
1386 void *image;
1387 ObjectCode *oc;
1388 int misalignment = 0;
1389
1390 r = pathstat(path, &st);
1391 if (r == -1) {
1392 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
1393 return NULL;
1394 }
1395
1396 fileSize = st.st_size;
1397
1398 #if RTS_LINKER_USE_MMAP
1399 int fd;
1400
1401 /* On many architectures malloc'd memory isn't executable, so we need to use
1402 * mmap. */
1403
1404 #if defined(openbsd_HOST_OS)
1405 fd = open(path, O_RDONLY, S_IRUSR);
1406 #else
1407 fd = open(path, O_RDONLY);
1408 #endif
1409 if (fd == -1) {
1410 errorBelch("loadObj: can't open %s", path);
1411 return NULL;
1412 }
1413
1414 /* iOS does not permit to mmap with r+w+x, however while the comment for
1415 * this function says this is not the final resting place, for some
1416 * architectures / hosts (at least mach-o non-iOS -- see ocGetNames_MachO)
1417 * the image mmaped here in fact ends up being the final resting place for
1418 * the sections. And hence we need to leave r+w+x here for other hosts
1419 * until all hosts have been made aware of the initial image being r+w only.
1420 *
1421 * See also the misalignment logic for darwin below.
1422 */
1423 #if defined(ios_HOST_OS)
1424 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
1425 #else
1426 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
1427 MAP_PRIVATE, fd, 0);
1428 #endif
1429
1430 if (image == MAP_FAILED) {
1431 errorBelch("mmap: failed. errno = %d", errno);
1432 }
1433 // not 32-bit yet, we'll remap later
1434 close(fd);
1435
1436 #else /* !RTS_LINKER_USE_MMAP */
1437 FILE *f;
1438
1439 /* load the image into memory */
1440 /* coverity[toctou] */
1441 f = pathopen(path, WSTR("rb"));
1442 if (!f) {
1443 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
1444 return NULL;
1445 }
1446
1447 # if defined(darwin_HOST_OS)
1448
1449 // In a Mach-O .o file, all sections can and will be misaligned
1450 // if the total size of the headers is not a multiple of the
1451 // desired alignment. This is fine for .o files that only serve
1452 // as input for the static linker, but it's not fine for us,
1453 // as SSE (used by gcc for floating point) and Altivec require
1454 // 16-byte alignment.
1455 // We calculate the correct alignment from the header before
1456 // reading the file, and then we misalign image on purpose so
1457 // that the actual sections end up aligned again.
1458 misalignment = machoGetMisalignment(f);
1459 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
1460 image += misalignment;
1461
1462 # else /* !defined(darwin_HOST_OS) */
1463
1464 image = stgMallocBytes(fileSize, "loadObj(image)");
1465
1466 #endif
1467
1468 int n;
1469 n = fread ( image, 1, fileSize, f );
1470 fclose(f);
1471 if (n != fileSize) {
1472 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
1473 stgFree(image);
1474 return NULL;
1475 }
1476
1477 #endif /* RTS_LINKER_USE_MMAP */
1478
1479 IF_DEBUG(linker, debugBelch("loadObj: preloaded image at %p\n", (void *) image));
1480
1481 /* FIXME (AP): =mapped= parameter unconditionally set to true */
1482 oc = mkOc(path, image, fileSize, true, NULL, misalignment);
1483
1484 #if defined(OBJFORMAT_MACHO)
1485 if (ocVerifyImage_MachO( oc ))
1486 ocInit_MachO( oc );
1487 #endif
1488 #if defined(OBJFORMAT_ELF)
1489 if(ocVerifyImage_ELF( oc ))
1490 ocInit_ELF( oc );
1491 #endif
1492 return oc;
1493 }
1494
1495 /* -----------------------------------------------------------------------------
1496 * Load an obj (populate the global symbol table, but don't resolve yet)
1497 *
1498 * Returns: 1 if ok, 0 on error.
1499 */
1500 static HsInt loadObj_ (pathchar *path)
1501 {
1502 ObjectCode* oc;
1503 IF_DEBUG(linker, debugBelch("loadObj: %" PATH_FMT "\n", path));
1504
1505 /* debugBelch("loadObj %s\n", path ); */
1506
1507 /* Check that we haven't already loaded this object.
1508 Ignore requests to load multiple times */
1509
1510 if (isAlreadyLoaded(path)) {
1511 IF_DEBUG(linker,
1512 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1513 return 1; /* success */
1514 }
1515
1516 oc = preloadObjectFile(path);
1517 if (oc == NULL) return 0;
1518
1519 if (! loadOc(oc)) {
1520 // failed; free everything we've allocated
1521 removeOcSymbols(oc);
1522 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
1523 freeObjectCode(oc);
1524 return 0;
1525 }
1526
1527 oc->next = objects;
1528 objects = oc;
1529 return 1;
1530 }
1531
1532 HsInt loadObj (pathchar *path)
1533 {
1534 ACQUIRE_LOCK(&linker_mutex);
1535 HsInt r = loadObj_(path);
1536 RELEASE_LOCK(&linker_mutex);
1537 return r;
1538 }
1539
1540 HsInt loadOc (ObjectCode* oc)
1541 {
1542 int r;
1543
1544 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
1545
1546 /* verify the in-memory image */
1547 # if defined(OBJFORMAT_ELF)
1548 r = ocVerifyImage_ELF ( oc );
1549 # elif defined(OBJFORMAT_PEi386)
1550 r = ocVerifyImage_PEi386 ( oc );
1551 # elif defined(OBJFORMAT_MACHO)
1552 r = ocVerifyImage_MachO ( oc );
1553 # else
1554 barf("loadObj: no verify method");
1555 # endif
1556 if (!r) {
1557 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
1558 return r;
1559 }
1560
1561 /* Note [loadOc orderings]
1562 The order of `ocAllocateExtras` and `ocGetNames` matters. For MachO
1563 and ELF, `ocInit` and `ocGetNames` initialize a bunch of pointers based
1564 on the offset to `oc->image`, but `ocAllocateExtras` may relocate
1565 the address of `oc->image` and invalidate those pointers. So we must
1566 compute or recompute those pointers after `ocAllocateExtras`.
1567
1568 On Windows, when we have an import library we (for now, as we don't honor
1569 the lazy loading semantics of the library and instead GHCi is already
1570 lazy) don't use the library after ocGetNames as it just populates the
1571 symbol table. Allocating space for jump tables in ocAllocateExtras
1572 would just be a waste then as we'll be stopping further processing of the
1573 library in the next few steps. If necessary, the actual allocation
1574 happens in `ocGetNames_PEi386` and `ocAllocateExtras_PEi386` simply
1575 set the correct pointers.
1576 */
1577
1578 #if defined(NEED_SYMBOL_EXTRAS)
1579 # if defined(OBJFORMAT_MACHO)
1580 r = ocAllocateExtras_MachO ( oc );
1581 if (!r) {
1582 IF_DEBUG(linker,
1583 debugBelch("loadOc: ocAllocateExtras_MachO failed\n"));
1584 return r;
1585 }
1586 # elif defined(OBJFORMAT_ELF)
1587 r = ocAllocateExtras_ELF ( oc );
1588 if (!r) {
1589 IF_DEBUG(linker,
1590 debugBelch("loadOc: ocAllocateExtras_ELF failed\n"));
1591 return r;
1592 }
1593 # endif
1594 #endif
1595
1596 /* build the symbol list for this image */
1597 # if defined(OBJFORMAT_ELF)
1598 r = ocGetNames_ELF ( oc );
1599 # elif defined(OBJFORMAT_PEi386)
1600 r = ocGetNames_PEi386 ( oc );
1601 # elif defined(OBJFORMAT_MACHO)
1602 r = ocGetNames_MachO ( oc );
1603 # else
1604 barf("loadObj: no getNames method");
1605 # endif
1606 if (!r) {
1607 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
1608 return r;
1609 }
1610
1611 #if defined(NEED_SYMBOL_EXTRAS)
1612 # if defined(OBJFORMAT_PEi386)
1613 ocAllocateExtras_PEi386 ( oc );
1614 # endif
1615 #endif
1616
1617 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
1618 setOcInitialStatus( oc );
1619 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
1620
1621 return 1;
1622 }
1623
1624 /* -----------------------------------------------------------------------------
1625 * try to load and initialize an ObjectCode into memory
1626 *
1627 * Returns: 1 if ok, 0 on error.
1628 */
1629 int ocTryLoad (ObjectCode* oc) {
1630 int r;
1631
1632 if (oc->status != OBJECT_NEEDED) {
1633 return 1;
1634 }
1635
1636 /* Check for duplicate symbols by looking into `symhash`.
1637 Duplicate symbols are any symbols which exist
1638 in different ObjectCodes that have both been loaded, or
1639 are to be loaded by this call.
1640
1641 This call is intended to have no side-effects when a non-duplicate
1642 symbol is re-inserted. A symbol is only a duplicate if the object file
1643 it is defined in has had it's relocations resolved. A resolved object
1644 file means the symbols inside it are required.
1645
1646 The symbol address is not used to distinguish symbols. Duplicate symbols
1647 are distinguished by name, oc and attributes (weak symbols etc).
1648 */
1649 int x;
1650 Symbol_t symbol;
1651 for (x = 0; x < oc->n_symbols; x++) {
1652 symbol = oc->symbols[x];
1653 if ( symbol.name
1654 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol.name,
1655 symbol.addr,
1656 isSymbolWeak(oc, symbol.name), oc)) {
1657 return 0;
1658 }
1659 }
1660
1661 # if defined(OBJFORMAT_ELF)
1662 r = ocResolve_ELF ( oc );
1663 # elif defined(OBJFORMAT_PEi386)
1664 r = ocResolve_PEi386 ( oc );
1665 # elif defined(OBJFORMAT_MACHO)
1666 r = ocResolve_MachO ( oc );
1667 # else
1668 barf("ocTryLoad: not implemented on this platform");
1669 # endif
1670 if (!r) { return r; }
1671
1672 #if defined(NEED_SYMBOL_EXTRAS)
1673 ocProtectExtras(oc);
1674 #endif
1675
1676 // We have finished loading and relocating; flush the m32 allocators to
1677 // setup page protections.
1678 #if RTS_LINKER_USE_MMAP
1679 m32_allocator_flush(oc->rx_m32);
1680 m32_allocator_flush(oc->rw_m32);
1681 #endif
1682
1683 // run init/init_array/ctors/mod_init_func
1684
1685 IF_DEBUG(linker, debugBelch("ocTryLoad: ocRunInit start\n"));
1686
1687 loading_obj = oc; // tells foreignExportStablePtr what to do
1688 #if defined(OBJFORMAT_ELF)
1689 r = ocRunInit_ELF ( oc );
1690 #elif defined(OBJFORMAT_PEi386)
1691 r = ocRunInit_PEi386 ( oc );
1692 #elif defined(OBJFORMAT_MACHO)
1693 r = ocRunInit_MachO ( oc );
1694 #else
1695 barf("ocTryLoad: initializers not implemented on this platform");
1696 #endif
1697 loading_obj = NULL;
1698
1699 if (!r) { return r; }
1700
1701 oc->status = OBJECT_RESOLVED;
1702
1703 return 1;
1704 }
1705
1706 /* -----------------------------------------------------------------------------
1707 * resolve all the currently unlinked objects in memory
1708 *
1709 * Returns: 1 if ok, 0 on error.
1710 */
1711 static HsInt resolveObjs_ (void)
1712 {
1713 ObjectCode *oc;
1714 int r;
1715
1716 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
1717
1718 for (oc = objects; oc; oc = oc->next) {
1719 r = ocTryLoad(oc);
1720 if (!r)
1721 {
1722 return r;
1723 }
1724 }
1725
1726 #if defined(PROFILING)
1727 // collect any new cost centres & CCSs that were defined during runInit
1728 refreshProfilingCCSs();
1729 #endif
1730
1731 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
1732 return 1;
1733 }
1734
1735 HsInt resolveObjs (void)
1736 {
1737 ACQUIRE_LOCK(&linker_mutex);
1738 HsInt r = resolveObjs_();
1739 RELEASE_LOCK(&linker_mutex);
1740 return r;
1741 }
1742
1743 /* -----------------------------------------------------------------------------
1744 * delete an object from the pool
1745 */
1746 static HsInt unloadObj_ (pathchar *path, bool just_purge)
1747 {
1748 ObjectCode *oc, *prev, *next;
1749 HsBool unloadedAnyObj = HS_BOOL_FALSE;
1750
1751 ASSERT(symhash != NULL);
1752 ASSERT(objects != NULL);
1753
1754 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
1755
1756 prev = NULL;
1757 for (oc = objects; oc; oc = next) {
1758 next = oc->next; // oc might be freed
1759
1760 if (!pathcmp(oc->fileName,path)) {
1761
1762 // these are both idempotent, so in just_purge mode we can
1763 // later call unloadObj() to really unload the object.
1764 removeOcSymbols(oc);
1765 freeOcStablePtrs(oc);
1766
1767 if (!just_purge) {
1768 if (prev == NULL) {
1769 objects = oc->next;
1770 } else {
1771 prev->next = oc->next;
1772 }
1773 ACQUIRE_LOCK(&linker_unloaded_mutex);
1774 oc->next = unloaded_objects;
1775 unloaded_objects = oc;
1776 oc->status = OBJECT_UNLOADED;
1777 RELEASE_LOCK(&linker_unloaded_mutex);
1778 // We do not own oc any more; it can be released at any time by
1779 // the GC in checkUnload().
1780 } else {
1781 prev = oc;
1782 }
1783
1784 /* This could be a member of an archive so continue
1785 * unloading other members. */
1786 unloadedAnyObj = HS_BOOL_TRUE;
1787 } else {
1788 prev = oc;
1789 }
1790 }
1791
1792 if (unloadedAnyObj) {
1793 return 1;
1794 }
1795 else {
1796 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
1797 return 0;
1798 }
1799 }
1800
1801 HsInt unloadObj (pathchar *path)
1802 {
1803 ACQUIRE_LOCK(&linker_mutex);
1804 HsInt r = unloadObj_(path, false);
1805 RELEASE_LOCK(&linker_mutex);
1806 return r;
1807 }
1808
1809 HsInt purgeObj (pathchar *path)
1810 {
1811 ACQUIRE_LOCK(&linker_mutex);
1812 HsInt r = unloadObj_(path, true);
1813 RELEASE_LOCK(&linker_mutex);
1814 return r;
1815 }
1816
1817 static OStatus getObjectLoadStatus_ (pathchar *path)
1818 {
1819 ObjectCode *o;
1820 for (o = objects; o; o = o->next) {
1821 if (0 == pathcmp(o->fileName, path)) {
1822 return o->status;
1823 }
1824 }
1825 for (o = unloaded_objects; o; o = o->next) {
1826 if (0 == pathcmp(o->fileName, path)) {
1827 return o->status;
1828 }
1829 }
1830 return OBJECT_NOT_LOADED;
1831 }
1832
1833 OStatus getObjectLoadStatus (pathchar *path)
1834 {
1835 ACQUIRE_LOCK(&linker_mutex);
1836 OStatus r = getObjectLoadStatus_(path);
1837 RELEASE_LOCK(&linker_mutex);
1838 return r;
1839 }
1840
1841 /* -----------------------------------------------------------------------------
1842 * Sanity checking. For each ObjectCode, maintain a list of address ranges
1843 * which may be prodded during relocation, and abort if we try and write
1844 * outside any of these.
1845 */
1846 void
1847 addProddableBlock ( ObjectCode* oc, void* start, int size )
1848 {
1849 ProddableBlock* pb
1850 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
1851
1852 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
1853 ASSERT(size > 0);
1854 pb->start = start;
1855 pb->size = size;
1856 pb->next = oc->proddables;
1857 oc->proddables = pb;
1858 }
1859
1860 void
1861 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
1862 {
1863 ProddableBlock* pb;
1864
1865 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
1866 char* s = (char*)(pb->start);
1867 char* e = s + pb->size;
1868 char* a = (char*)addr;
1869 if (a >= s && (a+size) <= e) return;
1870 }
1871 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
1872 }
1873
1874 void freeProddableBlocks (ObjectCode *oc)
1875 {
1876 ProddableBlock *pb, *next;
1877
1878 for (pb = oc->proddables; pb != NULL; pb = next) {
1879 next = pb->next;
1880 stgFree(pb);
1881 }
1882 oc->proddables = NULL;
1883 }
1884
1885 /* -----------------------------------------------------------------------------
1886 * Section management.
1887 */
1888 void
1889 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
1890 void* start, StgWord size,
1891 StgWord mapped_offset, void* mapped_start, StgWord mapped_size)
1892 {
1893 s->start = start; /* actual start of section in memory */
1894 s->size = size; /* actual size of section in memory */
1895 s->kind = kind;
1896 s->alloc = alloc;
1897 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
1898
1899 s->mapped_start = mapped_start; /* start of mmap() block */
1900 s->mapped_size = mapped_size; /* size of mmap() block */
1901
1902 if (!s->info)
1903 s->info
1904 = (struct SectionFormatInfo*)stgCallocBytes(1, sizeof *s->info,
1905 "addSection(SectionFormatInfo)");
1906
1907 IF_DEBUG(linker,
1908 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
1909 start, (void*)((StgWord)start + size),
1910 size, kind ));
1911 }
1912
1913 /* -----------------------------------------------------------------------------
1914 * Segment management
1915 */
1916 void
1917 initSegment (Segment *s, void *start, size_t size, SegmentProt prot, int n_sections)
1918 {
1919 s->start = start;
1920 s->size = size;
1921 s->prot = prot;
1922 s->sections_idx = (int *)stgCallocBytes(n_sections, sizeof(int),
1923 "initSegment(segment)");
1924 s->n_sections = n_sections;
1925 }
1926
1927 void freeSegments (ObjectCode *oc)
1928 {
1929 if (oc->segments != NULL) {
1930 IF_DEBUG(linker, debugBelch("freeSegments: freeing %d segments\n", oc->n_segments));
1931
1932 for (int i = 0; i < oc->n_segments; i++) {
1933 Segment *s = &oc->segments[i];
1934
1935 IF_DEBUG(linker, debugBelch("freeSegments: freeing segment %d at %p size %zu\n",
1936 i, s->start, s->size));
1937
1938 stgFree(s->sections_idx);
1939 s->sections_idx = NULL;
1940
1941 if (0 == s->size) {
1942 IF_DEBUG(linker, debugBelch("freeSegment: skipping segment of 0 size\n"));
1943 continue;
1944 } else {
1945 #if RTS_LINKER_USE_MMAP
1946 CHECKM(0 == munmap(s->start, s->size), "freeSegments: failed to unmap memory");
1947 #else
1948 stgFree(s->start);
1949 #endif
1950 }
1951 s->start = NULL;
1952 }
1953
1954 stgFree(oc->segments);
1955 oc->segments = NULL;
1956 }
1957 }