Enable two-step allocator on FreeBSD
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "Stable.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31 #include "linker/CacheFlush.h"
32 #include "linker/SymbolExtras.h"
33 #include "PathUtils.h"
34
35 #if !defined(mingw32_HOST_OS)
36 #include "posix/Signals.h"
37 #endif
38
39 // get protos for is*()
40 #include <ctype.h>
41
42 #if defined(HAVE_SYS_TYPES_H)
43 #include <sys/types.h>
44 #endif
45
46 #include <inttypes.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <stdio.h>
50 #include <assert.h>
51 #include <fs_rts.h>
52
53 #if defined(HAVE_SYS_STAT_H)
54 #include <sys/stat.h>
55 #endif
56
57 #if defined(HAVE_DLFCN_H)
58 #include <dlfcn.h>
59 #endif
60
61 #if defined(OBJFORMAT_ELF)
62 # include "linker/Elf.h"
63 # include <regex.h> // regex is already used by dlopen() so this is OK
64 // to use here without requiring an additional lib
65 #elif defined(OBJFORMAT_PEi386)
66 # include "linker/PEi386.h"
67 # include <windows.h>
68 #elif defined(OBJFORMAT_MACHO)
69 # include "linker/MachO.h"
70 # include <regex.h>
71 # include <mach/machine.h>
72 # include <mach-o/fat.h>
73 #endif
74
75 #if defined(x86_64_HOST_ARCH) && defined(darwin_HOST_OS)
76 #define ALWAYS_PIC
77 #endif
78
79 #if defined(dragonfly_HOST_OS)
80 #include <sys/tls.h>
81 #endif
82 /*
83 Note [runtime-linker-support]
84 -----------------------------
85 When adding support for a new platform to the runtime linker please
86 update `$TOP/configure.ac` under heading `Does target have runtime
87 linker support?`.
88 */
89 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
90 This hashtable will contain information on all symbols
91 that we know of, however the .o they are in may not be loaded.
92
93 Until the ObjectCode the symbol belongs to is actually
94 loaded this symbol may be replaced. So do not rely on
95 addresses of unloaded symbols.
96
97 Note [runtime-linker-phases]
98 --------------------------------------
99 Broadly the behavior of the runtime linker can be
100 split into the following four phases:
101
102 - Indexing (e.g. ocVerifyImage and ocGetNames)
103 - Initialization (e.g. ocResolve and ocRunInit)
104 - Resolve (e.g. resolveObjs())
105 - Lookup (e.g. lookupSymbol)
106
107 This is to enable lazy loading of symbols. Eager loading is problematic
108 as it means that all symbols must be available, even those which we will
109 never use. This is especially painful on Windows, where the number of
110 libraries required to link things like mingwex grows to be quite high.
111
112 We proceed through these stages as follows,
113
114 * During Indexing we verify and open the ObjectCode and
115 perform a quick scan/indexing of the ObjectCode. All the work
116 required to actually load the ObjectCode is done.
117
118 All symbols from the ObjectCode are also inserted into
119 `symhash`, where possible duplicates are handled via the semantics
120 described in `ghciInsertSymbolTable`.
121
122 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
123 depending on whether they are an archive member or not.
124
125 * During initialization we load ObjectCode, perform relocations, execute
126 static constructors etc. This phase may trigger other ObjectCodes to
127 be loaded because of the calls to lookupSymbol.
128
129 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
130 previous status was `OBJECT_LOADED`.
131
132 * During resolve we attempt to resolve all the symbols needed for the
133 initial link. This essentially means, that for any ObjectCode given
134 directly to the command-line we perform lookupSymbols on the required
135 symbols. lookupSymbols may trigger the loading of additional ObjectCode
136 if required.
137
138 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
139 the previous status was `OBJECT_NEEDED`.
140
141 * lookupSymbols is used to lookup any symbols required, both during initial
142 link and during statement and expression compilations in the REPL.
143 Declaration of e.g. a foreign import, will eventually call lookupSymbol
144 which will either fail (symbol unknown) or succeed (and possibly trigger a
145 load).
146
147 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
148
149 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
150 by calling unloadObj and starting over.
151 When a new declaration or statement is performed ultimately lookupSymbol is called
152 without doing a re-link.
153
154 The goal of these different phases is to allow the linker to be able to perform
155 "lazy loading" of ObjectCode. The reason for this is that we want to only link
156 in symbols that are actually required for the link. This reduces:
157
158 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
159 then we don't need to load libB. This means the dependency chain for libraries
160 such as mingw32 and mingwex can be broken down.
161
162 2) The number of duplicate symbols, since now only symbols that are
163 true duplicates will display the error.
164 */
165 /*Str*/HashTable *symhash;
166
167 /* List of currently loaded objects */
168 ObjectCode *objects = NULL; /* initially empty */
169
170 /* List of objects that have been unloaded via unloadObj(), but are waiting
171 to be actually freed via checkUnload() */
172 ObjectCode *unloaded_objects = NULL; /* initially empty */
173
174 #if defined(THREADED_RTS)
175 /* This protects all the Linker's global state except unloaded_objects */
176 Mutex linker_mutex;
177 /*
178 * This protects unloaded_objects. We have a separate mutex for this, because
179 * the GC needs to access unloaded_objects in checkUnload, while the linker only
180 * needs to access unloaded_objects in unloadObj(), so this allows most linker
181 * operations proceed concurrently with the GC.
182 */
183 Mutex linker_unloaded_mutex;
184 #endif
185
186 /* Generic wrapper function to try and Resolve and RunInit oc files */
187 int ocTryLoad( ObjectCode* oc );
188
189 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
190 * small memory model on this architecture (see gcc docs,
191 * -mcmodel=small).
192 *
193 * MAP_32BIT not available on OpenBSD/amd64
194 */
195 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
196 #define TRY_MAP_32BIT MAP_32BIT
197 #else
198 #define TRY_MAP_32BIT 0
199 #endif
200
201 /*
202 * Due to the small memory model (see above), on x86_64 we have to map
203 * all our non-PIC object files into the low 2Gb of the address space
204 * (why 2Gb and not 4Gb? Because all addresses must be reachable
205 * using a 32-bit signed PC-relative offset). On Linux we can do this
206 * using the MAP_32BIT flag to mmap(), however on other OSs
207 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
208 * can't do this. So on these systems, we have to pick a base address
209 * in the low 2Gb of the address space and try to allocate memory from
210 * there.
211 *
212 * We pick a default address based on the OS, but also make this
213 * configurable via an RTS flag (+RTS -xm)
214 */
215 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
216
217 #if defined(MAP_32BIT)
218 // Try to use MAP_32BIT
219 #define MMAP_32BIT_BASE_DEFAULT 0
220 #else
221 // A guess: 1Gb.
222 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
223 #endif
224
225 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
226 #endif
227
228 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
229 ObjectCode *owner)
230 {
231 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
232 if (!pinfo || owner != pinfo->owner) return;
233 removeStrHashTable(table, key, NULL);
234 if (isSymbolImport (owner, key))
235 stgFree(pinfo->value);
236
237 stgFree(pinfo);
238 }
239
240 /* -----------------------------------------------------------------------------
241 * Insert symbols into hash tables, checking for duplicates.
242 *
243 * Returns: 0 on failure, nonzero on success
244 */
245 /*
246 Note [weak-symbols-support]
247 -------------------------------------
248 While ghciInsertSymbolTable does implement extensive
249 logic for weak symbol support, weak symbols are not currently
250 fully supported by the RTS. This code is mostly here for COMDAT
251 support which uses the weak symbols support.
252
253 Linking weak symbols defined purely in C code with other C code
254 should also work, probably. Observing weak symbols in Haskell
255 won't.
256
257 Some test have been written for weak symbols but have been disabled
258 mostly because it's unsure how the weak symbols support should look.
259 See Trac #11223
260 */
261 int ghciInsertSymbolTable(
262 pathchar* obj_name,
263 HashTable *table,
264 const SymbolName* key,
265 SymbolAddr* data,
266 HsBool weak,
267 ObjectCode *owner)
268 {
269 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
270 if (!pinfo) /* new entry */
271 {
272 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
273 pinfo->value = data;
274 pinfo->owner = owner;
275 pinfo->weak = weak;
276 insertStrHashTable(table, key, pinfo);
277 return 1;
278 }
279 else if (weak && data && pinfo->weak && !pinfo->value)
280 {
281 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
282 pinfo->value = data;
283 pinfo->owner = owner;
284 return 1;
285 }
286 else if (weak)
287 {
288 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
289 already know of another copy throw this one away.
290
291 or both weak symbols have a nonzero value. Keep the existing one.
292
293 This also preserves the semantics of linking against
294 the first symbol we find. */
295 }
296 else if (pinfo->weak && !weak) /* weak symbol is in the table */
297 {
298 /* override the weak definition with the non-weak one */
299 pinfo->value = data;
300 pinfo->owner = owner;
301 pinfo->weak = HS_BOOL_FALSE;
302 return 1;
303 }
304 else if ( pinfo->owner
305 && pinfo->owner->status != OBJECT_RESOLVED
306 && pinfo->owner->status != OBJECT_NEEDED)
307 {
308 /* If the other symbol hasn't been loaded or will be loaded and we want to
309 explicitly load the new one, we can just swap it out and load the one
310 that has been requested. If not, just keep the first one encountered.
311
312 Because the `symHash' table consists symbols we've also not loaded but
313 found during the initial scan this is safe to do. If however the existing
314 symbol has been loaded then it means we have a duplicate.
315
316 This is essentially emulating the behavior of a linker wherein it will always
317 link in object files that are .o file arguments, but only take object files
318 from archives as needed. */
319 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
320 pinfo->value = data;
321 pinfo->owner = owner;
322 pinfo->weak = weak;
323 }
324
325 return 1;
326 }
327 else if (pinfo->owner == owner)
328 {
329 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
330 return 1;
331 }
332 else if (owner && owner->status == OBJECT_LOADED)
333 {
334 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
335 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
336 call this function again to trigger the duplicate error. */
337 return 1;
338 }
339
340 pathchar* archiveName = NULL;
341 debugBelch(
342 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
343 " %s\n"
344 "whilst processing object file\n"
345 " %" PATH_FMT "\n"
346 "The symbol was previously defined in\n"
347 " %" PATH_FMT "\n"
348 "This could be caused by:\n"
349 " * Loading two different object files which export the same symbol\n"
350 " * Specifying the same object file twice on the GHCi command line\n"
351 " * An incorrect `package.conf' entry, causing some object to be\n"
352 " loaded twice.\n",
353 (char*)key,
354 obj_name,
355 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
356 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
357 : pinfo->owner->fileName
358 );
359
360 if (archiveName)
361 {
362 stgFree(archiveName);
363 archiveName = NULL;
364 }
365 return 0;
366 }
367
368 /* -----------------------------------------------------------------------------
369 * Looks up symbols into hash tables.
370 *
371 * Returns: 0 on failure and result is not set,
372 * nonzero on success and result set to nonzero pointer
373 */
374 HsBool ghciLookupSymbolInfo(HashTable *table,
375 const SymbolName* key, RtsSymbolInfo **result)
376 {
377 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
378 if (!pinfo) {
379 *result = NULL;
380 return HS_BOOL_FALSE;
381 }
382 if (pinfo->weak)
383 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
384 /* Once it's looked up, it can no longer be overridden */
385 pinfo->weak = HS_BOOL_FALSE;
386
387 *result = pinfo;
388 return HS_BOOL_TRUE;
389 }
390
391 /* -----------------------------------------------------------------------------
392 * initialize the object linker
393 */
394
395
396 static int linker_init_done = 0 ;
397
398 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
399 static void *dl_prog_handle;
400 static regex_t re_invalid;
401 static regex_t re_realso;
402 #if defined(THREADED_RTS)
403 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
404 #endif
405 #endif
406
407 void initLinker (void)
408 {
409 // default to retaining CAFs for backwards compatibility. Most
410 // users will want initLinker_(0): otherwise unloadObj() will not
411 // be able to unload object files when they contain CAFs.
412 initLinker_(1);
413 }
414
415 void
416 initLinker_ (int retain_cafs)
417 {
418 RtsSymbolVal *sym;
419 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
420 int compileResult;
421 #endif
422
423 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
424
425 /* Make initLinker idempotent, so we can call it
426 before every relevant operation; that means we
427 don't need to initialise the linker separately */
428 if (linker_init_done == 1) {
429 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
430 return;
431 } else {
432 linker_init_done = 1;
433 }
434
435 objects = NULL;
436 unloaded_objects = NULL;
437
438 #if defined(THREADED_RTS)
439 initMutex(&linker_mutex);
440 initMutex(&linker_unloaded_mutex);
441 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
442 initMutex(&dl_mutex);
443 #endif
444 #endif
445
446 symhash = allocStrHashTable();
447
448 /* populate the symbol table with stuff from the RTS */
449 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
450 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
451 symhash, sym->lbl, sym->addr,
452 sym->weak, NULL)) {
453 barf("ghciInsertSymbolTable failed");
454 }
455 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
456 }
457 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
458 machoInitSymbolsWithoutUnderscore();
459 # endif
460 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
461 referenced from a statically linked module. We need to mimic this, but
462 we cannot use NULL because we use it to mean nonexistent symbols. So we
463 use an arbitrary (hopefully unique) address here.
464 */
465 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
466 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
467 barf("ghciInsertSymbolTable failed");
468 }
469
470 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
471 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
472 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
473 retain_cafs ? newRetainedCAF : newGCdCAF,
474 HS_BOOL_FALSE, NULL)) {
475 barf("ghciInsertSymbolTable failed");
476 }
477
478 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
479 # if defined(RTLD_DEFAULT)
480 dl_prog_handle = RTLD_DEFAULT;
481 # else
482 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
483 # endif /* RTLD_DEFAULT */
484
485 compileResult = regcomp(&re_invalid,
486 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short|invalid file format)",
487 REG_EXTENDED);
488 if (compileResult != 0) {
489 barf("Compiling re_invalid failed");
490 }
491 compileResult = regcomp(&re_realso,
492 "(GROUP|INPUT) *\\( *([^ )]+)",
493 REG_EXTENDED);
494 if (compileResult != 0) {
495 barf("Compiling re_realso failed");
496 }
497 # endif
498
499 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
500 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
501 // User-override for mmap_32bit_base
502 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
503 }
504 #endif
505
506 if (RTS_LINKER_USE_MMAP)
507 m32_allocator_init();
508
509 #if defined(OBJFORMAT_PEi386)
510 initLinker_PEi386();
511 #endif
512
513 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
514 return;
515 }
516
517 void
518 exitLinker( void ) {
519 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
520 if (linker_init_done == 1) {
521 regfree(&re_invalid);
522 regfree(&re_realso);
523 #if defined(THREADED_RTS)
524 closeMutex(&dl_mutex);
525 #endif
526 }
527 #endif
528 if (linker_init_done == 1) {
529 freeHashTable(symhash, free);
530 }
531 #if defined(THREADED_RTS)
532 closeMutex(&linker_mutex);
533 #endif
534 }
535
536 /* -----------------------------------------------------------------------------
537 * Loading DLL or .so dynamic libraries
538 * -----------------------------------------------------------------------------
539 *
540 * Add a DLL from which symbols may be found. In the ELF case, just
541 * do RTLD_GLOBAL-style add, so no further messing around needs to
542 * happen in order that symbols in the loaded .so are findable --
543 * lookupSymbol() will subsequently see them by dlsym on the program's
544 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
545 *
546 * In the PEi386 case, open the DLLs and put handles to them in a
547 * linked list. When looking for a symbol, try all handles in the
548 * list. This means that we need to load even DLLs that are guaranteed
549 * to be in the ghc.exe image already, just so we can get a handle
550 * to give to loadSymbol, so that we can find the symbols. For such
551 * libraries, the LoadLibrary call should be a no-op except for returning
552 * the handle.
553 *
554 */
555
556 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
557
558 /* Suppose in ghci we load a temporary SO for a module containing
559 f = 1
560 and then modify the module, recompile, and load another temporary
561 SO with
562 f = 2
563 Then as we don't unload the first SO, dlsym will find the
564 f = 1
565 symbol whereas we want the
566 f = 2
567 symbol. We therefore need to keep our own SO handle list, and
568 try SOs in the right order. */
569
570 typedef
571 struct _OpenedSO {
572 struct _OpenedSO* next;
573 void *handle;
574 }
575 OpenedSO;
576
577 /* A list thereof. */
578 static OpenedSO* openedSOs = NULL;
579
580 static const char *
581 internal_dlopen(const char *dll_name)
582 {
583 OpenedSO* o_so;
584 void *hdl;
585 const char *errmsg;
586 char *errmsg_copy;
587
588 // omitted: RTLD_NOW
589 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
590 IF_DEBUG(linker,
591 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
592
593 //-------------- Begin critical section ------------------
594 // This critical section is necessary because dlerror() is not
595 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
596 // Also, the error message returned must be copied to preserve it
597 // (see POSIX also)
598
599 ACQUIRE_LOCK(&dl_mutex);
600 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
601
602 errmsg = NULL;
603 if (hdl == NULL) {
604 /* dlopen failed; return a ptr to the error msg. */
605 errmsg = dlerror();
606 if (errmsg == NULL) errmsg = "addDLL: unknown error";
607 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
608 strcpy(errmsg_copy, errmsg);
609 errmsg = errmsg_copy;
610 } else {
611 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
612 o_so->handle = hdl;
613 o_so->next = openedSOs;
614 openedSOs = o_so;
615 }
616
617 RELEASE_LOCK(&dl_mutex);
618 //--------------- End critical section -------------------
619
620 return errmsg;
621 }
622
623 /*
624 Note [RTLD_LOCAL]
625
626 In GHCi we want to be able to override previous .so's with newly
627 loaded .so's when we recompile something. This further implies that
628 when we look up a symbol in internal_dlsym() we have to iterate
629 through the loaded libraries (in order from most recently loaded to
630 oldest) looking up the symbol in each one until we find it.
631
632 However, this can cause problems for some symbols that are copied
633 by the linker into the executable image at runtime - see #8935 for a
634 lengthy discussion. To solve that problem we need to look up
635 symbols in the main executable *first*, before attempting to look
636 them up in the loaded .so's. But in order to make that work, we
637 have to always call dlopen with RTLD_LOCAL, so that the loaded
638 libraries don't populate the global symbol table.
639 */
640
641 static void *
642 internal_dlsym(const char *symbol) {
643 OpenedSO* o_so;
644 void *v;
645
646 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
647 ACQUIRE_LOCK(&dl_mutex);
648 dlerror();
649 // look in program first
650 v = dlsym(dl_prog_handle, symbol);
651 if (dlerror() == NULL) {
652 RELEASE_LOCK(&dl_mutex);
653 return v;
654 }
655
656 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
657 v = dlsym(o_so->handle, symbol);
658 if (dlerror() == NULL) {
659 RELEASE_LOCK(&dl_mutex);
660 return v;
661 }
662 }
663 RELEASE_LOCK(&dl_mutex);
664 return v;
665 }
666 # endif
667
668 const char *
669 addDLL( pathchar *dll_name )
670 {
671 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
672 /* ------------------- ELF DLL loader ------------------- */
673
674 #define NMATCH 5
675 regmatch_t match[NMATCH];
676 const char *errmsg;
677 FILE* fp;
678 size_t match_length;
679 #define MAXLINE 1000
680 char line[MAXLINE];
681 int result;
682
683 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
684 errmsg = internal_dlopen(dll_name);
685
686 if (errmsg == NULL) {
687 return NULL;
688 }
689
690 // GHC Trac ticket #2615
691 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
692 // contain linker scripts rather than ELF-format object code. This
693 // code handles the situation by recognizing the real object code
694 // file name given in the linker script.
695 //
696 // If an "invalid ELF header" error occurs, it is assumed that the
697 // .so file contains a linker script instead of ELF object code.
698 // In this case, the code looks for the GROUP ( ... ) linker
699 // directive. If one is found, the first file name inside the
700 // parentheses is treated as the name of a dynamic library and the
701 // code attempts to dlopen that file. If this is also unsuccessful,
702 // an error message is returned.
703
704 // see if the error message is due to an invalid ELF header
705 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
706 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
707 IF_DEBUG(linker, debugBelch("result = %i\n", result));
708 if (result == 0) {
709 // success -- try to read the named file as a linker script
710 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
711 MAXLINE-1);
712 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
713 line[match_length] = '\0'; // make sure string is null-terminated
714 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
715 if ((fp = __rts_fopen(line, "r")) == NULL) {
716 return errmsg; // return original error if open fails
717 }
718 // try to find a GROUP or INPUT ( ... ) command
719 while (fgets(line, MAXLINE, fp) != NULL) {
720 IF_DEBUG(linker, debugBelch("input line = %s", line));
721 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
722 // success -- try to dlopen the first named file
723 IF_DEBUG(linker, debugBelch("match%s\n",""));
724 line[match[2].rm_eo] = '\0';
725 stgFree((void*)errmsg); // Free old message before creating new one
726 errmsg = internal_dlopen(line+match[2].rm_so);
727 break;
728 }
729 // if control reaches here, no GROUP or INPUT ( ... ) directive
730 // was found and the original error message is returned to the
731 // caller
732 }
733 fclose(fp);
734 }
735 return errmsg;
736
737 # elif defined(OBJFORMAT_PEi386)
738 return addDLL_PEi386(dll_name, NULL);
739
740 # else
741 barf("addDLL: not implemented on this platform");
742 # endif
743 }
744
745 /* -----------------------------------------------------------------------------
746 * Searches the system directories to determine if there is a system DLL that
747 * satisfies the given name. This prevent GHCi from linking against a static
748 * library if a DLL is available.
749 *
750 * Returns: NULL on failure or no DLL found, else the full path to the DLL
751 * that can be loaded.
752 */
753 pathchar* findSystemLibrary(pathchar* dll_name)
754 {
755 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%"
756 PATH_FMT "'\n", dll_name));
757
758 #if defined(OBJFORMAT_PEi386)
759 return findSystemLibrary_PEi386(dll_name);
760 #else
761 (void)(dll_name); // Function not implemented for other platforms.
762 return NULL;
763 #endif
764 }
765
766 /* -----------------------------------------------------------------------------
767 * Emits a warning determining that the system is missing a required security
768 * update that we need to get access to the proper APIs
769 */
770 void warnMissingKBLibraryPaths( void )
771 {
772 static HsBool missing_update_warn = HS_BOOL_FALSE;
773 if (!missing_update_warn) {
774 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
775 missing_update_warn = HS_BOOL_TRUE;
776 }
777 }
778
779 /* -----------------------------------------------------------------------------
780 * appends a directory to the process DLL Load path so LoadLibrary can find it
781 *
782 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
783 * restore the search path to what it was before this call.
784 */
785 HsPtr addLibrarySearchPath(pathchar* dll_path)
786 {
787 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%"
788 PATH_FMT "'\n", dll_path));
789
790 #if defined(OBJFORMAT_PEi386)
791 return addLibrarySearchPath_PEi386(dll_path);
792 #else
793 (void)(dll_path); // Function not implemented for other platforms.
794 return NULL;
795 #endif
796 }
797
798 /* -----------------------------------------------------------------------------
799 * removes a directory from the process DLL Load path
800 *
801 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
802 */
803 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
804 {
805 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n",
806 dll_path_index));
807
808 #if defined(OBJFORMAT_PEi386)
809 return removeLibrarySearchPath_PEi386(dll_path_index);
810 #else
811 (void)(dll_path_index); // Function not implemented for other platforms.
812 return HS_BOOL_FALSE;
813 #endif
814 }
815
816 /* -----------------------------------------------------------------------------
817 * insert a symbol in the hash table
818 *
819 * Returns: 0 on failure, nozero on success
820 */
821 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
822 {
823 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE,
824 NULL);
825 }
826
827 /* -----------------------------------------------------------------------------
828 * lookup a symbol in the hash table
829 */
830 #if defined(OBJFORMAT_PEi386)
831 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
832 {
833 return lookupSymbol_PEi386(lbl);
834 }
835
836 #else
837
838 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
839 {
840 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
841
842 ASSERT(symhash != NULL);
843 RtsSymbolInfo *pinfo;
844
845 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
846 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
847
848 # if defined(OBJFORMAT_ELF)
849 return internal_dlsym(lbl);
850 # elif defined(OBJFORMAT_MACHO)
851
852 /* HACK: On OS X, all symbols are prefixed with an underscore.
853 However, dlsym wants us to omit the leading underscore from the
854 symbol name -- the dlsym routine puts it back on before
855 searching for the symbol. For now, we simply strip it off here
856 (and ONLY here).
857 */
858 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n",
859 lbl));
860 ASSERT(lbl[0] == '_');
861 return internal_dlsym(lbl + 1);
862
863 # else
864 ASSERT(2+2 == 5);
865 return NULL;
866 # endif
867 } else {
868 return loadSymbol(lbl, pinfo);
869 }
870 }
871 #endif /* OBJFORMAT_PEi386 */
872
873 /*
874 * Load and relocate the object code for a symbol as necessary.
875 * Symbol name only used for diagnostics output.
876 */
877 SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo) {
878 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl,
879 pinfo->value));
880 ObjectCode* oc = pinfo->owner;
881
882 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
883 See Note [runtime-linker-phases] */
884 if (oc && lbl && oc->status == OBJECT_LOADED) {
885 oc->status = OBJECT_NEEDED;
886 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand "
887 "loading symbol '%s'\n", lbl));
888 int r = ocTryLoad(oc);
889 if (!r) {
890 return NULL;
891 }
892
893 #if defined(PROFILING)
894 // collect any new cost centres & CCSs
895 // that were defined during runInit
896 initProfiling2();
897 #endif
898 }
899
900 return pinfo->value;
901 }
902
903 SymbolAddr* lookupSymbol( SymbolName* lbl )
904 {
905 ACQUIRE_LOCK(&linker_mutex);
906 SymbolAddr* r = lookupSymbol_(lbl);
907 if (!r) {
908 errorBelch("^^ Could not load '%s', dependency unresolved. "
909 "See top entry above.\n", lbl);
910 fflush(stderr);
911 }
912 RELEASE_LOCK(&linker_mutex);
913 return r;
914 }
915
916 /* -----------------------------------------------------------------------------
917 Create a StablePtr for a foreign export. This is normally called by
918 a C function with __attribute__((constructor)), which is generated
919 by GHC and linked into the module.
920
921 If the object code is being loaded dynamically, then we remember
922 which StablePtrs were allocated by the constructors and free them
923 again in unloadObj().
924 -------------------------------------------------------------------------- */
925
926 static ObjectCode *loading_obj = NULL;
927
928 StgStablePtr foreignExportStablePtr (StgPtr p)
929 {
930 ForeignExportStablePtr *fe_sptr;
931 StgStablePtr *sptr;
932
933 sptr = getStablePtr(p);
934
935 if (loading_obj != NULL) {
936 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
937 "foreignExportStablePtr");
938 fe_sptr->stable_ptr = sptr;
939 fe_sptr->next = loading_obj->stable_ptrs;
940 loading_obj->stable_ptrs = fe_sptr;
941 }
942
943 return sptr;
944 }
945
946
947 /* -----------------------------------------------------------------------------
948 * Debugging aid: look in GHCi's object symbol tables for symbols
949 * within DELTA bytes of the specified address, and show their names.
950 */
951 #if defined(DEBUG)
952 void ghci_enquire ( SymbolAddr* addr );
953
954 void ghci_enquire(SymbolAddr* addr)
955 {
956 int i;
957 SymbolName* sym;
958 RtsSymbolInfo* a;
959 const int DELTA = 64;
960 ObjectCode* oc;
961
962 for (oc = objects; oc; oc = oc->next) {
963 for (i = 0; i < oc->n_symbols; i++) {
964 sym = oc->symbols[i];
965 if (sym == NULL) continue;
966 a = NULL;
967 if (a == NULL) {
968 ghciLookupSymbolInfo(symhash, sym, &a);
969 }
970 if (a == NULL) {
971 // debugBelch("ghci_enquire: can't find %s\n", sym);
972 }
973 else if ( a->value
974 && (char*)addr-DELTA <= (char*)a->value
975 && (char*)a->value <= (char*)addr+DELTA) {
976 debugBelch("%p + %3d == `%s'\n", addr,
977 (int)((char*)a->value - (char*)addr), sym);
978 }
979 }
980 }
981 }
982 #endif
983
984 pathchar*
985 resolveSymbolAddr (pathchar* buffer, int size,
986 SymbolAddr* symbol, uintptr_t* top)
987 {
988 #if defined(OBJFORMAT_PEi386)
989 return resolveSymbolAddr_PEi386 (buffer, size, symbol, top);
990 #else /* OBJFORMAT_PEi386 */
991 (void)buffer;
992 (void)size;
993 (void)symbol;
994 (void)top;
995 return NULL;
996 #endif /* OBJFORMAT_PEi386 */
997 }
998
999 #if RTS_LINKER_USE_MMAP
1000 //
1001 // Returns NULL on failure.
1002 //
1003 void *
1004 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
1005 {
1006 void *map_addr = NULL;
1007 void *result;
1008 size_t size;
1009 static uint32_t fixed = 0;
1010
1011 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1012 size = roundUpToPage(bytes);
1013
1014 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1015 mmap_again:
1016
1017 if (mmap_32bit_base != 0) {
1018 map_addr = mmap_32bit_base;
1019 }
1020 #endif
1021
1022 IF_DEBUG(linker,
1023 debugBelch("mmapForLinker: \tprotection %#0x\n",
1024 PROT_EXEC | PROT_READ | PROT_WRITE));
1025 IF_DEBUG(linker,
1026 debugBelch("mmapForLinker: \tflags %#0x\n",
1027 MAP_PRIVATE | TRY_MAP_32BIT | fixed | flags));
1028
1029 result = mmap(map_addr, size,
1030 PROT_EXEC|PROT_READ|PROT_WRITE,
1031 MAP_PRIVATE|TRY_MAP_32BIT|fixed|flags, fd, offset);
1032
1033 if (result == MAP_FAILED) {
1034 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1035 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1036 return NULL;
1037 }
1038
1039 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1040 if (mmap_32bit_base != 0) {
1041 if (result == map_addr) {
1042 mmap_32bit_base = (StgWord8*)map_addr + size;
1043 } else {
1044 if ((W_)result > 0x80000000) {
1045 // oops, we were given memory over 2Gb
1046 munmap(result,size);
1047 #if defined(freebsd_HOST_OS) || \
1048 defined(kfreebsdgnu_HOST_OS) || \
1049 defined(dragonfly_HOST_OS)
1050 // Some platforms require MAP_FIXED. This is normally
1051 // a bad idea, because MAP_FIXED will overwrite
1052 // existing mappings.
1053 fixed = MAP_FIXED;
1054 goto mmap_again;
1055 #else
1056 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1057 "asked for %lu bytes at %p. "
1058 "Try specifying an address with +RTS -xm<addr> -RTS",
1059 size, map_addr);
1060 return NULL;
1061 #endif
1062 } else {
1063 // hmm, we were given memory somewhere else, but it's
1064 // still under 2Gb so we can use it. Next time, ask
1065 // for memory right after the place we just got some
1066 mmap_32bit_base = (StgWord8*)result + size;
1067 }
1068 }
1069 } else {
1070 if ((W_)result > 0x80000000) {
1071 // oops, we were given memory over 2Gb
1072 // ... try allocating memory somewhere else?;
1073 debugTrace(DEBUG_linker,
1074 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1075 bytes, result);
1076 munmap(result, size);
1077
1078 // Set a base address and try again... (guess: 1Gb)
1079 mmap_32bit_base = (void*)0x40000000;
1080 goto mmap_again;
1081 }
1082 }
1083 #endif
1084
1085 IF_DEBUG(linker,
1086 debugBelch("mmapForLinker: mapped %" FMT_Word
1087 " bytes starting at %p\n", (W_)size, result));
1088 IF_DEBUG(linker,
1089 debugBelch("mmapForLinker: done\n"));
1090
1091 return result;
1092 }
1093 #endif
1094
1095 /*
1096 * Remove symbols from the symbol table, and free oc->symbols.
1097 * This operation is idempotent.
1098 */
1099 static void removeOcSymbols (ObjectCode *oc)
1100 {
1101 if (oc->symbols == NULL) return;
1102
1103 // Remove all the mappings for the symbols within this object..
1104 int i;
1105 for (i = 0; i < oc->n_symbols; i++) {
1106 if (oc->symbols[i] != NULL) {
1107 ghciRemoveSymbolTable(symhash, oc->symbols[i], oc);
1108 }
1109 }
1110
1111 stgFree(oc->symbols);
1112 oc->symbols = NULL;
1113 }
1114
1115 /*
1116 * Release StablePtrs and free oc->stable_ptrs.
1117 * This operation is idempotent.
1118 */
1119 static void freeOcStablePtrs (ObjectCode *oc)
1120 {
1121 // Release any StablePtrs that were created when this
1122 // object module was initialized.
1123 ForeignExportStablePtr *fe_ptr, *next;
1124
1125 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1126 next = fe_ptr->next;
1127 freeStablePtr(fe_ptr->stable_ptr);
1128 stgFree(fe_ptr);
1129 }
1130 oc->stable_ptrs = NULL;
1131 }
1132
1133 static void
1134 freePreloadObjectFile (ObjectCode *oc)
1135 {
1136 #if defined(mingw32_HOST_OS)
1137 freePreloadObjectFile_PEi386(oc);
1138 #else
1139
1140 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1141 munmap(oc->image, oc->fileSize);
1142 }
1143 else {
1144 stgFree(oc->image);
1145 }
1146
1147 #endif
1148
1149 oc->image = NULL;
1150 oc->fileSize = 0;
1151 }
1152
1153 /*
1154 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1155 * the GC when a previously unloaded ObjectCode has been determined to be
1156 * unused, and when an error occurs during loadObj().
1157 */
1158 void freeObjectCode (ObjectCode *oc)
1159 {
1160 freePreloadObjectFile(oc);
1161
1162 if (oc->symbols != NULL) {
1163 stgFree(oc->symbols);
1164 oc->symbols = NULL;
1165 }
1166
1167 if (oc->extraInfos != NULL) {
1168 freeHashTable(oc->extraInfos, NULL);
1169 oc->extraInfos = NULL;
1170 }
1171
1172 if (oc->sections != NULL) {
1173 int i;
1174 for (i=0; i < oc->n_sections; i++) {
1175 if (oc->sections[i].start != NULL) {
1176 switch(oc->sections[i].alloc){
1177 #if RTS_LINKER_USE_MMAP
1178 case SECTION_MMAP:
1179 munmap(oc->sections[i].mapped_start,
1180 oc->sections[i].mapped_size);
1181 break;
1182 case SECTION_M32:
1183 m32_free(oc->sections[i].start,
1184 oc->sections[i].size);
1185 break;
1186 #endif
1187 case SECTION_MALLOC:
1188 stgFree(oc->sections[i].start);
1189 break;
1190 default:
1191 break;
1192 }
1193 }
1194 if (oc->sections[i].info) {
1195 stgFree(oc->sections[i].info);
1196 }
1197 }
1198 stgFree(oc->sections);
1199 }
1200
1201 freeProddableBlocks(oc);
1202
1203 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1204 * alongside the image, so we don't need to free. */
1205 #if defined(NEED_SYMBOL_EXTRAS) && (!defined(x86_64_HOST_ARCH) \
1206 || !defined(mingw32_HOST_OS))
1207 if (RTS_LINKER_USE_MMAP) {
1208 if (!USE_CONTIGUOUS_MMAP && oc->symbol_extras != NULL) {
1209 m32_free(oc->symbol_extras,
1210 sizeof(SymbolExtra) * oc->n_symbol_extras);
1211 }
1212 }
1213 else {
1214 stgFree(oc->symbol_extras);
1215 }
1216 #endif
1217
1218 #if defined(OBJECTFORMAT_MACHO)
1219 ocDeinit_MachO(oc);
1220 #endif
1221 #if defined(OBJFORMAT_ELF)
1222 ocDeinit_ELF(oc);
1223 #endif
1224
1225 stgFree(oc->fileName);
1226 stgFree(oc->archiveMemberName);
1227
1228 stgFree(oc);
1229 }
1230
1231 /* -----------------------------------------------------------------------------
1232 * Sets the initial status of a fresh ObjectCode
1233 */
1234 static void setOcInitialStatus(ObjectCode* oc) {
1235 if (oc->archiveMemberName == NULL) {
1236 oc->status = OBJECT_NEEDED;
1237 } else {
1238 oc->status = OBJECT_LOADED;
1239 }
1240 }
1241
1242 ObjectCode*
1243 mkOc( pathchar *path, char *image, int imageSize,
1244 bool mapped, char *archiveMemberName, int misalignment ) {
1245 ObjectCode* oc;
1246
1247 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1248 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1249
1250 oc->info = NULL;
1251
1252 # if defined(OBJFORMAT_ELF)
1253 oc->formatName = "ELF";
1254 # elif defined(OBJFORMAT_PEi386)
1255 oc->formatName = "PEi386";
1256 # elif defined(OBJFORMAT_MACHO)
1257 oc->formatName = "Mach-O";
1258 # else
1259 stgFree(oc);
1260 barf("loadObj: not implemented on this platform");
1261 # endif
1262
1263 oc->image = image;
1264 oc->fileName = pathdup(path);
1265
1266 if (archiveMemberName) {
1267 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1,
1268 "loadObj" );
1269 strcpy(oc->archiveMemberName, archiveMemberName);
1270 } else {
1271 oc->archiveMemberName = NULL;
1272 }
1273
1274 setOcInitialStatus( oc );
1275
1276 oc->fileSize = imageSize;
1277 oc->symbols = NULL;
1278 oc->n_sections = 0;
1279 oc->sections = NULL;
1280 oc->proddables = NULL;
1281 oc->stable_ptrs = NULL;
1282 #if defined(NEED_SYMBOL_EXTRAS)
1283 oc->symbol_extras = NULL;
1284 #endif
1285 oc->imageMapped = mapped;
1286
1287 oc->misalignment = misalignment;
1288 oc->extraInfos = NULL;
1289
1290 /* chain it onto the list of objects */
1291 oc->next = NULL;
1292
1293 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1294 return oc;
1295 }
1296
1297 /* -----------------------------------------------------------------------------
1298 * Check if an object or archive is already loaded.
1299 *
1300 * Returns: 1 if the path is already loaded, 0 otherwise.
1301 */
1302 HsInt
1303 isAlreadyLoaded( pathchar *path )
1304 {
1305 ObjectCode *o;
1306 for (o = objects; o; o = o->next) {
1307 if (0 == pathcmp(o->fileName, path)) {
1308 return 1; /* already loaded */
1309 }
1310 }
1311 return 0; /* not loaded yet */
1312 }
1313
1314 //
1315 // Load the object file into memory. This will not be its final resting place,
1316 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
1317 // address space, properly aligned.
1318 //
1319 static ObjectCode *
1320 preloadObjectFile (pathchar *path)
1321 {
1322 int fileSize;
1323 struct_stat st;
1324 int r;
1325 void *image;
1326 ObjectCode *oc;
1327 int misalignment = 0;
1328
1329 r = pathstat(path, &st);
1330 if (r == -1) {
1331 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
1332 return NULL;
1333 }
1334
1335 fileSize = st.st_size;
1336
1337 #if RTS_LINKER_USE_MMAP
1338 int fd;
1339
1340 /* On many architectures malloc'd memory isn't executable, so we need to use
1341 * mmap. */
1342
1343 #if defined(openbsd_HOST_OS)
1344 fd = open(path, O_RDONLY, S_IRUSR);
1345 #else
1346 fd = open(path, O_RDONLY);
1347 #endif
1348 if (fd == -1) {
1349 errorBelch("loadObj: can't open %s", path);
1350 return NULL;
1351 }
1352
1353 /* iOS does not permit to mmap with r+w+x, however while the comment for
1354 * this function says this is not the final resting place, for some
1355 * architectures / hosts (at least mach-o non-iOS -- see ocGetNames_MachO)
1356 * the image mmaped here in fact ends up being the final resting place for
1357 * the sections. And hence we need to leave r+w+x here for other hosts
1358 * until all hosts have been made aware of the initial image being r+w only.
1359 *
1360 * See also the misalignment logic for darwin below.
1361 */
1362 #if defined(ios_HOST_OS)
1363 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
1364 #else
1365 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
1366 MAP_PRIVATE, fd, 0);
1367 #endif
1368
1369 if (image == MAP_FAILED) {
1370 errorBelch("mmap: failed. errno = %d", errno);
1371 }
1372 // not 32-bit yet, we'll remap later
1373 close(fd);
1374
1375 #else /* !RTS_LINKER_USE_MMAP */
1376 FILE *f;
1377
1378 /* load the image into memory */
1379 /* coverity[toctou] */
1380 f = pathopen(path, WSTR("rb"));
1381 if (!f) {
1382 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
1383 return NULL;
1384 }
1385
1386 # if defined(mingw32_HOST_OS)
1387
1388 // TODO: We would like to use allocateExec here, but allocateExec
1389 // cannot currently allocate blocks large enough.
1390 image = allocateImageAndTrampolines(path, "itself", f, fileSize,
1391 HS_BOOL_FALSE);
1392 if (image == NULL) {
1393 fclose(f);
1394 return NULL;
1395 }
1396
1397 # elif defined(darwin_HOST_OS)
1398
1399 // In a Mach-O .o file, all sections can and will be misaligned
1400 // if the total size of the headers is not a multiple of the
1401 // desired alignment. This is fine for .o files that only serve
1402 // as input for the static linker, but it's not fine for us,
1403 // as SSE (used by gcc for floating point) and Altivec require
1404 // 16-byte alignment.
1405 // We calculate the correct alignment from the header before
1406 // reading the file, and then we misalign image on purpose so
1407 // that the actual sections end up aligned again.
1408 misalignment = machoGetMisalignment(f);
1409 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
1410 image += misalignment;
1411
1412 # else /* !defined(mingw32_HOST_OS) */
1413
1414 image = stgMallocBytes(fileSize, "loadObj(image)");
1415
1416 #endif
1417
1418 int n;
1419 n = fread ( image, 1, fileSize, f );
1420 fclose(f);
1421 if (n != fileSize) {
1422 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
1423 stgFree(image);
1424 return NULL;
1425 }
1426
1427 #endif /* RTS_LINKER_USE_MMAP */
1428
1429 oc = mkOc(path, image, fileSize, true, NULL, misalignment);
1430
1431 #if defined(OBJFORMAT_MACHO)
1432 if (ocVerifyImage_MachO( oc ))
1433 ocInit_MachO( oc );
1434 #endif
1435 #if defined(OBJFORMAT_ELF)
1436 if(ocVerifyImage_ELF( oc ))
1437 ocInit_ELF( oc );
1438 #endif
1439 return oc;
1440 }
1441
1442 /* -----------------------------------------------------------------------------
1443 * Load an obj (populate the global symbol table, but don't resolve yet)
1444 *
1445 * Returns: 1 if ok, 0 on error.
1446 */
1447 static HsInt loadObj_ (pathchar *path)
1448 {
1449 ObjectCode* oc;
1450 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
1451
1452 /* debugBelch("loadObj %s\n", path ); */
1453
1454 /* Check that we haven't already loaded this object.
1455 Ignore requests to load multiple times */
1456
1457 if (isAlreadyLoaded(path)) {
1458 IF_DEBUG(linker,
1459 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1460 return 1; /* success */
1461 }
1462
1463 oc = preloadObjectFile(path);
1464 if (oc == NULL) return 0;
1465
1466 if (! loadOc(oc)) {
1467 // failed; free everything we've allocated
1468 removeOcSymbols(oc);
1469 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
1470 freeObjectCode(oc);
1471 return 0;
1472 }
1473
1474 oc->next = objects;
1475 objects = oc;
1476 return 1;
1477 }
1478
1479 HsInt loadObj (pathchar *path)
1480 {
1481 ACQUIRE_LOCK(&linker_mutex);
1482 HsInt r = loadObj_(path);
1483 RELEASE_LOCK(&linker_mutex);
1484 return r;
1485 }
1486
1487 HsInt loadOc (ObjectCode* oc)
1488 {
1489 int r;
1490
1491 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
1492
1493 /* verify the in-memory image */
1494 # if defined(OBJFORMAT_ELF)
1495 r = ocVerifyImage_ELF ( oc );
1496 # elif defined(OBJFORMAT_PEi386)
1497 r = ocVerifyImage_PEi386 ( oc );
1498 # elif defined(OBJFORMAT_MACHO)
1499 r = ocVerifyImage_MachO ( oc );
1500 # else
1501 barf("loadObj: no verify method");
1502 # endif
1503 if (!r) {
1504 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
1505 return r;
1506 }
1507
1508 #if defined(NEED_SYMBOL_EXTRAS)
1509 # if defined(OBJFORMAT_MACHO)
1510 r = ocAllocateSymbolExtras_MachO ( oc );
1511 if (!r) {
1512 IF_DEBUG(linker,
1513 debugBelch("loadOc: ocAllocateSymbolExtras_MachO failed\n"));
1514 return r;
1515 }
1516 # elif defined(OBJFORMAT_ELF)
1517 r = ocAllocateSymbolExtras_ELF ( oc );
1518 if (!r) {
1519 IF_DEBUG(linker,
1520 debugBelch("loadOc: ocAllocateSymbolExtras_ELF failed\n"));
1521 return r;
1522 }
1523 # elif defined(OBJFORMAT_PEi386)
1524 ocAllocateSymbolExtras_PEi386 ( oc );
1525 # endif
1526 #endif
1527
1528 /* build the symbol list for this image */
1529 # if defined(OBJFORMAT_ELF)
1530 r = ocGetNames_ELF ( oc );
1531 # elif defined(OBJFORMAT_PEi386)
1532 r = ocGetNames_PEi386 ( oc );
1533 # elif defined(OBJFORMAT_MACHO)
1534 r = ocGetNames_MachO ( oc );
1535 # else
1536 barf("loadObj: no getNames method");
1537 # endif
1538 if (!r) {
1539 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
1540 return r;
1541 }
1542
1543 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
1544 setOcInitialStatus( oc );
1545 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
1546
1547 return 1;
1548 }
1549
1550 /* -----------------------------------------------------------------------------
1551 * try to load and initialize an ObjectCode into memory
1552 *
1553 * Returns: 1 if ok, 0 on error.
1554 */
1555 int ocTryLoad (ObjectCode* oc) {
1556 int r;
1557
1558 if (oc->status != OBJECT_NEEDED) {
1559 return 1;
1560 }
1561
1562 /* Check for duplicate symbols by looking into `symhash`.
1563 Duplicate symbols are any symbols which exist
1564 in different ObjectCodes that have both been loaded, or
1565 are to be loaded by this call.
1566
1567 This call is intended to have no side-effects when a non-duplicate
1568 symbol is re-inserted.
1569
1570 We set the Address to NULL since that is not used to distinguish
1571 symbols. Duplicate symbols are distinguished by name and oc.
1572 */
1573 int x;
1574 SymbolName* symbol;
1575 for (x = 0; x < oc->n_symbols; x++) {
1576 symbol = oc->symbols[x];
1577 if ( symbol
1578 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol, NULL,
1579 isSymbolWeak(oc, symbol), oc)) {
1580 return 0;
1581 }
1582 }
1583
1584 # if defined(OBJFORMAT_ELF)
1585 r = ocResolve_ELF ( oc );
1586 # elif defined(OBJFORMAT_PEi386)
1587 r = ocResolve_PEi386 ( oc );
1588 # elif defined(OBJFORMAT_MACHO)
1589 r = ocResolve_MachO ( oc );
1590 # else
1591 barf("ocTryLoad: not implemented on this platform");
1592 # endif
1593 if (!r) { return r; }
1594
1595 // run init/init_array/ctors/mod_init_func
1596
1597 loading_obj = oc; // tells foreignExportStablePtr what to do
1598 #if defined(OBJFORMAT_ELF)
1599 r = ocRunInit_ELF ( oc );
1600 #elif defined(OBJFORMAT_PEi386)
1601 r = ocRunInit_PEi386 ( oc );
1602 #elif defined(OBJFORMAT_MACHO)
1603 r = ocRunInit_MachO ( oc );
1604 #else
1605 barf("ocTryLoad: initializers not implemented on this platform");
1606 #endif
1607 loading_obj = NULL;
1608
1609 if (!r) { return r; }
1610
1611 oc->status = OBJECT_RESOLVED;
1612
1613 return 1;
1614 }
1615
1616 /* -----------------------------------------------------------------------------
1617 * resolve all the currently unlinked objects in memory
1618 *
1619 * Returns: 1 if ok, 0 on error.
1620 */
1621 static HsInt resolveObjs_ (void)
1622 {
1623 ObjectCode *oc;
1624 int r;
1625
1626 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
1627
1628 for (oc = objects; oc; oc = oc->next) {
1629 r = ocTryLoad(oc);
1630 if (!r)
1631 {
1632 return r;
1633 }
1634 }
1635
1636 #if defined(PROFILING)
1637 // collect any new cost centres & CCSs that were defined during runInit
1638 initProfiling2();
1639 #endif
1640
1641 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
1642 return 1;
1643 }
1644
1645 HsInt resolveObjs (void)
1646 {
1647 ACQUIRE_LOCK(&linker_mutex);
1648 HsInt r = resolveObjs_();
1649 RELEASE_LOCK(&linker_mutex);
1650 return r;
1651 }
1652
1653 /* -----------------------------------------------------------------------------
1654 * delete an object from the pool
1655 */
1656 static HsInt unloadObj_ (pathchar *path, bool just_purge)
1657 {
1658 ObjectCode *oc, *prev, *next;
1659 HsBool unloadedAnyObj = HS_BOOL_FALSE;
1660
1661 ASSERT(symhash != NULL);
1662 ASSERT(objects != NULL);
1663
1664 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
1665
1666 prev = NULL;
1667 for (oc = objects; oc; oc = next) {
1668 next = oc->next; // oc might be freed
1669
1670 if (!pathcmp(oc->fileName,path)) {
1671
1672 // these are both idempotent, so in just_purge mode we can
1673 // later call unloadObj() to really unload the object.
1674 removeOcSymbols(oc);
1675 freeOcStablePtrs(oc);
1676
1677 if (!just_purge) {
1678 if (prev == NULL) {
1679 objects = oc->next;
1680 } else {
1681 prev->next = oc->next;
1682 }
1683 ACQUIRE_LOCK(&linker_unloaded_mutex);
1684 oc->next = unloaded_objects;
1685 unloaded_objects = oc;
1686 oc->status = OBJECT_UNLOADED;
1687 RELEASE_LOCK(&linker_unloaded_mutex);
1688 // We do not own oc any more; it can be released at any time by
1689 // the GC in checkUnload().
1690 } else {
1691 prev = oc;
1692 }
1693
1694 /* This could be a member of an archive so continue
1695 * unloading other members. */
1696 unloadedAnyObj = HS_BOOL_TRUE;
1697 } else {
1698 prev = oc;
1699 }
1700 }
1701
1702 if (unloadedAnyObj) {
1703 return 1;
1704 }
1705 else {
1706 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
1707 return 0;
1708 }
1709 }
1710
1711 HsInt unloadObj (pathchar *path)
1712 {
1713 ACQUIRE_LOCK(&linker_mutex);
1714 HsInt r = unloadObj_(path, false);
1715 RELEASE_LOCK(&linker_mutex);
1716 return r;
1717 }
1718
1719 HsInt purgeObj (pathchar *path)
1720 {
1721 ACQUIRE_LOCK(&linker_mutex);
1722 HsInt r = unloadObj_(path, true);
1723 RELEASE_LOCK(&linker_mutex);
1724 return r;
1725 }
1726
1727 static OStatus getObjectLoadStatus_ (pathchar *path)
1728 {
1729 ObjectCode *o;
1730 for (o = objects; o; o = o->next) {
1731 if (0 == pathcmp(o->fileName, path)) {
1732 return o->status;
1733 }
1734 }
1735 for (o = unloaded_objects; o; o = o->next) {
1736 if (0 == pathcmp(o->fileName, path)) {
1737 return o->status;
1738 }
1739 }
1740 return OBJECT_NOT_LOADED;
1741 }
1742
1743 OStatus getObjectLoadStatus (pathchar *path)
1744 {
1745 ACQUIRE_LOCK(&linker_mutex);
1746 OStatus r = getObjectLoadStatus_(path);
1747 RELEASE_LOCK(&linker_mutex);
1748 return r;
1749 }
1750
1751 /* -----------------------------------------------------------------------------
1752 * Sanity checking. For each ObjectCode, maintain a list of address ranges
1753 * which may be prodded during relocation, and abort if we try and write
1754 * outside any of these.
1755 */
1756 void
1757 addProddableBlock ( ObjectCode* oc, void* start, int size )
1758 {
1759 ProddableBlock* pb
1760 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
1761
1762 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
1763 ASSERT(size > 0);
1764 pb->start = start;
1765 pb->size = size;
1766 pb->next = oc->proddables;
1767 oc->proddables = pb;
1768 }
1769
1770 void
1771 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
1772 {
1773 ProddableBlock* pb;
1774
1775 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
1776 char* s = (char*)(pb->start);
1777 char* e = s + pb->size;
1778 char* a = (char*)addr;
1779 if (a >= s && (a+size) <= e) return;
1780 }
1781 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
1782 }
1783
1784 void freeProddableBlocks (ObjectCode *oc)
1785 {
1786 ProddableBlock *pb, *next;
1787
1788 for (pb = oc->proddables; pb != NULL; pb = next) {
1789 next = pb->next;
1790 stgFree(pb);
1791 }
1792 oc->proddables = NULL;
1793 }
1794
1795 /* -----------------------------------------------------------------------------
1796 * Section management.
1797 */
1798 void
1799 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
1800 void* start, StgWord size, StgWord mapped_offset,
1801 void* mapped_start, StgWord mapped_size)
1802 {
1803 s->start = start; /* actual start of section in memory */
1804 s->size = size; /* actual size of section in memory */
1805 s->kind = kind;
1806 s->alloc = alloc;
1807 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
1808
1809 s->mapped_start = mapped_start; /* start of mmap() block */
1810 s->mapped_size = mapped_size; /* size of mmap() block */
1811
1812 s->info = (struct SectionFormatInfo*)stgCallocBytes(1, sizeof *s->info,
1813 "addSection(SectionFormatInfo)");
1814
1815 IF_DEBUG(linker,
1816 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
1817 start, (void*)((StgWord)start + size),
1818 size, kind ));
1819 }
1820