ghci: Ensure that system libffi include path is searched
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "StablePtr.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31 #include "linker/CacheFlush.h"
32 #include "linker/SymbolExtras.h"
33 #include "PathUtils.h"
34
35 #if !defined(mingw32_HOST_OS)
36 #include "posix/Signals.h"
37 #endif
38
39 // get protos for is*()
40 #include <ctype.h>
41
42 #if defined(HAVE_SYS_TYPES_H)
43 #include <sys/types.h>
44 #endif
45
46 #include <inttypes.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <stdio.h>
50 #include <assert.h>
51 #include <fs_rts.h>
52
53 #if defined(HAVE_SYS_STAT_H)
54 #include <sys/stat.h>
55 #endif
56
57 #if defined(HAVE_DLFCN_H)
58 #include <dlfcn.h>
59 #endif
60
61 #if defined(OBJFORMAT_ELF)
62 # include "linker/Elf.h"
63 # include <regex.h> // regex is already used by dlopen() so this is OK
64 // to use here without requiring an additional lib
65 #elif defined(OBJFORMAT_PEi386)
66 # include "linker/PEi386.h"
67 # include <windows.h>
68 #elif defined(OBJFORMAT_MACHO)
69 # include "linker/MachO.h"
70 # include <regex.h>
71 # include <mach/machine.h>
72 # include <mach-o/fat.h>
73 #endif
74
75 #if defined(dragonfly_HOST_OS)
76 #include <sys/tls.h>
77 #endif
78 /*
79 Note [runtime-linker-support]
80 -----------------------------
81 When adding support for a new platform to the runtime linker please
82 update `$TOP/configure.ac` under heading `Does target have runtime
83 linker support?`.
84 */
85 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
86 This hashtable will contain information on all symbols
87 that we know of, however the .o they are in may not be loaded.
88
89 Until the ObjectCode the symbol belongs to is actually
90 loaded this symbol may be replaced. So do not rely on
91 addresses of unloaded symbols.
92
93 Note [runtime-linker-phases]
94 --------------------------------------
95 Broadly the behavior of the runtime linker can be
96 split into the following four phases:
97
98 - Indexing (e.g. ocVerifyImage and ocGetNames)
99 - Initialization (e.g. ocResolve and ocRunInit)
100 - Resolve (e.g. resolveObjs())
101 - Lookup (e.g. lookupSymbol)
102
103 This is to enable lazy loading of symbols. Eager loading is problematic
104 as it means that all symbols must be available, even those which we will
105 never use. This is especially painful on Windows, where the number of
106 libraries required to link things like mingwex grows to be quite high.
107
108 We proceed through these stages as follows,
109
110 * During Indexing we verify and open the ObjectCode and
111 perform a quick scan/indexing of the ObjectCode. All the work
112 required to actually load the ObjectCode is done.
113
114 All symbols from the ObjectCode are also inserted into
115 `symhash`, where possible duplicates are handled via the semantics
116 described in `ghciInsertSymbolTable`.
117
118 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
119 depending on whether they are an archive member or not.
120
121 * During initialization we load ObjectCode, perform relocations, execute
122 static constructors etc. This phase may trigger other ObjectCodes to
123 be loaded because of the calls to lookupSymbol.
124
125 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
126 previous status was `OBJECT_LOADED`.
127
128 * During resolve we attempt to resolve all the symbols needed for the
129 initial link. This essentially means, that for any ObjectCode given
130 directly to the command-line we perform lookupSymbols on the required
131 symbols. lookupSymbols may trigger the loading of additional ObjectCode
132 if required.
133
134 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
135 the previous status was `OBJECT_NEEDED`.
136
137 * lookupSymbols is used to lookup any symbols required, both during initial
138 link and during statement and expression compilations in the REPL.
139 Declaration of e.g. a foreign import, will eventually call lookupSymbol
140 which will either fail (symbol unknown) or succeed (and possibly trigger a
141 load).
142
143 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
144
145 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
146 by calling unloadObj and starting over.
147 When a new declaration or statement is performed ultimately lookupSymbol is called
148 without doing a re-link.
149
150 The goal of these different phases is to allow the linker to be able to perform
151 "lazy loading" of ObjectCode. The reason for this is that we want to only link
152 in symbols that are actually required for the link. This reduces:
153
154 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
155 then we don't need to load libB. This means the dependency chain for libraries
156 such as mingw32 and mingwex can be broken down.
157
158 2) The number of duplicate symbols, since now only symbols that are
159 true duplicates will display the error.
160 */
161 /*Str*/HashTable *symhash;
162
163 /* List of currently loaded objects */
164 ObjectCode *objects = NULL; /* initially empty */
165
166 /* List of objects that have been unloaded via unloadObj(), but are waiting
167 to be actually freed via checkUnload() */
168 ObjectCode *unloaded_objects = NULL; /* initially empty */
169
170 #if defined(THREADED_RTS)
171 /* This protects all the Linker's global state except unloaded_objects */
172 Mutex linker_mutex;
173 /*
174 * This protects unloaded_objects. We have a separate mutex for this, because
175 * the GC needs to access unloaded_objects in checkUnload, while the linker only
176 * needs to access unloaded_objects in unloadObj(), so this allows most linker
177 * operations proceed concurrently with the GC.
178 */
179 Mutex linker_unloaded_mutex;
180 #endif
181
182 /* Generic wrapper function to try and Resolve and RunInit oc files */
183 int ocTryLoad( ObjectCode* oc );
184
185 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
186 * small memory model on this architecture (see gcc docs,
187 * -mcmodel=small).
188 *
189 * MAP_32BIT not available on OpenBSD/amd64
190 */
191 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
192 #define TRY_MAP_32BIT MAP_32BIT
193 #else
194 #define TRY_MAP_32BIT 0
195 #endif
196
197 /*
198 * Due to the small memory model (see above), on x86_64 we have to map
199 * all our non-PIC object files into the low 2Gb of the address space
200 * (why 2Gb and not 4Gb? Because all addresses must be reachable
201 * using a 32-bit signed PC-relative offset). On Linux we can do this
202 * using the MAP_32BIT flag to mmap(), however on other OSs
203 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
204 * can't do this. So on these systems, we have to pick a base address
205 * in the low 2Gb of the address space and try to allocate memory from
206 * there.
207 *
208 * We pick a default address based on the OS, but also make this
209 * configurable via an RTS flag (+RTS -xm)
210 */
211 #if defined(MAP_32BIT) || DEFAULT_LINKER_ALWAYS_PIC
212 // Try to use MAP_32BIT
213 #define MMAP_32BIT_BASE_DEFAULT 0
214 #else
215 // A guess: 1Gb.
216 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
217 #endif
218
219 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
220
221 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
222 ObjectCode *owner)
223 {
224 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
225 if (!pinfo || owner != pinfo->owner) return;
226 removeStrHashTable(table, key, NULL);
227 if (isSymbolImport (owner, key))
228 stgFree(pinfo->value);
229
230 stgFree(pinfo);
231 }
232
233 /* -----------------------------------------------------------------------------
234 * Insert symbols into hash tables, checking for duplicates.
235 *
236 * Returns: 0 on failure, nonzero on success
237 */
238 /*
239 Note [weak-symbols-support]
240 -------------------------------------
241 While ghciInsertSymbolTable does implement extensive
242 logic for weak symbol support, weak symbols are not currently
243 fully supported by the RTS. This code is mostly here for COMDAT
244 support which uses the weak symbols support.
245
246 Linking weak symbols defined purely in C code with other C code
247 should also work, probably. Observing weak symbols in Haskell
248 won't.
249
250 Some test have been written for weak symbols but have been disabled
251 mostly because it's unsure how the weak symbols support should look.
252 See #11223
253 */
254 int ghciInsertSymbolTable(
255 pathchar* obj_name,
256 HashTable *table,
257 const SymbolName* key,
258 SymbolAddr* data,
259 HsBool weak,
260 ObjectCode *owner)
261 {
262 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
263 if (!pinfo) /* new entry */
264 {
265 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
266 pinfo->value = data;
267 pinfo->owner = owner;
268 pinfo->weak = weak;
269 insertStrHashTable(table, key, pinfo);
270 return 1;
271 }
272 else if (weak && data && pinfo->weak && !pinfo->value)
273 {
274 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
275 pinfo->value = data;
276 pinfo->owner = owner;
277 return 1;
278 }
279 else if (weak)
280 {
281 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
282 already know of another copy throw this one away.
283
284 or both weak symbols have a nonzero value. Keep the existing one.
285
286 This also preserves the semantics of linking against
287 the first symbol we find. */
288 }
289 else if (pinfo->weak && !weak) /* weak symbol is in the table */
290 {
291 /* override the weak definition with the non-weak one */
292 pinfo->value = data;
293 pinfo->owner = owner;
294 pinfo->weak = HS_BOOL_FALSE;
295 return 1;
296 }
297 else if ( pinfo->owner
298 && pinfo->owner->status != OBJECT_RESOLVED
299 && pinfo->owner->status != OBJECT_NEEDED)
300 {
301 /* If the other symbol hasn't been loaded or will be loaded and we want to
302 explicitly load the new one, we can just swap it out and load the one
303 that has been requested. If not, just keep the first one encountered.
304
305 Because the `symHash' table consists symbols we've also not loaded but
306 found during the initial scan this is safe to do. If however the existing
307 symbol has been loaded then it means we have a duplicate.
308
309 This is essentially emulating the behavior of a linker wherein it will always
310 link in object files that are .o file arguments, but only take object files
311 from archives as needed. */
312 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
313 pinfo->value = data;
314 pinfo->owner = owner;
315 pinfo->weak = weak;
316 }
317
318 return 1;
319 }
320 else if (pinfo->owner == owner)
321 {
322 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
323 return 1;
324 }
325 else if (owner && owner->status == OBJECT_LOADED)
326 {
327 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
328 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
329 call this function again to trigger the duplicate error. */
330 return 1;
331 }
332
333 pathchar* archiveName = NULL;
334 debugBelch(
335 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
336 " %s\n"
337 "whilst processing object file\n"
338 " %" PATH_FMT "\n"
339 "The symbol was previously defined in\n"
340 " %" PATH_FMT "\n"
341 "This could be caused by:\n"
342 " * Loading two different object files which export the same symbol\n"
343 " * Specifying the same object file twice on the GHCi command line\n"
344 " * An incorrect `package.conf' entry, causing some object to be\n"
345 " loaded twice.\n",
346 (char*)key,
347 obj_name,
348 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
349 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
350 : pinfo->owner->fileName
351 );
352
353 if (archiveName)
354 {
355 stgFree(archiveName);
356 archiveName = NULL;
357 }
358 return 0;
359 }
360
361 /* -----------------------------------------------------------------------------
362 * Looks up symbols into hash tables.
363 *
364 * Returns: 0 on failure and result is not set,
365 * nonzero on success and result set to nonzero pointer
366 */
367 HsBool ghciLookupSymbolInfo(HashTable *table,
368 const SymbolName* key, RtsSymbolInfo **result)
369 {
370 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
371 if (!pinfo) {
372 *result = NULL;
373 return HS_BOOL_FALSE;
374 }
375 if (pinfo->weak)
376 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
377 /* Once it's looked up, it can no longer be overridden */
378 pinfo->weak = HS_BOOL_FALSE;
379
380 *result = pinfo;
381 return HS_BOOL_TRUE;
382 }
383
384 /* -----------------------------------------------------------------------------
385 * initialize the object linker
386 */
387
388
389 static int linker_init_done = 0 ;
390
391 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
392 static void *dl_prog_handle;
393 static regex_t re_invalid;
394 static regex_t re_realso;
395 #if defined(THREADED_RTS)
396 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
397 #endif
398 #endif
399
400 void initLinker (void)
401 {
402 // default to retaining CAFs for backwards compatibility. Most
403 // users will want initLinker_(0): otherwise unloadObj() will not
404 // be able to unload object files when they contain CAFs.
405 initLinker_(1);
406 }
407
408 void
409 initLinker_ (int retain_cafs)
410 {
411 RtsSymbolVal *sym;
412 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
413 int compileResult;
414 #endif
415
416 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
417
418 /* Make initLinker idempotent, so we can call it
419 before every relevant operation; that means we
420 don't need to initialise the linker separately */
421 if (linker_init_done == 1) {
422 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
423 return;
424 } else {
425 linker_init_done = 1;
426 }
427
428 objects = NULL;
429 unloaded_objects = NULL;
430
431 #if defined(THREADED_RTS)
432 initMutex(&linker_mutex);
433 initMutex(&linker_unloaded_mutex);
434 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
435 initMutex(&dl_mutex);
436 #endif
437 #endif
438
439 symhash = allocStrHashTable();
440
441 /* populate the symbol table with stuff from the RTS */
442 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
443 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
444 symhash, sym->lbl, sym->addr,
445 sym->weak, NULL)) {
446 barf("ghciInsertSymbolTable failed");
447 }
448 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
449 }
450
451 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
452 referenced from a statically linked module. We need to mimic this, but
453 we cannot use NULL because we use it to mean nonexistent symbols. So we
454 use an arbitrary (hopefully unique) address here.
455 */
456 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
457 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
458 barf("ghciInsertSymbolTable failed");
459 }
460
461 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
462 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
463 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
464 retain_cafs ? newRetainedCAF : newGCdCAF,
465 HS_BOOL_FALSE, NULL)) {
466 barf("ghciInsertSymbolTable failed");
467 }
468
469 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
470 # if defined(RTLD_DEFAULT)
471 dl_prog_handle = RTLD_DEFAULT;
472 # else
473 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
474 # endif /* RTLD_DEFAULT */
475
476 compileResult = regcomp(&re_invalid,
477 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short|invalid file format|Exec format error)",
478 REG_EXTENDED);
479 if (compileResult != 0) {
480 barf("Compiling re_invalid failed");
481 }
482 compileResult = regcomp(&re_realso,
483 "(GROUP|INPUT) *\\( *([^ )]+)",
484 REG_EXTENDED);
485 if (compileResult != 0) {
486 barf("Compiling re_realso failed");
487 }
488 # endif
489
490 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
491 // User-override for mmap_32bit_base
492 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
493 }
494
495 if (RTS_LINKER_USE_MMAP)
496 m32_allocator_init();
497
498 #if defined(OBJFORMAT_PEi386)
499 initLinker_PEi386();
500 #endif
501
502 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
503 return;
504 }
505
506 void
507 exitLinker( void ) {
508 #if defined(OBJFORMAT_PEi386)
509 exitLinker_PEi386();
510 #endif
511 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
512 if (linker_init_done == 1) {
513 regfree(&re_invalid);
514 regfree(&re_realso);
515 #if defined(THREADED_RTS)
516 closeMutex(&dl_mutex);
517 #endif
518 }
519 #endif
520 if (linker_init_done == 1) {
521 freeHashTable(symhash, free);
522 }
523 #if defined(THREADED_RTS)
524 closeMutex(&linker_mutex);
525 #endif
526 }
527
528 /* -----------------------------------------------------------------------------
529 * Loading DLL or .so dynamic libraries
530 * -----------------------------------------------------------------------------
531 *
532 * Add a DLL from which symbols may be found. In the ELF case, just
533 * do RTLD_GLOBAL-style add, so no further messing around needs to
534 * happen in order that symbols in the loaded .so are findable --
535 * lookupSymbol() will subsequently see them by dlsym on the program's
536 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
537 *
538 * In the PEi386 case, open the DLLs and put handles to them in a
539 * linked list. When looking for a symbol, try all handles in the
540 * list. This means that we need to load even DLLs that are guaranteed
541 * to be in the ghc.exe image already, just so we can get a handle
542 * to give to loadSymbol, so that we can find the symbols. For such
543 * libraries, the LoadLibrary call should be a no-op except for returning
544 * the handle.
545 *
546 */
547
548 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
549
550 /* Suppose in ghci we load a temporary SO for a module containing
551 f = 1
552 and then modify the module, recompile, and load another temporary
553 SO with
554 f = 2
555 Then as we don't unload the first SO, dlsym will find the
556 f = 1
557 symbol whereas we want the
558 f = 2
559 symbol. We therefore need to keep our own SO handle list, and
560 try SOs in the right order. */
561
562 typedef
563 struct _OpenedSO {
564 struct _OpenedSO* next;
565 void *handle;
566 }
567 OpenedSO;
568
569 /* A list thereof. */
570 static OpenedSO* openedSOs = NULL;
571
572 static const char *
573 internal_dlopen(const char *dll_name)
574 {
575 OpenedSO* o_so;
576 void *hdl;
577 const char *errmsg;
578 char *errmsg_copy;
579
580 // omitted: RTLD_NOW
581 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
582 IF_DEBUG(linker,
583 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
584
585 //-------------- Begin critical section ------------------
586 // This critical section is necessary because dlerror() is not
587 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
588 // Also, the error message returned must be copied to preserve it
589 // (see POSIX also)
590
591 ACQUIRE_LOCK(&dl_mutex);
592 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
593
594 errmsg = NULL;
595 if (hdl == NULL) {
596 /* dlopen failed; return a ptr to the error msg. */
597 errmsg = dlerror();
598 if (errmsg == NULL) errmsg = "addDLL: unknown error";
599 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
600 strcpy(errmsg_copy, errmsg);
601 errmsg = errmsg_copy;
602 } else {
603 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
604 o_so->handle = hdl;
605 o_so->next = openedSOs;
606 openedSOs = o_so;
607 }
608
609 RELEASE_LOCK(&dl_mutex);
610 //--------------- End critical section -------------------
611
612 return errmsg;
613 }
614
615 /*
616 Note [RTLD_LOCAL]
617
618 In GHCi we want to be able to override previous .so's with newly
619 loaded .so's when we recompile something. This further implies that
620 when we look up a symbol in internal_dlsym() we have to iterate
621 through the loaded libraries (in order from most recently loaded to
622 oldest) looking up the symbol in each one until we find it.
623
624 However, this can cause problems for some symbols that are copied
625 by the linker into the executable image at runtime - see #8935 for a
626 lengthy discussion. To solve that problem we need to look up
627 symbols in the main executable *first*, before attempting to look
628 them up in the loaded .so's. But in order to make that work, we
629 have to always call dlopen with RTLD_LOCAL, so that the loaded
630 libraries don't populate the global symbol table.
631 */
632
633 static void *
634 internal_dlsym(const char *symbol) {
635 OpenedSO* o_so;
636 void *v;
637
638 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
639 ACQUIRE_LOCK(&dl_mutex);
640 dlerror();
641 // look in program first
642 v = dlsym(dl_prog_handle, symbol);
643 if (dlerror() == NULL) {
644 RELEASE_LOCK(&dl_mutex);
645 return v;
646 }
647
648 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
649 v = dlsym(o_so->handle, symbol);
650 if (dlerror() == NULL) {
651 RELEASE_LOCK(&dl_mutex);
652 return v;
653 }
654 }
655 RELEASE_LOCK(&dl_mutex);
656 return v;
657 }
658 # endif
659
660 const char *
661 addDLL( pathchar *dll_name )
662 {
663 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
664 /* ------------------- ELF DLL loader ------------------- */
665
666 #define NMATCH 5
667 regmatch_t match[NMATCH];
668 const char *errmsg;
669 FILE* fp;
670 size_t match_length;
671 #define MAXLINE 1000
672 char line[MAXLINE];
673 int result;
674
675 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
676 errmsg = internal_dlopen(dll_name);
677
678 if (errmsg == NULL) {
679 return NULL;
680 }
681
682 // GHC #2615
683 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
684 // contain linker scripts rather than ELF-format object code. This
685 // code handles the situation by recognizing the real object code
686 // file name given in the linker script.
687 //
688 // If an "invalid ELF header" error occurs, it is assumed that the
689 // .so file contains a linker script instead of ELF object code.
690 // In this case, the code looks for the GROUP ( ... ) linker
691 // directive. If one is found, the first file name inside the
692 // parentheses is treated as the name of a dynamic library and the
693 // code attempts to dlopen that file. If this is also unsuccessful,
694 // an error message is returned.
695
696 // see if the error message is due to an invalid ELF header
697 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
698 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
699 IF_DEBUG(linker, debugBelch("result = %i\n", result));
700 if (result == 0) {
701 // success -- try to read the named file as a linker script
702 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
703 MAXLINE-1);
704 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
705 line[match_length] = '\0'; // make sure string is null-terminated
706 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
707 if ((fp = __rts_fopen(line, "r")) == NULL) {
708 return errmsg; // return original error if open fails
709 }
710 // try to find a GROUP or INPUT ( ... ) command
711 while (fgets(line, MAXLINE, fp) != NULL) {
712 IF_DEBUG(linker, debugBelch("input line = %s", line));
713 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
714 // success -- try to dlopen the first named file
715 IF_DEBUG(linker, debugBelch("match%s\n",""));
716 line[match[2].rm_eo] = '\0';
717 stgFree((void*)errmsg); // Free old message before creating new one
718 errmsg = internal_dlopen(line+match[2].rm_so);
719 break;
720 }
721 // if control reaches here, no GROUP or INPUT ( ... ) directive
722 // was found and the original error message is returned to the
723 // caller
724 }
725 fclose(fp);
726 }
727 return errmsg;
728
729 # elif defined(OBJFORMAT_PEi386)
730 return addDLL_PEi386(dll_name, NULL);
731
732 # else
733 barf("addDLL: not implemented on this platform");
734 # endif
735 }
736
737 /* -----------------------------------------------------------------------------
738 * Searches the system directories to determine if there is a system DLL that
739 * satisfies the given name. This prevent GHCi from linking against a static
740 * library if a DLL is available.
741 *
742 * Returns: NULL on failure or no DLL found, else the full path to the DLL
743 * that can be loaded.
744 */
745 pathchar* findSystemLibrary(pathchar* dll_name)
746 {
747 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%"
748 PATH_FMT "'\n", dll_name));
749
750 #if defined(OBJFORMAT_PEi386)
751 return findSystemLibrary_PEi386(dll_name);
752 #else
753 (void)(dll_name); // Function not implemented for other platforms.
754 return NULL;
755 #endif
756 }
757
758 /* -----------------------------------------------------------------------------
759 * Emits a warning determining that the system is missing a required security
760 * update that we need to get access to the proper APIs
761 */
762 void warnMissingKBLibraryPaths( void )
763 {
764 static HsBool missing_update_warn = HS_BOOL_FALSE;
765 if (!missing_update_warn) {
766 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
767 missing_update_warn = HS_BOOL_TRUE;
768 }
769 }
770
771 /* -----------------------------------------------------------------------------
772 * appends a directory to the process DLL Load path so LoadLibrary can find it
773 *
774 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
775 * restore the search path to what it was before this call.
776 */
777 HsPtr addLibrarySearchPath(pathchar* dll_path)
778 {
779 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%"
780 PATH_FMT "'\n", dll_path));
781
782 #if defined(OBJFORMAT_PEi386)
783 return addLibrarySearchPath_PEi386(dll_path);
784 #else
785 (void)(dll_path); // Function not implemented for other platforms.
786 return NULL;
787 #endif
788 }
789
790 /* -----------------------------------------------------------------------------
791 * removes a directory from the process DLL Load path
792 *
793 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
794 */
795 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
796 {
797 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n",
798 dll_path_index));
799
800 #if defined(OBJFORMAT_PEi386)
801 return removeLibrarySearchPath_PEi386(dll_path_index);
802 #else
803 (void)(dll_path_index); // Function not implemented for other platforms.
804 return HS_BOOL_FALSE;
805 #endif
806 }
807
808 /* -----------------------------------------------------------------------------
809 * insert a symbol in the hash table
810 *
811 * Returns: 0 on failure, nozero on success
812 */
813 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
814 {
815 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE,
816 NULL);
817 }
818
819 /* -----------------------------------------------------------------------------
820 * lookup a symbol in the hash table
821 */
822 #if defined(OBJFORMAT_PEi386)
823 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
824 {
825 return lookupSymbol_PEi386(lbl);
826 }
827
828 #else
829
830 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
831 {
832 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
833
834 ASSERT(symhash != NULL);
835 RtsSymbolInfo *pinfo;
836
837 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
838 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
839
840 # if defined(OBJFORMAT_ELF)
841 return internal_dlsym(lbl);
842 # elif defined(OBJFORMAT_MACHO)
843
844 /* HACK: On OS X, all symbols are prefixed with an underscore.
845 However, dlsym wants us to omit the leading underscore from the
846 symbol name -- the dlsym routine puts it back on before
847 searching for the symbol. For now, we simply strip it off here
848 (and ONLY here).
849 */
850 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n",
851 lbl));
852 ASSERT(lbl[0] == '_');
853 return internal_dlsym(lbl + 1);
854
855 # else
856 ASSERT(2+2 == 5);
857 return NULL;
858 # endif
859 } else {
860 return loadSymbol(lbl, pinfo);
861 }
862 }
863 #endif /* OBJFORMAT_PEi386 */
864
865 /*
866 * Load and relocate the object code for a symbol as necessary.
867 * Symbol name only used for diagnostics output.
868 */
869 SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo) {
870 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl,
871 pinfo->value));
872 ObjectCode* oc = pinfo->owner;
873
874 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
875 See Note [runtime-linker-phases] */
876 if (oc && lbl && oc->status == OBJECT_LOADED) {
877 oc->status = OBJECT_NEEDED;
878 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand "
879 "loading symbol '%s'\n", lbl));
880 int r = ocTryLoad(oc);
881 if (!r) {
882 return NULL;
883 }
884
885 #if defined(PROFILING)
886 // collect any new cost centres & CCSs
887 // that were defined during runInit
888 initProfiling2();
889 #endif
890 }
891
892 return pinfo->value;
893 }
894
895 SymbolAddr* lookupSymbol( SymbolName* lbl )
896 {
897 ACQUIRE_LOCK(&linker_mutex);
898 SymbolAddr* r = lookupSymbol_(lbl);
899 if (!r) {
900 errorBelch("^^ Could not load '%s', dependency unresolved. "
901 "See top entry above.\n", lbl);
902 fflush(stderr);
903 }
904 RELEASE_LOCK(&linker_mutex);
905 return r;
906 }
907
908 /* -----------------------------------------------------------------------------
909 Create a StablePtr for a foreign export. This is normally called by
910 a C function with __attribute__((constructor)), which is generated
911 by GHC and linked into the module.
912
913 If the object code is being loaded dynamically, then we remember
914 which StablePtrs were allocated by the constructors and free them
915 again in unloadObj().
916 -------------------------------------------------------------------------- */
917
918 static ObjectCode *loading_obj = NULL;
919
920 StgStablePtr foreignExportStablePtr (StgPtr p)
921 {
922 ForeignExportStablePtr *fe_sptr;
923 StgStablePtr *sptr;
924
925 sptr = getStablePtr(p);
926
927 if (loading_obj != NULL) {
928 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
929 "foreignExportStablePtr");
930 fe_sptr->stable_ptr = sptr;
931 fe_sptr->next = loading_obj->stable_ptrs;
932 loading_obj->stable_ptrs = fe_sptr;
933 }
934
935 return sptr;
936 }
937
938
939 /* -----------------------------------------------------------------------------
940 * Debugging aid: look in GHCi's object symbol tables for symbols
941 * within DELTA bytes of the specified address, and show their names.
942 */
943 #if defined(DEBUG)
944 void ghci_enquire ( SymbolAddr* addr );
945
946 void ghci_enquire(SymbolAddr* addr)
947 {
948 int i;
949 SymbolName* sym;
950 RtsSymbolInfo* a;
951 const int DELTA = 64;
952 ObjectCode* oc;
953
954 for (oc = objects; oc; oc = oc->next) {
955 for (i = 0; i < oc->n_symbols; i++) {
956 sym = oc->symbols[i].name;
957 if (sym == NULL) continue;
958 a = NULL;
959 if (a == NULL) {
960 ghciLookupSymbolInfo(symhash, sym, &a);
961 }
962 if (a == NULL) {
963 // debugBelch("ghci_enquire: can't find %s\n", sym);
964 }
965 else if ( a->value
966 && (char*)addr-DELTA <= (char*)a->value
967 && (char*)a->value <= (char*)addr+DELTA) {
968 debugBelch("%p + %3d == `%s'\n", addr,
969 (int)((char*)a->value - (char*)addr), sym);
970 }
971 }
972 }
973 }
974 #endif
975
976 pathchar*
977 resolveSymbolAddr (pathchar* buffer, int size,
978 SymbolAddr* symbol, uintptr_t* top)
979 {
980 #if defined(OBJFORMAT_PEi386)
981 return resolveSymbolAddr_PEi386 (buffer, size, symbol, top);
982 #else /* OBJFORMAT_PEi386 */
983 (void)buffer;
984 (void)size;
985 (void)symbol;
986 (void)top;
987 return NULL;
988 #endif /* OBJFORMAT_PEi386 */
989 }
990
991 #if RTS_LINKER_USE_MMAP
992 //
993 // Returns NULL on failure.
994 //
995 void *
996 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
997 {
998 void *map_addr = NULL;
999 void *result;
1000 size_t size;
1001 uint32_t tryMap32Bit = RtsFlags.MiscFlags.linkerAlwaysPic
1002 ? 0
1003 : TRY_MAP_32BIT;
1004 static uint32_t fixed = 0;
1005
1006 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1007 size = roundUpToPage(bytes);
1008
1009 #if defined(x86_64_HOST_ARCH)
1010 mmap_again:
1011 #endif
1012
1013 if (mmap_32bit_base != 0) {
1014 map_addr = mmap_32bit_base;
1015 }
1016
1017 IF_DEBUG(linker,
1018 debugBelch("mmapForLinker: \tprotection %#0x\n",
1019 PROT_EXEC | PROT_READ | PROT_WRITE));
1020 IF_DEBUG(linker,
1021 debugBelch("mmapForLinker: \tflags %#0x\n",
1022 MAP_PRIVATE | tryMap32Bit | fixed | flags));
1023
1024 result = mmap(map_addr, size,
1025 PROT_EXEC|PROT_READ|PROT_WRITE,
1026 MAP_PRIVATE|tryMap32Bit|fixed|flags, fd, offset);
1027
1028 if (result == MAP_FAILED) {
1029 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1030 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1031 return NULL;
1032 }
1033
1034 #if defined(x86_64_HOST_ARCH)
1035 if (RtsFlags.MiscFlags.linkerAlwaysPic) {
1036 } else if (mmap_32bit_base != 0) {
1037 if (result == map_addr) {
1038 mmap_32bit_base = (StgWord8*)map_addr + size;
1039 } else {
1040 if ((W_)result > 0x80000000) {
1041 // oops, we were given memory over 2Gb
1042 munmap(result,size);
1043 #if defined(freebsd_HOST_OS) || \
1044 defined(kfreebsdgnu_HOST_OS) || \
1045 defined(dragonfly_HOST_OS)
1046 // Some platforms require MAP_FIXED. This is normally
1047 // a bad idea, because MAP_FIXED will overwrite
1048 // existing mappings.
1049 fixed = MAP_FIXED;
1050 goto mmap_again;
1051 #else
1052 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1053 "asked for %lu bytes at %p. "
1054 "Try specifying an address with +RTS -xm<addr> -RTS",
1055 size, map_addr);
1056 return NULL;
1057 #endif
1058 } else {
1059 // hmm, we were given memory somewhere else, but it's
1060 // still under 2Gb so we can use it. Next time, ask
1061 // for memory right after the place we just got some
1062 mmap_32bit_base = (StgWord8*)result + size;
1063 }
1064 }
1065 } else {
1066 if ((W_)result > 0x80000000) {
1067 // oops, we were given memory over 2Gb
1068 // ... try allocating memory somewhere else?;
1069 debugTrace(DEBUG_linker,
1070 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1071 bytes, result);
1072 munmap(result, size);
1073
1074 // Set a base address and try again... (guess: 1Gb)
1075 mmap_32bit_base = (void*)0x40000000;
1076 goto mmap_again;
1077 }
1078 }
1079 #endif
1080
1081 IF_DEBUG(linker,
1082 debugBelch("mmapForLinker: mapped %" FMT_Word
1083 " bytes starting at %p\n", (W_)size, result));
1084 IF_DEBUG(linker,
1085 debugBelch("mmapForLinker: done\n"));
1086
1087 return result;
1088 }
1089 #endif
1090
1091 /*
1092 * Remove symbols from the symbol table, and free oc->symbols.
1093 * This operation is idempotent.
1094 */
1095 static void removeOcSymbols (ObjectCode *oc)
1096 {
1097 if (oc->symbols == NULL) return;
1098
1099 // Remove all the mappings for the symbols within this object..
1100 int i;
1101 for (i = 0; i < oc->n_symbols; i++) {
1102 if (oc->symbols[i].name != NULL) {
1103 ghciRemoveSymbolTable(symhash, oc->symbols[i].name, oc);
1104 }
1105 }
1106
1107 stgFree(oc->symbols);
1108 oc->symbols = NULL;
1109 }
1110
1111 /*
1112 * Release StablePtrs and free oc->stable_ptrs.
1113 * This operation is idempotent.
1114 */
1115 static void freeOcStablePtrs (ObjectCode *oc)
1116 {
1117 // Release any StablePtrs that were created when this
1118 // object module was initialized.
1119 ForeignExportStablePtr *fe_ptr, *next;
1120
1121 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1122 next = fe_ptr->next;
1123 freeStablePtr(fe_ptr->stable_ptr);
1124 stgFree(fe_ptr);
1125 }
1126 oc->stable_ptrs = NULL;
1127 }
1128
1129 static void
1130 freePreloadObjectFile (ObjectCode *oc)
1131 {
1132 #if defined(mingw32_HOST_OS)
1133 freePreloadObjectFile_PEi386(oc);
1134 #else
1135
1136 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1137 munmap(oc->image, oc->fileSize);
1138 }
1139 else {
1140 stgFree(oc->image);
1141 }
1142
1143 #endif
1144
1145 oc->image = NULL;
1146 oc->fileSize = 0;
1147 }
1148
1149 /*
1150 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1151 * the GC when a previously unloaded ObjectCode has been determined to be
1152 * unused, and when an error occurs during loadObj().
1153 */
1154 void freeObjectCode (ObjectCode *oc)
1155 {
1156 freePreloadObjectFile(oc);
1157
1158 if (oc->symbols != NULL) {
1159 stgFree(oc->symbols);
1160 oc->symbols = NULL;
1161 }
1162
1163 if (oc->extraInfos != NULL) {
1164 freeHashTable(oc->extraInfos, NULL);
1165 oc->extraInfos = NULL;
1166 }
1167
1168 if (oc->sections != NULL) {
1169 int i;
1170 for (i=0; i < oc->n_sections; i++) {
1171 if (oc->sections[i].start != NULL) {
1172 switch(oc->sections[i].alloc){
1173 #if RTS_LINKER_USE_MMAP
1174 case SECTION_MMAP:
1175 munmap(oc->sections[i].mapped_start,
1176 oc->sections[i].mapped_size);
1177 break;
1178 case SECTION_M32:
1179 m32_free(oc->sections[i].start,
1180 oc->sections[i].size);
1181 break;
1182 #endif
1183 case SECTION_MALLOC:
1184 stgFree(oc->sections[i].start);
1185 break;
1186 default:
1187 break;
1188 }
1189 }
1190 if (oc->sections[i].info) {
1191 stgFree(oc->sections[i].info);
1192 }
1193 }
1194 stgFree(oc->sections);
1195 }
1196
1197 freeProddableBlocks(oc);
1198 freeSegments(oc);
1199
1200 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1201 * alongside the image, so we don't need to free. */
1202 #if defined(NEED_SYMBOL_EXTRAS) && (!defined(x86_64_HOST_ARCH) \
1203 || !defined(mingw32_HOST_OS))
1204 if (RTS_LINKER_USE_MMAP) {
1205 if (!USE_CONTIGUOUS_MMAP && !RtsFlags.MiscFlags.linkerAlwaysPic &&
1206 oc->symbol_extras != NULL) {
1207 m32_free(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras);
1208 }
1209 }
1210 else {
1211 stgFree(oc->symbol_extras);
1212 }
1213 #endif
1214
1215 #if defined(OBJECTFORMAT_MACHO)
1216 ocDeinit_MachO(oc);
1217 #endif
1218 #if defined(OBJFORMAT_ELF)
1219 ocDeinit_ELF(oc);
1220 #endif
1221
1222 stgFree(oc->fileName);
1223 stgFree(oc->archiveMemberName);
1224
1225 stgFree(oc);
1226 }
1227
1228 /* -----------------------------------------------------------------------------
1229 * Sets the initial status of a fresh ObjectCode
1230 */
1231 static void setOcInitialStatus(ObjectCode* oc) {
1232 /* If a target has requested the ObjectCode not to be resolved then
1233 honor this requests. Usually this means the ObjectCode has not been
1234 initialized and can't be. */
1235 if (oc->status == OBJECT_DONT_RESOLVE)
1236 return;
1237
1238 if (oc->archiveMemberName == NULL) {
1239 oc->status = OBJECT_NEEDED;
1240 } else {
1241 oc->status = OBJECT_LOADED;
1242 }
1243 }
1244
1245 ObjectCode*
1246 mkOc( pathchar *path, char *image, int imageSize,
1247 bool mapped, char *archiveMemberName, int misalignment ) {
1248 ObjectCode* oc;
1249
1250 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1251 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1252
1253 oc->info = NULL;
1254
1255 # if defined(OBJFORMAT_ELF)
1256 oc->formatName = "ELF";
1257 # elif defined(OBJFORMAT_PEi386)
1258 oc->formatName = "PEi386";
1259 # elif defined(OBJFORMAT_MACHO)
1260 oc->formatName = "Mach-O";
1261 # else
1262 stgFree(oc);
1263 barf("loadObj: not implemented on this platform");
1264 # endif
1265
1266 oc->image = image;
1267 oc->fileName = pathdup(path);
1268
1269 if (archiveMemberName) {
1270 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1,
1271 "loadObj" );
1272 strcpy(oc->archiveMemberName, archiveMemberName);
1273 } else {
1274 oc->archiveMemberName = NULL;
1275 }
1276
1277 setOcInitialStatus( oc );
1278
1279 oc->fileSize = imageSize;
1280 oc->symbols = NULL;
1281 oc->n_sections = 0;
1282 oc->sections = NULL;
1283 oc->n_segments = 0;
1284 oc->segments = NULL;
1285 oc->proddables = NULL;
1286 oc->stable_ptrs = NULL;
1287 #if defined(NEED_SYMBOL_EXTRAS)
1288 oc->symbol_extras = NULL;
1289 #endif
1290 oc->bssBegin = NULL;
1291 oc->bssEnd = NULL;
1292 oc->imageMapped = mapped;
1293
1294 oc->misalignment = misalignment;
1295 oc->extraInfos = NULL;
1296
1297 /* chain it onto the list of objects */
1298 oc->next = NULL;
1299
1300 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1301 return oc;
1302 }
1303
1304 /* -----------------------------------------------------------------------------
1305 * Check if an object or archive is already loaded.
1306 *
1307 * Returns: 1 if the path is already loaded, 0 otherwise.
1308 */
1309 HsInt
1310 isAlreadyLoaded( pathchar *path )
1311 {
1312 ObjectCode *o;
1313 for (o = objects; o; o = o->next) {
1314 if (0 == pathcmp(o->fileName, path)) {
1315 return 1; /* already loaded */
1316 }
1317 }
1318 return 0; /* not loaded yet */
1319 }
1320
1321 //
1322 // Load the object file into memory. This will not be its final resting place,
1323 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
1324 // address space, properly aligned.
1325 //
1326 static ObjectCode *
1327 preloadObjectFile (pathchar *path)
1328 {
1329 int fileSize;
1330 struct_stat st;
1331 int r;
1332 void *image;
1333 ObjectCode *oc;
1334 int misalignment = 0;
1335
1336 r = pathstat(path, &st);
1337 if (r == -1) {
1338 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
1339 return NULL;
1340 }
1341
1342 fileSize = st.st_size;
1343
1344 #if RTS_LINKER_USE_MMAP
1345 int fd;
1346
1347 /* On many architectures malloc'd memory isn't executable, so we need to use
1348 * mmap. */
1349
1350 #if defined(openbsd_HOST_OS)
1351 fd = open(path, O_RDONLY, S_IRUSR);
1352 #else
1353 fd = open(path, O_RDONLY);
1354 #endif
1355 if (fd == -1) {
1356 errorBelch("loadObj: can't open %s", path);
1357 return NULL;
1358 }
1359
1360 /* iOS does not permit to mmap with r+w+x, however while the comment for
1361 * this function says this is not the final resting place, for some
1362 * architectures / hosts (at least mach-o non-iOS -- see ocGetNames_MachO)
1363 * the image mmaped here in fact ends up being the final resting place for
1364 * the sections. And hence we need to leave r+w+x here for other hosts
1365 * until all hosts have been made aware of the initial image being r+w only.
1366 *
1367 * See also the misalignment logic for darwin below.
1368 */
1369 #if defined(ios_HOST_OS)
1370 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
1371 #else
1372 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
1373 MAP_PRIVATE, fd, 0);
1374 #endif
1375
1376 if (image == MAP_FAILED) {
1377 errorBelch("mmap: failed. errno = %d", errno);
1378 }
1379 // not 32-bit yet, we'll remap later
1380 close(fd);
1381
1382 #else /* !RTS_LINKER_USE_MMAP */
1383 FILE *f;
1384
1385 /* load the image into memory */
1386 /* coverity[toctou] */
1387 f = pathopen(path, WSTR("rb"));
1388 if (!f) {
1389 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
1390 return NULL;
1391 }
1392
1393 # if defined(darwin_HOST_OS)
1394
1395 // In a Mach-O .o file, all sections can and will be misaligned
1396 // if the total size of the headers is not a multiple of the
1397 // desired alignment. This is fine for .o files that only serve
1398 // as input for the static linker, but it's not fine for us,
1399 // as SSE (used by gcc for floating point) and Altivec require
1400 // 16-byte alignment.
1401 // We calculate the correct alignment from the header before
1402 // reading the file, and then we misalign image on purpose so
1403 // that the actual sections end up aligned again.
1404 misalignment = machoGetMisalignment(f);
1405 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
1406 image += misalignment;
1407
1408 # else /* !defined(darwin_HOST_OS) */
1409
1410 image = stgMallocBytes(fileSize, "loadObj(image)");
1411
1412 #endif
1413
1414 int n;
1415 n = fread ( image, 1, fileSize, f );
1416 fclose(f);
1417 if (n != fileSize) {
1418 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
1419 stgFree(image);
1420 return NULL;
1421 }
1422
1423 #endif /* RTS_LINKER_USE_MMAP */
1424
1425 IF_DEBUG(linker, debugBelch("loadObj: preloaded image at %p\n", (void *) image));
1426
1427 /* FIXME (AP): =mapped= parameter unconditionally set to true */
1428 oc = mkOc(path, image, fileSize, true, NULL, misalignment);
1429
1430 #if defined(OBJFORMAT_MACHO)
1431 if (ocVerifyImage_MachO( oc ))
1432 ocInit_MachO( oc );
1433 #endif
1434 #if defined(OBJFORMAT_ELF)
1435 if(ocVerifyImage_ELF( oc ))
1436 ocInit_ELF( oc );
1437 #endif
1438 return oc;
1439 }
1440
1441 /* -----------------------------------------------------------------------------
1442 * Load an obj (populate the global symbol table, but don't resolve yet)
1443 *
1444 * Returns: 1 if ok, 0 on error.
1445 */
1446 static HsInt loadObj_ (pathchar *path)
1447 {
1448 ObjectCode* oc;
1449 IF_DEBUG(linker, debugBelch("loadObj: %" PATH_FMT "\n", path));
1450
1451 /* debugBelch("loadObj %s\n", path ); */
1452
1453 /* Check that we haven't already loaded this object.
1454 Ignore requests to load multiple times */
1455
1456 if (isAlreadyLoaded(path)) {
1457 IF_DEBUG(linker,
1458 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1459 return 1; /* success */
1460 }
1461
1462 oc = preloadObjectFile(path);
1463 if (oc == NULL) return 0;
1464
1465 if (! loadOc(oc)) {
1466 // failed; free everything we've allocated
1467 removeOcSymbols(oc);
1468 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
1469 freeObjectCode(oc);
1470 return 0;
1471 }
1472
1473 oc->next = objects;
1474 objects = oc;
1475 return 1;
1476 }
1477
1478 HsInt loadObj (pathchar *path)
1479 {
1480 ACQUIRE_LOCK(&linker_mutex);
1481 HsInt r = loadObj_(path);
1482 RELEASE_LOCK(&linker_mutex);
1483 return r;
1484 }
1485
1486 HsInt loadOc (ObjectCode* oc)
1487 {
1488 int r;
1489
1490 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
1491
1492 /* verify the in-memory image */
1493 # if defined(OBJFORMAT_ELF)
1494 r = ocVerifyImage_ELF ( oc );
1495 # elif defined(OBJFORMAT_PEi386)
1496 r = ocVerifyImage_PEi386 ( oc );
1497 # elif defined(OBJFORMAT_MACHO)
1498 r = ocVerifyImage_MachO ( oc );
1499 # else
1500 barf("loadObj: no verify method");
1501 # endif
1502 if (!r) {
1503 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
1504 return r;
1505 }
1506
1507 /* Note [loadOc orderings]
1508 The order of `ocAllocateExtras` and `ocGetNames` matters. For MachO
1509 and ELF, `ocInit` and `ocGetNames` initialize a bunch of pointers based
1510 on the offset to `oc->image`, but `ocAllocateExtras` may relocate
1511 the address of `oc->image` and invalidate those pointers. So we must
1512 compute or recompute those pointers after `ocAllocateExtras`.
1513
1514 On Windows, when we have an import library we (for now, as we don't honor
1515 the lazy loading semantics of the library and instead GHCi is already
1516 lazy) don't use the library after ocGetNames as it just populates the
1517 symbol table. Allocating space for jump tables in ocAllocateExtras
1518 would just be a waste then as we'll be stopping further processing of the
1519 library in the next few steps. If necessary, the actual allocation
1520 happens in `ocGetNames_PEi386` and `ocAllocateExtras_PEi386` simply
1521 set the correct pointers.
1522 */
1523
1524 #if defined(NEED_SYMBOL_EXTRAS)
1525 # if defined(OBJFORMAT_MACHO)
1526 r = ocAllocateExtras_MachO ( oc );
1527 if (!r) {
1528 IF_DEBUG(linker,
1529 debugBelch("loadOc: ocAllocateExtras_MachO failed\n"));
1530 return r;
1531 }
1532 # elif defined(OBJFORMAT_ELF)
1533 r = ocAllocateExtras_ELF ( oc );
1534 if (!r) {
1535 IF_DEBUG(linker,
1536 debugBelch("loadOc: ocAllocateExtras_ELF failed\n"));
1537 return r;
1538 }
1539 # endif
1540 #endif
1541
1542 /* build the symbol list for this image */
1543 # if defined(OBJFORMAT_ELF)
1544 r = ocGetNames_ELF ( oc );
1545 # elif defined(OBJFORMAT_PEi386)
1546 r = ocGetNames_PEi386 ( oc );
1547 # elif defined(OBJFORMAT_MACHO)
1548 r = ocGetNames_MachO ( oc );
1549 # else
1550 barf("loadObj: no getNames method");
1551 # endif
1552 if (!r) {
1553 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
1554 return r;
1555 }
1556
1557 #if defined(NEED_SYMBOL_EXTRAS)
1558 # if defined(OBJFORMAT_PEi386)
1559 ocAllocateExtras_PEi386 ( oc );
1560 # endif
1561 #endif
1562
1563 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
1564 setOcInitialStatus( oc );
1565 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
1566
1567 return 1;
1568 }
1569
1570 /* -----------------------------------------------------------------------------
1571 * try to load and initialize an ObjectCode into memory
1572 *
1573 * Returns: 1 if ok, 0 on error.
1574 */
1575 int ocTryLoad (ObjectCode* oc) {
1576 int r;
1577
1578 if (oc->status != OBJECT_NEEDED) {
1579 return 1;
1580 }
1581
1582 /* Check for duplicate symbols by looking into `symhash`.
1583 Duplicate symbols are any symbols which exist
1584 in different ObjectCodes that have both been loaded, or
1585 are to be loaded by this call.
1586
1587 This call is intended to have no side-effects when a non-duplicate
1588 symbol is re-inserted. A symbol is only a duplicate if the object file
1589 it is defined in has had it's relocations resolved. A resolved object
1590 file means the symbols inside it are required.
1591
1592 The symbol address is not used to distinguish symbols. Duplicate symbols
1593 are distinguished by name, oc and attributes (weak symbols etc).
1594 */
1595 int x;
1596 Symbol_t symbol;
1597 for (x = 0; x < oc->n_symbols; x++) {
1598 symbol = oc->symbols[x];
1599 if ( symbol.name
1600 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol.name,
1601 symbol.addr,
1602 isSymbolWeak(oc, symbol.name), oc)) {
1603 return 0;
1604 }
1605 }
1606
1607 # if defined(OBJFORMAT_ELF)
1608 r = ocResolve_ELF ( oc );
1609 # elif defined(OBJFORMAT_PEi386)
1610 r = ocResolve_PEi386 ( oc );
1611 # elif defined(OBJFORMAT_MACHO)
1612 r = ocResolve_MachO ( oc );
1613 # else
1614 barf("ocTryLoad: not implemented on this platform");
1615 # endif
1616 if (!r) { return r; }
1617
1618 // run init/init_array/ctors/mod_init_func
1619
1620 IF_DEBUG(linker, debugBelch("ocTryLoad: ocRunInit start\n"));
1621
1622 loading_obj = oc; // tells foreignExportStablePtr what to do
1623 #if defined(OBJFORMAT_ELF)
1624 r = ocRunInit_ELF ( oc );
1625 #elif defined(OBJFORMAT_PEi386)
1626 r = ocRunInit_PEi386 ( oc );
1627 #elif defined(OBJFORMAT_MACHO)
1628 r = ocRunInit_MachO ( oc );
1629 #else
1630 barf("ocTryLoad: initializers not implemented on this platform");
1631 #endif
1632 loading_obj = NULL;
1633
1634 if (!r) { return r; }
1635
1636 oc->status = OBJECT_RESOLVED;
1637
1638 return 1;
1639 }
1640
1641 /* -----------------------------------------------------------------------------
1642 * resolve all the currently unlinked objects in memory
1643 *
1644 * Returns: 1 if ok, 0 on error.
1645 */
1646 static HsInt resolveObjs_ (void)
1647 {
1648 ObjectCode *oc;
1649 int r;
1650
1651 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
1652
1653 for (oc = objects; oc; oc = oc->next) {
1654 r = ocTryLoad(oc);
1655 if (!r)
1656 {
1657 return r;
1658 }
1659 }
1660
1661 #if defined(PROFILING)
1662 // collect any new cost centres & CCSs that were defined during runInit
1663 initProfiling2();
1664 #endif
1665
1666 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
1667 return 1;
1668 }
1669
1670 HsInt resolveObjs (void)
1671 {
1672 ACQUIRE_LOCK(&linker_mutex);
1673 HsInt r = resolveObjs_();
1674 RELEASE_LOCK(&linker_mutex);
1675 return r;
1676 }
1677
1678 /* -----------------------------------------------------------------------------
1679 * delete an object from the pool
1680 */
1681 static HsInt unloadObj_ (pathchar *path, bool just_purge)
1682 {
1683 ObjectCode *oc, *prev, *next;
1684 HsBool unloadedAnyObj = HS_BOOL_FALSE;
1685
1686 ASSERT(symhash != NULL);
1687 ASSERT(objects != NULL);
1688
1689 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
1690
1691 prev = NULL;
1692 for (oc = objects; oc; oc = next) {
1693 next = oc->next; // oc might be freed
1694
1695 if (!pathcmp(oc->fileName,path)) {
1696
1697 // these are both idempotent, so in just_purge mode we can
1698 // later call unloadObj() to really unload the object.
1699 removeOcSymbols(oc);
1700 freeOcStablePtrs(oc);
1701
1702 if (!just_purge) {
1703 if (prev == NULL) {
1704 objects = oc->next;
1705 } else {
1706 prev->next = oc->next;
1707 }
1708 ACQUIRE_LOCK(&linker_unloaded_mutex);
1709 oc->next = unloaded_objects;
1710 unloaded_objects = oc;
1711 oc->status = OBJECT_UNLOADED;
1712 RELEASE_LOCK(&linker_unloaded_mutex);
1713 // We do not own oc any more; it can be released at any time by
1714 // the GC in checkUnload().
1715 } else {
1716 prev = oc;
1717 }
1718
1719 /* This could be a member of an archive so continue
1720 * unloading other members. */
1721 unloadedAnyObj = HS_BOOL_TRUE;
1722 } else {
1723 prev = oc;
1724 }
1725 }
1726
1727 if (unloadedAnyObj) {
1728 return 1;
1729 }
1730 else {
1731 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
1732 return 0;
1733 }
1734 }
1735
1736 HsInt unloadObj (pathchar *path)
1737 {
1738 ACQUIRE_LOCK(&linker_mutex);
1739 HsInt r = unloadObj_(path, false);
1740 RELEASE_LOCK(&linker_mutex);
1741 return r;
1742 }
1743
1744 HsInt purgeObj (pathchar *path)
1745 {
1746 ACQUIRE_LOCK(&linker_mutex);
1747 HsInt r = unloadObj_(path, true);
1748 RELEASE_LOCK(&linker_mutex);
1749 return r;
1750 }
1751
1752 static OStatus getObjectLoadStatus_ (pathchar *path)
1753 {
1754 ObjectCode *o;
1755 for (o = objects; o; o = o->next) {
1756 if (0 == pathcmp(o->fileName, path)) {
1757 return o->status;
1758 }
1759 }
1760 for (o = unloaded_objects; o; o = o->next) {
1761 if (0 == pathcmp(o->fileName, path)) {
1762 return o->status;
1763 }
1764 }
1765 return OBJECT_NOT_LOADED;
1766 }
1767
1768 OStatus getObjectLoadStatus (pathchar *path)
1769 {
1770 ACQUIRE_LOCK(&linker_mutex);
1771 OStatus r = getObjectLoadStatus_(path);
1772 RELEASE_LOCK(&linker_mutex);
1773 return r;
1774 }
1775
1776 /* -----------------------------------------------------------------------------
1777 * Sanity checking. For each ObjectCode, maintain a list of address ranges
1778 * which may be prodded during relocation, and abort if we try and write
1779 * outside any of these.
1780 */
1781 void
1782 addProddableBlock ( ObjectCode* oc, void* start, int size )
1783 {
1784 ProddableBlock* pb
1785 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
1786
1787 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
1788 ASSERT(size > 0);
1789 pb->start = start;
1790 pb->size = size;
1791 pb->next = oc->proddables;
1792 oc->proddables = pb;
1793 }
1794
1795 void
1796 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
1797 {
1798 ProddableBlock* pb;
1799
1800 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
1801 char* s = (char*)(pb->start);
1802 char* e = s + pb->size;
1803 char* a = (char*)addr;
1804 if (a >= s && (a+size) <= e) return;
1805 }
1806 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
1807 }
1808
1809 void freeProddableBlocks (ObjectCode *oc)
1810 {
1811 ProddableBlock *pb, *next;
1812
1813 for (pb = oc->proddables; pb != NULL; pb = next) {
1814 next = pb->next;
1815 stgFree(pb);
1816 }
1817 oc->proddables = NULL;
1818 }
1819
1820 /* -----------------------------------------------------------------------------
1821 * Section management.
1822 */
1823 void
1824 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
1825 void* start, StgWord size,
1826 StgWord mapped_offset, void* mapped_start, StgWord mapped_size)
1827 {
1828 s->start = start; /* actual start of section in memory */
1829 s->size = size; /* actual size of section in memory */
1830 s->kind = kind;
1831 s->alloc = alloc;
1832 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
1833
1834 s->mapped_start = mapped_start; /* start of mmap() block */
1835 s->mapped_size = mapped_size; /* size of mmap() block */
1836
1837 if (!s->info)
1838 s->info
1839 = (struct SectionFormatInfo*)stgCallocBytes(1, sizeof *s->info,
1840 "addSection(SectionFormatInfo)");
1841
1842 IF_DEBUG(linker,
1843 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
1844 start, (void*)((StgWord)start + size),
1845 size, kind ));
1846 }
1847
1848 /* -----------------------------------------------------------------------------
1849 * Segment management
1850 */
1851 void
1852 initSegment (Segment *s, void *start, size_t size, SegmentProt prot, int n_sections)
1853 {
1854 s->start = start;
1855 s->size = size;
1856 s->prot = prot;
1857 s->sections_idx = (int *)stgCallocBytes(n_sections, sizeof(int),
1858 "initSegment(segment)");
1859 s->n_sections = n_sections;
1860 }
1861
1862 void freeSegments (ObjectCode *oc)
1863 {
1864 if (oc->segments != NULL) {
1865 IF_DEBUG(linker, debugBelch("freeSegments: freeing %d segments\n", oc->n_segments));
1866
1867 for (int i = 0; i < oc->n_segments; i++) {
1868 Segment *s = &oc->segments[i];
1869
1870 IF_DEBUG(linker, debugBelch("freeSegments: freeing segment %d at %p size %zu\n",
1871 i, s->start, s->size));
1872
1873 stgFree(s->sections_idx);
1874 s->sections_idx = NULL;
1875
1876 if (0 == s->size) {
1877 IF_DEBUG(linker, debugBelch("freeSegment: skipping segment of 0 size\n"));
1878 continue;
1879 } else {
1880 #if RTS_LINKER_USE_MMAP
1881 CHECKM(0 == munmap(s->start, s->size), "freeSegments: failed to unmap memory");
1882 #else
1883 stgFree(s->start);
1884 #endif
1885 }
1886 s->start = NULL;
1887 }
1888
1889 stgFree(oc->segments);
1890 oc->segments = NULL;
1891 }
1892 }
1893