linker: Split ELF implementation into separate source file
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "Stable.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31 #include "linker/CacheFlush.h"
32 #include "linker/SymbolExtras.h"
33 #include "PathUtils.h"
34
35 #if !defined(mingw32_HOST_OS)
36 #include "posix/Signals.h"
37 #endif
38
39 // get protos for is*()
40 #include <ctype.h>
41
42 #ifdef HAVE_SYS_TYPES_H
43 #include <sys/types.h>
44 #endif
45
46 #include <inttypes.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <stdio.h>
50 #include <assert.h>
51
52 #ifdef HAVE_SYS_STAT_H
53 #include <sys/stat.h>
54 #endif
55
56 #if defined(HAVE_DLFCN_H)
57 #include <dlfcn.h>
58 #endif
59
60 #if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
61 # define OBJFORMAT_ELF
62 # include "linker/Elf.h"
63 # include <regex.h> // regex is already used by dlopen() so this is OK
64 // to use here without requiring an additional lib
65 #elif defined (mingw32_HOST_OS)
66 # define OBJFORMAT_PEi386
67 # include "linker/PEi386.h"
68 # include <windows.h>
69 #elif defined(darwin_HOST_OS)
70 # define OBJFORMAT_MACHO
71 # include "linker/MachO.h"
72 # include <regex.h>
73 # include <mach/machine.h>
74 # include <mach-o/fat.h>
75 #endif
76
77 #if defined(x86_64_HOST_ARCH) && defined(darwin_HOST_OS)
78 #define ALWAYS_PIC
79 #endif
80
81 #if defined(dragonfly_HOST_OS)
82 #include <sys/tls.h>
83 #endif
84
85 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
86 This hashtable will contain information on all symbols
87 that we know of, however the .o they are in may not be loaded.
88
89 Until the ObjectCode the symbol belongs to is actually
90 loaded this symbol may be replaced. So do not rely on
91 addresses of unloaded symbols.
92
93 Note [runtime-linker-phases]
94 --------------------------------------
95 Broadly the behavior of the runtime linker can be
96 split into the following four phases:
97
98 - Indexing (e.g. ocVerifyImage and ocGetNames)
99 - Initialization (e.g. ocResolve and ocRunInit)
100 - Resolve (e.g. resolveObjs())
101 - Lookup (e.g. lookupSymbol)
102
103 This is to enable lazy loading of symbols. Eager loading is problematic
104 as it means that all symbols must be available, even those which we will
105 never use. This is especially painful of Windows, where the number of
106 libraries required to link things like mingwex grows to be quite high.
107
108 We proceed through these stages as follows,
109
110 * During Indexing we verify and open the ObjectCode and
111 perform a quick scan/indexing of the ObjectCode. All the work
112 required to actually load the ObjectCode is done.
113
114 All symbols from the ObjectCode is also inserted into
115 `symhash`, where possible duplicates are handled via the semantics
116 described in `ghciInsertSymbolTable`.
117
118 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
119 depending on whether they are an archive members or not.
120
121 * During initialization we load ObjectCode, perform relocations, execute
122 static constructors etc. This phase may trigger other ObjectCodes to
123 be loaded because of the calls to lookupSymbol.
124
125 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
126 previous status was `OBJECT_LOADED`.
127
128 * During resolve we attempt to resolve all the symbols needed for the
129 initial link. This essentially means, that for any ObjectCode given
130 directly to the command-line we perform lookupSymbols on the required
131 symbols. lookupSymbols may trigger the loading of additional ObjectCode
132 if required.
133
134 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
135 the previous status was `OBJECT_NEEDED`.
136
137 * Lookup symbols is used to lookup any symbols required, both during initial
138 link and during statement and expression compilations in the REPL.
139 Declaration of e.g. an foreign import, will eventually call lookupSymbol
140 which will either fail (symbol unknown) or succeed (and possibly triggered a
141 load).
142
143 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
144
145 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
146 by calling unloadObj and starting over.
147 When a new declaration or statement is performed ultimately lookupSymbol is called
148 without doing a re-link.
149
150 The goal of these different phases is to allow the linker to be able to perform
151 "lazy loading" of ObjectCode. The reason for this is that we want to only link
152 in symbols that are actually required for the link. This reduces:
153
154 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
155 then we don't need to load libB. This means the dependency chain for libraries
156 such as mingw32 and mingwex can be broken down.
157
158 2) The number of duplicate symbols, since now only symbols that are
159 true duplicates will display the error.
160 */
161 /*Str*/HashTable *symhash;
162
163 /* List of currently loaded objects */
164 ObjectCode *objects = NULL; /* initially empty */
165
166 /* List of objects that have been unloaded via unloadObj(), but are waiting
167 to be actually freed via checkUnload() */
168 ObjectCode *unloaded_objects = NULL; /* initially empty */
169
170 #ifdef THREADED_RTS
171 /* This protects all the Linker's global state except unloaded_objects */
172 Mutex linker_mutex;
173 /*
174 * This protects unloaded_objects. We have a separate mutex for this, because
175 * the GC needs to access unloaded_objects in checkUnload, while the linker only
176 * needs to access unloaded_objects in unloadObj(), so this allows most linker
177 * operations proceed concurrently with the GC.
178 */
179 Mutex linker_unloaded_mutex;
180 #endif
181
182 static HsInt isAlreadyLoaded( pathchar *path );
183 static HsInt loadOc( ObjectCode* oc );
184 static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
185 rtsBool mapped, char *archiveMemberName,
186 int misalignment
187 );
188
189 /* Generic wrapper function to try and Resolve and RunInit oc files */
190 int ocTryLoad( ObjectCode* oc );
191
192 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
193 * small memory model on this architecture (see gcc docs,
194 * -mcmodel=small).
195 *
196 * MAP_32BIT not available on OpenBSD/amd64
197 */
198 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
199 #define TRY_MAP_32BIT MAP_32BIT
200 #else
201 #define TRY_MAP_32BIT 0
202 #endif
203
204 /*
205 Note [The ARM/Thumb Story]
206 ~~~~~~~~~~~~~~~~~~~~~~~~~~
207
208 Support for the ARM architecture is complicated by the fact that ARM has not
209 one but several instruction encodings. The two relevant ones here are the original
210 ARM encoding and Thumb, a more dense variant of ARM supporting only a subset
211 of the instruction set.
212
213 How the CPU decodes a particular instruction is determined by a mode bit. This
214 mode bit is set on jump instructions, the value being determined by the low
215 bit of the target address: An odd address means the target is a procedure
216 encoded in the Thumb encoding whereas an even address means it's a traditional
217 ARM procedure (the actual address jumped to is even regardless of the encoding bit).
218
219 Interoperation between Thumb- and ARM-encoded object code (known as "interworking")
220 is tricky. If the linker needs to link a call by an ARM object into Thumb code
221 (or vice-versa) it will produce a jump island. This, however, is incompatible with
222 GHC's tables-next-to-code. For this reason, it is critical that GHC emit
223 exclusively ARM or Thumb objects for all Haskell code.
224
225 We still do, however, need to worry about foreign code.
226 */
227
228 /*
229 * Due to the small memory model (see above), on x86_64 we have to map
230 * all our non-PIC object files into the low 2Gb of the address space
231 * (why 2Gb and not 4Gb? Because all addresses must be reachable
232 * using a 32-bit signed PC-relative offset). On Linux we can do this
233 * using the MAP_32BIT flag to mmap(), however on other OSs
234 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
235 * can't do this. So on these systems, we have to pick a base address
236 * in the low 2Gb of the address space and try to allocate memory from
237 * there.
238 *
239 * We pick a default address based on the OS, but also make this
240 * configurable via an RTS flag (+RTS -xm)
241 */
242 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
243
244 #if defined(MAP_32BIT)
245 // Try to use MAP_32BIT
246 #define MMAP_32BIT_BASE_DEFAULT 0
247 #else
248 // A guess: 1Gb.
249 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
250 #endif
251
252 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
253 #endif
254
255 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
256 ObjectCode *owner)
257 {
258 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
259 if (!pinfo || owner != pinfo->owner) return;
260 removeStrHashTable(table, key, NULL);
261 stgFree(pinfo);
262 }
263
264 /* -----------------------------------------------------------------------------
265 * Insert symbols into hash tables, checking for duplicates.
266 *
267 * Returns: 0 on failure, nonzero on success
268 */
269 /*
270 Note [weak-symbols-support]
271 -------------------------------------
272 While ghciInsertSymbolTable does implement extensive
273 logic for weak symbol support, weak symbols are not currently
274 fully supported by the RTS. This code is mostly here for COMDAT
275 support which uses the weak symbols support.
276
277 Linking weak symbols defined purely in C code with other C code
278 should also work, probably. Observing weak symbols in Haskell
279 won't.
280
281 Some test have been written for weak symbols but have been disabled
282 mostly because it's unsure how the weak symbols support should look.
283 See Trac #11223
284 */
285 int ghciInsertSymbolTable(
286 pathchar* obj_name,
287 HashTable *table,
288 const SymbolName* key,
289 SymbolAddr* data,
290 HsBool weak,
291 ObjectCode *owner)
292 {
293 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
294 if (!pinfo) /* new entry */
295 {
296 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
297 pinfo->value = data;
298 pinfo->owner = owner;
299 pinfo->weak = weak;
300 insertStrHashTable(table, key, pinfo);
301 return 1;
302 }
303 else if (weak && data && pinfo->weak && !pinfo->value)
304 {
305 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
306 pinfo->value = data;
307 pinfo->owner = owner;
308 return 1;
309 }
310 else if (weak)
311 {
312 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
313 already know of another copy throw this one away.
314
315 or both weak symbols have a nonzero value. Keep the existing one.
316
317 This also preserves the semantics of linking against
318 the first symbol we find. */
319 }
320 else if (pinfo->weak && !weak) /* weak symbol is in the table */
321 {
322 /* override the weak definition with the non-weak one */
323 pinfo->value = data;
324 pinfo->owner = owner;
325 pinfo->weak = HS_BOOL_FALSE;
326 return 1;
327 }
328 else if ( pinfo->owner
329 && pinfo->owner->status != OBJECT_RESOLVED
330 && pinfo->owner->status != OBJECT_NEEDED)
331 {
332 /* If the other symbol hasn't been loaded or will be loaded and we want to
333 explicitly load the new one, we can just swap it out and load the one
334 that has been requested. If not, just keep the first one encountered.
335
336 Because the `symHash' table consists symbols we've also not loaded but
337 found during the initial scan this is safe to do. If however the existing
338 symbol has been loaded then it means we have a duplicate.
339
340 This is essentially emulating the behavior of a linker wherein it will always
341 link in object files that are .o file arguments, but only take object files
342 from archives as needed. */
343 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
344 pinfo->value = data;
345 pinfo->owner = owner;
346 pinfo->weak = weak;
347 }
348
349 return 1;
350 }
351 else if (pinfo->owner == owner)
352 {
353 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
354 return 1;
355 }
356 else if (owner && owner->status == OBJECT_LOADED)
357 {
358 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
359 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
360 call this function again to trigger the duplicate error. */
361 return 1;
362 }
363
364 pathchar* archiveName = NULL;
365 debugBelch(
366 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
367 " %s\n"
368 "whilst processing object file\n"
369 " %" PATH_FMT "\n"
370 "The symbol was previously defined in\n"
371 " %" PATH_FMT "\n"
372 "This could be caused by:\n"
373 " * Loading two different object files which export the same symbol\n"
374 " * Specifying the same object file twice on the GHCi command line\n"
375 " * An incorrect `package.conf' entry, causing some object to be\n"
376 " loaded twice.\n",
377 (char*)key,
378 obj_name,
379 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
380 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
381 : pinfo->owner->fileName
382 );
383
384 if (archiveName)
385 {
386 stgFree(archiveName);
387 archiveName = NULL;
388 }
389 return 0;
390 }
391
392 /* -----------------------------------------------------------------------------
393 * Looks up symbols into hash tables.
394 *
395 * Returns: 0 on failure and result is not set,
396 * nonzero on success and result set to nonzero pointer
397 */
398 HsBool ghciLookupSymbolInfo(HashTable *table,
399 const SymbolName* key, RtsSymbolInfo **result)
400 {
401 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
402 if (!pinfo) {
403 *result = NULL;
404 return HS_BOOL_FALSE;
405 }
406 if (pinfo->weak)
407 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
408 /* Once it's looked up, it can no longer be overridden */
409 pinfo->weak = HS_BOOL_FALSE;
410
411 *result = pinfo;
412 return HS_BOOL_TRUE;
413 }
414
415 /* -----------------------------------------------------------------------------
416 * initialize the object linker
417 */
418
419
420 static int linker_init_done = 0 ;
421
422 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
423 static void *dl_prog_handle;
424 static regex_t re_invalid;
425 static regex_t re_realso;
426 #ifdef THREADED_RTS
427 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
428 #endif
429 #endif
430
431 void initLinker (void)
432 {
433 // default to retaining CAFs for backwards compatibility. Most
434 // users will want initLinker_(0): otherwise unloadObj() will not
435 // be able to unload object files when they contain CAFs.
436 initLinker_(1);
437 }
438
439 void
440 initLinker_ (int retain_cafs)
441 {
442 RtsSymbolVal *sym;
443 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
444 int compileResult;
445 #endif
446
447 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
448
449 /* Make initLinker idempotent, so we can call it
450 before every relevant operation; that means we
451 don't need to initialise the linker separately */
452 if (linker_init_done == 1) {
453 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
454 return;
455 } else {
456 linker_init_done = 1;
457 }
458
459 objects = NULL;
460 unloaded_objects = NULL;
461
462 #if defined(THREADED_RTS)
463 initMutex(&linker_mutex);
464 initMutex(&linker_unloaded_mutex);
465 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
466 initMutex(&dl_mutex);
467 #endif
468 #endif
469
470 symhash = allocStrHashTable();
471
472 /* populate the symbol table with stuff from the RTS */
473 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
474 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
475 symhash, sym->lbl, sym->addr, HS_BOOL_FALSE, NULL)) {
476 barf("ghciInsertSymbolTable failed");
477 }
478 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
479 }
480 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
481 machoInitSymbolsWithoutUnderscore();
482 # endif
483 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
484 referenced from a statically linked module. We need to mimic this, but
485 we cannot use NULL because we use it to mean nonexistent symbols. So we
486 use an arbitrary (hopefully unique) address here.
487 */
488 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
489 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
490 barf("ghciInsertSymbolTable failed");
491 }
492
493 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
494 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
495 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
496 retain_cafs ? newRetainedCAF : newGCdCAF,
497 HS_BOOL_FALSE, NULL)) {
498 barf("ghciInsertSymbolTable failed");
499 }
500
501 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
502 # if defined(RTLD_DEFAULT)
503 dl_prog_handle = RTLD_DEFAULT;
504 # else
505 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
506 # endif /* RTLD_DEFAULT */
507
508 compileResult = regcomp(&re_invalid,
509 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short)",
510 REG_EXTENDED);
511 if (compileResult != 0) {
512 barf("Compiling re_invalid failed");
513 }
514 compileResult = regcomp(&re_realso,
515 "(GROUP|INPUT) *\\( *([^ )]+)",
516 REG_EXTENDED);
517 if (compileResult != 0) {
518 barf("Compiling re_realso failed");
519 }
520 # endif
521
522 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
523 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
524 // User-override for mmap_32bit_base
525 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
526 }
527 #endif
528
529 if (RTS_LINKER_USE_MMAP)
530 m32_allocator_init();
531
532 #if defined(OBJFORMAT_PEi386)
533 initLinker_PEi386();
534 #endif
535
536 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
537 return;
538 }
539
540 void
541 exitLinker( void ) {
542 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
543 if (linker_init_done == 1) {
544 regfree(&re_invalid);
545 regfree(&re_realso);
546 #ifdef THREADED_RTS
547 closeMutex(&dl_mutex);
548 #endif
549 }
550 #endif
551 if (linker_init_done == 1) {
552 freeHashTable(symhash, free);
553 }
554 #ifdef THREADED_RTS
555 closeMutex(&linker_mutex);
556 #endif
557 }
558
559 /* -----------------------------------------------------------------------------
560 * Loading DLL or .so dynamic libraries
561 * -----------------------------------------------------------------------------
562 *
563 * Add a DLL from which symbols may be found. In the ELF case, just
564 * do RTLD_GLOBAL-style add, so no further messing around needs to
565 * happen in order that symbols in the loaded .so are findable --
566 * lookupSymbol() will subsequently see them by dlsym on the program's
567 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
568 *
569 * In the PEi386 case, open the DLLs and put handles to them in a
570 * linked list. When looking for a symbol, try all handles in the
571 * list. This means that we need to load even DLLs that are guaranteed
572 * to be in the ghc.exe image already, just so we can get a handle
573 * to give to loadSymbol, so that we can find the symbols. For such
574 * libraries, the LoadLibrary call should be a no-op except for returning
575 * the handle.
576 *
577 */
578
579 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
580
581 /* Suppose in ghci we load a temporary SO for a module containing
582 f = 1
583 and then modify the module, recompile, and load another temporary
584 SO with
585 f = 2
586 Then as we don't unload the first SO, dlsym will find the
587 f = 1
588 symbol whereas we want the
589 f = 2
590 symbol. We therefore need to keep our own SO handle list, and
591 try SOs in the right order. */
592
593 typedef
594 struct _OpenedSO {
595 struct _OpenedSO* next;
596 void *handle;
597 }
598 OpenedSO;
599
600 /* A list thereof. */
601 static OpenedSO* openedSOs = NULL;
602
603 static const char *
604 internal_dlopen(const char *dll_name)
605 {
606 OpenedSO* o_so;
607 void *hdl;
608 const char *errmsg;
609 char *errmsg_copy;
610
611 // omitted: RTLD_NOW
612 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
613 IF_DEBUG(linker,
614 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
615
616 //-------------- Begin critical section ------------------
617 // This critical section is necessary because dlerror() is not
618 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
619 // Also, the error message returned must be copied to preserve it
620 // (see POSIX also)
621
622 ACQUIRE_LOCK(&dl_mutex);
623 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
624
625 errmsg = NULL;
626 if (hdl == NULL) {
627 /* dlopen failed; return a ptr to the error msg. */
628 errmsg = dlerror();
629 if (errmsg == NULL) errmsg = "addDLL: unknown error";
630 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
631 strcpy(errmsg_copy, errmsg);
632 errmsg = errmsg_copy;
633 } else {
634 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
635 o_so->handle = hdl;
636 o_so->next = openedSOs;
637 openedSOs = o_so;
638 }
639
640 RELEASE_LOCK(&dl_mutex);
641 //--------------- End critical section -------------------
642
643 return errmsg;
644 }
645
646 /*
647 Note [RTLD_LOCAL]
648
649 In GHCi we want to be able to override previous .so's with newly
650 loaded .so's when we recompile something. This further implies that
651 when we look up a symbol in internal_dlsym() we have to iterate
652 through the loaded libraries (in order from most recently loaded to
653 oldest) looking up the symbol in each one until we find it.
654
655 However, this can cause problems for some symbols that are copied
656 by the linker into the executable image at runtime - see #8935 for a
657 lengthy discussion. To solve that problem we need to look up
658 symbols in the main executable *first*, before attempting to look
659 them up in the loaded .so's. But in order to make that work, we
660 have to always call dlopen with RTLD_LOCAL, so that the loaded
661 libraries don't populate the global symbol table.
662 */
663
664 static void *
665 internal_dlsym(const char *symbol) {
666 OpenedSO* o_so;
667 void *v;
668
669 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
670 ACQUIRE_LOCK(&dl_mutex);
671 dlerror();
672 // look in program first
673 v = dlsym(dl_prog_handle, symbol);
674 if (dlerror() == NULL) {
675 RELEASE_LOCK(&dl_mutex);
676 return v;
677 }
678
679 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
680 v = dlsym(o_so->handle, symbol);
681 if (dlerror() == NULL) {
682 RELEASE_LOCK(&dl_mutex);
683 return v;
684 }
685 }
686 RELEASE_LOCK(&dl_mutex);
687 return v;
688 }
689 # endif
690
691 const char *
692 addDLL( pathchar *dll_name )
693 {
694 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
695 /* ------------------- ELF DLL loader ------------------- */
696
697 #define NMATCH 5
698 regmatch_t match[NMATCH];
699 const char *errmsg;
700 FILE* fp;
701 size_t match_length;
702 #define MAXLINE 1000
703 char line[MAXLINE];
704 int result;
705
706 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
707 errmsg = internal_dlopen(dll_name);
708
709 if (errmsg == NULL) {
710 return NULL;
711 }
712
713 // GHC Trac ticket #2615
714 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
715 // contain linker scripts rather than ELF-format object code. This
716 // code handles the situation by recognizing the real object code
717 // file name given in the linker script.
718 //
719 // If an "invalid ELF header" error occurs, it is assumed that the
720 // .so file contains a linker script instead of ELF object code.
721 // In this case, the code looks for the GROUP ( ... ) linker
722 // directive. If one is found, the first file name inside the
723 // parentheses is treated as the name of a dynamic library and the
724 // code attempts to dlopen that file. If this is also unsuccessful,
725 // an error message is returned.
726
727 // see if the error message is due to an invalid ELF header
728 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
729 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
730 IF_DEBUG(linker, debugBelch("result = %i\n", result));
731 if (result == 0) {
732 // success -- try to read the named file as a linker script
733 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
734 MAXLINE-1);
735 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
736 line[match_length] = '\0'; // make sure string is null-terminated
737 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
738 if ((fp = fopen(line, "r")) == NULL) {
739 return errmsg; // return original error if open fails
740 }
741 // try to find a GROUP or INPUT ( ... ) command
742 while (fgets(line, MAXLINE, fp) != NULL) {
743 IF_DEBUG(linker, debugBelch("input line = %s", line));
744 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
745 // success -- try to dlopen the first named file
746 IF_DEBUG(linker, debugBelch("match%s\n",""));
747 line[match[2].rm_eo] = '\0';
748 stgFree((void*)errmsg); // Free old message before creating new one
749 errmsg = internal_dlopen(line+match[2].rm_so);
750 break;
751 }
752 // if control reaches here, no GROUP or INPUT ( ... ) directive
753 // was found and the original error message is returned to the
754 // caller
755 }
756 fclose(fp);
757 }
758 return errmsg;
759
760 # elif defined(OBJFORMAT_PEi386)
761 return addDLL_PEi386(dll_name);
762
763 # else
764 barf("addDLL: not implemented on this platform");
765 # endif
766 }
767
768 /* -----------------------------------------------------------------------------
769 * Searches the system directories to determine if there is a system DLL that
770 * satisfies the given name. This prevent GHCi from linking against a static
771 * library if a DLL is available.
772 *
773 * Returns: NULL on failure or no DLL found, else the full path to the DLL
774 * that can be loaded.
775 */
776 pathchar* findSystemLibrary(pathchar* dll_name)
777 {
778 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%" PATH_FMT "'\n", dll_name));
779
780 #if defined(OBJFORMAT_PEi386)
781 return findSystemLibrary_PEi386(dll_name);
782 #else
783 (void)(dll_name); // Function not implemented for other platforms.
784 return NULL;
785 #endif
786 }
787
788 /* -----------------------------------------------------------------------------
789 * Emits a warning determining that the system is missing a required security
790 * update that we need to get access to the proper APIs
791 */
792 void warnMissingKBLibraryPaths( void )
793 {
794 static HsBool missing_update_warn = HS_BOOL_FALSE;
795 if (!missing_update_warn) {
796 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
797 missing_update_warn = HS_BOOL_TRUE;
798 }
799 }
800
801 /* -----------------------------------------------------------------------------
802 * appends a directory to the process DLL Load path so LoadLibrary can find it
803 *
804 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
805 * restore the search path to what it was before this call.
806 */
807 HsPtr addLibrarySearchPath(pathchar* dll_path)
808 {
809 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%" PATH_FMT "'\n", dll_path));
810
811 #if defined(OBJFORMAT_PEi386)
812 return addLibrarySearchPath_PEi386(dll_path);
813 #else
814 (void)(dll_path); // Function not implemented for other platforms.
815 return NULL;
816 #endif
817 }
818
819 /* -----------------------------------------------------------------------------
820 * removes a directory from the process DLL Load path
821 *
822 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
823 */
824 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
825 {
826 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n", dll_path_index));
827
828 #if defined(OBJFORMAT_PEi386)
829 return removeLibrarySearchPath_PEi386(dll_path_index);
830 #else
831 (void)(dll_path_index); // Function not implemented for other platforms.
832 return HS_BOOL_FALSE;
833 #endif
834 }
835
836 /* -----------------------------------------------------------------------------
837 * insert a symbol in the hash table
838 *
839 * Returns: 0 on failure, nozero on success
840 */
841 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
842 {
843 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE, NULL);
844 }
845
846 /* -----------------------------------------------------------------------------
847 * lookup a symbol in the hash table
848 */
849 #if defined(OBJFORMAT_PEi386)
850 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
851 {
852 return lookupSymbol_PEi386(lbl);
853 }
854
855 #else
856
857 SymbolAddr* lookupSymbol_ (SymbolName* lbl)
858 {
859 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
860
861 ASSERT(symhash != NULL);
862 RtsSymbolInfo *pinfo;
863
864 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
865 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
866
867 # if defined(OBJFORMAT_ELF)
868 return internal_dlsym(lbl);
869 # elif defined(OBJFORMAT_MACHO)
870
871 /* HACK: On OS X, all symbols are prefixed with an underscore.
872 However, dlsym wants us to omit the leading underscore from the
873 symbol name -- the dlsym routine puts it back on before searching
874 for the symbol. For now, we simply strip it off here (and ONLY
875 here).
876 */
877 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", lbl));
878 ASSERT(lbl[0] == '_');
879 return internal_dlsym(lbl + 1);
880
881 # else
882 ASSERT(2+2 == 5);
883 return NULL;
884 # endif
885 } else {
886 return loadSymbol(lbl, pinfo);
887 }
888 }
889 #endif /* OBJFORMAT_PEi386 */
890
891 /*
892 * Load and relocate the object code for a symbol as necessary.
893 * Symbol name only used for diagnostics output.
894 */
895 SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo) {
896 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl, pinfo->value));
897 ObjectCode* oc = pinfo->owner;
898
899 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
900 See Note [runtime-linker-phases] */
901 if (oc && oc->status == OBJECT_LOADED) {
902 oc->status = OBJECT_NEEDED;
903 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand loading symbol '%s'\n", lbl));
904 int r = ocTryLoad(oc);
905 if (!r) {
906 errorBelch("Could not on-demand load symbol '%s'\n", lbl);
907 return NULL;
908 }
909
910 #ifdef PROFILING
911 // collect any new cost centres & CCSs
912 // that were defined during runInit
913 initProfiling2();
914 #endif
915 }
916
917 return pinfo->value;
918 }
919
920 SymbolAddr* lookupSymbol( SymbolName* lbl )
921 {
922 ACQUIRE_LOCK(&linker_mutex);
923 SymbolAddr* r = lookupSymbol_(lbl);
924 RELEASE_LOCK(&linker_mutex);
925 return r;
926 }
927
928 /* -----------------------------------------------------------------------------
929 Create a StablePtr for a foreign export. This is normally called by
930 a C function with __attribute__((constructor)), which is generated
931 by GHC and linked into the module.
932
933 If the object code is being loaded dynamically, then we remember
934 which StablePtrs were allocated by the constructors and free them
935 again in unloadObj().
936 -------------------------------------------------------------------------- */
937
938 static ObjectCode *loading_obj = NULL;
939
940 StgStablePtr foreignExportStablePtr (StgPtr p)
941 {
942 ForeignExportStablePtr *fe_sptr;
943 StgStablePtr *sptr;
944
945 sptr = getStablePtr(p);
946
947 if (loading_obj != NULL) {
948 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
949 "foreignExportStablePtr");
950 fe_sptr->stable_ptr = sptr;
951 fe_sptr->next = loading_obj->stable_ptrs;
952 loading_obj->stable_ptrs = fe_sptr;
953 }
954
955 return sptr;
956 }
957
958
959 /* -----------------------------------------------------------------------------
960 * Debugging aid: look in GHCi's object symbol tables for symbols
961 * within DELTA bytes of the specified address, and show their names.
962 */
963 #ifdef DEBUG
964 void ghci_enquire ( SymbolAddr* addr );
965
966 void ghci_enquire(SymbolAddr* addr)
967 {
968 int i;
969 SymbolName* sym;
970 RtsSymbolInfo* a;
971 const int DELTA = 64;
972 ObjectCode* oc;
973
974 for (oc = objects; oc; oc = oc->next) {
975 for (i = 0; i < oc->n_symbols; i++) {
976 sym = oc->symbols[i];
977 if (sym == NULL) continue;
978 a = NULL;
979 if (a == NULL) {
980 ghciLookupSymbolInfo(symhash, sym, &a);
981 }
982 if (a == NULL) {
983 // debugBelch("ghci_enquire: can't find %s\n", sym);
984 }
985 else if ( a->value
986 && (char*)addr-DELTA <= (char*)a->value
987 && (char*)a->value <= (char*)addr+DELTA) {
988 debugBelch("%p + %3d == `%s'\n", addr, (int)((char*)a->value - (char*)addr), sym);
989 }
990 }
991 }
992 }
993 #endif
994
995 #if RTS_LINKER_USE_MMAP
996 //
997 // Returns NULL on failure.
998 //
999 void *
1000 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
1001 {
1002 void *map_addr = NULL;
1003 void *result;
1004 size_t size;
1005 static uint32_t fixed = 0;
1006
1007 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1008 size = roundUpToPage(bytes);
1009
1010 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1011 mmap_again:
1012
1013 if (mmap_32bit_base != 0) {
1014 map_addr = mmap_32bit_base;
1015 }
1016 #endif
1017
1018 IF_DEBUG(linker,
1019 debugBelch("mmapForLinker: \tprotection %#0x\n",
1020 PROT_EXEC | PROT_READ | PROT_WRITE));
1021 IF_DEBUG(linker,
1022 debugBelch("mmapForLinker: \tflags %#0x\n",
1023 MAP_PRIVATE | TRY_MAP_32BIT | fixed | flags));
1024
1025 result = mmap(map_addr, size,
1026 PROT_EXEC|PROT_READ|PROT_WRITE,
1027 MAP_PRIVATE|TRY_MAP_32BIT|fixed|flags, fd, offset);
1028
1029 if (result == MAP_FAILED) {
1030 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1031 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1032 return NULL;
1033 }
1034
1035 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1036 if (mmap_32bit_base != 0) {
1037 if (result == map_addr) {
1038 mmap_32bit_base = (StgWord8*)map_addr + size;
1039 } else {
1040 if ((W_)result > 0x80000000) {
1041 // oops, we were given memory over 2Gb
1042 munmap(result,size);
1043 #if defined(freebsd_HOST_OS) || \
1044 defined(kfreebsdgnu_HOST_OS) || \
1045 defined(dragonfly_HOST_OS)
1046 // Some platforms require MAP_FIXED. This is normally
1047 // a bad idea, because MAP_FIXED will overwrite
1048 // existing mappings.
1049 fixed = MAP_FIXED;
1050 goto mmap_again;
1051 #else
1052 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1053 "asked for %lu bytes at %p. "
1054 "Try specifying an address with +RTS -xm<addr> -RTS",
1055 size, map_addr);
1056 return NULL;
1057 #endif
1058 } else {
1059 // hmm, we were given memory somewhere else, but it's
1060 // still under 2Gb so we can use it. Next time, ask
1061 // for memory right after the place we just got some
1062 mmap_32bit_base = (StgWord8*)result + size;
1063 }
1064 }
1065 } else {
1066 if ((W_)result > 0x80000000) {
1067 // oops, we were given memory over 2Gb
1068 // ... try allocating memory somewhere else?;
1069 debugTrace(DEBUG_linker,
1070 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1071 bytes, result);
1072 munmap(result, size);
1073
1074 // Set a base address and try again... (guess: 1Gb)
1075 mmap_32bit_base = (void*)0x40000000;
1076 goto mmap_again;
1077 }
1078 }
1079 #endif
1080
1081 IF_DEBUG(linker,
1082 debugBelch("mmapForLinker: mapped %" FMT_Word
1083 " bytes starting at %p\n", (W_)size, result));
1084 IF_DEBUG(linker,
1085 debugBelch("mmapForLinker: done\n"));
1086
1087 return result;
1088 }
1089 #endif
1090
1091 /*
1092 * Remove symbols from the symbol table, and free oc->symbols.
1093 * This operation is idempotent.
1094 */
1095 static void removeOcSymbols (ObjectCode *oc)
1096 {
1097 if (oc->symbols == NULL) return;
1098
1099 // Remove all the mappings for the symbols within this object..
1100 int i;
1101 for (i = 0; i < oc->n_symbols; i++) {
1102 if (oc->symbols[i] != NULL) {
1103 ghciRemoveSymbolTable(symhash, oc->symbols[i], oc);
1104 }
1105 }
1106
1107 stgFree(oc->symbols);
1108 oc->symbols = NULL;
1109 }
1110
1111 /*
1112 * Release StablePtrs and free oc->stable_ptrs.
1113 * This operation is idempotent.
1114 */
1115 static void freeOcStablePtrs (ObjectCode *oc)
1116 {
1117 // Release any StablePtrs that were created when this
1118 // object module was initialized.
1119 ForeignExportStablePtr *fe_ptr, *next;
1120
1121 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1122 next = fe_ptr->next;
1123 freeStablePtr(fe_ptr->stable_ptr);
1124 stgFree(fe_ptr);
1125 }
1126 oc->stable_ptrs = NULL;
1127 }
1128
1129 static void
1130 freePreloadObjectFile (ObjectCode *oc)
1131 {
1132 #if defined(mingw32_HOST_OS)
1133 freePreloadObjectFile_PEi386(oc);
1134 #else
1135
1136 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1137 munmap(oc->image, oc->fileSize);
1138 }
1139 else {
1140 stgFree(oc->image);
1141 }
1142
1143 #endif
1144
1145 oc->image = NULL;
1146 oc->fileSize = 0;
1147 }
1148
1149 /*
1150 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1151 * the GC when a previously unloaded ObjectCode has been determined to be
1152 * unused, and when an error occurs during loadObj().
1153 */
1154 void freeObjectCode (ObjectCode *oc)
1155 {
1156 freePreloadObjectFile(oc);
1157
1158 if (oc->symbols != NULL) {
1159 stgFree(oc->symbols);
1160 oc->symbols = NULL;
1161 }
1162
1163 if (oc->extraInfos != NULL) {
1164 freeHashTable(oc->extraInfos, NULL);
1165 oc->extraInfos = NULL;
1166 }
1167
1168 if (oc->sections != NULL) {
1169 int i;
1170 for (i=0; i < oc->n_sections; i++) {
1171 if (oc->sections[i].start != NULL) {
1172 switch(oc->sections[i].alloc){
1173 #if RTS_LINKER_USE_MMAP
1174 case SECTION_MMAP:
1175 munmap(oc->sections[i].mapped_start,
1176 oc->sections[i].mapped_size);
1177 break;
1178 case SECTION_M32:
1179 m32_free(oc->sections[i].start,
1180 oc->sections[i].size);
1181 break;
1182 #endif
1183 case SECTION_MALLOC:
1184 stgFree(oc->sections[i].start);
1185 break;
1186 default:
1187 break;
1188 }
1189 }
1190 }
1191 stgFree(oc->sections);
1192 }
1193
1194 freeProddableBlocks(oc);
1195
1196 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1197 * alongside the image, so we don't need to free. */
1198 #if NEED_SYMBOL_EXTRAS && (!defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS))
1199 if (RTS_LINKER_USE_MMAP) {
1200 if (!USE_CONTIGUOUS_MMAP && oc->symbol_extras != NULL) {
1201 m32_free(oc->symbol_extras,
1202 sizeof(SymbolExtra) * oc->n_symbol_extras);
1203 }
1204 }
1205 else {
1206 stgFree(oc->symbol_extras);
1207 }
1208 #endif
1209
1210 stgFree(oc->fileName);
1211 stgFree(oc->archiveMemberName);
1212
1213 stgFree(oc);
1214 }
1215
1216 /* -----------------------------------------------------------------------------
1217 * Sets the initial status of a fresh ObjectCode
1218 */
1219 static void setOcInitialStatus(ObjectCode* oc) {
1220 if (oc->archiveMemberName == NULL) {
1221 oc->status = OBJECT_NEEDED;
1222 } else {
1223 oc->status = OBJECT_LOADED;
1224 }
1225 }
1226
1227 static ObjectCode*
1228 mkOc( pathchar *path, char *image, int imageSize,
1229 rtsBool mapped, char *archiveMemberName, int misalignment ) {
1230 ObjectCode* oc;
1231
1232 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1233 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1234
1235 # if defined(OBJFORMAT_ELF)
1236 oc->formatName = "ELF";
1237 # elif defined(OBJFORMAT_PEi386)
1238 oc->formatName = "PEi386";
1239 # elif defined(OBJFORMAT_MACHO)
1240 oc->formatName = "Mach-O";
1241 # else
1242 stgFree(oc);
1243 barf("loadObj: not implemented on this platform");
1244 # endif
1245
1246 oc->image = image;
1247 oc->fileName = pathdup(path);
1248
1249 if (archiveMemberName) {
1250 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1, "loadObj" );
1251 strcpy(oc->archiveMemberName, archiveMemberName);
1252 } else {
1253 oc->archiveMemberName = NULL;
1254 }
1255
1256 setOcInitialStatus( oc );
1257
1258 oc->fileSize = imageSize;
1259 oc->symbols = NULL;
1260 oc->n_sections = 0;
1261 oc->sections = NULL;
1262 oc->proddables = NULL;
1263 oc->stable_ptrs = NULL;
1264 #if NEED_SYMBOL_EXTRAS
1265 oc->symbol_extras = NULL;
1266 #endif
1267 oc->imageMapped = mapped;
1268
1269 oc->misalignment = misalignment;
1270 oc->extraInfos = NULL;
1271
1272 /* chain it onto the list of objects */
1273 oc->next = NULL;
1274
1275 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1276 return oc;
1277 }
1278
1279 /* -----------------------------------------------------------------------------
1280 * Check if an object or archive is already loaded.
1281 *
1282 * Returns: 1 if the path is already loaded, 0 otherwise.
1283 */
1284 static HsInt
1285 isAlreadyLoaded( pathchar *path )
1286 {
1287 ObjectCode *o;
1288 for (o = objects; o; o = o->next) {
1289 if (0 == pathcmp(o->fileName, path)) {
1290 return 1; /* already loaded */
1291 }
1292 }
1293 return 0; /* not loaded yet */
1294 }
1295
1296 static HsInt loadArchive_ (pathchar *path)
1297 {
1298 ObjectCode* oc;
1299 char *image;
1300 int memberSize;
1301 FILE *f;
1302 int n;
1303 size_t thisFileNameSize;
1304 char *fileName;
1305 size_t fileNameSize;
1306 int isObject, isGnuIndex, isThin, isImportLib;
1307 char tmp[20];
1308 char *gnuFileIndex;
1309 int gnuFileIndexSize;
1310 #if defined(darwin_HOST_OS)
1311 int i;
1312 uint32_t nfat_arch, nfat_offset, cputype, cpusubtype;
1313 #if defined(i386_HOST_ARCH)
1314 const uint32_t mycputype = CPU_TYPE_X86;
1315 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_ALL;
1316 #elif defined(x86_64_HOST_ARCH)
1317 const uint32_t mycputype = CPU_TYPE_X86_64;
1318 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_64_ALL;
1319 #elif defined(powerpc_HOST_ARCH)
1320 const uint32_t mycputype = CPU_TYPE_POWERPC;
1321 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1322 #elif defined(powerpc64_HOST_ARCH)
1323 const uint32_t mycputype = CPU_TYPE_POWERPC64;
1324 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1325 #else
1326 #error Unknown Darwin architecture
1327 #endif
1328 #endif
1329 int misalignment = 0;
1330
1331 /* TODO: don't call barf() on error, instead return an error code, freeing
1332 * all resources correctly. This function is pretty complex, so it needs
1333 * to be refactored to make this practical. */
1334
1335 IF_DEBUG(linker, debugBelch("loadArchive: start\n"));
1336 IF_DEBUG(linker, debugBelch("loadArchive: Loading archive `%" PATH_FMT" '\n", path));
1337
1338 /* Check that we haven't already loaded this archive.
1339 Ignore requests to load multiple times */
1340 if (isAlreadyLoaded(path)) {
1341 IF_DEBUG(linker,
1342 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1343 return 1; /* success */
1344 }
1345
1346 gnuFileIndex = NULL;
1347 gnuFileIndexSize = 0;
1348
1349 fileNameSize = 32;
1350 fileName = stgMallocBytes(fileNameSize, "loadArchive(fileName)");
1351
1352 isThin = 0;
1353 isImportLib = 0;
1354
1355 f = pathopen(path, WSTR("rb"));
1356 if (!f)
1357 barf("loadObj: can't read `%" PATH_FMT "'", path);
1358
1359 /* Check if this is an archive by looking for the magic "!<arch>\n"
1360 * string. Usually, if this fails, we barf and quit. On Darwin however,
1361 * we may have a fat archive, which contains archives for more than
1362 * one architecture. Fat archives start with the magic number 0xcafebabe,
1363 * always stored big endian. If we find a fat_header, we scan through
1364 * the fat_arch structs, searching through for one for our host
1365 * architecture. If a matching struct is found, we read the offset
1366 * of our archive data (nfat_offset) and seek forward nfat_offset bytes
1367 * from the start of the file.
1368 *
1369 * A subtlety is that all of the members of the fat_header and fat_arch
1370 * structs are stored big endian, so we need to call byte order
1371 * conversion functions.
1372 *
1373 * If we find the appropriate architecture in a fat archive, we gobble
1374 * its magic "!<arch>\n" string and continue processing just as if
1375 * we had a single architecture archive.
1376 */
1377
1378 n = fread ( tmp, 1, 8, f );
1379 if (n != 8)
1380 barf("loadArchive: Failed reading header from `%" PATH_FMT "'", path);
1381 if (strncmp(tmp, "!<arch>\n", 8) == 0) {}
1382 /* Check if this is a thin archive by looking for the magic string "!<thin>\n"
1383 *
1384 * ar thin libraries have the exact same format as normal archives except they
1385 * have a different magic string and they don't copy the object files into the
1386 * archive.
1387 *
1388 * Instead each header entry points to the location of the object file on disk.
1389 * This is useful when a library is only created to satisfy a compile time dependency
1390 * instead of to be distributed. This saves the time required for copying.
1391 *
1392 * Thin archives are always flattened. They always only contain simple headers
1393 * pointing to the object file and so we need not allocate more memory than needed
1394 * to find the object file.
1395 *
1396 */
1397 else if (strncmp(tmp, "!<thin>\n", 8) == 0) {
1398 isThin = 1;
1399 }
1400 #if defined(darwin_HOST_OS)
1401 /* Not a standard archive, look for a fat archive magic number: */
1402 else if (ntohl(*(uint32_t *)tmp) == FAT_MAGIC) {
1403 nfat_arch = ntohl(*(uint32_t *)(tmp + 4));
1404 IF_DEBUG(linker, debugBelch("loadArchive: found a fat archive containing %d architectures\n", nfat_arch));
1405 nfat_offset = 0;
1406
1407 for (i = 0; i < (int)nfat_arch; i++) {
1408 /* search for the right arch */
1409 n = fread( tmp, 1, 20, f );
1410 if (n != 8)
1411 barf("loadArchive: Failed reading arch from `%s'", path);
1412 cputype = ntohl(*(uint32_t *)tmp);
1413 cpusubtype = ntohl(*(uint32_t *)(tmp + 4));
1414
1415 if (cputype == mycputype && cpusubtype == mycpusubtype) {
1416 IF_DEBUG(linker, debugBelch("loadArchive: found my archive in a fat archive\n"));
1417 nfat_offset = ntohl(*(uint32_t *)(tmp + 8));
1418 break;
1419 }
1420 }
1421
1422 if (nfat_offset == 0) {
1423 barf ("loadArchive: searched %d architectures, but no host arch found", (int)nfat_arch);
1424 }
1425 else {
1426 n = fseek( f, nfat_offset, SEEK_SET );
1427 if (n != 0)
1428 barf("loadArchive: Failed to seek to arch in `%s'", path);
1429 n = fread ( tmp, 1, 8, f );
1430 if (n != 8)
1431 barf("loadArchive: Failed reading header from `%s'", path);
1432 if (strncmp(tmp, "!<arch>\n", 8) != 0) {
1433 barf("loadArchive: couldn't find archive in `%s' at offset %d", path, nfat_offset);
1434 }
1435 }
1436 }
1437 else {
1438 barf("loadArchive: Neither an archive, nor a fat archive: `%s'", path);
1439 }
1440 #else
1441 else {
1442 barf("loadArchive: Not an archive: `%" PATH_FMT "'", path);
1443 }
1444 #endif
1445
1446 IF_DEBUG(linker, debugBelch("loadArchive: loading archive contents\n"));
1447
1448 while (1) {
1449 IF_DEBUG(linker, debugBelch("loadArchive: reading at %ld\n", ftell(f)));
1450 n = fread ( fileName, 1, 16, f );
1451 if (n != 16) {
1452 if (feof(f)) {
1453 IF_DEBUG(linker, debugBelch("loadArchive: EOF while reading from '%" PATH_FMT "'\n", path));
1454 break;
1455 }
1456 else {
1457 barf("loadArchive: Failed reading file name from `%" PATH_FMT "'", path);
1458 }
1459 }
1460
1461 #if defined(darwin_HOST_OS)
1462 if (strncmp(fileName, "!<arch>\n", 8) == 0) {
1463 IF_DEBUG(linker, debugBelch("loadArchive: found the start of another archive, breaking\n"));
1464 break;
1465 }
1466 #endif
1467
1468 n = fread ( tmp, 1, 12, f );
1469 if (n != 12)
1470 barf("loadArchive: Failed reading mod time from `%" PATH_FMT "'", path);
1471 n = fread ( tmp, 1, 6, f );
1472 if (n != 6)
1473 barf("loadArchive: Failed reading owner from `%" PATH_FMT "'", path);
1474 n = fread ( tmp, 1, 6, f );
1475 if (n != 6)
1476 barf("loadArchive: Failed reading group from `%" PATH_FMT "'", path);
1477 n = fread ( tmp, 1, 8, f );
1478 if (n != 8)
1479 barf("loadArchive: Failed reading mode from `%" PATH_FMT "'", path);
1480 n = fread ( tmp, 1, 10, f );
1481 if (n != 10)
1482 barf("loadArchive: Failed reading size from `%" PATH_FMT "'", path);
1483 tmp[10] = '\0';
1484 for (n = 0; isdigit(tmp[n]); n++);
1485 tmp[n] = '\0';
1486 memberSize = atoi(tmp);
1487
1488 IF_DEBUG(linker, debugBelch("loadArchive: size of this archive member is %d\n", memberSize));
1489 n = fread ( tmp, 1, 2, f );
1490 if (n != 2)
1491 barf("loadArchive: Failed reading magic from `%" PATH_FMT "'", path);
1492 if (strncmp(tmp, "\x60\x0A", 2) != 0)
1493 barf("loadArchive: Failed reading magic from `%" PATH_FMT "' at %ld. Got %c%c",
1494 path, ftell(f), tmp[0], tmp[1]);
1495
1496 isGnuIndex = 0;
1497 /* Check for BSD-variant large filenames */
1498 if (0 == strncmp(fileName, "#1/", 3)) {
1499 fileName[16] = '\0';
1500 if (isdigit(fileName[3])) {
1501 for (n = 4; isdigit(fileName[n]); n++);
1502 fileName[n] = '\0';
1503 thisFileNameSize = atoi(fileName + 3);
1504 memberSize -= thisFileNameSize;
1505 if (thisFileNameSize >= fileNameSize) {
1506 /* Double it to avoid potentially continually
1507 increasing it by 1 */
1508 fileNameSize = thisFileNameSize * 2;
1509 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
1510 }
1511 n = fread ( fileName, 1, thisFileNameSize, f );
1512 if (n != (int)thisFileNameSize) {
1513 barf("loadArchive: Failed reading filename from `%" PATH_FMT "'",
1514 path);
1515 }
1516 fileName[thisFileNameSize] = 0;
1517
1518 /* On OS X at least, thisFileNameSize is the size of the
1519 fileName field, not the length of the fileName
1520 itself. */
1521 thisFileNameSize = strlen(fileName);
1522 }
1523 else {
1524 barf("loadArchive: BSD-variant filename size not found while reading filename from `%s'", path);
1525 }
1526 }
1527 /* Check for GNU file index file */
1528 else if (0 == strncmp(fileName, "//", 2)) {
1529 fileName[0] = '\0';
1530 thisFileNameSize = 0;
1531 isGnuIndex = 1;
1532 }
1533 /* Check for a file in the GNU file index */
1534 else if (fileName[0] == '/') {
1535 if (isdigit(fileName[1])) {
1536 int i;
1537
1538 for (n = 2; isdigit(fileName[n]); n++);
1539 fileName[n] = '\0';
1540 n = atoi(fileName + 1);
1541
1542 if (gnuFileIndex == NULL) {
1543 barf("loadArchive: GNU-variant filename without an index while reading from `%s'", path);
1544 }
1545 if (n < 0 || n > gnuFileIndexSize) {
1546 barf("loadArchive: GNU-variant filename offset %d out of range [0..%d] while reading filename from `%s'", n, gnuFileIndexSize, path);
1547 }
1548 if (n != 0 && gnuFileIndex[n - 1] != '\n') {
1549 barf("loadArchive: GNU-variant filename offset %d invalid (range [0..%d]) while reading filename from `%s'", n, gnuFileIndexSize, path);
1550 }
1551 for (i = n; gnuFileIndex[i] != '\n'; i++);
1552 thisFileNameSize = i - n - 1;
1553 if (thisFileNameSize >= fileNameSize) {
1554 /* Double it to avoid potentially continually
1555 increasing it by 1 */
1556 fileNameSize = thisFileNameSize * 2;
1557 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
1558 }
1559 memcpy(fileName, gnuFileIndex + n, thisFileNameSize);
1560 fileName[thisFileNameSize] = '\0';
1561 }
1562 else if (fileName[1] == ' ') {
1563 fileName[0] = '\0';
1564 thisFileNameSize = 0;
1565 }
1566 else {
1567 barf("loadArchive: GNU-variant filename offset not found while reading filename from `%s'", path);
1568 }
1569 }
1570 /* Finally, the case where the filename field actually contains
1571 the filename */
1572 else {
1573 /* GNU ar terminates filenames with a '/', this allowing
1574 spaces in filenames. So first look to see if there is a
1575 terminating '/'. */
1576 for (thisFileNameSize = 0;
1577 thisFileNameSize < 16;
1578 thisFileNameSize++) {
1579 if (fileName[thisFileNameSize] == '/') {
1580 fileName[thisFileNameSize] = '\0';
1581 break;
1582 }
1583 }
1584 /* If we didn't find a '/', then a space teminates the
1585 filename. Note that if we don't find one, then
1586 thisFileNameSize ends up as 16, and we already have the
1587 '\0' at the end. */
1588 if (thisFileNameSize == 16) {
1589 for (thisFileNameSize = 0;
1590 thisFileNameSize < 16;
1591 thisFileNameSize++) {
1592 if (fileName[thisFileNameSize] == ' ') {
1593 fileName[thisFileNameSize] = '\0';
1594 break;
1595 }
1596 }
1597 }
1598 }
1599
1600 IF_DEBUG(linker,
1601 debugBelch("loadArchive: Found member file `%s'\n", fileName));
1602
1603 isObject = (thisFileNameSize >= 2 && strncmp(fileName + thisFileNameSize - 2, ".o" , 2) == 0)
1604 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".p_o", 4) == 0);
1605
1606 #if defined(OBJFORMAT_PEi386)
1607 /*
1608 * Note [MSVC import files (ext .lib)]
1609 * MSVC compilers store the object files in
1610 * the import libraries with extension .dll
1611 * so on Windows we should look for those too.
1612 * The PE COFF format doesn't specify any specific file name
1613 * for sections. So on windows, just try to load it all.
1614 *
1615 * Linker members (e.g. filename / are skipped since they are not needed)
1616 */
1617 isImportLib = thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".dll", 4) == 0;
1618
1619 /*
1620 * Note [GCC import files (ext .dll.a)]
1621 * GCC stores import information in the same binary format
1622 * as the object file normally has. The only difference is that
1623 * all the information are put in .idata sections. The only real
1624 * way to tell if we're dealing with an import lib is by looking
1625 * at the file extension.
1626 */
1627 isImportLib = isImportLib || endsWithPath(path, WSTR(".dll.a"));
1628 #endif // windows
1629
1630 IF_DEBUG(linker, debugBelch("loadArchive: \tthisFileNameSize = %d\n", (int)thisFileNameSize));
1631 IF_DEBUG(linker, debugBelch("loadArchive: \tisObject = %d\n", isObject));
1632
1633 if (isObject) {
1634 char *archiveMemberName;
1635
1636 IF_DEBUG(linker, debugBelch("loadArchive: Member is an object file...loading...\n"));
1637
1638 #if defined(mingw32_HOST_OS)
1639 // TODO: We would like to use allocateExec here, but allocateExec
1640 // cannot currently allocate blocks large enough.
1641 image = allocateImageAndTrampolines(path, fileName, f, memberSize,
1642 isThin);
1643 #elif defined(darwin_HOST_OS)
1644 if (RTS_LINKER_USE_MMAP)
1645 image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
1646 else {
1647 /* See loadObj() */
1648 misalignment = machoGetMisalignment(f);
1649 image = stgMallocBytes(memberSize + misalignment,
1650 "loadArchive(image)");
1651 image += misalignment;
1652 }
1653
1654 #else // not windows or darwin
1655 image = stgMallocBytes(memberSize, "loadArchive(image)");
1656 #endif
1657 if (isThin) {
1658 FILE *member;
1659 pathchar *pathCopy, *dirName, *memberPath, *objFileName;
1660
1661 /* Allocate and setup the dirname of the archive. We'll need
1662 this to locate the thin member */
1663 pathCopy = pathdup(path); // Convert the char* to a pathchar*
1664 dirName = pathdir(pathCopy);
1665
1666 /* Append the relative member name to the dirname. This should be
1667 be the full path to the actual thin member. */
1668 int memberLen = pathlen(dirName) + 1 + strlen(fileName) + 1;
1669 memberPath = stgMallocBytes(pathsize * memberLen, "loadArchive(file)");
1670 objFileName = mkPath(fileName);
1671 pathprintf(memberPath, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), dirName, objFileName);
1672 stgFree(objFileName);
1673 stgFree(dirName);
1674
1675 member = pathopen(memberPath, WSTR("rb"));
1676 if (!member)
1677 barf("loadObj: can't read thin archive `%" PATH_FMT "'", memberPath);
1678
1679 n = fread ( image, 1, memberSize, member );
1680 if (n != memberSize) {
1681 barf("loadArchive: error whilst reading `%s'", fileName);
1682 }
1683
1684 fclose(member);
1685 stgFree(memberPath);
1686 stgFree(pathCopy);
1687 }
1688 else
1689 {
1690 n = fread ( image, 1, memberSize, f );
1691 if (n != memberSize) {
1692 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
1693 }
1694 }
1695
1696 archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
1697 "loadArchive(file)");
1698 sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
1699 path, (int)thisFileNameSize, fileName);
1700
1701 oc = mkOc(path, image, memberSize, rtsFalse, archiveMemberName
1702 , misalignment);
1703
1704 stgFree(archiveMemberName);
1705
1706 if (0 == loadOc(oc)) {
1707 stgFree(fileName);
1708 fclose(f);
1709 return 0;
1710 } else {
1711 #if defined(OBJFORMAT_PEi386)
1712 if (isImportLib)
1713 {
1714 findAndLoadImportLibrary(oc);
1715 stgFree(oc);
1716 oc = NULL;
1717 break;
1718 } else {
1719 #endif
1720 oc->next = objects;
1721 objects = oc;
1722 #if defined(OBJFORMAT_PEi386)
1723 }
1724 #endif
1725 }
1726 }
1727 else if (isGnuIndex) {
1728 if (gnuFileIndex != NULL) {
1729 barf("loadArchive: GNU-variant index found, but already have an index, while reading filename from `%s'", path);
1730 }
1731 IF_DEBUG(linker, debugBelch("loadArchive: Found GNU-variant file index\n"));
1732 #if RTS_LINKER_USE_MMAP
1733 gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
1734 #else
1735 gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
1736 #endif
1737 n = fread ( gnuFileIndex, 1, memberSize, f );
1738 if (n != memberSize) {
1739 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
1740 }
1741 gnuFileIndex[memberSize] = '/';
1742 gnuFileIndexSize = memberSize;
1743 }
1744 else if (isImportLib) {
1745 #if defined(OBJFORMAT_PEi386)
1746 if (checkAndLoadImportLibrary(path, fileName, f)) {
1747 IF_DEBUG(linker, debugBelch("loadArchive: Member is an import file section... Corresponding DLL has been loaded...\n"));
1748 }
1749 else {
1750 IF_DEBUG(linker, debugBelch("loadArchive: Member is not a valid import file section... Skipping...\n"));
1751 n = fseek(f, memberSize, SEEK_CUR);
1752 if (n != 0)
1753 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
1754 memberSize, path);
1755 }
1756 #endif
1757 }
1758 else {
1759 IF_DEBUG(linker, debugBelch("loadArchive: '%s' does not appear to be an object file\n", fileName));
1760 if (!isThin || thisFileNameSize == 0) {
1761 n = fseek(f, memberSize, SEEK_CUR);
1762 if (n != 0)
1763 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
1764 memberSize, path);
1765 }
1766 }
1767
1768 /* .ar files are 2-byte aligned */
1769 if (!(isThin && thisFileNameSize > 0) && memberSize % 2) {
1770 IF_DEBUG(linker, debugBelch("loadArchive: trying to read one pad byte\n"));
1771 n = fread ( tmp, 1, 1, f );
1772 if (n != 1) {
1773 if (feof(f)) {
1774 IF_DEBUG(linker, debugBelch("loadArchive: found EOF while reading one pad byte\n"));
1775 break;
1776 }
1777 else {
1778 barf("loadArchive: Failed reading padding from `%" PATH_FMT "'", path);
1779 }
1780 }
1781 IF_DEBUG(linker, debugBelch("loadArchive: successfully read one pad byte\n"));
1782 }
1783 IF_DEBUG(linker, debugBelch("loadArchive: reached end of archive loading while loop\n"));
1784 }
1785
1786 fclose(f);
1787
1788 stgFree(fileName);
1789 if (gnuFileIndex != NULL) {
1790 #if RTS_LINKER_USE_MMAP
1791 munmap(gnuFileIndex, gnuFileIndexSize + 1);
1792 #else
1793 stgFree(gnuFileIndex);
1794 #endif
1795 }
1796
1797 if (RTS_LINKER_USE_MMAP)
1798 m32_allocator_flush();
1799
1800 IF_DEBUG(linker, debugBelch("loadArchive: done\n"));
1801 return 1;
1802 }
1803
1804 HsInt loadArchive (pathchar *path)
1805 {
1806 ACQUIRE_LOCK(&linker_mutex);
1807 HsInt r = loadArchive_(path);
1808 RELEASE_LOCK(&linker_mutex);
1809 return r;
1810 }
1811
1812 //
1813 // Load the object file into memory. This will not be its final resting place,
1814 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
1815 // address space, properly aligned.
1816 //
1817 static ObjectCode *
1818 preloadObjectFile (pathchar *path)
1819 {
1820 int fileSize;
1821 struct_stat st;
1822 int r;
1823 void *image;
1824 ObjectCode *oc;
1825 int misalignment = 0;
1826
1827 r = pathstat(path, &st);
1828 if (r == -1) {
1829 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
1830 return NULL;
1831 }
1832
1833 fileSize = st.st_size;
1834
1835 #if RTS_LINKER_USE_MMAP
1836 int fd;
1837
1838 /* On many architectures malloc'd memory isn't executable, so we need to use
1839 * mmap. */
1840
1841 #if defined(openbsd_HOST_OS)
1842 fd = open(path, O_RDONLY, S_IRUSR);
1843 #else
1844 fd = open(path, O_RDONLY);
1845 #endif
1846 if (fd == -1) {
1847 errorBelch("loadObj: can't open %s", path);
1848 return NULL;
1849 }
1850
1851 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
1852 MAP_PRIVATE, fd, 0);
1853 // not 32-bit yet, we'll remap later
1854 close(fd);
1855
1856 #else /* !RTS_LINKER_USE_MMAP */
1857 FILE *f;
1858
1859 /* load the image into memory */
1860 /* coverity[toctou] */
1861 f = pathopen(path, WSTR("rb"));
1862 if (!f) {
1863 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
1864 return NULL;
1865 }
1866
1867 # if defined(mingw32_HOST_OS)
1868
1869 // TODO: We would like to use allocateExec here, but allocateExec
1870 // cannot currently allocate blocks large enough.
1871 image = allocateImageAndTrampolines(path, "itself", f, fileSize,
1872 HS_BOOL_FALSE);
1873 if (image == NULL) {
1874 fclose(f);
1875 return NULL;
1876 }
1877
1878 # elif defined(darwin_HOST_OS)
1879
1880 // In a Mach-O .o file, all sections can and will be misaligned
1881 // if the total size of the headers is not a multiple of the
1882 // desired alignment. This is fine for .o files that only serve
1883 // as input for the static linker, but it's not fine for us,
1884 // as SSE (used by gcc for floating point) and Altivec require
1885 // 16-byte alignment.
1886 // We calculate the correct alignment from the header before
1887 // reading the file, and then we misalign image on purpose so
1888 // that the actual sections end up aligned again.
1889 misalignment = machoGetMisalignment(f);
1890 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
1891 image += misalignment;
1892
1893 # else /* !defined(mingw32_HOST_OS) */
1894
1895 image = stgMallocBytes(fileSize, "loadObj(image)");
1896
1897 #endif
1898
1899 int n;
1900 n = fread ( image, 1, fileSize, f );
1901 fclose(f);
1902 if (n != fileSize) {
1903 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
1904 stgFree(image);
1905 return NULL;
1906 }
1907
1908 #endif /* RTS_LINKER_USE_MMAP */
1909
1910 oc = mkOc(path, image, fileSize, rtsTrue, NULL, misalignment);
1911
1912 return oc;
1913 }
1914
1915 /* -----------------------------------------------------------------------------
1916 * Load an obj (populate the global symbol table, but don't resolve yet)
1917 *
1918 * Returns: 1 if ok, 0 on error.
1919 */
1920 static HsInt loadObj_ (pathchar *path)
1921 {
1922 ObjectCode* oc;
1923 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
1924
1925 /* debugBelch("loadObj %s\n", path ); */
1926
1927 /* Check that we haven't already loaded this object.
1928 Ignore requests to load multiple times */
1929
1930 if (isAlreadyLoaded(path)) {
1931 IF_DEBUG(linker,
1932 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1933 return 1; /* success */
1934 }
1935
1936 oc = preloadObjectFile(path);
1937 if (oc == NULL) return 0;
1938
1939 if (! loadOc(oc)) {
1940 // failed; free everything we've allocated
1941 removeOcSymbols(oc);
1942 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
1943 freeObjectCode(oc);
1944 return 0;
1945 }
1946
1947 oc->next = objects;
1948 objects = oc;
1949 return 1;
1950 }
1951
1952 HsInt loadObj (pathchar *path)
1953 {
1954 ACQUIRE_LOCK(&linker_mutex);
1955 HsInt r = loadObj_(path);
1956 RELEASE_LOCK(&linker_mutex);
1957 return r;
1958 }
1959
1960 static HsInt loadOc (ObjectCode* oc)
1961 {
1962 int r;
1963
1964 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
1965
1966 /* verify the in-memory image */
1967 # if defined(OBJFORMAT_ELF)
1968 r = ocVerifyImage_ELF ( oc );
1969 # elif defined(OBJFORMAT_PEi386)
1970 r = ocVerifyImage_PEi386 ( oc );
1971 # elif defined(OBJFORMAT_MACHO)
1972 r = ocVerifyImage_MachO ( oc );
1973 # else
1974 barf("loadObj: no verify method");
1975 # endif
1976 if (!r) {
1977 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
1978 return r;
1979 }
1980
1981 #if NEED_SYMBOL_EXTRAS
1982 # if defined(OBJFORMAT_MACHO)
1983 r = ocAllocateSymbolExtras_MachO ( oc );
1984 if (!r) {
1985 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_MachO failed\n"));
1986 return r;
1987 }
1988 # elif defined(OBJFORMAT_ELF)
1989 r = ocAllocateSymbolExtras_ELF ( oc );
1990 if (!r) {
1991 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_ELF failed\n"));
1992 return r;
1993 }
1994 # elif defined(OBJFORMAT_PEi386)
1995 ocAllocateSymbolExtras_PEi386 ( oc );
1996 # endif
1997 #endif
1998
1999 /* build the symbol list for this image */
2000 # if defined(OBJFORMAT_ELF)
2001 r = ocGetNames_ELF ( oc );
2002 # elif defined(OBJFORMAT_PEi386)
2003 r = ocGetNames_PEi386 ( oc );
2004 # elif defined(OBJFORMAT_MACHO)
2005 r = ocGetNames_MachO ( oc );
2006 # else
2007 barf("loadObj: no getNames method");
2008 # endif
2009 if (!r) {
2010 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
2011 return r;
2012 }
2013
2014 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
2015 setOcInitialStatus( oc );
2016 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
2017
2018 return 1;
2019 }
2020
2021 /* -----------------------------------------------------------------------------
2022 * try to load and initialize an ObjectCode into memory
2023 *
2024 * Returns: 1 if ok, 0 on error.
2025 */
2026 int ocTryLoad (ObjectCode* oc) {
2027 int r;
2028
2029 if (oc->status != OBJECT_NEEDED) {
2030 return 1;
2031 }
2032
2033 /* Check for duplicate symbols by looking into `symhash`.
2034 Duplicate symbols are any symbols which exist
2035 in different ObjectCodes that have both been loaded, or
2036 are to be loaded by this call.
2037
2038 This call is intended to have no side-effects when a non-duplicate
2039 symbol is re-inserted.
2040
2041 We set the Address to NULL since that is not used to distinguish
2042 symbols. Duplicate symbols are distinguished by name and oc.
2043 */
2044 int x;
2045 SymbolName* symbol;
2046 for (x = 0; x < oc->n_symbols; x++) {
2047 symbol = oc->symbols[x];
2048 if ( symbol
2049 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol, NULL, isSymbolWeak(oc, symbol), oc)) {
2050 return 0;
2051 }
2052 }
2053
2054 # if defined(OBJFORMAT_ELF)
2055 r = ocResolve_ELF ( oc );
2056 # elif defined(OBJFORMAT_PEi386)
2057 r = ocResolve_PEi386 ( oc );
2058 # elif defined(OBJFORMAT_MACHO)
2059 r = ocResolve_MachO ( oc );
2060 # else
2061 barf("ocTryLoad: not implemented on this platform");
2062 # endif
2063 if (!r) { return r; }
2064
2065 // run init/init_array/ctors/mod_init_func
2066
2067 loading_obj = oc; // tells foreignExportStablePtr what to do
2068 #if defined(OBJFORMAT_ELF)
2069 r = ocRunInit_ELF ( oc );
2070 #elif defined(OBJFORMAT_PEi386)
2071 r = ocRunInit_PEi386 ( oc );
2072 #elif defined(OBJFORMAT_MACHO)
2073 r = ocRunInit_MachO ( oc );
2074 #else
2075 barf("ocTryLoad: initializers not implemented on this platform");
2076 #endif
2077 loading_obj = NULL;
2078
2079 if (!r) { return r; }
2080
2081 oc->status = OBJECT_RESOLVED;
2082
2083 return 1;
2084 }
2085
2086 /* -----------------------------------------------------------------------------
2087 * resolve all the currently unlinked objects in memory
2088 *
2089 * Returns: 1 if ok, 0 on error.
2090 */
2091 static HsInt resolveObjs_ (void)
2092 {
2093 ObjectCode *oc;
2094 int r;
2095
2096 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
2097
2098 for (oc = objects; oc; oc = oc->next) {
2099 r = ocTryLoad(oc);
2100 if (!r)
2101 {
2102 return r;
2103 }
2104 }
2105
2106 #ifdef PROFILING
2107 // collect any new cost centres & CCSs that were defined during runInit
2108 initProfiling2();
2109 #endif
2110
2111 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
2112 return 1;
2113 }
2114
2115 HsInt resolveObjs (void)
2116 {
2117 ACQUIRE_LOCK(&linker_mutex);
2118 HsInt r = resolveObjs_();
2119 RELEASE_LOCK(&linker_mutex);
2120 return r;
2121 }
2122
2123 /* -----------------------------------------------------------------------------
2124 * delete an object from the pool
2125 */
2126 static HsInt unloadObj_ (pathchar *path, rtsBool just_purge)
2127 {
2128 ObjectCode *oc, *prev, *next;
2129 HsBool unloadedAnyObj = HS_BOOL_FALSE;
2130
2131 ASSERT(symhash != NULL);
2132 ASSERT(objects != NULL);
2133
2134 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
2135
2136 prev = NULL;
2137 for (oc = objects; oc; oc = next) {
2138 next = oc->next; // oc might be freed
2139
2140 if (!pathcmp(oc->fileName,path)) {
2141
2142 // these are both idempotent, so in just_purge mode we can
2143 // later call unloadObj() to really unload the object.
2144 removeOcSymbols(oc);
2145 freeOcStablePtrs(oc);
2146
2147 if (!just_purge) {
2148 if (prev == NULL) {
2149 objects = oc->next;
2150 } else {
2151 prev->next = oc->next;
2152 }
2153 ACQUIRE_LOCK(&linker_unloaded_mutex);
2154 oc->next = unloaded_objects;
2155 unloaded_objects = oc;
2156 oc->status = OBJECT_UNLOADED;
2157 RELEASE_LOCK(&linker_unloaded_mutex);
2158 // We do not own oc any more; it can be released at any time by
2159 // the GC in checkUnload().
2160 } else {
2161 prev = oc;
2162 }
2163
2164 /* This could be a member of an archive so continue
2165 * unloading other members. */
2166 unloadedAnyObj = HS_BOOL_TRUE;
2167 } else {
2168 prev = oc;
2169 }
2170 }
2171
2172 if (unloadedAnyObj) {
2173 return 1;
2174 }
2175 else {
2176 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
2177 return 0;
2178 }
2179 }
2180
2181 HsInt unloadObj (pathchar *path)
2182 {
2183 ACQUIRE_LOCK(&linker_mutex);
2184 HsInt r = unloadObj_(path, rtsFalse);
2185 RELEASE_LOCK(&linker_mutex);
2186 return r;
2187 }
2188
2189 HsInt purgeObj (pathchar *path)
2190 {
2191 ACQUIRE_LOCK(&linker_mutex);
2192 HsInt r = unloadObj_(path, rtsTrue);
2193 RELEASE_LOCK(&linker_mutex);
2194 return r;
2195 }
2196
2197 /* -----------------------------------------------------------------------------
2198 * Sanity checking. For each ObjectCode, maintain a list of address ranges
2199 * which may be prodded during relocation, and abort if we try and write
2200 * outside any of these.
2201 */
2202 void
2203 addProddableBlock ( ObjectCode* oc, void* start, int size )
2204 {
2205 ProddableBlock* pb
2206 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
2207
2208 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
2209 ASSERT(size > 0);
2210 pb->start = start;
2211 pb->size = size;
2212 pb->next = oc->proddables;
2213 oc->proddables = pb;
2214 }
2215
2216 void
2217 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
2218 {
2219 ProddableBlock* pb;
2220
2221 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
2222 char* s = (char*)(pb->start);
2223 char* e = s + pb->size;
2224 char* a = (char*)addr;
2225 if (a >= s && (a+size) <= e) return;
2226 }
2227 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
2228 }
2229
2230 void freeProddableBlocks (ObjectCode *oc)
2231 {
2232 ProddableBlock *pb, *next;
2233
2234 for (pb = oc->proddables; pb != NULL; pb = next) {
2235 next = pb->next;
2236 stgFree(pb);
2237 }
2238 oc->proddables = NULL;
2239 }
2240
2241 /* -----------------------------------------------------------------------------
2242 * Section management.
2243 */
2244 void
2245 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
2246 void* start, StgWord size, StgWord mapped_offset,
2247 void* mapped_start, StgWord mapped_size)
2248 {
2249 s->start = start; /* actual start of section in memory */
2250 s->size = size; /* actual size of section in memory */
2251 s->kind = kind;
2252 s->alloc = alloc;
2253 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
2254
2255 s->mapped_start = mapped_start; /* start of mmap() block */
2256 s->mapped_size = mapped_size; /* size of mmap() block */
2257
2258 IF_DEBUG(linker,
2259 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
2260 start, (void*)((StgWord)start + size),
2261 size, kind ));
2262 }
2263