Revert "rts/linker: Move loadArchive to new source file"
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "Stable.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31 #include "PathUtils.h"
32
33 #if !defined(mingw32_HOST_OS)
34 #include "posix/Signals.h"
35 #endif
36
37 // get protos for is*()
38 #include <ctype.h>
39
40 #ifdef HAVE_SYS_TYPES_H
41 #include <sys/types.h>
42 #endif
43
44 #include <inttypes.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <stdio.h>
48 #include <assert.h>
49
50 #ifdef HAVE_SYS_STAT_H
51 #include <sys/stat.h>
52 #endif
53
54 #if defined(HAVE_DLFCN_H)
55 #include <dlfcn.h>
56 #endif
57
58 /* PowerPC and ARM have relative branch instructions with only 24 bit
59 * displacements and therefore need jump islands contiguous with each object
60 * code module.
61 */
62 #if defined(powerpc_HOST_ARCH)
63 #define SHORT_REL_BRANCH 1
64 #endif
65 #if defined(arm_HOST_ARCH)
66 #define SHORT_REL_BRANCH 1
67 #endif
68
69 #if (RTS_LINKER_USE_MMAP && defined(SHORT_REL_BRANCH) && defined(linux_HOST_OS))
70 #define USE_CONTIGUOUS_MMAP 1
71 #else
72 #define USE_CONTIGUOUS_MMAP 0
73 #endif
74
75 #if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
76 # define OBJFORMAT_ELF
77 # include <regex.h> // regex is already used by dlopen() so this is OK
78 // to use here without requiring an additional lib
79 #elif defined (mingw32_HOST_OS)
80 # define OBJFORMAT_PEi386
81 # include <windows.h>
82 # include <shfolder.h> /* SHGetFolderPathW */
83 # include <math.h>
84 # include <wchar.h>
85 #elif defined(darwin_HOST_OS)
86 # define OBJFORMAT_MACHO
87 # include <regex.h>
88 # include <mach/machine.h>
89 # include <mach-o/fat.h>
90 # include <mach-o/loader.h>
91 # include <mach-o/nlist.h>
92 # include <mach-o/reloc.h>
93 #if defined(powerpc_HOST_ARCH)
94 # include <mach-o/ppc/reloc.h>
95 #endif
96 #if defined(x86_64_HOST_ARCH)
97 # include <mach-o/x86_64/reloc.h>
98 #endif
99 #endif
100
101 #if defined(x86_64_HOST_ARCH) && defined(darwin_HOST_OS)
102 #define ALWAYS_PIC
103 #endif
104
105 #if defined(dragonfly_HOST_OS)
106 #include <sys/tls.h>
107 #endif
108
109 /* SymbolInfo tracks a symbol's address, the object code from which
110 it originated, and whether or not it's weak.
111
112 RtsSymbolInfo is used to track the state of the symbols currently
113 loaded or to be loaded by the Linker.
114
115 Where the information in the `ObjectCode` is used to track the
116 original status of the symbol inside the `ObjectCode`.
117
118 A weak symbol that has been used will still be marked as weak
119 in the `ObjectCode` but in the `RtsSymbolInfo` it won't be.
120 */
121 typedef struct _RtsSymbolInfo {
122 SymbolAddr* value;
123 ObjectCode *owner;
124 HsBool weak;
125 } RtsSymbolInfo;
126
127 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
128 This hashtable will contain information on all symbols
129 that we know of, however the .o they are in may not be loaded.
130
131 Until the ObjectCode the symbol belongs to is actually
132 loaded this symbol may be replaced. So do not rely on
133 addresses of unloaded symbols.
134
135 Note [runtime-linker-phases]
136 --------------------------------------
137 Broadly the behavior of the runtime linker can be
138 split into the following four phases:
139
140 - Indexing (e.g. ocVerifyImage and ocGetNames)
141 - Initialization (e.g. ocResolve and ocRunInit)
142 - Resolve (e.g. resolveObjs())
143 - Lookup (e.g. lookupSymbol)
144
145 This is to enable lazy loading of symbols. Eager loading is problematic
146 as it means that all symbols must be available, even those which we will
147 never use. This is especially painful of Windows, where the number of
148 libraries required to link things like mingwex grows to be quite high.
149
150 We proceed through these stages as follows,
151
152 * During Indexing we verify and open the ObjectCode and
153 perform a quick scan/indexing of the ObjectCode. All the work
154 required to actually load the ObjectCode is done.
155
156 All symbols from the ObjectCode is also inserted into
157 `symhash`, where possible duplicates are handled via the semantics
158 described in `ghciInsertSymbolTable`.
159
160 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
161 depending on whether they are an archive members or not.
162
163 * During initialization we load ObjectCode, perform relocations, execute
164 static constructors etc. This phase may trigger other ObjectCodes to
165 be loaded because of the calls to lookupSymbol.
166
167 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
168 previous status was `OBJECT_LOADED`.
169
170 * During resolve we attempt to resolve all the symbols needed for the
171 initial link. This essentially means, that for any ObjectCode given
172 directly to the command-line we perform lookupSymbols on the required
173 symbols. lookupSymbols may trigger the loading of additional ObjectCode
174 if required.
175
176 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
177 the previous status was `OBJECT_NEEDED`.
178
179 * Lookup symbols is used to lookup any symbols required, both during initial
180 link and during statement and expression compilations in the REPL.
181 Declaration of e.g. an foreign import, will eventually call lookupSymbol
182 which will either fail (symbol unknown) or succeed (and possibly triggered a
183 load).
184
185 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
186
187 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
188 by calling unloadObj and starting over.
189 When a new declaration or statement is performed ultimately lookupSymbol is called
190 without doing a re-link.
191
192 The goal of these different phases is to allow the linker to be able to perform
193 "lazy loading" of ObjectCode. The reason for this is that we want to only link
194 in symbols that are actually required for the link. This reduces:
195
196 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
197 then we don't need to load libB. This means the dependency chain for libraries
198 such as mingw32 and mingwex can be broken down.
199
200 2) The number of duplicate symbols, since now only symbols that are
201 true duplicates will display the error.
202 */
203 static /*Str*/HashTable *symhash;
204
205 /* List of currently loaded objects */
206 ObjectCode *objects = NULL; /* initially empty */
207
208 /* List of objects that have been unloaded via unloadObj(), but are waiting
209 to be actually freed via checkUnload() */
210 ObjectCode *unloaded_objects = NULL; /* initially empty */
211
212 #ifdef THREADED_RTS
213 /* This protects all the Linker's global state except unloaded_objects */
214 Mutex linker_mutex;
215 /*
216 * This protects unloaded_objects. We have a separate mutex for this, because
217 * the GC needs to access unloaded_objects in checkUnload, while the linker only
218 * needs to access unloaded_objects in unloadObj(), so this allows most linker
219 * operations proceed concurrently with the GC.
220 */
221 Mutex linker_unloaded_mutex;
222 #endif
223
224 /* Type of the initializer */
225 typedef void (*init_t) (int argc, char **argv, char **env);
226
227 static HsInt isAlreadyLoaded( pathchar *path );
228 static HsInt loadOc( ObjectCode* oc );
229 static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
230 rtsBool mapped, char *archiveMemberName,
231 int misalignment
232 );
233
234 /* Generic wrapper function to try and Resolve and RunInit oc files */
235 int ocTryLoad( ObjectCode* oc );
236
237 #if defined(OBJFORMAT_ELF)
238 static int ocVerifyImage_ELF ( ObjectCode* oc );
239 static int ocGetNames_ELF ( ObjectCode* oc );
240 static int ocResolve_ELF ( ObjectCode* oc );
241 static int ocRunInit_ELF ( ObjectCode* oc );
242 #if NEED_SYMBOL_EXTRAS
243 static int ocAllocateSymbolExtras_ELF ( ObjectCode* oc );
244 #endif
245 #elif defined(OBJFORMAT_PEi386)
246 static int ocVerifyImage_PEi386 ( ObjectCode* oc );
247 static int ocGetNames_PEi386 ( ObjectCode* oc );
248 static int ocResolve_PEi386 ( ObjectCode* oc );
249 static int ocRunInit_PEi386 ( ObjectCode* oc );
250 static void *lookupSymbolInDLLs ( unsigned char *lbl );
251 /* See Note [mingw-w64 name decoration scheme] */
252 #ifndef x86_64_HOST_ARCH
253 static void zapTrailingAtSign ( unsigned char *sym );
254 #endif
255
256 #if defined(x86_64_HOST_ARCH)
257 #define USED_IF_x86_64_HOST_ARCH /* Nothing */
258 #else
259 #define USED_IF_x86_64_HOST_ARCH STG_UNUSED
260 #endif
261
262 static char *allocateImageAndTrampolines (
263 pathchar* arch_name, char* member_name,
264 FILE* f,
265 int size,
266 int isThin);
267 #if defined(x86_64_HOST_ARCH)
268 static int ocAllocateSymbolExtras_PEi386 ( ObjectCode* oc );
269 static size_t makeSymbolExtra_PEi386( ObjectCode* oc, size_t, char* symbol );
270 #define PEi386_IMAGE_OFFSET 4
271 #else
272 #define PEi386_IMAGE_OFFSET 0
273 #endif
274 #elif defined(OBJFORMAT_MACHO)
275 static int ocVerifyImage_MachO ( ObjectCode* oc );
276 static int ocGetNames_MachO ( ObjectCode* oc );
277 static int ocResolve_MachO ( ObjectCode* oc );
278 static int ocRunInit_MachO ( ObjectCode* oc );
279 static int machoGetMisalignment( FILE * );
280 #if NEED_SYMBOL_EXTRAS
281 static int ocAllocateSymbolExtras_MachO ( ObjectCode* oc );
282 #endif
283 #ifdef powerpc_HOST_ARCH
284 static void machoInitSymbolsWithoutUnderscore( void );
285 #endif
286 #endif
287
288 #if defined(OBJFORMAT_PEi386)
289 static int checkAndLoadImportLibrary(
290 pathchar* arch_name,
291 char* member_name,
292 FILE* f);
293
294 static int findAndLoadImportLibrary(
295 ObjectCode* oc
296 );
297
298 static UChar *myindex(
299 int scale,
300 void* base,
301 int index);
302 static UChar *cstring_from_COFF_symbol_name(
303 UChar* name,
304 UChar* strtab);
305 static char *cstring_from_section_name(
306 UChar* name,
307 UChar* strtab);
308
309
310 /* Add ld symbol for PE image base. */
311 #if defined(__GNUC__)
312 #define __ImageBase __MINGW_LSYMBOL(_image_base__)
313 #endif
314
315 /* Get the base of the module. */
316 /* This symbol is defined by ld. */
317 extern IMAGE_DOS_HEADER __ImageBase;
318 #define __image_base (void*)((HINSTANCE)&__ImageBase)
319
320 // MingW-w64 is missing these from the implementation. So we have to look them up
321 typedef DLL_DIRECTORY_COOKIE(WINAPI *LPAddDLLDirectory)(PCWSTR NewDirectory);
322 typedef WINBOOL(WINAPI *LPRemoveDLLDirectory)(DLL_DIRECTORY_COOKIE Cookie);
323 #endif /* OBJFORMAT_PEi386 */
324
325 static void freeProddableBlocks (ObjectCode *oc);
326
327 /* on x86_64 we have a problem with relocating symbol references in
328 * code that was compiled without -fPIC. By default, the small memory
329 * model is used, which assumes that symbol references can fit in a
330 * 32-bit slot. The system dynamic linker makes this work for
331 * references to shared libraries by either (a) allocating a jump
332 * table slot for code references, or (b) moving the symbol at load
333 * time (and copying its contents, if necessary) for data references.
334 *
335 * We unfortunately can't tell whether symbol references are to code
336 * or data. So for now we assume they are code (the vast majority
337 * are), and allocate jump-table slots. Unfortunately this will
338 * SILENTLY generate crashing code for data references. This hack is
339 * enabled by X86_64_ELF_NONPIC_HACK.
340 *
341 * One workaround is to use shared Haskell libraries. This is
342 * coming. Another workaround is to keep the static libraries but
343 * compile them with -fPIC, because that will generate PIC references
344 * to data which can be relocated. The PIC code is still too green to
345 * do this systematically, though.
346 *
347 * See bug #781
348 * See thread http://www.haskell.org/pipermail/cvs-ghc/2007-September/038458.html
349 *
350 * Naming Scheme for Symbol Macros
351 *
352 * SymI_*: symbol is internal to the RTS. It resides in an object
353 * file/library that is statically.
354 * SymE_*: symbol is external to the RTS library. It might be linked
355 * dynamically.
356 *
357 * Sym*_HasProto : the symbol prototype is imported in an include file
358 * or defined explicitly
359 * Sym*_NeedsProto: the symbol is undefined and we add a dummy
360 * default proto extern void sym(void);
361 */
362 #define X86_64_ELF_NONPIC_HACK 1
363
364 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
365 * small memory model on this architecture (see gcc docs,
366 * -mcmodel=small).
367 *
368 * MAP_32BIT not available on OpenBSD/amd64
369 */
370 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
371 #define TRY_MAP_32BIT MAP_32BIT
372 #else
373 #define TRY_MAP_32BIT 0
374 #endif
375
376 /*
377 Note [The ARM/Thumb Story]
378 ~~~~~~~~~~~~~~~~~~~~~~~~~~
379
380 Support for the ARM architecture is complicated by the fact that ARM has not
381 one but several instruction encodings. The two relevant ones here are the original
382 ARM encoding and Thumb, a more dense variant of ARM supporting only a subset
383 of the instruction set.
384
385 How the CPU decodes a particular instruction is determined by a mode bit. This
386 mode bit is set on jump instructions, the value being determined by the low
387 bit of the target address: An odd address means the target is a procedure
388 encoded in the Thumb encoding whereas an even address means it's a traditional
389 ARM procedure (the actual address jumped to is even regardless of the encoding bit).
390
391 Interoperation between Thumb- and ARM-encoded object code (known as "interworking")
392 is tricky. If the linker needs to link a call by an ARM object into Thumb code
393 (or vice-versa) it will produce a jump island. This, however, is incompatible with
394 GHC's tables-next-to-code. For this reason, it is critical that GHC emit
395 exclusively ARM or Thumb objects for all Haskell code.
396
397 We still do, however, need to worry about foreign code.
398 */
399
400 /*
401 * Due to the small memory model (see above), on x86_64 we have to map
402 * all our non-PIC object files into the low 2Gb of the address space
403 * (why 2Gb and not 4Gb? Because all addresses must be reachable
404 * using a 32-bit signed PC-relative offset). On Linux we can do this
405 * using the MAP_32BIT flag to mmap(), however on other OSs
406 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
407 * can't do this. So on these systems, we have to pick a base address
408 * in the low 2Gb of the address space and try to allocate memory from
409 * there.
410 *
411 * We pick a default address based on the OS, but also make this
412 * configurable via an RTS flag (+RTS -xm)
413 */
414 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
415
416 #if defined(MAP_32BIT)
417 // Try to use MAP_32BIT
418 #define MMAP_32BIT_BASE_DEFAULT 0
419 #else
420 // A guess: 1Gb.
421 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
422 #endif
423
424 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
425 #endif
426
427 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
428 ObjectCode *owner)
429 {
430 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
431 if (!pinfo || owner != pinfo->owner) return;
432 removeStrHashTable(table, key, NULL);
433 stgFree(pinfo);
434 }
435
436 /* -----------------------------------------------------------------------------
437 * Insert symbols into hash tables, checking for duplicates.
438 *
439 * Returns: 0 on failure, nonzero on success
440 */
441 /*
442 Note [weak-symbols-support]
443 -------------------------------------
444 While ghciInsertSymbolTable does implement extensive
445 logic for weak symbol support, weak symbols are not currently
446 fully supported by the RTS. This code is mostly here for COMDAT
447 support which uses the weak symbols support.
448
449 Linking weak symbols defined purely in C code with other C code
450 should also work, probably. Observing weak symbols in Haskell
451 won't.
452
453 Some test have been written for weak symbols but have been disabled
454 mostly because it's unsure how the weak symbols support should look.
455 See Trac #11223
456 */
457 static int ghciInsertSymbolTable(
458 pathchar* obj_name,
459 HashTable *table,
460 const SymbolName* key,
461 SymbolAddr* data,
462 HsBool weak,
463 ObjectCode *owner)
464 {
465 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
466 if (!pinfo) /* new entry */
467 {
468 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
469 pinfo->value = data;
470 pinfo->owner = owner;
471 pinfo->weak = weak;
472 insertStrHashTable(table, key, pinfo);
473 return 1;
474 }
475 else if (weak && data && pinfo->weak && !pinfo->value)
476 {
477 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
478 pinfo->value = data;
479 pinfo->owner = owner;
480 return 1;
481 }
482 else if (weak)
483 {
484 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
485 already know of another copy throw this one away.
486
487 or both weak symbols have a nonzero value. Keep the existing one.
488
489 This also preserves the semantics of linking against
490 the first symbol we find. */
491 }
492 else if (pinfo->weak && !weak) /* weak symbol is in the table */
493 {
494 /* override the weak definition with the non-weak one */
495 pinfo->value = data;
496 pinfo->owner = owner;
497 pinfo->weak = HS_BOOL_FALSE;
498 return 1;
499 }
500 else if ( pinfo->owner
501 && pinfo->owner->status != OBJECT_RESOLVED
502 && pinfo->owner->status != OBJECT_NEEDED)
503 {
504 /* If the other symbol hasn't been loaded or will be loaded and we want to
505 explicitly load the new one, we can just swap it out and load the one
506 that has been requested. If not, just keep the first one encountered.
507
508 Because the `symHash' table consists symbols we've also not loaded but
509 found during the initial scan this is safe to do. If however the existing
510 symbol has been loaded then it means we have a duplicate.
511
512 This is essentially emulating the behavior of a linker wherein it will always
513 link in object files that are .o file arguments, but only take object files
514 from archives as needed. */
515 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
516 pinfo->value = data;
517 pinfo->owner = owner;
518 pinfo->weak = weak;
519 }
520
521 return 1;
522 }
523 else if (pinfo->owner == owner)
524 {
525 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
526 return 1;
527 }
528 else if (owner && owner->status == OBJECT_LOADED)
529 {
530 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
531 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
532 call this function again to trigger the duplicate error. */
533 return 1;
534 }
535
536 pathchar* archiveName = NULL;
537 debugBelch(
538 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
539 " %s\n"
540 "whilst processing object file\n"
541 " %" PATH_FMT "\n"
542 "The symbol was previously defined in\n"
543 " %" PATH_FMT "\n"
544 "This could be caused by:\n"
545 " * Loading two different object files which export the same symbol\n"
546 " * Specifying the same object file twice on the GHCi command line\n"
547 " * An incorrect `package.conf' entry, causing some object to be\n"
548 " loaded twice.\n",
549 (char*)key,
550 obj_name,
551 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
552 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
553 : pinfo->owner->fileName
554 );
555
556 if (archiveName)
557 {
558 stgFree(archiveName);
559 archiveName = NULL;
560 }
561 return 0;
562 }
563
564 /* -----------------------------------------------------------------------------
565 * Looks up symbols into hash tables.
566 *
567 * Returns: 0 on failure and result is not set,
568 * nonzero on success and result set to nonzero pointer
569 */
570 static HsBool ghciLookupSymbolInfo(HashTable *table,
571 const SymbolName* key, RtsSymbolInfo **result)
572 {
573 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
574 if (!pinfo) {
575 *result = NULL;
576 return HS_BOOL_FALSE;
577 }
578 if (pinfo->weak)
579 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
580 /* Once it's looked up, it can no longer be overridden */
581 pinfo->weak = HS_BOOL_FALSE;
582
583 *result = pinfo;
584 return HS_BOOL_TRUE;
585 }
586
587 /* -----------------------------------------------------------------------------
588 * initialize the object linker
589 */
590
591
592 static int linker_init_done = 0 ;
593
594 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
595 static void *dl_prog_handle;
596 static regex_t re_invalid;
597 static regex_t re_realso;
598 #ifdef THREADED_RTS
599 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
600 #endif
601 #elif defined(OBJFORMAT_PEi386)
602 void addDLLHandle(pathchar* dll_name, HINSTANCE instance);
603 #endif
604
605 void initLinker (void)
606 {
607 // default to retaining CAFs for backwards compatibility. Most
608 // users will want initLinker_(0): otherwise unloadObj() will not
609 // be able to unload object files when they contain CAFs.
610 initLinker_(1);
611 }
612
613 void
614 initLinker_ (int retain_cafs)
615 {
616 RtsSymbolVal *sym;
617 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
618 int compileResult;
619 #endif
620
621 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
622
623 /* Make initLinker idempotent, so we can call it
624 before every relevant operation; that means we
625 don't need to initialise the linker separately */
626 if (linker_init_done == 1) {
627 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
628 return;
629 } else {
630 linker_init_done = 1;
631 }
632
633 objects = NULL;
634 unloaded_objects = NULL;
635
636 #if defined(THREADED_RTS)
637 initMutex(&linker_mutex);
638 initMutex(&linker_unloaded_mutex);
639 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
640 initMutex(&dl_mutex);
641 #endif
642 #endif
643
644 symhash = allocStrHashTable();
645
646 /* populate the symbol table with stuff from the RTS */
647 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
648 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
649 symhash, sym->lbl, sym->addr, HS_BOOL_FALSE, NULL)) {
650 barf("ghciInsertSymbolTable failed");
651 }
652 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
653 }
654 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
655 machoInitSymbolsWithoutUnderscore();
656 # endif
657 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
658 referenced from a statically linked module. We need to mimic this, but
659 we cannot use NULL because we use it to mean nonexistent symbols. So we
660 use an arbitrary (hopefully unique) address here.
661 */
662 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
663 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
664 barf("ghciInsertSymbolTable failed");
665 }
666
667 #if defined(OBJFORMAT_PEi386)
668 if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"),
669 symhash, "__image_base__", __image_base, HS_BOOL_TRUE, NULL)) {
670 barf("ghciInsertSymbolTable failed");
671 }
672 #endif /* OBJFORMAT_PEi386 */
673
674
675 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
676 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
677 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
678 retain_cafs ? newRetainedCAF : newGCdCAF,
679 HS_BOOL_FALSE, NULL)) {
680 barf("ghciInsertSymbolTable failed");
681 }
682
683 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
684 # if defined(RTLD_DEFAULT)
685 dl_prog_handle = RTLD_DEFAULT;
686 # else
687 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
688 # endif /* RTLD_DEFAULT */
689
690 compileResult = regcomp(&re_invalid,
691 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short)",
692 REG_EXTENDED);
693 if (compileResult != 0) {
694 barf("Compiling re_invalid failed");
695 }
696 compileResult = regcomp(&re_realso,
697 "(GROUP|INPUT) *\\( *([^ )]+)",
698 REG_EXTENDED);
699 if (compileResult != 0) {
700 barf("Compiling re_realso failed");
701 }
702 # endif
703
704 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
705 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
706 // User-override for mmap_32bit_base
707 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
708 }
709 #endif
710
711 #if defined(mingw32_HOST_OS)
712 /*
713 * These two libraries cause problems when added to the static link,
714 * but are necessary for resolving symbols in GHCi, hence we load
715 * them manually here.
716 */
717 addDLL(WSTR("msvcrt"));
718 addDLL(WSTR("kernel32"));
719 addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL));
720 #endif
721
722 if (RTS_LINKER_USE_MMAP)
723 m32_allocator_init();
724
725 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
726 return;
727 }
728
729 void
730 exitLinker( void ) {
731 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
732 if (linker_init_done == 1) {
733 regfree(&re_invalid);
734 regfree(&re_realso);
735 #ifdef THREADED_RTS
736 closeMutex(&dl_mutex);
737 #endif
738 }
739 #endif
740 if (linker_init_done == 1) {
741 freeHashTable(symhash, free);
742 }
743 #ifdef THREADED_RTS
744 closeMutex(&linker_mutex);
745 #endif
746 }
747
748 /* -----------------------------------------------------------------------------
749 * Loading DLL or .so dynamic libraries
750 * -----------------------------------------------------------------------------
751 *
752 * Add a DLL from which symbols may be found. In the ELF case, just
753 * do RTLD_GLOBAL-style add, so no further messing around needs to
754 * happen in order that symbols in the loaded .so are findable --
755 * lookupSymbol() will subsequently see them by dlsym on the program's
756 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
757 *
758 * In the PEi386 case, open the DLLs and put handles to them in a
759 * linked list. When looking for a symbol, try all handles in the
760 * list. This means that we need to load even DLLs that are guaranteed
761 * to be in the ghc.exe image already, just so we can get a handle
762 * to give to loadSymbol, so that we can find the symbols. For such
763 * libraries, the LoadLibrary call should be a no-op except for returning
764 * the handle.
765 *
766 */
767
768 #if defined(OBJFORMAT_PEi386)
769 /* A record for storing handles into DLLs. */
770
771 typedef
772 struct _OpenedDLL {
773 pathchar* name;
774 struct _OpenedDLL* next;
775 HINSTANCE instance;
776 }
777 OpenedDLL;
778
779 /* A list thereof. */
780 static OpenedDLL* opened_dlls = NULL;
781
782 /* A record for storing indirectly linked functions from DLLs. */
783 typedef
784 struct _IndirectAddr {
785 SymbolAddr* addr;
786 struct _IndirectAddr* next;
787 }
788 IndirectAddr;
789
790 /* A list thereof. */
791 static IndirectAddr* indirects = NULL;
792
793 /* Adds a DLL instance to the list of DLLs in which to search for symbols. */
794 void addDLLHandle(pathchar* dll_name, HINSTANCE instance) {
795 OpenedDLL* o_dll;
796 o_dll = stgMallocBytes( sizeof(OpenedDLL), "addDLLHandle" );
797 o_dll->name = dll_name ? pathdup(dll_name) : NULL;
798 o_dll->instance = instance;
799 o_dll->next = opened_dlls;
800 opened_dlls = o_dll;
801 }
802
803 #endif
804
805 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
806
807 /* Suppose in ghci we load a temporary SO for a module containing
808 f = 1
809 and then modify the module, recompile, and load another temporary
810 SO with
811 f = 2
812 Then as we don't unload the first SO, dlsym will find the
813 f = 1
814 symbol whereas we want the
815 f = 2
816 symbol. We therefore need to keep our own SO handle list, and
817 try SOs in the right order. */
818
819 typedef
820 struct _OpenedSO {
821 struct _OpenedSO* next;
822 void *handle;
823 }
824 OpenedSO;
825
826 /* A list thereof. */
827 static OpenedSO* openedSOs = NULL;
828
829 static const char *
830 internal_dlopen(const char *dll_name)
831 {
832 OpenedSO* o_so;
833 void *hdl;
834 const char *errmsg;
835 char *errmsg_copy;
836
837 // omitted: RTLD_NOW
838 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
839 IF_DEBUG(linker,
840 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
841
842 //-------------- Begin critical section ------------------
843 // This critical section is necessary because dlerror() is not
844 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
845 // Also, the error message returned must be copied to preserve it
846 // (see POSIX also)
847
848 ACQUIRE_LOCK(&dl_mutex);
849 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
850
851 errmsg = NULL;
852 if (hdl == NULL) {
853 /* dlopen failed; return a ptr to the error msg. */
854 errmsg = dlerror();
855 if (errmsg == NULL) errmsg = "addDLL: unknown error";
856 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
857 strcpy(errmsg_copy, errmsg);
858 errmsg = errmsg_copy;
859 } else {
860 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
861 o_so->handle = hdl;
862 o_so->next = openedSOs;
863 openedSOs = o_so;
864 }
865
866 RELEASE_LOCK(&dl_mutex);
867 //--------------- End critical section -------------------
868
869 return errmsg;
870 }
871
872 /*
873 Note [RTLD_LOCAL]
874
875 In GHCi we want to be able to override previous .so's with newly
876 loaded .so's when we recompile something. This further implies that
877 when we look up a symbol in internal_dlsym() we have to iterate
878 through the loaded libraries (in order from most recently loaded to
879 oldest) looking up the symbol in each one until we find it.
880
881 However, this can cause problems for some symbols that are copied
882 by the linker into the executable image at runtime - see #8935 for a
883 lengthy discussion. To solve that problem we need to look up
884 symbols in the main executable *first*, before attempting to look
885 them up in the loaded .so's. But in order to make that work, we
886 have to always call dlopen with RTLD_LOCAL, so that the loaded
887 libraries don't populate the global symbol table.
888 */
889
890 static void *
891 internal_dlsym(const char *symbol) {
892 OpenedSO* o_so;
893 void *v;
894
895 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
896 ACQUIRE_LOCK(&dl_mutex);
897 dlerror();
898 // look in program first
899 v = dlsym(dl_prog_handle, symbol);
900 if (dlerror() == NULL) {
901 RELEASE_LOCK(&dl_mutex);
902 return v;
903 }
904
905 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
906 v = dlsym(o_so->handle, symbol);
907 if (dlerror() == NULL) {
908 RELEASE_LOCK(&dl_mutex);
909 return v;
910 }
911 }
912 RELEASE_LOCK(&dl_mutex);
913 return v;
914 }
915 # endif
916
917 const char *
918 addDLL( pathchar *dll_name )
919 {
920 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
921 /* ------------------- ELF DLL loader ------------------- */
922
923 #define NMATCH 5
924 regmatch_t match[NMATCH];
925 const char *errmsg;
926 FILE* fp;
927 size_t match_length;
928 #define MAXLINE 1000
929 char line[MAXLINE];
930 int result;
931
932 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
933 errmsg = internal_dlopen(dll_name);
934
935 if (errmsg == NULL) {
936 return NULL;
937 }
938
939 // GHC Trac ticket #2615
940 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
941 // contain linker scripts rather than ELF-format object code. This
942 // code handles the situation by recognizing the real object code
943 // file name given in the linker script.
944 //
945 // If an "invalid ELF header" error occurs, it is assumed that the
946 // .so file contains a linker script instead of ELF object code.
947 // In this case, the code looks for the GROUP ( ... ) linker
948 // directive. If one is found, the first file name inside the
949 // parentheses is treated as the name of a dynamic library and the
950 // code attempts to dlopen that file. If this is also unsuccessful,
951 // an error message is returned.
952
953 // see if the error message is due to an invalid ELF header
954 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
955 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
956 IF_DEBUG(linker, debugBelch("result = %i\n", result));
957 if (result == 0) {
958 // success -- try to read the named file as a linker script
959 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
960 MAXLINE-1);
961 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
962 line[match_length] = '\0'; // make sure string is null-terminated
963 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
964 if ((fp = fopen(line, "r")) == NULL) {
965 return errmsg; // return original error if open fails
966 }
967 // try to find a GROUP or INPUT ( ... ) command
968 while (fgets(line, MAXLINE, fp) != NULL) {
969 IF_DEBUG(linker, debugBelch("input line = %s", line));
970 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
971 // success -- try to dlopen the first named file
972 IF_DEBUG(linker, debugBelch("match%s\n",""));
973 line[match[2].rm_eo] = '\0';
974 stgFree((void*)errmsg); // Free old message before creating new one
975 errmsg = internal_dlopen(line+match[2].rm_so);
976 break;
977 }
978 // if control reaches here, no GROUP or INPUT ( ... ) directive
979 // was found and the original error message is returned to the
980 // caller
981 }
982 fclose(fp);
983 }
984 return errmsg;
985
986 # elif defined(OBJFORMAT_PEi386)
987 /* ------------------- Win32 DLL loader ------------------- */
988
989 pathchar* buf;
990 OpenedDLL* o_dll;
991 HINSTANCE instance;
992
993 IF_DEBUG(linker, debugBelch("\naddDLL; dll_name = `%" PATH_FMT "'\n", dll_name));
994
995 /* See if we've already got it, and ignore if so. */
996 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
997 if (0 == pathcmp(o_dll->name, dll_name))
998 return NULL;
999 }
1000
1001 /* The file name has no suffix (yet) so that we can try
1002 both foo.dll and foo.drv
1003
1004 The documentation for LoadLibrary says:
1005 If no file name extension is specified in the lpFileName
1006 parameter, the default library extension .dll is
1007 appended. However, the file name string can include a trailing
1008 point character (.) to indicate that the module name has no
1009 extension. */
1010
1011 size_t bufsize = pathlen(dll_name) + 10;
1012 buf = stgMallocBytes(bufsize * sizeof(wchar_t), "addDLL");
1013
1014 /* These are ordered by probability of success and order we'd like them */
1015 const wchar_t *formats[] = { L"%ls.DLL", L"%ls.DRV", L"lib%ls.DLL", L"%ls" };
1016 const DWORD flags[] = { LOAD_LIBRARY_SEARCH_USER_DIRS | LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, 0 };
1017
1018 int cFormat;
1019 int cFlag;
1020 int flags_start = 1; // Assume we don't support the new API
1021
1022 /* Detect if newer API are available, if not, skip the first flags entry */
1023 if (GetProcAddress((HMODULE)LoadLibraryW(L"Kernel32.DLL"), "AddDllDirectory")) {
1024 flags_start = 0;
1025 }
1026
1027 /* Iterate through the possible flags and formats */
1028 for (cFlag = flags_start; cFlag < 2; cFlag++)
1029 {
1030 for (cFormat = 0; cFormat < 4; cFormat++)
1031 {
1032 snwprintf(buf, bufsize, formats[cFormat], dll_name);
1033 instance = LoadLibraryExW(buf, NULL, flags[cFlag]);
1034 if (instance == NULL)
1035 {
1036 if (GetLastError() != ERROR_MOD_NOT_FOUND)
1037 {
1038 goto error;
1039 }
1040 }
1041 else
1042 {
1043 break; // We're done. DLL has been loaded.
1044 }
1045 }
1046 }
1047
1048 // Check if we managed to load the DLL
1049 if (instance == NULL) {
1050 goto error;
1051 }
1052
1053 stgFree(buf);
1054
1055 addDLLHandle(dll_name, instance);
1056
1057 return NULL;
1058
1059 error:
1060 stgFree(buf);
1061 sysErrorBelch("addDLL: %" PATH_FMT " (Win32 error %lu)", dll_name, GetLastError());
1062
1063 /* LoadLibrary failed; return a ptr to the error msg. */
1064 return "addDLL: could not load DLL";
1065
1066 # else
1067 barf("addDLL: not implemented on this platform");
1068 # endif
1069 }
1070
1071 /* -----------------------------------------------------------------------------
1072 * Searches the system directories to determine if there is a system DLL that
1073 * satisfies the given name. This prevent GHCi from linking against a static
1074 * library if a DLL is available.
1075 *
1076 * Returns: NULL on failure or no DLL found, else the full path to the DLL
1077 * that can be loaded.
1078 */
1079 pathchar* findSystemLibrary(pathchar* dll_name)
1080 {
1081 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%" PATH_FMT "'\n", dll_name));
1082
1083 #if defined(OBJFORMAT_PEi386)
1084 const unsigned int init_buf_size = 1024;
1085 unsigned int bufsize = init_buf_size;
1086 wchar_t* result = malloc(sizeof(wchar_t) * bufsize);
1087 DWORD wResult = SearchPathW(NULL, dll_name, NULL, bufsize, result, NULL);
1088
1089 if (wResult > bufsize) {
1090 result = realloc(result, sizeof(wchar_t) * wResult);
1091 wResult = SearchPathW(NULL, dll_name, NULL, wResult, result, NULL);
1092 }
1093
1094
1095 if (!wResult) {
1096 free(result);
1097 return NULL;
1098 }
1099
1100 return result;
1101 #else
1102 (void)(dll_name); // Function not implemented for other platforms.
1103 return NULL;
1104 #endif
1105 }
1106
1107 /* -----------------------------------------------------------------------------
1108 * Emits a warning determining that the system is missing a required security
1109 * update that we need to get access to the proper APIs
1110 */
1111 void warnMissingKBLibraryPaths( void )
1112 {
1113 static HsBool missing_update_warn = HS_BOOL_FALSE;
1114 if (!missing_update_warn) {
1115 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
1116 missing_update_warn = HS_BOOL_TRUE;
1117 }
1118 }
1119
1120 /* -----------------------------------------------------------------------------
1121 * appends a directory to the process DLL Load path so LoadLibrary can find it
1122 *
1123 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
1124 * restore the search path to what it was before this call.
1125 */
1126 HsPtr addLibrarySearchPath(pathchar* dll_path)
1127 {
1128 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%" PATH_FMT "'\n", dll_path));
1129
1130 #if defined(OBJFORMAT_PEi386)
1131 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1132 LPAddDLLDirectory AddDllDirectory = (LPAddDLLDirectory)GetProcAddress((HMODULE)hDLL, "AddDllDirectory");
1133
1134 HsPtr result = NULL;
1135
1136 const unsigned int init_buf_size = 4096;
1137 int bufsize = init_buf_size;
1138
1139 // Make sure the path is an absolute path
1140 WCHAR* abs_path = malloc(sizeof(WCHAR) * init_buf_size);
1141 DWORD wResult = GetFullPathNameW(dll_path, bufsize, abs_path, NULL);
1142 if (!wResult){
1143 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1144 }
1145 else if (wResult > init_buf_size) {
1146 abs_path = realloc(abs_path, sizeof(WCHAR) * wResult);
1147 if (!GetFullPathNameW(dll_path, bufsize, abs_path, NULL)) {
1148 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1149 }
1150 }
1151
1152 if (AddDllDirectory) {
1153 result = AddDllDirectory(abs_path);
1154 }
1155 else
1156 {
1157 warnMissingKBLibraryPaths();
1158 WCHAR* str = malloc(sizeof(WCHAR) * init_buf_size);
1159 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1160
1161 if (wResult > init_buf_size) {
1162 str = realloc(str, sizeof(WCHAR) * wResult);
1163 bufsize = wResult;
1164 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1165 if (!wResult) {
1166 sysErrorBelch("addLibrarySearchPath[GetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1167 }
1168 }
1169
1170 bufsize = wResult + 2 + pathlen(abs_path);
1171 wchar_t* newPath = malloc(sizeof(wchar_t) * bufsize);
1172
1173 wcscpy(newPath, abs_path);
1174 wcscat(newPath, L";");
1175 wcscat(newPath, str);
1176 if (!SetEnvironmentVariableW(L"PATH", (LPCWSTR)newPath)) {
1177 sysErrorBelch("addLibrarySearchPath[SetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1178 }
1179
1180 free(newPath);
1181 free(abs_path);
1182
1183 return str;
1184 }
1185
1186 if (!result) {
1187 sysErrorBelch("addLibrarySearchPath: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1188 free(abs_path);
1189 return NULL;
1190 }
1191
1192 free(abs_path);
1193 return result;
1194 #else
1195 (void)(dll_path); // Function not implemented for other platforms.
1196 return NULL;
1197 #endif
1198 }
1199
1200 /* -----------------------------------------------------------------------------
1201 * removes a directory from the process DLL Load path
1202 *
1203 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
1204 */
1205 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
1206 {
1207 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n", dll_path_index));
1208
1209 #if defined(OBJFORMAT_PEi386)
1210 HsBool result = 0;
1211
1212 if (dll_path_index != NULL) {
1213 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1214 LPRemoveDLLDirectory RemoveDllDirectory = (LPRemoveDLLDirectory)GetProcAddress((HMODULE)hDLL, "RemoveDllDirectory");
1215
1216 if (RemoveDllDirectory) {
1217 result = RemoveDllDirectory(dll_path_index);
1218 // dll_path_index is now invalid, do not use it after this point.
1219 }
1220 else
1221 {
1222 warnMissingKBLibraryPaths();
1223 result = SetEnvironmentVariableW(L"PATH", (LPCWSTR)dll_path_index);
1224 free(dll_path_index);
1225 }
1226
1227 if (!result) {
1228 sysErrorBelch("removeLibrarySearchPath: (Win32 error %lu)", GetLastError());
1229 return HS_BOOL_FALSE;
1230 }
1231 }
1232
1233 return result == 0 ? HS_BOOL_TRUE : HS_BOOL_FALSE;
1234 #else
1235 (void)(dll_path_index); // Function not implemented for other platforms.
1236 return HS_BOOL_FALSE;
1237 #endif
1238 }
1239
1240 /* -----------------------------------------------------------------------------
1241 * insert a symbol in the hash table
1242 *
1243 * Returns: 0 on failure, nozero on success
1244 */
1245 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
1246 {
1247 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE, NULL);
1248 }
1249
1250 /* -----------------------------------------------------------------------------
1251 * lookup a symbol in the hash table
1252 */
1253 static SymbolAddr* lookupSymbol_ (SymbolName* lbl)
1254 {
1255 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
1256
1257 ASSERT(symhash != NULL);
1258 RtsSymbolInfo *pinfo;
1259
1260 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
1261 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
1262
1263 # if defined(OBJFORMAT_ELF)
1264 return internal_dlsym(lbl);
1265 # elif defined(OBJFORMAT_MACHO)
1266
1267 /* HACK: On OS X, all symbols are prefixed with an underscore.
1268 However, dlsym wants us to omit the leading underscore from the
1269 symbol name -- the dlsym routine puts it back on before searching
1270 for the symbol. For now, we simply strip it off here (and ONLY
1271 here).
1272 */
1273 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", lbl));
1274 ASSERT(lbl[0] == '_');
1275 return internal_dlsym(lbl + 1);
1276 # elif defined(OBJFORMAT_PEi386)
1277 SymbolAddr* sym;
1278
1279 /* See Note [mingw-w64 name decoration scheme] */
1280 #ifndef x86_64_HOST_ARCH
1281 zapTrailingAtSign ( (unsigned char*)lbl );
1282 #endif
1283 sym = lookupSymbolInDLLs((unsigned char*)lbl);
1284 return sym; // might be NULL if not found
1285
1286 # else
1287 ASSERT(2+2 == 5);
1288 return NULL;
1289 # endif
1290 } else {
1291 #if defined(mingw32_HOST_OS)
1292 // If Windows, perform initialization of uninitialized
1293 // Symbols from the C runtime which was loaded above.
1294 // We do this on lookup to prevent the hit when
1295 // The symbol isn't being used.
1296 if (pinfo->value == (void*)0xBAADF00D)
1297 {
1298 char symBuffer[50];
1299 sprintf(symBuffer, "_%s", lbl);
1300 pinfo->value = GetProcAddress(GetModuleHandle("msvcrt"), symBuffer);
1301 }
1302 #endif
1303 SymbolAddr* val = pinfo->value;
1304 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl, val));
1305
1306 int r;
1307 ObjectCode* oc = pinfo->owner;
1308
1309 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
1310 See Note [runtime-linker-phases] */
1311 if (oc && oc->status == OBJECT_LOADED) {
1312 oc->status = OBJECT_NEEDED;
1313 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand loading symbol '%s'\n", lbl));
1314 r = ocTryLoad(oc);
1315
1316 if (!r) {
1317 errorBelch("Could not on-demand load symbol '%s'\n", lbl);
1318 return NULL;
1319 }
1320 #ifdef PROFILING
1321 // collect any new cost centres & CCSs
1322 // that were defined during runInit
1323 initProfiling2();
1324 #endif
1325 }
1326
1327 return val;
1328 }
1329 }
1330
1331 SymbolAddr* lookupSymbol( SymbolName* lbl )
1332 {
1333 ACQUIRE_LOCK(&linker_mutex);
1334 SymbolAddr* r = lookupSymbol_(lbl);
1335 RELEASE_LOCK(&linker_mutex);
1336 return r;
1337 }
1338
1339 /* -----------------------------------------------------------------------------
1340 Create a StablePtr for a foreign export. This is normally called by
1341 a C function with __attribute__((constructor)), which is generated
1342 by GHC and linked into the module.
1343
1344 If the object code is being loaded dynamically, then we remember
1345 which StablePtrs were allocated by the constructors and free them
1346 again in unloadObj().
1347 -------------------------------------------------------------------------- */
1348
1349 static ObjectCode *loading_obj = NULL;
1350
1351 StgStablePtr foreignExportStablePtr (StgPtr p)
1352 {
1353 ForeignExportStablePtr *fe_sptr;
1354 StgStablePtr *sptr;
1355
1356 sptr = getStablePtr(p);
1357
1358 if (loading_obj != NULL) {
1359 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
1360 "foreignExportStablePtr");
1361 fe_sptr->stable_ptr = sptr;
1362 fe_sptr->next = loading_obj->stable_ptrs;
1363 loading_obj->stable_ptrs = fe_sptr;
1364 }
1365
1366 return sptr;
1367 }
1368
1369
1370 /* -----------------------------------------------------------------------------
1371 * Debugging aid: look in GHCi's object symbol tables for symbols
1372 * within DELTA bytes of the specified address, and show their names.
1373 */
1374 #ifdef DEBUG
1375 void ghci_enquire ( SymbolAddr* addr );
1376
1377 void ghci_enquire(SymbolAddr* addr)
1378 {
1379 int i;
1380 SymbolName* sym;
1381 RtsSymbolInfo* a;
1382 const int DELTA = 64;
1383 ObjectCode* oc;
1384
1385 for (oc = objects; oc; oc = oc->next) {
1386 for (i = 0; i < oc->n_symbols; i++) {
1387 sym = oc->symbols[i];
1388 if (sym == NULL) continue;
1389 a = NULL;
1390 if (a == NULL) {
1391 ghciLookupSymbolInfo(symhash, sym, &a);
1392 }
1393 if (a == NULL) {
1394 // debugBelch("ghci_enquire: can't find %s\n", sym);
1395 }
1396 else if ( a->value
1397 && (char*)addr-DELTA <= (char*)a->value
1398 && (char*)a->value <= (char*)addr+DELTA) {
1399 debugBelch("%p + %3d == `%s'\n", addr, (int)((char*)a->value - (char*)addr), sym);
1400 }
1401 }
1402 }
1403 }
1404 #endif
1405
1406 #if RTS_LINKER_USE_MMAP
1407 //
1408 // Returns NULL on failure.
1409 //
1410 void *
1411 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
1412 {
1413 void *map_addr = NULL;
1414 void *result;
1415 size_t size;
1416 static uint32_t fixed = 0;
1417
1418 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1419 size = roundUpToPage(bytes);
1420
1421 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1422 mmap_again:
1423
1424 if (mmap_32bit_base != 0) {
1425 map_addr = mmap_32bit_base;
1426 }
1427 #endif
1428
1429 IF_DEBUG(linker,
1430 debugBelch("mmapForLinker: \tprotection %#0x\n",
1431 PROT_EXEC | PROT_READ | PROT_WRITE));
1432 IF_DEBUG(linker,
1433 debugBelch("mmapForLinker: \tflags %#0x\n",
1434 MAP_PRIVATE | TRY_MAP_32BIT | fixed | flags));
1435
1436 result = mmap(map_addr, size,
1437 PROT_EXEC|PROT_READ|PROT_WRITE,
1438 MAP_PRIVATE|TRY_MAP_32BIT|fixed|flags, fd, offset);
1439
1440 if (result == MAP_FAILED) {
1441 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1442 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1443 return NULL;
1444 }
1445
1446 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1447 if (mmap_32bit_base != 0) {
1448 if (result == map_addr) {
1449 mmap_32bit_base = (StgWord8*)map_addr + size;
1450 } else {
1451 if ((W_)result > 0x80000000) {
1452 // oops, we were given memory over 2Gb
1453 munmap(result,size);
1454 #if defined(freebsd_HOST_OS) || \
1455 defined(kfreebsdgnu_HOST_OS) || \
1456 defined(dragonfly_HOST_OS)
1457 // Some platforms require MAP_FIXED. This is normally
1458 // a bad idea, because MAP_FIXED will overwrite
1459 // existing mappings.
1460 fixed = MAP_FIXED;
1461 goto mmap_again;
1462 #else
1463 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1464 "asked for %lu bytes at %p. "
1465 "Try specifying an address with +RTS -xm<addr> -RTS",
1466 size, map_addr);
1467 return NULL;
1468 #endif
1469 } else {
1470 // hmm, we were given memory somewhere else, but it's
1471 // still under 2Gb so we can use it. Next time, ask
1472 // for memory right after the place we just got some
1473 mmap_32bit_base = (StgWord8*)result + size;
1474 }
1475 }
1476 } else {
1477 if ((W_)result > 0x80000000) {
1478 // oops, we were given memory over 2Gb
1479 // ... try allocating memory somewhere else?;
1480 debugTrace(DEBUG_linker,
1481 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1482 bytes, result);
1483 munmap(result, size);
1484
1485 // Set a base address and try again... (guess: 1Gb)
1486 mmap_32bit_base = (void*)0x40000000;
1487 goto mmap_again;
1488 }
1489 }
1490 #endif
1491
1492 IF_DEBUG(linker,
1493 debugBelch("mmapForLinker: mapped %" FMT_Word
1494 " bytes starting at %p\n", (W_)size, result));
1495 IF_DEBUG(linker,
1496 debugBelch("mmapForLinker: done\n"));
1497
1498 return result;
1499 }
1500 #endif
1501
1502 /*
1503 * Remove symbols from the symbol table, and free oc->symbols.
1504 * This operation is idempotent.
1505 */
1506 static void removeOcSymbols (ObjectCode *oc)
1507 {
1508 if (oc->symbols == NULL) return;
1509
1510 // Remove all the mappings for the symbols within this object..
1511 int i;
1512 for (i = 0; i < oc->n_symbols; i++) {
1513 if (oc->symbols[i] != NULL) {
1514 ghciRemoveSymbolTable(symhash, oc->symbols[i], oc);
1515 }
1516 }
1517
1518 stgFree(oc->symbols);
1519 oc->symbols = NULL;
1520 }
1521
1522 /*
1523 * Release StablePtrs and free oc->stable_ptrs.
1524 * This operation is idempotent.
1525 */
1526 static void freeOcStablePtrs (ObjectCode *oc)
1527 {
1528 // Release any StablePtrs that were created when this
1529 // object module was initialized.
1530 ForeignExportStablePtr *fe_ptr, *next;
1531
1532 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1533 next = fe_ptr->next;
1534 freeStablePtr(fe_ptr->stable_ptr);
1535 stgFree(fe_ptr);
1536 }
1537 oc->stable_ptrs = NULL;
1538 }
1539
1540 static void
1541 freePreloadObjectFile (ObjectCode *oc)
1542 {
1543 #if defined(mingw32_HOST_OS)
1544
1545 VirtualFree(oc->image - PEi386_IMAGE_OFFSET, 0, MEM_RELEASE);
1546
1547 IndirectAddr *ia, *ia_next;
1548 ia = indirects;
1549 while (ia != NULL) {
1550 ia_next = ia->next;
1551 stgFree(ia);
1552 ia = ia_next;
1553 }
1554 indirects = NULL;
1555
1556 #else
1557
1558 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1559 munmap(oc->image, oc->fileSize);
1560 }
1561 else {
1562 stgFree(oc->image);
1563 }
1564
1565 #endif
1566
1567 oc->image = NULL;
1568 oc->fileSize = 0;
1569 }
1570
1571 /*
1572 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1573 * the GC when a previously unloaded ObjectCode has been determined to be
1574 * unused, and when an error occurs during loadObj().
1575 */
1576 void freeObjectCode (ObjectCode *oc)
1577 {
1578 freePreloadObjectFile(oc);
1579
1580 if (oc->symbols != NULL) {
1581 stgFree(oc->symbols);
1582 oc->symbols = NULL;
1583 }
1584
1585 if (oc->extraInfos != NULL) {
1586 freeHashTable(oc->extraInfos, NULL);
1587 oc->extraInfos = NULL;
1588 }
1589
1590 if (oc->sections != NULL) {
1591 int i;
1592 for (i=0; i < oc->n_sections; i++) {
1593 if (oc->sections[i].start != NULL) {
1594 switch(oc->sections[i].alloc){
1595 #if RTS_LINKER_USE_MMAP
1596 case SECTION_MMAP:
1597 munmap(oc->sections[i].mapped_start,
1598 oc->sections[i].mapped_size);
1599 break;
1600 case SECTION_M32:
1601 m32_free(oc->sections[i].start,
1602 oc->sections[i].size);
1603 break;
1604 #endif
1605 case SECTION_MALLOC:
1606 stgFree(oc->sections[i].start);
1607 break;
1608 default:
1609 break;
1610 }
1611 }
1612 }
1613 stgFree(oc->sections);
1614 }
1615
1616 freeProddableBlocks(oc);
1617
1618 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1619 * alongside the image, so we don't need to free. */
1620 #if NEED_SYMBOL_EXTRAS && (!defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS))
1621 if (RTS_LINKER_USE_MMAP) {
1622 if (!USE_CONTIGUOUS_MMAP && oc->symbol_extras != NULL) {
1623 m32_free(oc->symbol_extras,
1624 sizeof(SymbolExtra) * oc->n_symbol_extras);
1625 }
1626 }
1627 else {
1628 stgFree(oc->symbol_extras);
1629 }
1630 #endif
1631
1632 stgFree(oc->fileName);
1633 stgFree(oc->archiveMemberName);
1634
1635 stgFree(oc);
1636 }
1637
1638 /* -----------------------------------------------------------------------------
1639 * Sets the initial status of a fresh ObjectCode
1640 */
1641 static void setOcInitialStatus(ObjectCode* oc) {
1642 if (oc->archiveMemberName == NULL) {
1643 oc->status = OBJECT_NEEDED;
1644 } else {
1645 oc->status = OBJECT_LOADED;
1646 }
1647 }
1648
1649 static ObjectCode*
1650 mkOc( pathchar *path, char *image, int imageSize,
1651 rtsBool mapped, char *archiveMemberName, int misalignment ) {
1652 ObjectCode* oc;
1653
1654 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1655 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1656
1657 # if defined(OBJFORMAT_ELF)
1658 oc->formatName = "ELF";
1659 # elif defined(OBJFORMAT_PEi386)
1660 oc->formatName = "PEi386";
1661 # elif defined(OBJFORMAT_MACHO)
1662 oc->formatName = "Mach-O";
1663 # else
1664 stgFree(oc);
1665 barf("loadObj: not implemented on this platform");
1666 # endif
1667
1668 oc->image = image;
1669 oc->fileName = pathdup(path);
1670
1671 if (archiveMemberName) {
1672 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1, "loadObj" );
1673 strcpy(oc->archiveMemberName, archiveMemberName);
1674 } else {
1675 oc->archiveMemberName = NULL;
1676 }
1677
1678 setOcInitialStatus( oc );
1679
1680 oc->fileSize = imageSize;
1681 oc->symbols = NULL;
1682 oc->n_sections = 0;
1683 oc->sections = NULL;
1684 oc->proddables = NULL;
1685 oc->stable_ptrs = NULL;
1686 #if NEED_SYMBOL_EXTRAS
1687 oc->symbol_extras = NULL;
1688 #endif
1689 oc->imageMapped = mapped;
1690
1691 oc->misalignment = misalignment;
1692 oc->extraInfos = NULL;
1693
1694 /* chain it onto the list of objects */
1695 oc->next = NULL;
1696
1697 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1698 return oc;
1699 }
1700
1701 /* -----------------------------------------------------------------------------
1702 * Check if an object or archive is already loaded.
1703 *
1704 * Returns: 1 if the path is already loaded, 0 otherwise.
1705 */
1706 static HsInt
1707 isAlreadyLoaded( pathchar *path )
1708 {
1709 ObjectCode *o;
1710 for (o = objects; o; o = o->next) {
1711 if (0 == pathcmp(o->fileName, path)) {
1712 return 1; /* already loaded */
1713 }
1714 }
1715 return 0; /* not loaded yet */
1716 }
1717
1718 static HsInt loadArchive_ (pathchar *path)
1719 {
1720 ObjectCode* oc;
1721 char *image;
1722 int memberSize;
1723 FILE *f;
1724 int n;
1725 size_t thisFileNameSize;
1726 char *fileName;
1727 size_t fileNameSize;
1728 int isObject, isGnuIndex, isThin, isImportLib;
1729 char tmp[20];
1730 char *gnuFileIndex;
1731 int gnuFileIndexSize;
1732 #if defined(darwin_HOST_OS)
1733 int i;
1734 uint32_t nfat_arch, nfat_offset, cputype, cpusubtype;
1735 #if defined(i386_HOST_ARCH)
1736 const uint32_t mycputype = CPU_TYPE_X86;
1737 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_ALL;
1738 #elif defined(x86_64_HOST_ARCH)
1739 const uint32_t mycputype = CPU_TYPE_X86_64;
1740 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_64_ALL;
1741 #elif defined(powerpc_HOST_ARCH)
1742 const uint32_t mycputype = CPU_TYPE_POWERPC;
1743 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1744 #elif defined(powerpc64_HOST_ARCH)
1745 const uint32_t mycputype = CPU_TYPE_POWERPC64;
1746 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1747 #else
1748 #error Unknown Darwin architecture
1749 #endif
1750 #endif
1751 int misalignment = 0;
1752
1753 /* TODO: don't call barf() on error, instead return an error code, freeing
1754 * all resources correctly. This function is pretty complex, so it needs
1755 * to be refactored to make this practical. */
1756
1757 IF_DEBUG(linker, debugBelch("loadArchive: start\n"));
1758 IF_DEBUG(linker, debugBelch("loadArchive: Loading archive `%" PATH_FMT" '\n", path));
1759
1760 /* Check that we haven't already loaded this archive.
1761 Ignore requests to load multiple times */
1762 if (isAlreadyLoaded(path)) {
1763 IF_DEBUG(linker,
1764 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1765 return 1; /* success */
1766 }
1767
1768 gnuFileIndex = NULL;
1769 gnuFileIndexSize = 0;
1770
1771 fileNameSize = 32;
1772 fileName = stgMallocBytes(fileNameSize, "loadArchive(fileName)");
1773
1774 isThin = 0;
1775 isImportLib = 0;
1776
1777 f = pathopen(path, WSTR("rb"));
1778 if (!f)
1779 barf("loadObj: can't read `%" PATH_FMT "'", path);
1780
1781 /* Check if this is an archive by looking for the magic "!<arch>\n"
1782 * string. Usually, if this fails, we barf and quit. On Darwin however,
1783 * we may have a fat archive, which contains archives for more than
1784 * one architecture. Fat archives start with the magic number 0xcafebabe,
1785 * always stored big endian. If we find a fat_header, we scan through
1786 * the fat_arch structs, searching through for one for our host
1787 * architecture. If a matching struct is found, we read the offset
1788 * of our archive data (nfat_offset) and seek forward nfat_offset bytes
1789 * from the start of the file.
1790 *
1791 * A subtlety is that all of the members of the fat_header and fat_arch
1792 * structs are stored big endian, so we need to call byte order
1793 * conversion functions.
1794 *
1795 * If we find the appropriate architecture in a fat archive, we gobble
1796 * its magic "!<arch>\n" string and continue processing just as if
1797 * we had a single architecture archive.
1798 */
1799
1800 n = fread ( tmp, 1, 8, f );
1801 if (n != 8)
1802 barf("loadArchive: Failed reading header from `%" PATH_FMT "'", path);
1803 if (strncmp(tmp, "!<arch>\n", 8) == 0) {}
1804 /* Check if this is a thin archive by looking for the magic string "!<thin>\n"
1805 *
1806 * ar thin libraries have the exact same format as normal archives except they
1807 * have a different magic string and they don't copy the object files into the
1808 * archive.
1809 *
1810 * Instead each header entry points to the location of the object file on disk.
1811 * This is useful when a library is only created to satisfy a compile time dependency
1812 * instead of to be distributed. This saves the time required for copying.
1813 *
1814 * Thin archives are always flattened. They always only contain simple headers
1815 * pointing to the object file and so we need not allocate more memory than needed
1816 * to find the object file.
1817 *
1818 */
1819 else if (strncmp(tmp, "!<thin>\n", 8) == 0) {
1820 isThin = 1;
1821 }
1822 #if defined(darwin_HOST_OS)
1823 /* Not a standard archive, look for a fat archive magic number: */
1824 else if (ntohl(*(uint32_t *)tmp) == FAT_MAGIC) {
1825 nfat_arch = ntohl(*(uint32_t *)(tmp + 4));
1826 IF_DEBUG(linker, debugBelch("loadArchive: found a fat archive containing %d architectures\n", nfat_arch));
1827 nfat_offset = 0;
1828
1829 for (i = 0; i < (int)nfat_arch; i++) {
1830 /* search for the right arch */
1831 n = fread( tmp, 1, 20, f );
1832 if (n != 8)
1833 barf("loadArchive: Failed reading arch from `%s'", path);
1834 cputype = ntohl(*(uint32_t *)tmp);
1835 cpusubtype = ntohl(*(uint32_t *)(tmp + 4));
1836
1837 if (cputype == mycputype && cpusubtype == mycpusubtype) {
1838 IF_DEBUG(linker, debugBelch("loadArchive: found my archive in a fat archive\n"));
1839 nfat_offset = ntohl(*(uint32_t *)(tmp + 8));
1840 break;
1841 }
1842 }
1843
1844 if (nfat_offset == 0) {
1845 barf ("loadArchive: searched %d architectures, but no host arch found", (int)nfat_arch);
1846 }
1847 else {
1848 n = fseek( f, nfat_offset, SEEK_SET );
1849 if (n != 0)
1850 barf("loadArchive: Failed to seek to arch in `%s'", path);
1851 n = fread ( tmp, 1, 8, f );
1852 if (n != 8)
1853 barf("loadArchive: Failed reading header from `%s'", path);
1854 if (strncmp(tmp, "!<arch>\n", 8) != 0) {
1855 barf("loadArchive: couldn't find archive in `%s' at offset %d", path, nfat_offset);
1856 }
1857 }
1858 }
1859 else {
1860 barf("loadArchive: Neither an archive, nor a fat archive: `%s'", path);
1861 }
1862 #else
1863 else {
1864 barf("loadArchive: Not an archive: `%" PATH_FMT "'", path);
1865 }
1866 #endif
1867
1868 IF_DEBUG(linker, debugBelch("loadArchive: loading archive contents\n"));
1869
1870 while (1) {
1871 IF_DEBUG(linker, debugBelch("loadArchive: reading at %ld\n", ftell(f)));
1872 n = fread ( fileName, 1, 16, f );
1873 if (n != 16) {
1874 if (feof(f)) {
1875 IF_DEBUG(linker, debugBelch("loadArchive: EOF while reading from '%" PATH_FMT "'\n", path));
1876 break;
1877 }
1878 else {
1879 barf("loadArchive: Failed reading file name from `%" PATH_FMT "'", path);
1880 }
1881 }
1882
1883 #if defined(darwin_HOST_OS)
1884 if (strncmp(fileName, "!<arch>\n", 8) == 0) {
1885 IF_DEBUG(linker, debugBelch("loadArchive: found the start of another archive, breaking\n"));
1886 break;
1887 }
1888 #endif
1889
1890 n = fread ( tmp, 1, 12, f );
1891 if (n != 12)
1892 barf("loadArchive: Failed reading mod time from `%" PATH_FMT "'", path);
1893 n = fread ( tmp, 1, 6, f );
1894 if (n != 6)
1895 barf("loadArchive: Failed reading owner from `%" PATH_FMT "'", path);
1896 n = fread ( tmp, 1, 6, f );
1897 if (n != 6)
1898 barf("loadArchive: Failed reading group from `%" PATH_FMT "'", path);
1899 n = fread ( tmp, 1, 8, f );
1900 if (n != 8)
1901 barf("loadArchive: Failed reading mode from `%" PATH_FMT "'", path);
1902 n = fread ( tmp, 1, 10, f );
1903 if (n != 10)
1904 barf("loadArchive: Failed reading size from `%" PATH_FMT "'", path);
1905 tmp[10] = '\0';
1906 for (n = 0; isdigit(tmp[n]); n++);
1907 tmp[n] = '\0';
1908 memberSize = atoi(tmp);
1909
1910 IF_DEBUG(linker, debugBelch("loadArchive: size of this archive member is %d\n", memberSize));
1911 n = fread ( tmp, 1, 2, f );
1912 if (n != 2)
1913 barf("loadArchive: Failed reading magic from `%" PATH_FMT "'", path);
1914 if (strncmp(tmp, "\x60\x0A", 2) != 0)
1915 barf("loadArchive: Failed reading magic from `%" PATH_FMT "' at %ld. Got %c%c",
1916 path, ftell(f), tmp[0], tmp[1]);
1917
1918 isGnuIndex = 0;
1919 /* Check for BSD-variant large filenames */
1920 if (0 == strncmp(fileName, "#1/", 3)) {
1921 fileName[16] = '\0';
1922 if (isdigit(fileName[3])) {
1923 for (n = 4; isdigit(fileName[n]); n++);
1924 fileName[n] = '\0';
1925 thisFileNameSize = atoi(fileName + 3);
1926 memberSize -= thisFileNameSize;
1927 if (thisFileNameSize >= fileNameSize) {
1928 /* Double it to avoid potentially continually
1929 increasing it by 1 */
1930 fileNameSize = thisFileNameSize * 2;
1931 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
1932 }
1933 n = fread ( fileName, 1, thisFileNameSize, f );
1934 if (n != (int)thisFileNameSize) {
1935 barf("loadArchive: Failed reading filename from `%" PATH_FMT "'",
1936 path);
1937 }
1938 fileName[thisFileNameSize] = 0;
1939
1940 /* On OS X at least, thisFileNameSize is the size of the
1941 fileName field, not the length of the fileName
1942 itself. */
1943 thisFileNameSize = strlen(fileName);
1944 }
1945 else {
1946 barf("loadArchive: BSD-variant filename size not found while reading filename from `%s'", path);
1947 }
1948 }
1949 /* Check for GNU file index file */
1950 else if (0 == strncmp(fileName, "//", 2)) {
1951 fileName[0] = '\0';
1952 thisFileNameSize = 0;
1953 isGnuIndex = 1;
1954 }
1955 /* Check for a file in the GNU file index */
1956 else if (fileName[0] == '/') {
1957 if (isdigit(fileName[1])) {
1958 int i;
1959
1960 for (n = 2; isdigit(fileName[n]); n++);
1961 fileName[n] = '\0';
1962 n = atoi(fileName + 1);
1963
1964 if (gnuFileIndex == NULL) {
1965 barf("loadArchive: GNU-variant filename without an index while reading from `%s'", path);
1966 }
1967 if (n < 0 || n > gnuFileIndexSize) {
1968 barf("loadArchive: GNU-variant filename offset %d out of range [0..%d] while reading filename from `%s'", n, gnuFileIndexSize, path);
1969 }
1970 if (n != 0 && gnuFileIndex[n - 1] != '\n') {
1971 barf("loadArchive: GNU-variant filename offset %d invalid (range [0..%d]) while reading filename from `%s'", n, gnuFileIndexSize, path);
1972 }
1973 for (i = n; gnuFileIndex[i] != '\n'; i++);
1974 thisFileNameSize = i - n - 1;
1975 if (thisFileNameSize >= fileNameSize) {
1976 /* Double it to avoid potentially continually
1977 increasing it by 1 */
1978 fileNameSize = thisFileNameSize * 2;
1979 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
1980 }
1981 memcpy(fileName, gnuFileIndex + n, thisFileNameSize);
1982 fileName[thisFileNameSize] = '\0';
1983 }
1984 else if (fileName[1] == ' ') {
1985 fileName[0] = '\0';
1986 thisFileNameSize = 0;
1987 }
1988 else {
1989 barf("loadArchive: GNU-variant filename offset not found while reading filename from `%s'", path);
1990 }
1991 }
1992 /* Finally, the case where the filename field actually contains
1993 the filename */
1994 else {
1995 /* GNU ar terminates filenames with a '/', this allowing
1996 spaces in filenames. So first look to see if there is a
1997 terminating '/'. */
1998 for (thisFileNameSize = 0;
1999 thisFileNameSize < 16;
2000 thisFileNameSize++) {
2001 if (fileName[thisFileNameSize] == '/') {
2002 fileName[thisFileNameSize] = '\0';
2003 break;
2004 }
2005 }
2006 /* If we didn't find a '/', then a space teminates the
2007 filename. Note that if we don't find one, then
2008 thisFileNameSize ends up as 16, and we already have the
2009 '\0' at the end. */
2010 if (thisFileNameSize == 16) {
2011 for (thisFileNameSize = 0;
2012 thisFileNameSize < 16;
2013 thisFileNameSize++) {
2014 if (fileName[thisFileNameSize] == ' ') {
2015 fileName[thisFileNameSize] = '\0';
2016 break;
2017 }
2018 }
2019 }
2020 }
2021
2022 IF_DEBUG(linker,
2023 debugBelch("loadArchive: Found member file `%s'\n", fileName));
2024
2025 isObject = (thisFileNameSize >= 2 && strncmp(fileName + thisFileNameSize - 2, ".o" , 2) == 0)
2026 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".p_o", 4) == 0);
2027
2028 #if defined(OBJFORMAT_PEi386)
2029 /*
2030 * Note [MSVC import files (ext .lib)]
2031 * MSVC compilers store the object files in
2032 * the import libraries with extension .dll
2033 * so on Windows we should look for those too.
2034 * The PE COFF format doesn't specify any specific file name
2035 * for sections. So on windows, just try to load it all.
2036 *
2037 * Linker members (e.g. filename / are skipped since they are not needed)
2038 */
2039 isImportLib = thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".dll", 4) == 0;
2040
2041 /*
2042 * Note [GCC import files (ext .dll.a)]
2043 * GCC stores import information in the same binary format
2044 * as the object file normally has. The only difference is that
2045 * all the information are put in .idata sections. The only real
2046 * way to tell if we're dealing with an import lib is by looking
2047 * at the file extension.
2048 */
2049 isImportLib = isImportLib || endsWithPath(path, WSTR(".dll.a"));
2050 #endif // windows
2051
2052 IF_DEBUG(linker, debugBelch("loadArchive: \tthisFileNameSize = %d\n", (int)thisFileNameSize));
2053 IF_DEBUG(linker, debugBelch("loadArchive: \tisObject = %d\n", isObject));
2054
2055 if (isObject) {
2056 char *archiveMemberName;
2057
2058 IF_DEBUG(linker, debugBelch("loadArchive: Member is an object file...loading...\n"));
2059
2060 #if defined(mingw32_HOST_OS)
2061 // TODO: We would like to use allocateExec here, but allocateExec
2062 // cannot currently allocate blocks large enough.
2063 image = allocateImageAndTrampolines(path, fileName, f, memberSize,
2064 isThin);
2065 #elif defined(darwin_HOST_OS)
2066 if (RTS_LINKER_USE_MMAP)
2067 image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
2068 else {
2069 /* See loadObj() */
2070 misalignment = machoGetMisalignment(f);
2071 image = stgMallocBytes(memberSize + misalignment,
2072 "loadArchive(image)");
2073 image += misalignment;
2074 }
2075
2076 #else // not windows or darwin
2077 image = stgMallocBytes(memberSize, "loadArchive(image)");
2078 #endif
2079 if (isThin) {
2080 FILE *member;
2081 pathchar *pathCopy, *dirName, *memberPath, *objFileName;
2082
2083 /* Allocate and setup the dirname of the archive. We'll need
2084 this to locate the thin member */
2085 pathCopy = pathdup(path); // Convert the char* to a pathchar*
2086 dirName = pathdir(pathCopy);
2087
2088 /* Append the relative member name to the dirname. This should be
2089 be the full path to the actual thin member. */
2090 int memberLen = pathlen(dirName) + 1 + strlen(fileName) + 1;
2091 memberPath = stgMallocBytes(pathsize * memberLen, "loadArchive(file)");
2092 objFileName = mkPath(fileName);
2093 pathprintf(memberPath, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), dirName, objFileName);
2094 stgFree(objFileName);
2095 stgFree(dirName);
2096
2097 member = pathopen(memberPath, WSTR("rb"));
2098 if (!member)
2099 barf("loadObj: can't read thin archive `%" PATH_FMT "'", memberPath);
2100
2101 n = fread ( image, 1, memberSize, member );
2102 if (n != memberSize) {
2103 barf("loadArchive: error whilst reading `%s'", fileName);
2104 }
2105
2106 fclose(member);
2107 stgFree(memberPath);
2108 stgFree(pathCopy);
2109 }
2110 else
2111 {
2112 n = fread ( image, 1, memberSize, f );
2113 if (n != memberSize) {
2114 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2115 }
2116 }
2117
2118 archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
2119 "loadArchive(file)");
2120 sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
2121 path, (int)thisFileNameSize, fileName);
2122
2123 oc = mkOc(path, image, memberSize, rtsFalse, archiveMemberName
2124 , misalignment);
2125
2126 stgFree(archiveMemberName);
2127
2128 if (0 == loadOc(oc)) {
2129 stgFree(fileName);
2130 fclose(f);
2131 return 0;
2132 } else {
2133 #if defined(OBJFORMAT_PEi386)
2134 if (isImportLib)
2135 {
2136 findAndLoadImportLibrary(oc);
2137 stgFree(oc);
2138 oc = NULL;
2139 break;
2140 } else {
2141 #endif
2142 oc->next = objects;
2143 objects = oc;
2144 #if defined(OBJFORMAT_PEi386)
2145 }
2146 #endif
2147 }
2148 }
2149 else if (isGnuIndex) {
2150 if (gnuFileIndex != NULL) {
2151 barf("loadArchive: GNU-variant index found, but already have an index, while reading filename from `%s'", path);
2152 }
2153 IF_DEBUG(linker, debugBelch("loadArchive: Found GNU-variant file index\n"));
2154 #if RTS_LINKER_USE_MMAP
2155 gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
2156 #else
2157 gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
2158 #endif
2159 n = fread ( gnuFileIndex, 1, memberSize, f );
2160 if (n != memberSize) {
2161 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2162 }
2163 gnuFileIndex[memberSize] = '/';
2164 gnuFileIndexSize = memberSize;
2165 }
2166 else if (isImportLib) {
2167 #if defined(OBJFORMAT_PEi386)
2168 if (checkAndLoadImportLibrary(path, fileName, f)) {
2169 IF_DEBUG(linker, debugBelch("loadArchive: Member is an import file section... Corresponding DLL has been loaded...\n"));
2170 }
2171 else {
2172 IF_DEBUG(linker, debugBelch("loadArchive: Member is not a valid import file section... Skipping...\n"));
2173 n = fseek(f, memberSize, SEEK_CUR);
2174 if (n != 0)
2175 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
2176 memberSize, path);
2177 }
2178 #endif
2179 }
2180 else {
2181 IF_DEBUG(linker, debugBelch("loadArchive: '%s' does not appear to be an object file\n", fileName));
2182 if (!isThin || thisFileNameSize == 0) {
2183 n = fseek(f, memberSize, SEEK_CUR);
2184 if (n != 0)
2185 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
2186 memberSize, path);
2187 }
2188 }
2189
2190 /* .ar files are 2-byte aligned */
2191 if (!(isThin && thisFileNameSize > 0) && memberSize % 2) {
2192 IF_DEBUG(linker, debugBelch("loadArchive: trying to read one pad byte\n"));
2193 n = fread ( tmp, 1, 1, f );
2194 if (n != 1) {
2195 if (feof(f)) {
2196 IF_DEBUG(linker, debugBelch("loadArchive: found EOF while reading one pad byte\n"));
2197 break;
2198 }
2199 else {
2200 barf("loadArchive: Failed reading padding from `%" PATH_FMT "'", path);
2201 }
2202 }
2203 IF_DEBUG(linker, debugBelch("loadArchive: successfully read one pad byte\n"));
2204 }
2205 IF_DEBUG(linker, debugBelch("loadArchive: reached end of archive loading while loop\n"));
2206 }
2207
2208 fclose(f);
2209
2210 stgFree(fileName);
2211 if (gnuFileIndex != NULL) {
2212 #if RTS_LINKER_USE_MMAP
2213 munmap(gnuFileIndex, gnuFileIndexSize + 1);
2214 #else
2215 stgFree(gnuFileIndex);
2216 #endif
2217 }
2218
2219 if (RTS_LINKER_USE_MMAP)
2220 m32_allocator_flush();
2221
2222 IF_DEBUG(linker, debugBelch("loadArchive: done\n"));
2223 return 1;
2224 }
2225
2226 HsInt loadArchive (pathchar *path)
2227 {
2228 ACQUIRE_LOCK(&linker_mutex);
2229 HsInt r = loadArchive_(path);
2230 RELEASE_LOCK(&linker_mutex);
2231 return r;
2232 }
2233
2234 //
2235 // Load the object file into memory. This will not be its final resting place,
2236 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
2237 // address space, properly aligned.
2238 //
2239 static ObjectCode *
2240 preloadObjectFile (pathchar *path)
2241 {
2242 int fileSize;
2243 struct_stat st;
2244 int r;
2245 void *image;
2246 ObjectCode *oc;
2247 int misalignment = 0;
2248
2249 r = pathstat(path, &st);
2250 if (r == -1) {
2251 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
2252 return NULL;
2253 }
2254
2255 fileSize = st.st_size;
2256
2257 #if RTS_LINKER_USE_MMAP
2258 int fd;
2259
2260 /* On many architectures malloc'd memory isn't executable, so we need to use
2261 * mmap. */
2262
2263 #if defined(openbsd_HOST_OS)
2264 fd = open(path, O_RDONLY, S_IRUSR);
2265 #else
2266 fd = open(path, O_RDONLY);
2267 #endif
2268 if (fd == -1) {
2269 errorBelch("loadObj: can't open %s", path);
2270 return NULL;
2271 }
2272
2273 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
2274 MAP_PRIVATE, fd, 0);
2275 // not 32-bit yet, we'll remap later
2276 close(fd);
2277
2278 #else /* !RTS_LINKER_USE_MMAP */
2279 FILE *f;
2280
2281 /* load the image into memory */
2282 /* coverity[toctou] */
2283 f = pathopen(path, WSTR("rb"));
2284 if (!f) {
2285 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
2286 return NULL;
2287 }
2288
2289 # if defined(mingw32_HOST_OS)
2290
2291 // TODO: We would like to use allocateExec here, but allocateExec
2292 // cannot currently allocate blocks large enough.
2293 image = allocateImageAndTrampolines(path, "itself", f, fileSize,
2294 HS_BOOL_FALSE);
2295 if (image == NULL) {
2296 fclose(f);
2297 return NULL;
2298 }
2299
2300 # elif defined(darwin_HOST_OS)
2301
2302 // In a Mach-O .o file, all sections can and will be misaligned
2303 // if the total size of the headers is not a multiple of the
2304 // desired alignment. This is fine for .o files that only serve
2305 // as input for the static linker, but it's not fine for us,
2306 // as SSE (used by gcc for floating point) and Altivec require
2307 // 16-byte alignment.
2308 // We calculate the correct alignment from the header before
2309 // reading the file, and then we misalign image on purpose so
2310 // that the actual sections end up aligned again.
2311 misalignment = machoGetMisalignment(f);
2312 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
2313 image += misalignment;
2314
2315 # else /* !defined(mingw32_HOST_OS) */
2316
2317 image = stgMallocBytes(fileSize, "loadObj(image)");
2318
2319 #endif
2320
2321 int n;
2322 n = fread ( image, 1, fileSize, f );
2323 fclose(f);
2324 if (n != fileSize) {
2325 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
2326 stgFree(image);
2327 return NULL;
2328 }
2329
2330 #endif /* RTS_LINKER_USE_MMAP */
2331
2332 oc = mkOc(path, image, fileSize, rtsTrue, NULL, misalignment);
2333
2334 return oc;
2335 }
2336
2337 /* -----------------------------------------------------------------------------
2338 * Load an obj (populate the global symbol table, but don't resolve yet)
2339 *
2340 * Returns: 1 if ok, 0 on error.
2341 */
2342 static HsInt loadObj_ (pathchar *path)
2343 {
2344 ObjectCode* oc;
2345 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
2346
2347 /* debugBelch("loadObj %s\n", path ); */
2348
2349 /* Check that we haven't already loaded this object.
2350 Ignore requests to load multiple times */
2351
2352 if (isAlreadyLoaded(path)) {
2353 IF_DEBUG(linker,
2354 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
2355 return 1; /* success */
2356 }
2357
2358 oc = preloadObjectFile(path);
2359 if (oc == NULL) return 0;
2360
2361 if (! loadOc(oc)) {
2362 // failed; free everything we've allocated
2363 removeOcSymbols(oc);
2364 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
2365 freeObjectCode(oc);
2366 return 0;
2367 }
2368
2369 oc->next = objects;
2370 objects = oc;
2371 return 1;
2372 }
2373
2374 HsInt loadObj (pathchar *path)
2375 {
2376 ACQUIRE_LOCK(&linker_mutex);
2377 HsInt r = loadObj_(path);
2378 RELEASE_LOCK(&linker_mutex);
2379 return r;
2380 }
2381
2382 static HsInt loadOc (ObjectCode* oc)
2383 {
2384 int r;
2385
2386 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
2387
2388 /* verify the in-memory image */
2389 # if defined(OBJFORMAT_ELF)
2390 r = ocVerifyImage_ELF ( oc );
2391 # elif defined(OBJFORMAT_PEi386)
2392 r = ocVerifyImage_PEi386 ( oc );
2393 # elif defined(OBJFORMAT_MACHO)
2394 r = ocVerifyImage_MachO ( oc );
2395 # else
2396 barf("loadObj: no verify method");
2397 # endif
2398 if (!r) {
2399 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
2400 return r;
2401 }
2402
2403 #if NEED_SYMBOL_EXTRAS
2404 # if defined(OBJFORMAT_MACHO)
2405 r = ocAllocateSymbolExtras_MachO ( oc );
2406 if (!r) {
2407 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_MachO failed\n"));
2408 return r;
2409 }
2410 # elif defined(OBJFORMAT_ELF)
2411 r = ocAllocateSymbolExtras_ELF ( oc );
2412 if (!r) {
2413 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_ELF failed\n"));
2414 return r;
2415 }
2416 # elif defined(OBJFORMAT_PEi386)
2417 ocAllocateSymbolExtras_PEi386 ( oc );
2418 # endif
2419 #endif
2420
2421 /* build the symbol list for this image */
2422 # if defined(OBJFORMAT_ELF)
2423 r = ocGetNames_ELF ( oc );
2424 # elif defined(OBJFORMAT_PEi386)
2425 r = ocGetNames_PEi386 ( oc );
2426 # elif defined(OBJFORMAT_MACHO)
2427 r = ocGetNames_MachO ( oc );
2428 # else
2429 barf("loadObj: no getNames method");
2430 # endif
2431 if (!r) {
2432 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
2433 return r;
2434 }
2435
2436 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
2437 setOcInitialStatus( oc );
2438 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
2439
2440 return 1;
2441 }
2442
2443 /* -----------------------------------------------------------------------------
2444 * try to load and initialize an ObjectCode into memory
2445 *
2446 * Returns: 1 if ok, 0 on error.
2447 */
2448 int ocTryLoad (ObjectCode* oc) {
2449 int r;
2450
2451 if (oc->status != OBJECT_NEEDED) {
2452 return 1;
2453 }
2454
2455 /* Check for duplicate symbols by looking into `symhash`.
2456 Duplicate symbols are any symbols which exist
2457 in different ObjectCodes that have both been loaded, or
2458 are to be loaded by this call.
2459
2460 This call is intended to have no side-effects when a non-duplicate
2461 symbol is re-inserted.
2462
2463 We set the Address to NULL since that is not used to distinguish
2464 symbols. Duplicate symbols are distinguished by name and oc.
2465 */
2466 int x;
2467 SymbolName* symbol;
2468 for (x = 0; x < oc->n_symbols; x++) {
2469 symbol = oc->symbols[x];
2470 if ( symbol
2471 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol, NULL, isSymbolWeak(oc, symbol), oc)) {
2472 return 0;
2473 }
2474 }
2475
2476 # if defined(OBJFORMAT_ELF)
2477 r = ocResolve_ELF ( oc );
2478 # elif defined(OBJFORMAT_PEi386)
2479 r = ocResolve_PEi386 ( oc );
2480 # elif defined(OBJFORMAT_MACHO)
2481 r = ocResolve_MachO ( oc );
2482 # else
2483 barf("ocTryLoad: not implemented on this platform");
2484 # endif
2485 if (!r) { return r; }
2486
2487 // run init/init_array/ctors/mod_init_func
2488
2489 loading_obj = oc; // tells foreignExportStablePtr what to do
2490 #if defined(OBJFORMAT_ELF)
2491 r = ocRunInit_ELF ( oc );
2492 #elif defined(OBJFORMAT_PEi386)
2493 r = ocRunInit_PEi386 ( oc );
2494 #elif defined(OBJFORMAT_MACHO)
2495 r = ocRunInit_MachO ( oc );
2496 #else
2497 barf("ocTryLoad: initializers not implemented on this platform");
2498 #endif
2499 loading_obj = NULL;
2500
2501 if (!r) { return r; }
2502
2503 oc->status = OBJECT_RESOLVED;
2504
2505 return 1;
2506 }
2507
2508 /* -----------------------------------------------------------------------------
2509 * resolve all the currently unlinked objects in memory
2510 *
2511 * Returns: 1 if ok, 0 on error.
2512 */
2513 static HsInt resolveObjs_ (void)
2514 {
2515 ObjectCode *oc;
2516 int r;
2517
2518 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
2519
2520 for (oc = objects; oc; oc = oc->next) {
2521 r = ocTryLoad(oc);
2522 if (!r)
2523 {
2524 return r;
2525 }
2526 }
2527
2528 #ifdef PROFILING
2529 // collect any new cost centres & CCSs that were defined during runInit
2530 initProfiling2();
2531 #endif
2532
2533 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
2534 return 1;
2535 }
2536
2537 HsInt resolveObjs (void)
2538 {
2539 ACQUIRE_LOCK(&linker_mutex);
2540 HsInt r = resolveObjs_();
2541 RELEASE_LOCK(&linker_mutex);
2542 return r;
2543 }
2544
2545 /* -----------------------------------------------------------------------------
2546 * delete an object from the pool
2547 */
2548 static HsInt unloadObj_ (pathchar *path, rtsBool just_purge)
2549 {
2550 ObjectCode *oc, *prev, *next;
2551 HsBool unloadedAnyObj = HS_BOOL_FALSE;
2552
2553 ASSERT(symhash != NULL);
2554 ASSERT(objects != NULL);
2555
2556 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
2557
2558 prev = NULL;
2559 for (oc = objects; oc; oc = next) {
2560 next = oc->next; // oc might be freed
2561
2562 if (!pathcmp(oc->fileName,path)) {
2563
2564 // these are both idempotent, so in just_purge mode we can
2565 // later call unloadObj() to really unload the object.
2566 removeOcSymbols(oc);
2567 freeOcStablePtrs(oc);
2568
2569 if (!just_purge) {
2570 if (prev == NULL) {
2571 objects = oc->next;
2572 } else {
2573 prev->next = oc->next;
2574 }
2575 ACQUIRE_LOCK(&linker_unloaded_mutex);
2576 oc->next = unloaded_objects;
2577 unloaded_objects = oc;
2578 oc->status = OBJECT_UNLOADED;
2579 RELEASE_LOCK(&linker_unloaded_mutex);
2580 // We do not own oc any more; it can be released at any time by
2581 // the GC in checkUnload().
2582 } else {
2583 prev = oc;
2584 }
2585
2586 /* This could be a member of an archive so continue
2587 * unloading other members. */
2588 unloadedAnyObj = HS_BOOL_TRUE;
2589 } else {
2590 prev = oc;
2591 }
2592 }
2593
2594 if (unloadedAnyObj) {
2595 return 1;
2596 }
2597 else {
2598 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
2599 return 0;
2600 }
2601 }
2602
2603 HsInt unloadObj (pathchar *path)
2604 {
2605 ACQUIRE_LOCK(&linker_mutex);
2606 HsInt r = unloadObj_(path, rtsFalse);
2607 RELEASE_LOCK(&linker_mutex);
2608 return r;
2609 }
2610
2611 HsInt purgeObj (pathchar *path)
2612 {
2613 ACQUIRE_LOCK(&linker_mutex);
2614 HsInt r = unloadObj_(path, rtsTrue);
2615 RELEASE_LOCK(&linker_mutex);
2616 return r;
2617 }
2618
2619 /* -----------------------------------------------------------------------------
2620 * Sanity checking. For each ObjectCode, maintain a list of address ranges
2621 * which may be prodded during relocation, and abort if we try and write
2622 * outside any of these.
2623 */
2624 static void
2625 addProddableBlock ( ObjectCode* oc, void* start, int size )
2626 {
2627 ProddableBlock* pb
2628 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
2629
2630 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
2631 ASSERT(size > 0);
2632 pb->start = start;
2633 pb->size = size;
2634 pb->next = oc->proddables;
2635 oc->proddables = pb;
2636 }
2637
2638 static void
2639 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
2640 {
2641 ProddableBlock* pb;
2642
2643 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
2644 char* s = (char*)(pb->start);
2645 char* e = s + pb->size;
2646 char* a = (char*)addr;
2647 if (a >= s && (a+size) <= e) return;
2648 }
2649 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
2650 }
2651
2652 static void freeProddableBlocks (ObjectCode *oc)
2653 {
2654 ProddableBlock *pb, *next;
2655
2656 for (pb = oc->proddables; pb != NULL; pb = next) {
2657 next = pb->next;
2658 stgFree(pb);
2659 }
2660 oc->proddables = NULL;
2661 }
2662
2663 /* -----------------------------------------------------------------------------
2664 * Section management.
2665 */
2666 static void
2667 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
2668 void* start, StgWord size, StgWord mapped_offset,
2669 void* mapped_start, StgWord mapped_size)
2670 {
2671 s->start = start; /* actual start of section in memory */
2672 s->size = size; /* actual size of section in memory */
2673 s->kind = kind;
2674 s->alloc = alloc;
2675 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
2676
2677 s->mapped_start = mapped_start; /* start of mmap() block */
2678 s->mapped_size = mapped_size; /* size of mmap() block */
2679
2680 IF_DEBUG(linker,
2681 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
2682 start, (void*)((StgWord)start + size),
2683 size, kind ));
2684 }
2685
2686
2687 /* --------------------------------------------------------------------------
2688 * Symbol Extras.
2689 * This is about allocating a small chunk of memory for every symbol in the
2690 * object file. We make sure that the SymboLExtras are always "in range" of
2691 * limited-range PC-relative instructions on various platforms by allocating
2692 * them right next to the object code itself.
2693 */
2694
2695 #if NEED_SYMBOL_EXTRAS
2696 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2697
2698 /*
2699 ocAllocateSymbolExtras
2700
2701 Allocate additional space at the end of the object file image to make room
2702 for jump islands (powerpc, x86_64, arm) and GOT entries (x86_64).
2703
2704 PowerPC relative branch instructions have a 24 bit displacement field.
2705 As PPC code is always 4-byte-aligned, this yields a +-32MB range.
2706 If a particular imported symbol is outside this range, we have to redirect
2707 the jump to a short piece of new code that just loads the 32bit absolute
2708 address and jumps there.
2709 On x86_64, PC-relative jumps and PC-relative accesses to the GOT are limited
2710 to 32 bits (+-2GB).
2711
2712 This function just allocates space for one SymbolExtra for every
2713 undefined symbol in the object file. The code for the jump islands is
2714 filled in by makeSymbolExtra below.
2715 */
2716
2717 static int ocAllocateSymbolExtras( ObjectCode* oc, int count, int first )
2718 {
2719 size_t n;
2720
2721 if (RTS_LINKER_USE_MMAP && USE_CONTIGUOUS_MMAP) {
2722 n = roundUpToPage(oc->fileSize);
2723
2724 /* Keep image and symbol_extras contiguous */
2725 void *new = mmapForLinker(n + (sizeof(SymbolExtra) * count),
2726 MAP_ANONYMOUS, -1, 0);
2727 if (new) {
2728 memcpy(new, oc->image, oc->fileSize);
2729 if (oc->imageMapped) {
2730 munmap(oc->image, n);
2731 }
2732 oc->image = new;
2733 oc->imageMapped = rtsTrue;
2734 oc->fileSize = n + (sizeof(SymbolExtra) * count);
2735 oc->symbol_extras = (SymbolExtra *) (oc->image + n);
2736 }
2737 else {
2738 oc->symbol_extras = NULL;
2739 return 0;
2740 }
2741 }
2742 else if( count > 0 ) {
2743 if (RTS_LINKER_USE_MMAP) {
2744 n = roundUpToPage(oc->fileSize);
2745
2746 oc->symbol_extras = m32_alloc(sizeof(SymbolExtra) * count, 8);
2747 if (oc->symbol_extras == NULL) return 0;
2748 }
2749 else {
2750 // round up to the nearest 4
2751 int aligned = (oc->fileSize + 3) & ~3;
2752 int misalignment = oc->misalignment;
2753
2754 oc->image -= misalignment;
2755 oc->image = stgReallocBytes( oc->image,
2756 misalignment +
2757 aligned + sizeof (SymbolExtra) * count,
2758 "ocAllocateSymbolExtras" );
2759 oc->image += misalignment;
2760
2761 oc->symbol_extras = (SymbolExtra *) (oc->image + aligned);
2762 }
2763 }
2764
2765 if (oc->symbol_extras != NULL) {
2766 memset( oc->symbol_extras, 0, sizeof (SymbolExtra) * count );
2767 }
2768
2769 oc->first_symbol_extra = first;
2770 oc->n_symbol_extras = count;
2771
2772 return 1;
2773 }
2774
2775 #endif
2776 #endif // NEED_SYMBOL_EXTRAS
2777
2778 #if defined(arm_HOST_ARCH)
2779
2780 static void
2781 ocFlushInstructionCache( ObjectCode *oc )
2782 {
2783 int i;
2784 // Object code
2785 for (i=0; i < oc->n_sections; i++) {
2786 Section *s = &oc->sections[i];
2787 // This is a bit too broad but we don't have any way to determine what
2788 // is certainly code
2789 if (s->kind == SECTIONKIND_CODE_OR_RODATA)
2790 __clear_cache(s->start, (void*) ((uintptr_t) s->start + s->size));
2791 }
2792
2793 // Jump islands
2794 // Note the (+1) to ensure that the last symbol extra is covered by the
2795 // flush.
2796 __clear_cache(oc->symbol_extras, &oc->symbol_extras[oc->n_symbol_extras+1]);
2797 }
2798
2799 #endif
2800
2801 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2802 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2803
2804 static SymbolExtra* makeSymbolExtra( ObjectCode* oc,
2805 unsigned long symbolNumber,
2806 unsigned long target )
2807 {
2808 SymbolExtra *extra;
2809
2810 ASSERT( symbolNumber >= oc->first_symbol_extra
2811 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2812
2813 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2814
2815 #ifdef powerpc_HOST_ARCH
2816 // lis r12, hi16(target)
2817 extra->jumpIsland.lis_r12 = 0x3d80;
2818 extra->jumpIsland.hi_addr = target >> 16;
2819
2820 // ori r12, r12, lo16(target)
2821 extra->jumpIsland.ori_r12_r12 = 0x618c;
2822 extra->jumpIsland.lo_addr = target & 0xffff;
2823
2824 // mtctr r12
2825 extra->jumpIsland.mtctr_r12 = 0x7d8903a6;
2826
2827 // bctr
2828 extra->jumpIsland.bctr = 0x4e800420;
2829 #endif
2830 #ifdef x86_64_HOST_ARCH
2831 // jmp *-14(%rip)
2832 static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF };
2833 extra->addr = target;
2834 memcpy(extra->jumpIsland, jmp, 6);
2835 #endif
2836
2837 return extra;
2838 }
2839
2840 #endif
2841 #endif // defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2842
2843 #ifdef arm_HOST_ARCH
2844 static SymbolExtra* makeArmSymbolExtra( ObjectCode* oc,
2845 unsigned long symbolNumber,
2846 unsigned long target,
2847 int fromThumb,
2848 int toThumb )
2849 {
2850 SymbolExtra *extra;
2851
2852 ASSERT( symbolNumber >= oc->first_symbol_extra
2853 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2854
2855 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2856
2857 // Make sure instruction mode bit is set properly
2858 if (toThumb)
2859 target |= 1;
2860 else
2861 target &= ~1;
2862
2863 if (!fromThumb) {
2864 // In ARM encoding:
2865 // movw r12, #0
2866 // movt r12, #0
2867 // bx r12
2868 uint32_t code[] = { 0xe300c000, 0xe340c000, 0xe12fff1c };
2869
2870 // Patch lower half-word into movw
2871 code[0] |= ((target>>12) & 0xf) << 16;
2872 code[0] |= target & 0xfff;
2873 // Patch upper half-word into movt
2874 target >>= 16;
2875 code[1] |= ((target>>12) & 0xf) << 16;
2876 code[1] |= target & 0xfff;
2877
2878 memcpy(extra->jumpIsland, code, 12);
2879
2880 } else {
2881 // In Thumb encoding:
2882 // movw r12, #0
2883 // movt r12, #0
2884 // bx r12
2885 uint16_t code[] = { 0xf240, 0x0c00,
2886 0xf2c0, 0x0c00,
2887 0x4760 };
2888
2889 // Patch lower half-word into movw
2890 code[0] |= (target>>12) & 0xf;
2891 code[0] |= ((target>>11) & 0x1) << 10;
2892 code[1] |= ((target>>8) & 0x7) << 12;
2893 code[1] |= target & 0xff;
2894 // Patch upper half-word into movt
2895 target >>= 16;
2896 code[2] |= (target>>12) & 0xf;
2897 code[2] |= ((target>>11) & 0x1) << 10;
2898 code[3] |= ((target>>8) & 0x7) << 12;
2899 code[3] |= target & 0xff;
2900
2901 memcpy(extra->jumpIsland, code, 10);
2902 }
2903
2904 return extra;
2905 }
2906 #endif // arm_HOST_ARCH
2907
2908 /* --------------------------------------------------------------------------
2909 * PowerPC specifics (instruction cache flushing)
2910 * ------------------------------------------------------------------------*/
2911
2912 #ifdef powerpc_HOST_ARCH
2913 /*
2914 ocFlushInstructionCache
2915
2916 Flush the data & instruction caches.
2917 Because the PPC has split data/instruction caches, we have to
2918 do that whenever we modify code at runtime.
2919 */
2920
2921 static void
2922 ocFlushInstructionCacheFrom(void* begin, size_t length)
2923 {
2924 size_t n = (length + 3) / 4;
2925 unsigned long* p = begin;
2926
2927 while (n--)
2928 {
2929 __asm__ volatile ( "dcbf 0,%0\n\t"
2930 "sync\n\t"
2931 "icbi 0,%0"
2932 :
2933 : "r" (p)
2934 );
2935 p++;
2936 }
2937 __asm__ volatile ( "sync\n\t"
2938 "isync"
2939 );
2940 }
2941
2942 static void
2943 ocFlushInstructionCache( ObjectCode *oc )
2944 {
2945 /* The main object code */
2946 ocFlushInstructionCacheFrom(oc->image + oc->misalignment, oc->fileSize);
2947
2948 /* Jump Islands */
2949 ocFlushInstructionCacheFrom(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras);
2950 }
2951 #endif /* powerpc_HOST_ARCH */
2952
2953
2954 /* --------------------------------------------------------------------------
2955 * PEi386(+) specifics (Win32 targets)
2956 * ------------------------------------------------------------------------*/
2957
2958 /* The information for this linker comes from
2959 Microsoft Portable Executable
2960 and Common Object File Format Specification
2961 revision 8.3 February 2013
2962
2963 It can be found online at:
2964
2965 https://msdn.microsoft.com/en-us/windows/hardware/gg463119.aspx
2966
2967 Things move, so if that fails, try searching for it via
2968
2969 http://www.google.com/search?q=PE+COFF+specification
2970
2971 The ultimate reference for the PE format is the Winnt.h
2972 header file that comes with the Platform SDKs; as always,
2973 implementations will drift wrt their documentation.
2974
2975 A good background article on the PE format is Matt Pietrek's
2976 March 1994 article in Microsoft System Journal (MSJ)
2977 (Vol.9, No. 3): "Peering Inside the PE: A Tour of the
2978 Win32 Portable Executable File Format." The info in there
2979 has recently been updated in a two part article in
2980 MSDN magazine, issues Feb and March 2002,
2981 "Inside Windows: An In-Depth Look into the Win32 Portable
2982 Executable File Format"
2983
2984 John Levine's book "Linkers and Loaders" contains useful
2985 info on PE too.
2986
2987 The PE specification doesn't specify how to do the actual
2988 relocations. For this reason, and because both PE and ELF are
2989 based on COFF, the relocations for the PEi386+ code is based on
2990 the ELF relocations for the equivalent relocation type.
2991
2992 The ELF ABI can be found at
2993
2994 http://www.x86-64.org/documentation/abi.pdf
2995
2996 The current code is based on version 0.99.6 - October 2013
2997 */
2998
2999
3000 #if defined(OBJFORMAT_PEi386)
3001
3002 static int verifyCOFFHeader ( COFF_header *hdr, pathchar *filename);
3003
3004 /* We assume file pointer is right at the
3005 beginning of COFF object.
3006 */
3007 static char *
3008 allocateImageAndTrampolines (
3009 pathchar* arch_name, char* member_name,
3010 FILE* f USED_IF_x86_64_HOST_ARCH,
3011 int size,
3012 int isThin USED_IF_x86_64_HOST_ARCH)
3013 {
3014 char* image;
3015 #if defined(x86_64_HOST_ARCH)
3016 if (!isThin)
3017 {
3018 /* PeCoff contains number of symbols right in it's header, so
3019 we can reserve the room for symbolExtras right here. */
3020 COFF_header hdr;
3021 size_t n;
3022
3023 n = fread(&hdr, 1, sizeof_COFF_header, f);
3024 if (n != sizeof(COFF_header)) {
3025 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%S'",
3026 member_name, arch_name);
3027 return NULL;
3028 }
3029 fseek(f, -sizeof_COFF_header, SEEK_CUR);
3030
3031 if (!verifyCOFFHeader(&hdr, arch_name)) {
3032 return 0;
3033 }
3034
3035 /* We get back 8-byte aligned memory (is that guaranteed?), but
3036 the offsets to the sections within the file are all 4 mod 8
3037 (is that guaranteed?). We therefore need to offset the image
3038 by 4, so that all the pointers are 8-byte aligned, so that
3039 pointer tagging works. */
3040 /* For 32-bit case we don't need this, hence we use macro PEi386_IMAGE_OFFSET,
3041 which equals to 4 for 64-bit case and 0 for 32-bit case. */
3042 /* We allocate trampolines area for all symbols right behind
3043 image data, aligned on 8. */
3044 size = ((PEi386_IMAGE_OFFSET + size + 0x7) & ~0x7)
3045 + hdr.NumberOfSymbols * sizeof(SymbolExtra);
3046 }
3047 #endif
3048 image = VirtualAlloc(NULL, size,
3049 MEM_RESERVE | MEM_COMMIT,
3050 PAGE_EXECUTE_READWRITE);
3051
3052 if (image == NULL) {
3053 errorBelch("%" PATH_FMT ": failed to allocate memory for image for %s",
3054 arch_name, member_name);
3055 return NULL;
3056 }
3057
3058 return image + PEi386_IMAGE_OFFSET;
3059 }
3060
3061 static int findAndLoadImportLibrary(ObjectCode* oc)
3062 {
3063 int i;
3064
3065 COFF_header* hdr;
3066 COFF_section* sectab;
3067 COFF_symbol* symtab;
3068 UChar* strtab;
3069
3070 hdr = (COFF_header*)(oc->image);
3071 sectab = (COFF_section*)(
3072 ((UChar*)(oc->image))
3073 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3074 );
3075
3076 symtab = (COFF_symbol*)(
3077 ((UChar*)(oc->image))
3078 + hdr->PointerToSymbolTable
3079 );
3080
3081 strtab = ((UChar*)symtab)
3082 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3083
3084 for (i = 0; i < oc->n_sections; i++)
3085 {
3086 COFF_section* sectab_i
3087 = (COFF_section*)myindex(sizeof_COFF_section, sectab, i);
3088
3089 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3090
3091 // Find the first entry containing a valid .idata$7 section.
3092 if (strcmp(secname, ".idata$7") == 0) {
3093 /* First load the containing DLL if not loaded. */
3094 Section section = oc->sections[i];
3095
3096 pathchar* dirName = pathdir(oc->fileName);
3097 HsPtr token = addLibrarySearchPath(dirName);
3098 stgFree(dirName);
3099 char* dllName = (char*)section.start;
3100
3101 if (strlen(dllName) == 0 || dllName[0] == ' ')
3102 {
3103 continue;
3104 }
3105
3106 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand '%ls' => `%s'\n", oc->fileName, dllName));
3107
3108 pathchar* dll = mkPath(dllName);
3109 removeLibrarySearchPath(token);
3110
3111 const char* result = addDLL(dll);
3112 stgFree(dll);
3113
3114 if (result != NULL) {
3115 errorBelch("Could not load `%s'. Reason: %s\n", (char*)dllName, result);
3116 return 0;
3117 }
3118
3119 break;
3120 }
3121
3122 stgFree(secname);
3123 }
3124
3125 return 1;
3126 }
3127
3128 static int checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f)
3129 {
3130 char* image;
3131 static HsBool load_dll_warn = HS_BOOL_FALSE;
3132
3133 if (load_dll_warn) { return 0; }
3134
3135 /* Based on Import Library specification. PE Spec section 7.1 */
3136
3137 COFF_import_header hdr;
3138 size_t n;
3139
3140 n = fread(&hdr, 1, sizeof_COFF_import_Header, f);
3141 if (n != sizeof(COFF_header)) {
3142 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%" PATH_FMT "'\n",
3143 member_name, arch_name);
3144 return 0;
3145 }
3146
3147 if (hdr.Sig1 != 0x0 || hdr.Sig2 != 0xFFFF) {
3148 fseek(f, -sizeof_COFF_import_Header, SEEK_CUR);
3149 IF_DEBUG(linker, debugBelch("loadArchive: Object `%s` is not an import lib. Skipping...\n", member_name));
3150 return 0;
3151 }
3152
3153 IF_DEBUG(linker, debugBelch("loadArchive: reading %d bytes at %ld\n", hdr.SizeOfData, ftell(f)));
3154
3155 image = malloc(hdr.SizeOfData);
3156 n = fread(image, 1, hdr.SizeOfData, f);
3157 if (n != hdr.SizeOfData) {
3158 errorBelch("loadArchive: error whilst reading `%s' header in `%" PATH_FMT "'. Did not read enough bytes.\n",
3159 member_name, arch_name);
3160 }
3161
3162 char* symbol = strtok(image, "\0");
3163 int symLen = strlen(symbol) + 1;
3164 int nameLen = n - symLen;
3165 char* dllName = malloc(sizeof(char) * nameLen);
3166 dllName = strncpy(dllName, image + symLen, nameLen);
3167 pathchar* dll = malloc(sizeof(wchar_t) * nameLen);
3168 mbstowcs(dll, dllName, nameLen);
3169 free(dllName);
3170
3171 IF_DEBUG(linker, debugBelch("loadArchive: read symbol %s from lib `%ls'\n", symbol, dll));
3172 const char* result = addDLL(dll);
3173
3174 free(image);
3175
3176 if (result != NULL) {
3177 errorBelch("Could not load `%ls'. Reason: %s\n", dll, result);
3178 load_dll_warn = HS_BOOL_TRUE;
3179
3180 free(dll);
3181 fseek(f, -(n + sizeof_COFF_import_Header), SEEK_CUR);
3182 return 0;
3183 }
3184
3185 free(dll);
3186 return 1;
3187 }
3188
3189 /* We use myindex to calculate array addresses, rather than
3190 simply doing the normal subscript thing. That's because
3191 some of the above structs have sizes which are not
3192 a whole number of words. GCC rounds their sizes up to a
3193 whole number of words, which means that the address calcs
3194 arising from using normal C indexing or pointer arithmetic
3195 are just plain wrong. Sigh.
3196 */
3197 static UChar *
3198 myindex ( int scale, void* base, int index )
3199 {
3200 return
3201 ((UChar*)base) + scale * index;
3202 }
3203
3204
3205 static void
3206 printName ( UChar* name, UChar* strtab )
3207 {
3208 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3209 UInt32 strtab_offset = * (UInt32*)(name+4);
3210 debugBelch("%s", strtab + strtab_offset );
3211 } else {
3212 int i;
3213 for (i = 0; i < 8; i++) {
3214 if (name[i] == 0) break;
3215 debugBelch("%c", name[i] );
3216 }
3217 }
3218 }
3219
3220
3221 static void
3222 copyName ( UChar* name, UChar* strtab, UChar* dst, int dstSize )
3223 {
3224 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3225 UInt32 strtab_offset = * (UInt32*)(name+4);
3226 strncpy ( (char*)dst, (char*)strtab+strtab_offset, dstSize );
3227 dst[dstSize-1] = 0;
3228 } else {
3229 int i = 0;
3230 while (1) {
3231 if (i >= 8) break;
3232 if (name[i] == 0) break;
3233 dst[i] = name[i];
3234 i++;
3235 }
3236 dst[i] = 0;
3237 }
3238 }
3239
3240
3241 static UChar *
3242 cstring_from_COFF_symbol_name ( UChar* name, UChar* strtab )
3243 {
3244 UChar* newstr;
3245 /* If the string is longer than 8 bytes, look in the
3246 string table for it -- this will be correctly zero terminated.
3247 */
3248 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3249 UInt32 strtab_offset = * (UInt32*)(name+4);
3250 return ((UChar*)strtab) + strtab_offset;
3251 }
3252 /* Otherwise, if shorter than 8 bytes, return the original,
3253 which by defn is correctly terminated.
3254 */
3255 if (name[7]==0) return name;
3256 /* The annoying case: 8 bytes. Copy into a temporary
3257 (XXX which is never freed ...)
3258 */
3259 newstr = stgMallocBytes(9, "cstring_from_COFF_symbol_name");
3260 ASSERT(newstr);
3261 strncpy((char*)newstr,(char*)name,8);
3262 newstr[8] = 0;
3263 return newstr;
3264 }
3265
3266 /* Getting the name of a section is mildly tricky, so we make a
3267 function for it. Sadly, in one case we have to copy the string
3268 (when it is exactly 8 bytes long there's no trailing '\0'), so for
3269 consistency we *always* copy the string; the caller must free it
3270 */
3271 static char *
3272 cstring_from_section_name (UChar* name, UChar* strtab)
3273 {
3274 char *newstr;
3275
3276 if (name[0]=='/') {
3277 int strtab_offset = strtol((char*)name+1,NULL,10);
3278 int len = strlen(((char*)strtab) + strtab_offset);
3279
3280 newstr = stgMallocBytes(len+1, "cstring_from_section_symbol_name");
3281 strcpy((char*)newstr, (char*)((UChar*)strtab) + strtab_offset);
3282 return newstr;
3283 }
3284 else
3285 {
3286 newstr = stgMallocBytes(9, "cstring_from_section_symbol_name");
3287 ASSERT(newstr);
3288 strncpy((char*)newstr,(char*)name,8);
3289 newstr[8] = 0;
3290 return newstr;
3291 }
3292 }
3293
3294 /* See Note [mingw-w64 name decoration scheme] */
3295 #ifndef x86_64_HOST_ARCH
3296 static void
3297 zapTrailingAtSign ( UChar* sym )
3298 {
3299 # define my_isdigit(c) ((c) >= '0' && (c) <= '9')
3300 int i, j;
3301 if (sym[0] == 0) return;
3302 i = 0;
3303 while (sym[i] != 0) i++;
3304 i--;
3305 j = i;
3306 while (j > 0 && my_isdigit(sym[j])) j--;
3307 if (j > 0 && sym[j] == '@' && j != i) sym[j] = 0;
3308 # undef my_isdigit
3309 }
3310 #endif
3311
3312 /* See Note [mingw-w64 name decoration scheme] */
3313 #ifndef x86_64_HOST_ARCH
3314 #define STRIP_LEADING_UNDERSCORE 1
3315 #else
3316 #define STRIP_LEADING_UNDERSCORE 0
3317 #endif
3318
3319 /*
3320 Note [mingw-w64 name decoration scheme]
3321
3322 What's going on with name decoration? Well, original code
3323 have some crufty and ad-hocish paths related mostly to very old
3324 mingw gcc/binutils/runtime combinations. Now mingw-w64 offers pretty
3325 uniform and MS-compatible decoration scheme across its tools and runtime.
3326
3327 The scheme is pretty straightforward: on 32 bit objects symbols are exported
3328 with underscore prepended (and @ + stack size suffix appended for stdcall
3329 functions), on 64 bits no underscore is prepended and no suffix is appended
3330 because we have no stdcall convention on 64 bits.
3331
3332 See #9218
3333 */
3334
3335 static SymbolAddr*
3336 lookupSymbolInDLLs ( UChar *lbl )
3337 {
3338 OpenedDLL* o_dll;
3339 SymbolAddr* sym;
3340
3341 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
3342 /* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */
3343
3344 sym = GetProcAddress(o_dll->instance, (char*)(lbl+STRIP_LEADING_UNDERSCORE));
3345 if (sym != NULL) {
3346 /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/
3347 return sym;
3348 }
3349
3350 /* Ticket #2283.
3351 Long description: http://support.microsoft.com/kb/132044
3352 tl;dr:
3353 If C/C++ compiler sees __declspec(dllimport) ... foo ...
3354 it generates call *__imp_foo, and __imp_foo here has exactly
3355 the same semantics as in __imp_foo = GetProcAddress(..., "foo")
3356 */
3357 if (sym == NULL && strncmp ((const char*)lbl, "__imp_", 6) == 0) {
3358 sym = GetProcAddress(o_dll->instance, (char*)(lbl+6+STRIP_LEADING_UNDERSCORE));
3359 if (sym != NULL) {
3360 IndirectAddr* ret;
3361 ret = stgMallocBytes( sizeof(IndirectAddr), "lookupSymbolInDLLs" );
3362 ret->addr = sym;
3363 ret->next = indirects;
3364 indirects = ret;
3365 IF_DEBUG(linker,
3366 debugBelch("warning: %s from %S is linked instead of %s\n",
3367 (char*)(lbl+6+STRIP_LEADING_UNDERSCORE), o_dll->name, (char*)lbl));
3368 return (void*) & ret->addr;
3369 }
3370 }
3371
3372 sym = GetProcAddress(o_dll->instance, (char*)lbl);
3373 if (sym != NULL) {
3374 /*debugBelch("found %s in %s\n", lbl,o_dll->name);*/
3375 return sym;
3376 }
3377 }
3378 return NULL;
3379 }
3380
3381 static int
3382 verifyCOFFHeader (COFF_header *hdr, pathchar *fileName)
3383 {
3384 #if defined(i386_HOST_ARCH)
3385 if (hdr->Machine != 0x14c) {
3386 errorBelch("%" PATH_FMT ": Not x86 PEi386", fileName);
3387 return 0;
3388 }
3389 #elif defined(x86_64_HOST_ARCH)
3390 if (hdr->Machine != 0x8664) {
3391 errorBelch("%" PATH_FMT ": Not x86_64 PEi386", fileName);
3392 return 0;
3393 }
3394 #else
3395 errorBelch("PEi386 not supported on this arch");
3396 #endif
3397
3398 if (hdr->SizeOfOptionalHeader != 0) {
3399 errorBelch("%" PATH_FMT ": PEi386 with nonempty optional header",
3400 fileName);
3401 return 0;
3402 }
3403 if ( /* (hdr->Characteristics & MYIMAGE_FILE_RELOCS_STRIPPED) || */
3404 (hdr->Characteristics & MYIMAGE_FILE_EXECUTABLE_IMAGE) ||
3405 (hdr->Characteristics & MYIMAGE_FILE_DLL) ||
3406 (hdr->Characteristics & MYIMAGE_FILE_SYSTEM) ) {
3407 errorBelch("%" PATH_FMT ": Not a PEi386 object file", fileName);
3408 return 0;
3409 }
3410 if ( (hdr->Characteristics & MYIMAGE_FILE_BYTES_REVERSED_HI)
3411 /* || !(hdr->Characteristics & MYIMAGE_FILE_32BIT_MACHINE) */ ) {
3412 errorBelch("%" PATH_FMT ": Invalid PEi386 word size or endiannness: %d",
3413 fileName,
3414 (int)(hdr->Characteristics));
3415 return 0;
3416 }
3417 return 1;
3418 }
3419
3420 static int
3421 ocVerifyImage_PEi386 ( ObjectCode* oc )
3422 {
3423 int i;
3424 UInt32 j, noRelocs;
3425 COFF_header* hdr;
3426 COFF_section* sectab;
3427 COFF_symbol* symtab;
3428 UChar* strtab;
3429 /* debugBelch("\nLOADING %s\n", oc->fileName); */
3430 hdr = (COFF_header*)(oc->image);
3431 sectab = (COFF_section*) (
3432 ((UChar*)(oc->image))
3433 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3434 );
3435 symtab = (COFF_symbol*) (
3436 ((UChar*)(oc->image))
3437 + hdr->PointerToSymbolTable
3438 );
3439 strtab = ((UChar*)symtab)
3440 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3441
3442 if (!verifyCOFFHeader(hdr, oc->fileName)) {
3443 return 0;
3444 }
3445
3446 /* If the string table size is way crazy, this might indicate that
3447 there are more than 64k relocations, despite claims to the
3448 contrary. Hence this test. */
3449 /* debugBelch("strtab size %d\n", * (UInt32*)strtab); */
3450 #if 0
3451 if ( (*(UInt32*)strtab) > 600000 ) {
3452 /* Note that 600k has no special significance other than being
3453 big enough to handle the almost-2MB-sized lumps that
3454 constitute HSwin32*.o. */
3455 debugBelch("PEi386 object has suspiciously large string table; > 64k relocs?");
3456 return 0;
3457 }
3458 #endif
3459
3460 /* .BSS Section is initialized in ocGetNames_PEi386
3461 but we need the Sections array initialized here already. */
3462 Section *sections;
3463 sections = (Section*)stgCallocBytes(
3464 sizeof(Section),
3465 hdr->NumberOfSections + 1, /* +1 for the global BSS section see ocGetNames_PEi386 */
3466 "ocVerifyImage_PEi386(sections)");
3467 oc->sections = sections;
3468 oc->n_sections = hdr->NumberOfSections + 1;
3469
3470 /* Initialize the Sections */
3471 for (i = 0; i < hdr->NumberOfSections; i++) {
3472 COFF_section* sectab_i
3473 = (COFF_section*)
3474 myindex(sizeof_COFF_section, sectab, i);
3475
3476 /* Calculate the start of the data section */
3477 sections[i].start = oc->image + sectab_i->PointerToRawData;
3478 }
3479
3480 /* No further verification after this point; only debug printing. */
3481 i = 0;
3482 IF_DEBUG(linker, i=1);
3483 if (i == 0) return 1;
3484
3485 debugBelch("sectab offset = %" FMT_SizeT "\n",
3486 ((UChar*)sectab) - ((UChar*)hdr) );
3487 debugBelch("symtab offset = %" FMT_SizeT "\n",
3488 ((UChar*)symtab) - ((UChar*)hdr) );
3489 debugBelch("strtab offset = %" FMT_SizeT "\n",
3490 ((UChar*)strtab) - ((UChar*)hdr) );
3491
3492 debugBelch("\n" );
3493 debugBelch( "Machine: 0x%x\n", (UInt32)(hdr->Machine) );
3494 debugBelch( "# sections: %d\n", (UInt32)(hdr->NumberOfSections) );
3495 debugBelch( "time/date: 0x%x\n", (UInt32)(hdr->TimeDateStamp) );
3496 debugBelch( "symtab offset: %d\n", (UInt32)(hdr->PointerToSymbolTable) );
3497 debugBelch( "# symbols: %d\n", (UInt32)(hdr->NumberOfSymbols) );
3498 debugBelch( "sz of opt hdr: %d\n", (UInt32)(hdr->SizeOfOptionalHeader) );
3499 debugBelch( "characteristics: 0x%x\n", (UInt32)(hdr->Characteristics) );
3500
3501 /* Print the section table. */
3502 debugBelch("\n" );
3503 for (i = 0; i < hdr->NumberOfSections; i++) {
3504 COFF_reloc* reltab;
3505 COFF_section* sectab_i
3506 = (COFF_section*)
3507 myindex ( sizeof_COFF_section, sectab, i );
3508 Section section = sections[i];
3509 debugBelch(
3510 "\n"
3511 "section %d\n"
3512 " name `",
3513 i
3514 );
3515 printName ( sectab_i->Name, strtab );
3516 debugBelch(
3517 "'\n"
3518 " vsize %d\n"
3519 " vaddr %d\n"
3520 " data sz %d\n"
3521 " data off 0x%p\n"
3522 " num rel %d\n"
3523 " off rel %d\n"
3524 " ptr raw 0x%x\n",
3525 sectab_i->VirtualSize,
3526 sectab_i->VirtualAddress,
3527 sectab_i->SizeOfRawData,
3528 section.start,
3529 sectab_i->NumberOfRelocations,
3530 sectab_i->PointerToRelocations,
3531 sectab_i->PointerToRawData
3532 );
3533 reltab = (COFF_reloc*) (
3534 ((UChar*)(oc->image)) + sectab_i->PointerToRelocations
3535 );
3536
3537 if ( sectab_i->Characteristics & MYIMAGE_SCN_LNK_NRELOC_OVFL ) {
3538 /* If the relocation field (a short) has overflowed, the
3539 * real count can be found in the first reloc entry.
3540 *
3541 * See Section 4.1 (last para) of the PE spec (rev6.0).
3542 */
3543 COFF_reloc* rel = (COFF_reloc*)
3544 myindex ( sizeof_COFF_reloc, reltab, 0 );
3545 noRelocs = rel->VirtualAddress;
3546 j = 1;
3547 } else {
3548 noRelocs = sectab_i->NumberOfRelocations;
3549 j = 0;
3550 }
3551
3552 for (; j < noRelocs; j++) {
3553 COFF_symbol* sym;
3554 COFF_reloc* rel = (COFF_reloc*)
3555 myindex ( sizeof_COFF_reloc, reltab, j );
3556 debugBelch(
3557 " type 0x%-4x vaddr 0x%-8x name `",
3558 (UInt32)rel->Type,
3559 rel->VirtualAddress );
3560 sym = (COFF_symbol*)
3561 myindex ( sizeof_COFF_symbol, symtab, rel->SymbolTableIndex );
3562 /* Hmm..mysterious looking offset - what's it for? SOF */
3563 printName ( sym->Name, strtab -10 );
3564 debugBelch("'\n" );
3565 }
3566
3567 debugBelch("\n" );
3568 }
3569 debugBelch("\n" );
3570 debugBelch("string table has size 0x%x\n", * (UInt32*)strtab );
3571 debugBelch("---START of string table---\n");
3572 for (i = 4; i < *(Int32*)strtab; i++) {
3573 if (strtab[i] == 0)
3574 debugBelch("\n"); else
3575 debugBelch("%c", strtab[i] );
3576 }
3577 debugBelch("--- END of string table---\n");
3578
3579 debugBelch("\n" );
3580 i = 0;
3581 while (1) {
3582 COFF_symbol* symtab_i;
3583 if (i >= (Int32)(hdr->NumberOfSymbols)) break;
3584 symtab_i = (COFF_symbol*)
3585 myindex ( sizeof_COFF_symbol, symtab, i );
3586 debugBelch(
3587 "symbol %d\n"
3588 " name `",
3589 i
3590 );
3591 printName ( symtab_i->Name, strtab );
3592 debugBelch(
3593 "'\n"
3594 " value 0x%x\n"
3595 " 1+sec# %d\n"
3596 " type 0x%x\n"
3597 " sclass 0x%x\n"
3598 " nAux %d\n",
3599 symtab_i->Value,
3600 (Int32)(symtab_i->SectionNumber),
3601 (UInt32)symtab_i->Type,
3602 (UInt32)symtab_i->StorageClass,
3603 (UInt32)symtab_i->NumberOfAuxSymbols
3604 );
3605 i += symtab_i->NumberOfAuxSymbols;
3606 i++;
3607 }
3608
3609 debugBelch("\n" );
3610 return 1;
3611 }
3612
3613
3614 static int
3615 ocGetNames_PEi386 ( ObjectCode* oc )
3616 {
3617 COFF_header* hdr;
3618 COFF_section* sectab;
3619 COFF_symbol* symtab;
3620 UChar* strtab;
3621
3622 UChar* sname;
3623 SymbolAddr* addr;
3624 int i;
3625
3626 hdr = (COFF_header*)(oc->image);
3627 sectab = (COFF_section*) (
3628 ((UChar*)(oc->image))
3629 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3630 );
3631 symtab = (COFF_symbol*) (
3632 ((UChar*)(oc->image))
3633 + hdr->PointerToSymbolTable
3634 );
3635 strtab = ((UChar*)(oc->image))
3636 + hdr->PointerToSymbolTable
3637 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3638
3639 /* Allocate space for any (local, anonymous) .bss sections. */
3640
3641 for (i = 0; i < hdr->NumberOfSections; i++) {
3642 UInt32 bss_sz;
3643 UChar* zspace;
3644 COFF_section* sectab_i
3645 = (COFF_section*)
3646 myindex ( sizeof_COFF_section, sectab, i );
3647
3648 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3649
3650 if (0 != strcmp(secname, ".bss")) {
3651 stgFree(secname);
3652 continue;
3653 }
3654
3655 stgFree(secname);
3656
3657 /* sof 10/05: the PE spec text isn't too clear regarding what
3658 * the SizeOfRawData field is supposed to hold for object
3659 * file sections containing just uninitialized data -- for executables,
3660 * it is supposed to be zero; unclear what it's supposed to be
3661 * for object files. However, VirtualSize is guaranteed to be
3662 * zero for object files, which definitely suggests that SizeOfRawData
3663 * will be non-zero (where else would the size of this .bss section be
3664 * stored?) Looking at the COFF_section info for incoming object files,
3665 * this certainly appears to be the case.
3666 *
3667 * => I suspect we've been incorrectly handling .bss sections in (relocatable)
3668 * object files up until now. This turned out to bite us with ghc-6.4.1's use
3669 * of gcc-3.4.x, which has started to emit initially-zeroed-out local 'static'
3670 * variable decls into the .bss section. (The specific function in Q which
3671 * triggered this is libraries/base/cbits/dirUtils.c:__hscore_getFolderPath())
3672 */
3673 if (sectab_i->VirtualSize == 0 && sectab_i->SizeOfRawData == 0) continue;
3674 /* This is a non-empty .bss section.
3675 Allocate zeroed space for it */
3676 bss_sz = sectab_i->VirtualSize;
3677 if ( bss_sz < sectab_i->SizeOfRawData) { bss_sz = sectab_i->SizeOfRawData; }
3678 zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
3679 oc->sections[i].start = zspace;
3680 addProddableBlock(oc, zspace, bss_sz);
3681 /* debugBelch("BSS anon section at 0x%x\n", zspace); */
3682 }
3683
3684 /* Copy section information into the ObjectCode. */
3685
3686 for (i = 0; i < hdr->NumberOfSections; i++) {
3687 UChar* start;
3688 UChar* end;
3689 UInt32 sz;
3690
3691 /* By default consider all section as CODE or DATA, which means we want to load them. */
3692 SectionKind kind
3693 = SECTIONKIND_CODE_OR_RODATA;
3694 COFF_section* sectab_i
3695 = (COFF_section*)
3696 myindex ( sizeof_COFF_section, sectab, i );
3697 Section section = oc->sections[i];
3698
3699 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3700
3701 IF_DEBUG(linker, debugBelch("section name = %s\n", secname ));
3702
3703 /* The PE file section flag indicates whether the section contains code or data. */
3704 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_CODE ||
3705 sectab_i->Characteristics & MYIMAGE_SCN_CNT_INITIALIZED_DATA)
3706 kind = SECTIONKIND_CODE_OR_RODATA;
3707
3708 /* Check next if it contains any uninitialized data */
3709 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_UNINITIALIZED_DATA)
3710 kind = SECTIONKIND_RWDATA;
3711
3712 /* Finally check if it can be discarded. This will also ignore .debug sections */
3713 if (sectab_i->Characteristics & MYIMAGE_SCN_MEM_DISCARDABLE ||
3714 sectab_i->Characteristics & MYIMAGE_SCN_LNK_REMOVE)
3715 kind = SECTIONKIND_OTHER;
3716
3717 if (0==strcmp(".ctors", (char*)secname))
3718 kind = SECTIONKIND_INIT_ARRAY;
3719
3720 ASSERT(sectab_i->SizeOfRawData == 0 || sectab_i->VirtualSize == 0);
3721 sz = sectab_i->SizeOfRawData;
3722 if (sz < sectab_i->VirtualSize) sz = sectab_i->VirtualSize;
3723
3724 start = section.start;
3725 end = start + sz - 1;
3726
3727 if (kind != SECTIONKIND_OTHER && end >= start) {
3728 addSection(&oc->sections[i], kind, SECTION_NOMEM, start, sz, 0, 0, 0);
3729 addProddableBlock(oc, start, sz);
3730 }
3731
3732 stgFree(secname);
3733 }
3734
3735 /* Copy exported symbols into the ObjectCode. */
3736
3737 oc->n_symbols = hdr->NumberOfSymbols;
3738 oc->symbols = stgCallocBytes(sizeof(SymbolName*), oc->n_symbols,
3739 "ocGetNames_PEi386(oc->symbols)");
3740
3741 /* Work out the size of the global BSS section */
3742 StgWord globalBssSize = 0;
3743 for (i=0; i < (int)hdr->NumberOfSymbols; i++) {
3744 COFF_symbol* symtab_i;
3745 symtab_i = (COFF_symbol*)
3746 myindex ( sizeof_COFF_symbol, symtab, i );
3747 if (symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3748 && symtab_i->Value > 0
3749 && symtab_i->StorageClass != MYIMAGE_SYM_CLASS_SECTION) {
3750 globalBssSize += symtab_i->Value;
3751 }
3752 i += symtab_i->NumberOfAuxSymbols;
3753 }
3754
3755 /* Allocate BSS space */
3756 SymbolAddr* bss = NULL;
3757 if (globalBssSize > 0) {
3758 bss = stgCallocBytes(1, globalBssSize,
3759 "ocGetNames_PEi386(non-anonymous bss)");
3760 addSection(&oc->sections[oc->n_sections-1],
3761 SECTIONKIND_RWDATA, SECTION_MALLOC,
3762 bss, globalBssSize, 0, 0, 0);
3763 IF_DEBUG(linker, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize));
3764 addProddableBlock(oc, bss, globalBssSize);
3765 } else {
3766 addSection(&oc->sections[oc->n_sections-1],
3767 SECTIONKIND_OTHER, SECTION_NOMEM, NULL, 0, 0, 0, 0);
3768 }
3769
3770 for (i = 0; i < oc->n_symbols; i++) {
3771 COFF_symbol* symtab_i;
3772 symtab_i = (COFF_symbol*)
3773 myindex ( sizeof_COFF_symbol, symtab, i );
3774
3775 addr = NULL;
3776 HsBool isWeak = HS_BOOL_FALSE;
3777 if ( symtab_i->SectionNumber != MYIMAGE_SYM_UNDEFINED
3778 && symtab_i->SectionNumber > 0) {
3779 /* This symbol is global and defined, viz, exported */
3780 /* for MYIMAGE_SYMCLASS_EXTERNAL
3781 && !MYIMAGE_SYM_UNDEFINED,
3782 the address of the symbol is:
3783 address of relevant section + offset in section
3784 */
3785 COFF_section* sectabent
3786 = (COFF_section*) myindex ( sizeof_COFF_section,
3787 sectab,
3788 symtab_i->SectionNumber-1 );
3789 if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_EXTERNAL
3790 || ( symtab_i->StorageClass == MYIMAGE_SYM_CLASS_STATIC
3791 && sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT)
3792 ) {
3793 addr = (void*)((size_t)oc->sections[symtab_i->SectionNumber-1].start
3794 + symtab_i->Value);
3795 if (sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT) {
3796 isWeak = HS_BOOL_TRUE;
3797 }
3798 }
3799 }
3800 else if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_WEAK_EXTERNAL) {
3801 isWeak = HS_BOOL_TRUE;
3802 }
3803 else if ( symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3804 && symtab_i->Value > 0) {
3805 /* This symbol isn't in any section at all, ie, global bss.
3806 Allocate zeroed space for it from the BSS section */
3807 addr = bss;
3808 bss = (SymbolAddr*)((StgWord)bss + (StgWord)symtab_i->Value);
3809 IF_DEBUG(linker, debugBelch("bss symbol @ %p %u\n", addr, symtab_i->Value));
3810 }
3811
3812 sname = cstring_from_COFF_symbol_name(symtab_i->Name, strtab);
3813 if (addr != NULL || isWeak == HS_BOOL_TRUE) {
3814
3815 /* debugBelch("addSymbol %p `%s' Weak:%lld \n", addr, sname, isWeak); */
3816 IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr,sname);)
3817 ASSERT(i >= 0 && i < oc->n_symbols);
3818 /* cstring_from_COFF_symbol_name always succeeds. */
3819 oc->symbols[i] = (SymbolName*)sname;
3820 if (isWeak == HS_BOOL_TRUE) {
3821 setWeakSymbol(oc, sname);
3822 }
3823
3824 if (! ghciInsertSymbolTable(oc->fileName, symhash, (SymbolName*)sname, addr,
3825 isWeak, oc)) {
3826 return 0;
3827 }
3828 } else {
3829 /* We're skipping the symbol, but if we ever load this
3830 object file we'll want to skip it then too. */
3831 oc->symbols[i] = NULL;
3832
3833 # if 0
3834 debugBelch(
3835 "IGNORING symbol %d\n"
3836 " name `",
3837 i
3838 );
3839 printName ( symtab_i->Name, strtab );
3840 debugBelch(
3841 "'\n"
3842 " value 0x%x\n"
3843 " 1+sec# %d\n"
3844 " type 0x%x\n"
3845 " sclass 0x%x\n"
3846 " nAux %d\n",
3847 symtab_i->Value,
3848 (Int32)(symtab_i->SectionNumber),
3849 (UInt32)symtab_i->Type,
3850 (UInt32)symtab_i->StorageClass,
3851 (UInt32)symtab_i->NumberOfAuxSymbols
3852 );
3853 # endif
3854 }
3855
3856 i += symtab_i->NumberOfAuxSymbols;
3857 }
3858
3859 return 1;
3860 }
3861
3862 #if defined(x86_64_HOST_ARCH)
3863
3864 /* We've already reserved a room for symbol extras in loadObj,
3865 * so simply set correct pointer here.
3866 */
3867 static int
3868 ocAllocateSymbolExtras_PEi386 ( ObjectCode* oc )
3869 {
3870 oc->symbol_extras = (SymbolExtra*)(oc->image - PEi386_IMAGE_OFFSET
3871 + ((PEi386_IMAGE_OFFSET + oc->fileSize + 0x7) & ~0x7));
3872 oc->first_symbol_extra = 0;
3873 oc->n_symbol_extras = ((COFF_header*)oc->image)->NumberOfSymbols;
3874
3875 return 1;
3876 }
3877
3878 static size_t
3879 makeSymbolExtra_PEi386( ObjectCode* oc, size_t s, char* symbol )
3880 {
3881 unsigned int curr_thunk;
3882 SymbolExtra *extra;
3883
3884 curr_thunk = oc->first_symbol_extra;
3885 if (curr_thunk >= oc->n_symbol_extras) {
3886 barf("Can't allocate thunk for %s", symbol);
3887 }
3888
3889 extra = oc->symbol_extras + curr_thunk;
3890
3891 // jmp *-14(%rip)
3892 static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF };
3893 extra->addr = (uint64_t)s;
3894 memcpy(extra->jumpIsland, jmp, 6);
3895
3896 oc->first_symbol_extra++;
3897
3898 return (size_t)extra->jumpIsland;
3899 }
3900
3901 #endif
3902
3903 static int
3904 ocResolve_PEi386 ( ObjectCode* oc )
3905 {
3906 COFF_header* hdr;
3907 COFF_section* sectab;
3908 COFF_symbol* symtab;
3909 UChar* strtab;
3910
3911 UInt32 A;
3912 size_t S;
3913 SymbolAddr* pP;
3914
3915 int i;
3916 UInt32 j, noRelocs;
3917
3918 /* ToDo: should be variable-sized? But is at least safe in the
3919 sense of buffer-overrun-proof. */
3920 UChar symbol[1000];
3921 /* debugBelch("resolving for %s\n", oc->fileName); */
3922
3923 hdr = (COFF_header*)(oc->image);
3924 sectab = (COFF_section*) (
3925 ((UChar*)(oc->image))
3926 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3927 );
3928 symtab = (COFF_symbol*) (