Fix incorrect calculated relocations on Windows x86_64
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "Stable.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31
32 #if !defined(mingw32_HOST_OS)
33 #include "posix/Signals.h"
34 #endif
35
36 // get protos for is*()
37 #include <ctype.h>
38
39 #ifdef HAVE_SYS_TYPES_H
40 #include <sys/types.h>
41 #endif
42
43 #include <inttypes.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <stdio.h>
47 #include <assert.h>
48 #include <libgen.h>
49
50 #ifdef HAVE_SYS_STAT_H
51 #include <sys/stat.h>
52 #endif
53
54 #if defined(HAVE_DLFCN_H)
55 #include <dlfcn.h>
56 #endif
57
58 /* PowerPC and ARM have relative branch instructions with only 24 bit
59 * displacements and therefore need jump islands contiguous with each object
60 * code module.
61 */
62 #if defined(powerpc_HOST_ARCH)
63 #define SHORT_REL_BRANCH 1
64 #endif
65 #if defined(arm_HOST_ARCH)
66 #define SHORT_REL_BRANCH 1
67 #endif
68
69 #if (RTS_LINKER_USE_MMAP && defined(SHORT_REL_BRANCH) && defined(linux_HOST_OS))
70 #define USE_CONTIGUOUS_MMAP 1
71 #else
72 #define USE_CONTIGUOUS_MMAP 0
73 #endif
74
75 #if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
76 # define OBJFORMAT_ELF
77 # include <regex.h> // regex is already used by dlopen() so this is OK
78 // to use here without requiring an additional lib
79 #elif defined (mingw32_HOST_OS)
80 # define OBJFORMAT_PEi386
81 # include <windows.h>
82 # include <shfolder.h> /* SHGetFolderPathW */
83 # include <math.h>
84 # include <wchar.h>
85 #elif defined(darwin_HOST_OS)
86 # define OBJFORMAT_MACHO
87 # include <regex.h>
88 # include <mach/machine.h>
89 # include <mach-o/fat.h>
90 # include <mach-o/loader.h>
91 # include <mach-o/nlist.h>
92 # include <mach-o/reloc.h>
93 #if defined(powerpc_HOST_ARCH)
94 # include <mach-o/ppc/reloc.h>
95 #endif
96 #if defined(x86_64_HOST_ARCH)
97 # include <mach-o/x86_64/reloc.h>
98 #endif
99 #endif
100
101 #if defined(x86_64_HOST_ARCH) && defined(darwin_HOST_OS)
102 #define ALWAYS_PIC
103 #endif
104
105 #if defined(dragonfly_HOST_OS)
106 #include <sys/tls.h>
107 #endif
108
109 /* SymbolInfo tracks a symbol's address, the object code from which
110 it originated, and whether or not it's weak.
111
112 RtsSymbolInfo is used to track the state of the symbols currently
113 loaded or to be loaded by the Linker.
114
115 Where the information in the `ObjectCode` is used to track the
116 original status of the symbol inside the `ObjectCode`.
117
118 A weak symbol that has been used will still be marked as weak
119 in the `ObjectCode` but in the `RtsSymbolInfo` it won't be.
120 */
121 typedef struct _RtsSymbolInfo {
122 SymbolAddr* value;
123 ObjectCode *owner;
124 HsBool weak;
125 } RtsSymbolInfo;
126
127 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
128 This hashtable will contain information on all symbols
129 that we know of, however the .o they are in may not be loaded.
130
131 Until the ObjectCode the symbol belongs to is actually
132 loaded this symbol may be replaced. So do not rely on
133 addresses of unloaded symbols.
134
135 Note [runtime-linker-phases]
136 --------------------------------------
137 Broadly the behavior of the runtime linker can be
138 split into the following four phases:
139
140 - Indexing (e.g. ocVerifyImage and ocGetNames)
141 - Initialization (e.g. ocResolve and ocRunInit)
142 - Resolve (e.g. resolveObjs())
143 - Lookup (e.g. lookupSymbol)
144
145 This is to enable lazy loading of symbols. Eager loading is problematic
146 as it means that all symbols must be available, even those which we will
147 never use. This is especially painful of Windows, where the number of
148 libraries required to link things like mingwex grows to be quite high.
149
150 We proceed through these stages as follows,
151
152 * During Indexing we verify and open the ObjectCode and
153 perform a quick scan/indexing of the ObjectCode. All the work
154 required to actually load the ObjectCode is done.
155
156 All symbols from the ObjectCode is also inserted into
157 `symhash`, where possible duplicates are handled via the semantics
158 described in `ghciInsertSymbolTable`.
159
160 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
161 depending on whether they are an archive members or not.
162
163 * During initialization we load ObjectCode, perform relocations, execute
164 static constructors etc. This phase may trigger other ObjectCodes to
165 be loaded because of the calls to lookupSymbol.
166
167 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
168 previous status was `OBJECT_LOADED`.
169
170 * During resolve we attempt to resolve all the symbols needed for the
171 initial link. This essentially means, that for any ObjectCode given
172 directly to the command-line we perform lookupSymbols on the required
173 symbols. lookupSymbols may trigger the loading of additional ObjectCode
174 if required.
175
176 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
177 the previous status was `OBJECT_NEEDED`.
178
179 * Lookup symbols is used to lookup any symbols required, both during initial
180 link and during statement and expression compilations in the REPL.
181 Declaration of e.g. an foreign import, will eventually call lookupSymbol
182 which will either fail (symbol unknown) or succeed (and possibly triggered a
183 load).
184
185 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
186
187 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
188 by calling unloadObj and starting over.
189 When a new declaration or statement is performed ultimately lookupSymbol is called
190 without doing a re-link.
191
192 The goal of these different phases is to allow the linker to be able to perform
193 "lazy loading" of ObjectCode. The reason for this is that we want to only link
194 in symbols that are actually required for the link. This reduces:
195
196 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
197 then we don't need to load libB. This means the dependency chain for libraries
198 such as mingw32 and mingwex can be broken down.
199
200 2) The number of duplicate symbols, since now only symbols that are
201 true duplicates will display the error.
202 */
203 static /*Str*/HashTable *symhash;
204
205 /* List of currently loaded objects */
206 ObjectCode *objects = NULL; /* initially empty */
207
208 /* List of objects that have been unloaded via unloadObj(), but are waiting
209 to be actually freed via checkUnload() */
210 ObjectCode *unloaded_objects = NULL; /* initially empty */
211
212 #ifdef THREADED_RTS
213 /* This protects all the Linker's global state except unloaded_objects */
214 Mutex linker_mutex;
215 /*
216 * This protects unloaded_objects. We have a separate mutex for this, because
217 * the GC needs to access unloaded_objects in checkUnload, while the linker only
218 * needs to access unloaded_objects in unloadObj(), so this allows most linker
219 * operations proceed concurrently with the GC.
220 */
221 Mutex linker_unloaded_mutex;
222 #endif
223
224 /* Type of the initializer */
225 typedef void (*init_t) (int argc, char **argv, char **env);
226
227 static HsInt isAlreadyLoaded( pathchar *path );
228 static HsInt loadOc( ObjectCode* oc );
229 static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
230 rtsBool mapped, char *archiveMemberName,
231 int misalignment
232 );
233
234 // Use wchar_t for pathnames on Windows (#5697)
235 #if defined(mingw32_HOST_OS)
236 #define pathcmp wcscmp
237 #define pathlen wcslen
238 #define pathopen _wfopen
239 #define pathstat _wstat
240 #define struct_stat struct _stat
241 #define open wopen
242 #define WSTR(s) L##s
243 #define pathprintf swprintf
244 #define pathsplit _wsplitpath_s
245 #define pathsize sizeof(wchar_t)
246 #else
247 #define pathcmp strcmp
248 #define pathlen strlen
249 #define pathopen fopen
250 #define pathstat stat
251 #define struct_stat struct stat
252 #define WSTR(s) s
253 #define pathprintf snprintf
254 #define pathsplit _splitpath_s
255 #define pathsize sizeof(char)
256 #endif
257
258 static pathchar* pathdup(pathchar *path)
259 {
260 pathchar *ret;
261 #if defined(mingw32_HOST_OS)
262 ret = wcsdup(path);
263 #else
264 /* sigh, strdup() isn't a POSIX function, so do it the long way */
265 ret = stgMallocBytes( strlen(path)+1, "pathdup" );
266 strcpy(ret, path);
267 #endif
268 return ret;
269 }
270
271 static pathchar* mkPath(char* path)
272 {
273 #if defined(mingw32_HOST_OS)
274 size_t required = mbstowcs(NULL, path, 0);
275 pathchar *ret = stgMallocBytes(sizeof(pathchar) * (required + 1), "mkPath");
276 if (mbstowcs(ret, path, required) == (size_t)-1)
277 {
278 barf("mkPath failed converting char* to wchar_t*");
279 }
280 ret[required] = '\0';
281 return ret;
282 #else
283 return pathdup(path);
284 #endif
285 }
286
287 /* Generic wrapper function to try and Resolve and RunInit oc files */
288 int ocTryLoad( ObjectCode* oc );
289
290 #if defined(OBJFORMAT_ELF)
291 static int ocVerifyImage_ELF ( ObjectCode* oc );
292 static int ocGetNames_ELF ( ObjectCode* oc );
293 static int ocResolve_ELF ( ObjectCode* oc );
294 static int ocRunInit_ELF ( ObjectCode* oc );
295 #if NEED_SYMBOL_EXTRAS
296 static int ocAllocateSymbolExtras_ELF ( ObjectCode* oc );
297 #endif
298 #elif defined(OBJFORMAT_PEi386)
299 static int ocVerifyImage_PEi386 ( ObjectCode* oc );
300 static int ocGetNames_PEi386 ( ObjectCode* oc );
301 static int ocResolve_PEi386 ( ObjectCode* oc );
302 static int ocRunInit_PEi386 ( ObjectCode* oc );
303 static void *lookupSymbolInDLLs ( unsigned char *lbl );
304 /* See Note [mingw-w64 name decoration scheme] */
305 #ifndef x86_64_HOST_ARCH
306 static void zapTrailingAtSign ( unsigned char *sym );
307 #endif
308 static char *allocateImageAndTrampolines (
309 pathchar* arch_name, char* member_name,
310 #if defined(x86_64_HOST_ARCH)
311 FILE* f,
312 #endif
313 int size );
314 #if defined(x86_64_HOST_ARCH)
315 static int ocAllocateSymbolExtras_PEi386 ( ObjectCode* oc );
316 static size_t makeSymbolExtra_PEi386( ObjectCode* oc, size_t, char* symbol );
317 #define PEi386_IMAGE_OFFSET 4
318 #else
319 #define PEi386_IMAGE_OFFSET 0
320 #endif
321 #elif defined(OBJFORMAT_MACHO)
322 static int ocVerifyImage_MachO ( ObjectCode* oc );
323 static int ocGetNames_MachO ( ObjectCode* oc );
324 static int ocResolve_MachO ( ObjectCode* oc );
325 static int ocRunInit_MachO ( ObjectCode* oc );
326 static int machoGetMisalignment( FILE * );
327 #if NEED_SYMBOL_EXTRAS
328 static int ocAllocateSymbolExtras_MachO ( ObjectCode* oc );
329 #endif
330 #ifdef powerpc_HOST_ARCH
331 static void machoInitSymbolsWithoutUnderscore( void );
332 #endif
333 #endif
334
335 #if defined(OBJFORMAT_PEi386)
336 /* string utility function */
337 static HsBool endsWithPath(pathchar* base, pathchar* str) {
338 int blen = pathlen(base);
339 int slen = pathlen(str);
340 return (blen >= slen) && (0 == pathcmp(base + blen - slen, str));
341 }
342
343 static int checkAndLoadImportLibrary(
344 pathchar* arch_name,
345 char* member_name,
346 FILE* f);
347
348 static int findAndLoadImportLibrary(
349 ObjectCode* oc
350 );
351
352 static UChar *myindex(
353 int scale,
354 void* base,
355 int index);
356 static UChar *cstring_from_COFF_symbol_name(
357 UChar* name,
358 UChar* strtab);
359 static char *cstring_from_section_name(
360 UChar* name,
361 UChar* strtab);
362
363
364 /* Add ld symbol for PE image base. */
365 #if defined(__GNUC__)
366 #define __ImageBase __MINGW_LSYMBOL(_image_base__)
367 #endif
368
369 /* Get the base of the module. */
370 /* This symbol is defined by ld. */
371 extern IMAGE_DOS_HEADER __ImageBase;
372 #define __image_base (void*)((HINSTANCE)&__ImageBase)
373
374 // MingW-w64 is missing these from the implementation. So we have to look them up
375 typedef DLL_DIRECTORY_COOKIE(WINAPI *LPAddDLLDirectory)(PCWSTR NewDirectory);
376 typedef WINBOOL(WINAPI *LPRemoveDLLDirectory)(DLL_DIRECTORY_COOKIE Cookie);
377 #endif /* OBJFORMAT_PEi386 */
378
379 static void freeProddableBlocks (ObjectCode *oc);
380
381 /* on x86_64 we have a problem with relocating symbol references in
382 * code that was compiled without -fPIC. By default, the small memory
383 * model is used, which assumes that symbol references can fit in a
384 * 32-bit slot. The system dynamic linker makes this work for
385 * references to shared libraries by either (a) allocating a jump
386 * table slot for code references, or (b) moving the symbol at load
387 * time (and copying its contents, if necessary) for data references.
388 *
389 * We unfortunately can't tell whether symbol references are to code
390 * or data. So for now we assume they are code (the vast majority
391 * are), and allocate jump-table slots. Unfortunately this will
392 * SILENTLY generate crashing code for data references. This hack is
393 * enabled by X86_64_ELF_NONPIC_HACK.
394 *
395 * One workaround is to use shared Haskell libraries. This is
396 * coming. Another workaround is to keep the static libraries but
397 * compile them with -fPIC, because that will generate PIC references
398 * to data which can be relocated. The PIC code is still too green to
399 * do this systematically, though.
400 *
401 * See bug #781
402 * See thread http://www.haskell.org/pipermail/cvs-ghc/2007-September/038458.html
403 *
404 * Naming Scheme for Symbol Macros
405 *
406 * SymI_*: symbol is internal to the RTS. It resides in an object
407 * file/library that is statically.
408 * SymE_*: symbol is external to the RTS library. It might be linked
409 * dynamically.
410 *
411 * Sym*_HasProto : the symbol prototype is imported in an include file
412 * or defined explicitly
413 * Sym*_NeedsProto: the symbol is undefined and we add a dummy
414 * default proto extern void sym(void);
415 */
416 #define X86_64_ELF_NONPIC_HACK 1
417
418 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
419 * small memory model on this architecture (see gcc docs,
420 * -mcmodel=small).
421 *
422 * MAP_32BIT not available on OpenBSD/amd64
423 */
424 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
425 #define TRY_MAP_32BIT MAP_32BIT
426 #else
427 #define TRY_MAP_32BIT 0
428 #endif
429
430 /*
431 Note [The ARM/Thumb Story]
432 ~~~~~~~~~~~~~~~~~~~~~~~~~~
433
434 Support for the ARM architecture is complicated by the fact that ARM has not
435 one but several instruction encodings. The two relevant ones here are the original
436 ARM encoding and Thumb, a more dense variant of ARM supporting only a subset
437 of the instruction set.
438
439 How the CPU decodes a particular instruction is determined by a mode bit. This
440 mode bit is set on jump instructions, the value being determined by the low
441 bit of the target address: An odd address means the target is a procedure
442 encoded in the Thumb encoding whereas an even address means it's a traditional
443 ARM procedure (the actual address jumped to is even regardless of the encoding bit).
444
445 Interoperation between Thumb- and ARM-encoded object code (known as "interworking")
446 is tricky. If the linker needs to link a call by an ARM object into Thumb code
447 (or vice-versa) it will produce a jump island. This, however, is incompatible with
448 GHC's tables-next-to-code. For this reason, it is critical that GHC emit
449 exclusively ARM or Thumb objects for all Haskell code.
450
451 We still do, however, need to worry about foreign code.
452 */
453
454 /*
455 * Due to the small memory model (see above), on x86_64 we have to map
456 * all our non-PIC object files into the low 2Gb of the address space
457 * (why 2Gb and not 4Gb? Because all addresses must be reachable
458 * using a 32-bit signed PC-relative offset). On Linux we can do this
459 * using the MAP_32BIT flag to mmap(), however on other OSs
460 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
461 * can't do this. So on these systems, we have to pick a base address
462 * in the low 2Gb of the address space and try to allocate memory from
463 * there.
464 *
465 * We pick a default address based on the OS, but also make this
466 * configurable via an RTS flag (+RTS -xm)
467 */
468 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
469
470 #if defined(MAP_32BIT)
471 // Try to use MAP_32BIT
472 #define MMAP_32BIT_BASE_DEFAULT 0
473 #else
474 // A guess: 1Gb.
475 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
476 #endif
477
478 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
479 #endif
480
481 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
482 ObjectCode *owner)
483 {
484 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
485 if (!pinfo || owner != pinfo->owner) return;
486 removeStrHashTable(table, key, NULL);
487 stgFree(pinfo);
488 }
489
490 /* -----------------------------------------------------------------------------
491 * Insert symbols into hash tables, checking for duplicates.
492 *
493 * Returns: 0 on failure, nonzero on success
494 */
495 /*
496 Note [weak-symbols-support]
497 -------------------------------------
498 While ghciInsertSymbolTable does implement extensive
499 logic for weak symbol support, weak symbols are not currently
500 fully supported by the RTS. This code is mostly here for COMDAT
501 support which uses the weak symbols support.
502
503 Linking weak symbols defined purely in C code with other C code
504 should also work, probably. Observing weak symbols in Haskell
505 won't.
506
507 Some test have been written for weak symbols but have been disabled
508 mostly because it's unsure how the weak symbols support should look.
509 See Trac #11223
510 */
511 static int ghciInsertSymbolTable(
512 pathchar* obj_name,
513 HashTable *table,
514 const SymbolName* key,
515 SymbolAddr* data,
516 HsBool weak,
517 ObjectCode *owner)
518 {
519 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
520 if (!pinfo) /* new entry */
521 {
522 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
523 pinfo->value = data;
524 pinfo->owner = owner;
525 pinfo->weak = weak;
526 insertStrHashTable(table, key, pinfo);
527 return 1;
528 }
529 else if (weak && data && pinfo->weak && !pinfo->value)
530 {
531 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
532 pinfo->value = data;
533 pinfo->owner = owner;
534 return 1;
535 }
536 else if (weak)
537 {
538 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
539 already know of another copy throw this one away.
540
541 or both weak symbols have a nonzero value. Keep the existing one.
542
543 This also preserves the semantics of linking against
544 the first symbol we find. */
545 }
546 else if (pinfo->weak && !weak) /* weak symbol is in the table */
547 {
548 /* override the weak definition with the non-weak one */
549 pinfo->value = data;
550 pinfo->owner = owner;
551 pinfo->weak = HS_BOOL_FALSE;
552 return 1;
553 }
554 else if ( pinfo->owner
555 && pinfo->owner->status != OBJECT_RESOLVED
556 && pinfo->owner->status != OBJECT_NEEDED)
557 {
558 /* If the other symbol hasn't been loaded or will be loaded and we want to
559 explicitly load the new one, we can just swap it out and load the one
560 that has been requested. If not, just keep the first one encountered.
561
562 Because the `symHash' table consists symbols we've also not loaded but
563 found during the initial scan this is safe to do. If however the existing
564 symbol has been loaded then it means we have a duplicate.
565
566 This is essentially emulating the behavior of a linker wherein it will always
567 link in object files that are .o file arguments, but only take object files
568 from archives as needed. */
569 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
570 pinfo->value = data;
571 pinfo->owner = owner;
572 pinfo->weak = weak;
573 }
574
575 return 1;
576 }
577 else if (pinfo->owner == owner)
578 {
579 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
580 return 1;
581 }
582 else if (owner && owner->status == OBJECT_LOADED)
583 {
584 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
585 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
586 call this function again to trigger the duplicate error. */
587 return 1;
588 }
589
590 pathchar* archiveName = NULL;
591 debugBelch(
592 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
593 " %s\n"
594 "whilst processing object file\n"
595 " %" PATH_FMT "\n"
596 "The symbol was previously defined in\n"
597 " %" PATH_FMT "\n"
598 "This could be caused by:\n"
599 " * Loading two different object files which export the same symbol\n"
600 " * Specifying the same object file twice on the GHCi command line\n"
601 " * An incorrect `package.conf' entry, causing some object to be\n"
602 " loaded twice.\n",
603 (char*)key,
604 obj_name,
605 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
606 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
607 : pinfo->owner->fileName
608 );
609
610 if (archiveName)
611 {
612 stgFree(archiveName);
613 archiveName = NULL;
614 }
615 return 0;
616 }
617
618 /* -----------------------------------------------------------------------------
619 * Looks up symbols into hash tables.
620 *
621 * Returns: 0 on failure and result is not set,
622 * nonzero on success and result set to nonzero pointer
623 */
624 static HsBool ghciLookupSymbolInfo(HashTable *table,
625 const SymbolName* key, RtsSymbolInfo **result)
626 {
627 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
628 if (!pinfo) {
629 *result = NULL;
630 return HS_BOOL_FALSE;
631 }
632 if (pinfo->weak)
633 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
634 /* Once it's looked up, it can no longer be overridden */
635 pinfo->weak = HS_BOOL_FALSE;
636
637 *result = pinfo;
638 return HS_BOOL_TRUE;
639 }
640
641 /* -----------------------------------------------------------------------------
642 * initialize the object linker
643 */
644
645
646 static int linker_init_done = 0 ;
647
648 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
649 static void *dl_prog_handle;
650 static regex_t re_invalid;
651 static regex_t re_realso;
652 #ifdef THREADED_RTS
653 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
654 #endif
655 #elif defined(OBJFORMAT_PEi386)
656 void addDLLHandle(pathchar* dll_name, HINSTANCE instance);
657 #endif
658
659 void initLinker (void)
660 {
661 // default to retaining CAFs for backwards compatibility. Most
662 // users will want initLinker_(0): otherwise unloadObj() will not
663 // be able to unload object files when they contain CAFs.
664 initLinker_(1);
665 }
666
667 void
668 initLinker_ (int retain_cafs)
669 {
670 RtsSymbolVal *sym;
671 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
672 int compileResult;
673 #endif
674
675 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
676
677 /* Make initLinker idempotent, so we can call it
678 before every relevant operation; that means we
679 don't need to initialise the linker separately */
680 if (linker_init_done == 1) {
681 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
682 return;
683 } else {
684 linker_init_done = 1;
685 }
686
687 objects = NULL;
688 unloaded_objects = NULL;
689
690 #if defined(THREADED_RTS)
691 initMutex(&linker_mutex);
692 initMutex(&linker_unloaded_mutex);
693 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
694 initMutex(&dl_mutex);
695 #endif
696 #endif
697 symhash = allocStrHashTable();
698
699 /* populate the symbol table with stuff from the RTS */
700 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
701 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
702 symhash, sym->lbl, sym->addr, HS_BOOL_FALSE, NULL)) {
703 barf("ghciInsertSymbolTable failed");
704 }
705 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
706 }
707 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
708 machoInitSymbolsWithoutUnderscore();
709 # endif
710 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
711 referenced from a statically linked module. We need to mimic this, but
712 we cannot use NULL because we use it to mean nonexistent symbols. So we
713 use an arbitrary (hopefully unique) address here.
714 */
715 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
716 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
717 barf("ghciInsertSymbolTable failed");
718 }
719
720 #if defined(OBJFORMAT_PEi386)
721 if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"),
722 symhash, "__image_base__", __image_base, HS_BOOL_TRUE, NULL)) {
723 barf("ghciInsertSymbolTable failed");
724 }
725 #endif /* OBJFORMAT_PEi386 */
726
727
728 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
729 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
730 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
731 retain_cafs ? newRetainedCAF : newGCdCAF,
732 HS_BOOL_FALSE, NULL)) {
733 barf("ghciInsertSymbolTable failed");
734 }
735
736 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
737 # if defined(RTLD_DEFAULT)
738 dl_prog_handle = RTLD_DEFAULT;
739 # else
740 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
741 # endif /* RTLD_DEFAULT */
742
743 compileResult = regcomp(&re_invalid,
744 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short)",
745 REG_EXTENDED);
746 if (compileResult != 0) {
747 barf("Compiling re_invalid failed");
748 }
749 compileResult = regcomp(&re_realso,
750 "(GROUP|INPUT) *\\( *([^ )]+)",
751 REG_EXTENDED);
752 if (compileResult != 0) {
753 barf("Compiling re_realso failed");
754 }
755 # endif
756
757 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
758 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
759 // User-override for mmap_32bit_base
760 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
761 }
762 #endif
763
764 #if defined(mingw32_HOST_OS)
765 /*
766 * These two libraries cause problems when added to the static link,
767 * but are necessary for resolving symbols in GHCi, hence we load
768 * them manually here.
769 */
770 addDLL(WSTR("msvcrt"));
771 addDLL(WSTR("kernel32"));
772 addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL));
773 #endif
774
775 if (RTS_LINKER_USE_MMAP)
776 m32_allocator_init();
777
778 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
779 return;
780 }
781
782 void
783 exitLinker( void ) {
784 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
785 if (linker_init_done == 1) {
786 regfree(&re_invalid);
787 regfree(&re_realso);
788 #ifdef THREADED_RTS
789 closeMutex(&dl_mutex);
790 #endif
791 }
792 #endif
793 if (linker_init_done == 1) {
794 freeHashTable(symhash, free);
795 }
796 #ifdef THREADED_RTS
797 closeMutex(&linker_mutex);
798 #endif
799 }
800
801 /* -----------------------------------------------------------------------------
802 * Loading DLL or .so dynamic libraries
803 * -----------------------------------------------------------------------------
804 *
805 * Add a DLL from which symbols may be found. In the ELF case, just
806 * do RTLD_GLOBAL-style add, so no further messing around needs to
807 * happen in order that symbols in the loaded .so are findable --
808 * lookupSymbol() will subsequently see them by dlsym on the program's
809 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
810 *
811 * In the PEi386 case, open the DLLs and put handles to them in a
812 * linked list. When looking for a symbol, try all handles in the
813 * list. This means that we need to load even DLLs that are guaranteed
814 * to be in the ghc.exe image already, just so we can get a handle
815 * to give to loadSymbol, so that we can find the symbols. For such
816 * libraries, the LoadLibrary call should be a no-op except for returning
817 * the handle.
818 *
819 */
820
821 #if defined(OBJFORMAT_PEi386)
822 /* A record for storing handles into DLLs. */
823
824 typedef
825 struct _OpenedDLL {
826 pathchar* name;
827 struct _OpenedDLL* next;
828 HINSTANCE instance;
829 }
830 OpenedDLL;
831
832 /* A list thereof. */
833 static OpenedDLL* opened_dlls = NULL;
834
835 /* A record for storing indirectly linked functions from DLLs. */
836 typedef
837 struct _IndirectAddr {
838 SymbolAddr* addr;
839 struct _IndirectAddr* next;
840 }
841 IndirectAddr;
842
843 /* A list thereof. */
844 static IndirectAddr* indirects = NULL;
845
846 /* Adds a DLL instance to the list of DLLs in which to search for symbols. */
847 void addDLLHandle(pathchar* dll_name, HINSTANCE instance) {
848 OpenedDLL* o_dll;
849 o_dll = stgMallocBytes( sizeof(OpenedDLL), "addDLLHandle" );
850 o_dll->name = dll_name ? pathdup(dll_name) : NULL;
851 o_dll->instance = instance;
852 o_dll->next = opened_dlls;
853 opened_dlls = o_dll;
854 }
855
856 #endif
857
858 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
859
860 /* Suppose in ghci we load a temporary SO for a module containing
861 f = 1
862 and then modify the module, recompile, and load another temporary
863 SO with
864 f = 2
865 Then as we don't unload the first SO, dlsym will find the
866 f = 1
867 symbol whereas we want the
868 f = 2
869 symbol. We therefore need to keep our own SO handle list, and
870 try SOs in the right order. */
871
872 typedef
873 struct _OpenedSO {
874 struct _OpenedSO* next;
875 void *handle;
876 }
877 OpenedSO;
878
879 /* A list thereof. */
880 static OpenedSO* openedSOs = NULL;
881
882 static const char *
883 internal_dlopen(const char *dll_name)
884 {
885 OpenedSO* o_so;
886 void *hdl;
887 const char *errmsg;
888 char *errmsg_copy;
889
890 // omitted: RTLD_NOW
891 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
892 IF_DEBUG(linker,
893 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
894
895 //-------------- Begin critical section ------------------
896 // This critical section is necessary because dlerror() is not
897 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
898 // Also, the error message returned must be copied to preserve it
899 // (see POSIX also)
900
901 ACQUIRE_LOCK(&dl_mutex);
902 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
903
904 errmsg = NULL;
905 if (hdl == NULL) {
906 /* dlopen failed; return a ptr to the error msg. */
907 errmsg = dlerror();
908 if (errmsg == NULL) errmsg = "addDLL: unknown error";
909 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
910 strcpy(errmsg_copy, errmsg);
911 errmsg = errmsg_copy;
912 } else {
913 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
914 o_so->handle = hdl;
915 o_so->next = openedSOs;
916 openedSOs = o_so;
917 }
918
919 RELEASE_LOCK(&dl_mutex);
920 //--------------- End critical section -------------------
921
922 return errmsg;
923 }
924
925 /*
926 Note [RTLD_LOCAL]
927
928 In GHCi we want to be able to override previous .so's with newly
929 loaded .so's when we recompile something. This further implies that
930 when we look up a symbol in internal_dlsym() we have to iterate
931 through the loaded libraries (in order from most recently loaded to
932 oldest) looking up the symbol in each one until we find it.
933
934 However, this can cause problems for some symbols that are copied
935 by the linker into the executable image at runtime - see #8935 for a
936 lengthy discussion. To solve that problem we need to look up
937 symbols in the main executable *first*, before attempting to look
938 them up in the loaded .so's. But in order to make that work, we
939 have to always call dlopen with RTLD_LOCAL, so that the loaded
940 libraries don't populate the global symbol table.
941 */
942
943 static void *
944 internal_dlsym(const char *symbol) {
945 OpenedSO* o_so;
946 void *v;
947
948 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
949 ACQUIRE_LOCK(&dl_mutex);
950 dlerror();
951 // look in program first
952 v = dlsym(dl_prog_handle, symbol);
953 if (dlerror() == NULL) {
954 RELEASE_LOCK(&dl_mutex);
955 return v;
956 }
957
958 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
959 v = dlsym(o_so->handle, symbol);
960 if (dlerror() == NULL) {
961 RELEASE_LOCK(&dl_mutex);
962 return v;
963 }
964 }
965 RELEASE_LOCK(&dl_mutex);
966 return v;
967 }
968 # endif
969
970 const char *
971 addDLL( pathchar *dll_name )
972 {
973 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
974 /* ------------------- ELF DLL loader ------------------- */
975
976 #define NMATCH 5
977 regmatch_t match[NMATCH];
978 const char *errmsg;
979 FILE* fp;
980 size_t match_length;
981 #define MAXLINE 1000
982 char line[MAXLINE];
983 int result;
984
985 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
986 errmsg = internal_dlopen(dll_name);
987
988 if (errmsg == NULL) {
989 return NULL;
990 }
991
992 // GHC Trac ticket #2615
993 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
994 // contain linker scripts rather than ELF-format object code. This
995 // code handles the situation by recognizing the real object code
996 // file name given in the linker script.
997 //
998 // If an "invalid ELF header" error occurs, it is assumed that the
999 // .so file contains a linker script instead of ELF object code.
1000 // In this case, the code looks for the GROUP ( ... ) linker
1001 // directive. If one is found, the first file name inside the
1002 // parentheses is treated as the name of a dynamic library and the
1003 // code attempts to dlopen that file. If this is also unsuccessful,
1004 // an error message is returned.
1005
1006 // see if the error message is due to an invalid ELF header
1007 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
1008 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
1009 IF_DEBUG(linker, debugBelch("result = %i\n", result));
1010 if (result == 0) {
1011 // success -- try to read the named file as a linker script
1012 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
1013 MAXLINE-1);
1014 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
1015 line[match_length] = '\0'; // make sure string is null-terminated
1016 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
1017 if ((fp = fopen(line, "r")) == NULL) {
1018 return errmsg; // return original error if open fails
1019 }
1020 // try to find a GROUP or INPUT ( ... ) command
1021 while (fgets(line, MAXLINE, fp) != NULL) {
1022 IF_DEBUG(linker, debugBelch("input line = %s", line));
1023 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
1024 // success -- try to dlopen the first named file
1025 IF_DEBUG(linker, debugBelch("match%s\n",""));
1026 line[match[2].rm_eo] = '\0';
1027 stgFree((void*)errmsg); // Free old message before creating new one
1028 errmsg = internal_dlopen(line+match[2].rm_so);
1029 break;
1030 }
1031 // if control reaches here, no GROUP or INPUT ( ... ) directive
1032 // was found and the original error message is returned to the
1033 // caller
1034 }
1035 fclose(fp);
1036 }
1037 return errmsg;
1038
1039 # elif defined(OBJFORMAT_PEi386)
1040 /* ------------------- Win32 DLL loader ------------------- */
1041
1042 pathchar* buf;
1043 OpenedDLL* o_dll;
1044 HINSTANCE instance;
1045
1046 IF_DEBUG(linker, debugBelch("\naddDLL; dll_name = `%" PATH_FMT "'\n", dll_name));
1047
1048 /* See if we've already got it, and ignore if so. */
1049 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
1050 if (0 == pathcmp(o_dll->name, dll_name))
1051 return NULL;
1052 }
1053
1054 /* The file name has no suffix (yet) so that we can try
1055 both foo.dll and foo.drv
1056
1057 The documentation for LoadLibrary says:
1058 If no file name extension is specified in the lpFileName
1059 parameter, the default library extension .dll is
1060 appended. However, the file name string can include a trailing
1061 point character (.) to indicate that the module name has no
1062 extension. */
1063
1064 size_t bufsize = pathlen(dll_name) + 10;
1065 buf = stgMallocBytes(bufsize * sizeof(wchar_t), "addDLL");
1066
1067 /* These are ordered by probability of success and order we'd like them */
1068 const wchar_t *formats[] = { L"%ls.DLL", L"%ls.DRV", L"lib%ls.DLL", L"%ls" };
1069 const DWORD flags[] = { LOAD_LIBRARY_SEARCH_USER_DIRS | LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, 0 };
1070
1071 int cFormat;
1072 int cFlag;
1073 int flags_start = 1; // Assume we don't support the new API
1074
1075 /* Detect if newer API are available, if not, skip the first flags entry */
1076 if (GetProcAddress((HMODULE)LoadLibraryW(L"Kernel32.DLL"), "AddDllDirectory")) {
1077 flags_start = 0;
1078 }
1079
1080 /* Iterate through the possible flags and formats */
1081 for (cFlag = flags_start; cFlag < 2; cFlag++)
1082 {
1083 for (cFormat = 0; cFormat < 4; cFormat++)
1084 {
1085 snwprintf(buf, bufsize, formats[cFormat], dll_name);
1086 instance = LoadLibraryExW(buf, NULL, flags[cFlag]);
1087 if (instance == NULL)
1088 {
1089 if (GetLastError() != ERROR_MOD_NOT_FOUND)
1090 {
1091 goto error;
1092 }
1093 }
1094 else
1095 {
1096 break; // We're done. DLL has been loaded.
1097 }
1098 }
1099 }
1100
1101 // Check if we managed to load the DLL
1102 if (instance == NULL) {
1103 goto error;
1104 }
1105
1106 stgFree(buf);
1107
1108 addDLLHandle(dll_name, instance);
1109
1110 return NULL;
1111
1112 error:
1113 stgFree(buf);
1114 sysErrorBelch("addDLL: %" PATH_FMT " (Win32 error %lu)", dll_name, GetLastError());
1115
1116 /* LoadLibrary failed; return a ptr to the error msg. */
1117 return "addDLL: could not load DLL";
1118
1119 # else
1120 barf("addDLL: not implemented on this platform");
1121 # endif
1122 }
1123
1124 /* -----------------------------------------------------------------------------
1125 * Searches the system directories to determine if there is a system DLL that
1126 * satisfies the given name. This prevent GHCi from linking against a static
1127 * library if a DLL is available.
1128 *
1129 * Returns: NULL on failure or no DLL found, else the full path to the DLL
1130 * that can be loaded.
1131 */
1132 pathchar* findSystemLibrary(pathchar* dll_name)
1133 {
1134 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%" PATH_FMT "'\n", dll_name));
1135
1136 #if defined(OBJFORMAT_PEi386)
1137 const unsigned int init_buf_size = 1024;
1138 unsigned int bufsize = init_buf_size;
1139 wchar_t* result = malloc(sizeof(wchar_t) * bufsize);
1140 DWORD wResult = SearchPathW(NULL, dll_name, NULL, bufsize, result, NULL);
1141
1142 if (wResult > bufsize) {
1143 result = realloc(result, sizeof(wchar_t) * wResult);
1144 wResult = SearchPathW(NULL, dll_name, NULL, wResult, result, NULL);
1145 }
1146
1147
1148 if (!wResult) {
1149 free(result);
1150 return NULL;
1151 }
1152
1153 return result;
1154 #else
1155 (void)(dll_name); // Function not implemented for other platforms.
1156 return NULL;
1157 #endif
1158 }
1159
1160 /* -----------------------------------------------------------------------------
1161 * Emits a warning determining that the system is missing a required security
1162 * update that we need to get access to the proper APIs
1163 */
1164 void warnMissingKBLibraryPaths( void )
1165 {
1166 static HsBool missing_update_warn = HS_BOOL_FALSE;
1167 if (!missing_update_warn) {
1168 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
1169 missing_update_warn = HS_BOOL_TRUE;
1170 }
1171 }
1172
1173 /* -----------------------------------------------------------------------------
1174 * appends a directory to the process DLL Load path so LoadLibrary can find it
1175 *
1176 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
1177 * restore the search path to what it was before this call.
1178 */
1179 HsPtr addLibrarySearchPath(pathchar* dll_path)
1180 {
1181 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%" PATH_FMT "'\n", dll_path));
1182
1183 #if defined(OBJFORMAT_PEi386)
1184 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1185 LPAddDLLDirectory AddDllDirectory = (LPAddDLLDirectory)GetProcAddress((HMODULE)hDLL, "AddDllDirectory");
1186
1187 HsPtr result = NULL;
1188
1189 const unsigned int init_buf_size = 4096;
1190 int bufsize = init_buf_size;
1191
1192 // Make sure the path is an absolute path
1193 WCHAR* abs_path = malloc(sizeof(WCHAR) * init_buf_size);
1194 DWORD wResult = GetFullPathNameW(dll_path, bufsize, abs_path, NULL);
1195 if (!wResult){
1196 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1197 }
1198 else if (wResult > init_buf_size) {
1199 abs_path = realloc(abs_path, sizeof(WCHAR) * wResult);
1200 if (!GetFullPathNameW(dll_path, bufsize, abs_path, NULL)) {
1201 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1202 }
1203 }
1204
1205 if (AddDllDirectory) {
1206 result = AddDllDirectory(abs_path);
1207 }
1208 else
1209 {
1210 warnMissingKBLibraryPaths();
1211 WCHAR* str = malloc(sizeof(WCHAR) * init_buf_size);
1212 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1213
1214 if (wResult > init_buf_size) {
1215 str = realloc(str, sizeof(WCHAR) * wResult);
1216 bufsize = wResult;
1217 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1218 if (!wResult) {
1219 sysErrorBelch("addLibrarySearchPath[GetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1220 }
1221 }
1222
1223 bufsize = wResult + 2 + pathlen(abs_path);
1224 wchar_t* newPath = malloc(sizeof(wchar_t) * bufsize);
1225
1226 wcscpy(newPath, abs_path);
1227 wcscat(newPath, L";");
1228 wcscat(newPath, str);
1229 if (!SetEnvironmentVariableW(L"PATH", (LPCWSTR)newPath)) {
1230 sysErrorBelch("addLibrarySearchPath[SetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1231 }
1232
1233 free(newPath);
1234 free(abs_path);
1235
1236 return str;
1237 }
1238
1239 if (!result) {
1240 sysErrorBelch("addLibrarySearchPath: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1241 free(abs_path);
1242 return NULL;
1243 }
1244
1245 free(abs_path);
1246 return result;
1247 #else
1248 (void)(dll_path); // Function not implemented for other platforms.
1249 return NULL;
1250 #endif
1251 }
1252
1253 /* -----------------------------------------------------------------------------
1254 * removes a directory from the process DLL Load path
1255 *
1256 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
1257 */
1258 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
1259 {
1260 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n", dll_path_index));
1261
1262 #if defined(OBJFORMAT_PEi386)
1263 HsBool result = 0;
1264
1265 if (dll_path_index != NULL) {
1266 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1267 LPRemoveDLLDirectory RemoveDllDirectory = (LPRemoveDLLDirectory)GetProcAddress((HMODULE)hDLL, "RemoveDllDirectory");
1268
1269 if (RemoveDllDirectory) {
1270 result = RemoveDllDirectory(dll_path_index);
1271 // dll_path_index is now invalid, do not use it after this point.
1272 }
1273 else
1274 {
1275 warnMissingKBLibraryPaths();
1276 result = SetEnvironmentVariableW(L"PATH", (LPCWSTR)dll_path_index);
1277 free(dll_path_index);
1278 }
1279
1280 if (!result) {
1281 sysErrorBelch("removeLibrarySearchPath: (Win32 error %lu)", GetLastError());
1282 return HS_BOOL_FALSE;
1283 }
1284 }
1285
1286 return result == 0 ? HS_BOOL_TRUE : HS_BOOL_FALSE;
1287 #else
1288 (void)(dll_path_index); // Function not implemented for other platforms.
1289 return HS_BOOL_FALSE;
1290 #endif
1291 }
1292
1293 /* -----------------------------------------------------------------------------
1294 * insert a symbol in the hash table
1295 *
1296 * Returns: 0 on failure, nozero on success
1297 */
1298 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
1299 {
1300 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE, NULL);
1301 }
1302
1303 /* -----------------------------------------------------------------------------
1304 * lookup a symbol in the hash table
1305 */
1306 static SymbolAddr* lookupSymbol_ (SymbolName* lbl)
1307 {
1308 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
1309
1310 ASSERT(symhash != NULL);
1311 RtsSymbolInfo *pinfo;
1312
1313 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
1314 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
1315
1316 # if defined(OBJFORMAT_ELF)
1317 return internal_dlsym(lbl);
1318 # elif defined(OBJFORMAT_MACHO)
1319
1320 /* HACK: On OS X, all symbols are prefixed with an underscore.
1321 However, dlsym wants us to omit the leading underscore from the
1322 symbol name -- the dlsym routine puts it back on before searching
1323 for the symbol. For now, we simply strip it off here (and ONLY
1324 here).
1325 */
1326 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", lbl));
1327 ASSERT(lbl[0] == '_');
1328 return internal_dlsym(lbl + 1);
1329 # elif defined(OBJFORMAT_PEi386)
1330 SymbolAddr* sym;
1331
1332 /* See Note [mingw-w64 name decoration scheme] */
1333 #ifndef x86_64_HOST_ARCH
1334 zapTrailingAtSign ( (unsigned char*)lbl );
1335 #endif
1336 sym = lookupSymbolInDLLs((unsigned char*)lbl);
1337 return sym; // might be NULL if not found
1338
1339 # else
1340 ASSERT(2+2 == 5);
1341 return NULL;
1342 # endif
1343 } else {
1344 SymbolAddr* val = pinfo->value;
1345 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl, val));
1346
1347 int r;
1348 ObjectCode* oc = pinfo->owner;
1349
1350 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
1351 See Note [runtime-linker-phases] */
1352 if (oc && oc->status == OBJECT_LOADED) {
1353 oc->status = OBJECT_NEEDED;
1354 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand loading symbol '%s'\n", lbl));
1355 r = ocTryLoad(oc);
1356
1357 if (!r) {
1358 errorBelch("Could not on-demand load symbol '%s'\n", lbl);
1359 return NULL;
1360 }
1361 }
1362
1363 return val;
1364 }
1365 }
1366
1367 SymbolAddr* lookupSymbol( SymbolName* lbl )
1368 {
1369 ACQUIRE_LOCK(&linker_mutex);
1370 SymbolAddr* r = lookupSymbol_(lbl);
1371 RELEASE_LOCK(&linker_mutex);
1372 return r;
1373 }
1374
1375 /* -----------------------------------------------------------------------------
1376 Create a StablePtr for a foreign export. This is normally called by
1377 a C function with __attribute__((constructor)), which is generated
1378 by GHC and linked into the module.
1379
1380 If the object code is being loaded dynamically, then we remember
1381 which StablePtrs were allocated by the constructors and free them
1382 again in unloadObj().
1383 -------------------------------------------------------------------------- */
1384
1385 static ObjectCode *loading_obj = NULL;
1386
1387 StgStablePtr foreignExportStablePtr (StgPtr p)
1388 {
1389 ForeignExportStablePtr *fe_sptr;
1390 StgStablePtr *sptr;
1391
1392 sptr = getStablePtr(p);
1393
1394 if (loading_obj != NULL) {
1395 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
1396 "foreignExportStablePtr");
1397 fe_sptr->stable_ptr = sptr;
1398 fe_sptr->next = loading_obj->stable_ptrs;
1399 loading_obj->stable_ptrs = fe_sptr;
1400 }
1401
1402 return sptr;
1403 }
1404
1405
1406 /* -----------------------------------------------------------------------------
1407 * Debugging aid: look in GHCi's object symbol tables for symbols
1408 * within DELTA bytes of the specified address, and show their names.
1409 */
1410 #ifdef DEBUG
1411 void ghci_enquire ( SymbolAddr* addr );
1412
1413 void ghci_enquire(SymbolAddr* addr)
1414 {
1415 int i;
1416 SymbolName* sym;
1417 RtsSymbolInfo* a;
1418 const int DELTA = 64;
1419 ObjectCode* oc;
1420
1421 for (oc = objects; oc; oc = oc->next) {
1422 for (i = 0; i < oc->n_symbols; i++) {
1423 sym = oc->symbols[i];
1424 if (sym == NULL) continue;
1425 a = NULL;
1426 if (a == NULL) {
1427 ghciLookupSymbolInfo(symhash, sym, &a);
1428 }
1429 if (a == NULL) {
1430 // debugBelch("ghci_enquire: can't find %s\n", sym);
1431 }
1432 else if ( a->value
1433 && (char*)addr-DELTA <= (char*)a->value
1434 && (char*)a->value <= (char*)addr+DELTA) {
1435 debugBelch("%p + %3d == `%s'\n", addr, (int)((char*)a->value - (char*)addr), sym);
1436 }
1437 }
1438 }
1439 }
1440 #endif
1441
1442 #if RTS_LINKER_USE_MMAP
1443 //
1444 // Returns NULL on failure.
1445 //
1446 void *
1447 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
1448 {
1449 void *map_addr = NULL;
1450 void *result;
1451 size_t size;
1452 static uint32_t fixed = 0;
1453
1454 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1455 size = roundUpToPage(bytes);
1456
1457 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1458 mmap_again:
1459
1460 if (mmap_32bit_base != 0) {
1461 map_addr = mmap_32bit_base;
1462 }
1463 #endif
1464
1465 IF_DEBUG(linker,
1466 debugBelch("mmapForLinker: \tprotection %#0x\n",
1467 PROT_EXEC | PROT_READ | PROT_WRITE));
1468 IF_DEBUG(linker,
1469 debugBelch("mmapForLinker: \tflags %#0x\n",
1470 MAP_PRIVATE | TRY_MAP_32BIT | fixed | flags));
1471
1472 result = mmap(map_addr, size,
1473 PROT_EXEC|PROT_READ|PROT_WRITE,
1474 MAP_PRIVATE|TRY_MAP_32BIT|fixed|flags, fd, offset);
1475
1476 if (result == MAP_FAILED) {
1477 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1478 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1479 return NULL;
1480 }
1481
1482 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1483 if (mmap_32bit_base != 0) {
1484 if (result == map_addr) {
1485 mmap_32bit_base = (StgWord8*)map_addr + size;
1486 } else {
1487 if ((W_)result > 0x80000000) {
1488 // oops, we were given memory over 2Gb
1489 munmap(result,size);
1490 #if defined(freebsd_HOST_OS) || \
1491 defined(kfreebsdgnu_HOST_OS) || \
1492 defined(dragonfly_HOST_OS)
1493 // Some platforms require MAP_FIXED. This is normally
1494 // a bad idea, because MAP_FIXED will overwrite
1495 // existing mappings.
1496 fixed = MAP_FIXED;
1497 goto mmap_again;
1498 #else
1499 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1500 "asked for %lu bytes at %p. "
1501 "Try specifying an address with +RTS -xm<addr> -RTS",
1502 size, map_addr);
1503 return NULL;
1504 #endif
1505 } else {
1506 // hmm, we were given memory somewhere else, but it's
1507 // still under 2Gb so we can use it. Next time, ask
1508 // for memory right after the place we just got some
1509 mmap_32bit_base = (StgWord8*)result + size;
1510 }
1511 }
1512 } else {
1513 if ((W_)result > 0x80000000) {
1514 // oops, we were given memory over 2Gb
1515 // ... try allocating memory somewhere else?;
1516 debugTrace(DEBUG_linker,
1517 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1518 bytes, result);
1519 munmap(result, size);
1520
1521 // Set a base address and try again... (guess: 1Gb)
1522 mmap_32bit_base = (void*)0x40000000;
1523 goto mmap_again;
1524 }
1525 }
1526 #endif
1527
1528 IF_DEBUG(linker,
1529 debugBelch("mmapForLinker: mapped %" FMT_Word
1530 " bytes starting at %p\n", (W_)size, result));
1531 IF_DEBUG(linker,
1532 debugBelch("mmapForLinker: done\n"));
1533
1534 return result;
1535 }
1536 #endif
1537
1538 /*
1539 * Remove symbols from the symbol table, and free oc->symbols.
1540 * This operation is idempotent.
1541 */
1542 static void removeOcSymbols (ObjectCode *oc)
1543 {
1544 if (oc->symbols == NULL) return;
1545
1546 // Remove all the mappings for the symbols within this object..
1547 int i;
1548 for (i = 0; i < oc->n_symbols; i++) {
1549 if (oc->symbols[i] != NULL) {
1550 ghciRemoveSymbolTable(symhash, oc->symbols[i], oc);
1551 }
1552 }
1553
1554 stgFree(oc->symbols);
1555 oc->symbols = NULL;
1556 }
1557
1558 /*
1559 * Release StablePtrs and free oc->stable_ptrs.
1560 * This operation is idempotent.
1561 */
1562 static void freeOcStablePtrs (ObjectCode *oc)
1563 {
1564 // Release any StablePtrs that were created when this
1565 // object module was initialized.
1566 ForeignExportStablePtr *fe_ptr, *next;
1567
1568 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1569 next = fe_ptr->next;
1570 freeStablePtr(fe_ptr->stable_ptr);
1571 stgFree(fe_ptr);
1572 }
1573 oc->stable_ptrs = NULL;
1574 }
1575
1576 static void
1577 freePreloadObjectFile (ObjectCode *oc)
1578 {
1579 #if defined(mingw32_HOST_OS)
1580
1581 VirtualFree(oc->image - PEi386_IMAGE_OFFSET, 0, MEM_RELEASE);
1582
1583 IndirectAddr *ia, *ia_next;
1584 ia = indirects;
1585 while (ia != NULL) {
1586 ia_next = ia->next;
1587 stgFree(ia);
1588 ia = ia_next;
1589 }
1590 indirects = NULL;
1591
1592 #else
1593
1594 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1595 munmap(oc->image, oc->fileSize);
1596 }
1597 else {
1598 stgFree(oc->image);
1599 }
1600
1601 #endif
1602
1603 oc->image = NULL;
1604 oc->fileSize = 0;
1605 }
1606
1607 /*
1608 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1609 * the GC when a previously unloaded ObjectCode has been determined to be
1610 * unused, and when an error occurs during loadObj().
1611 */
1612 void freeObjectCode (ObjectCode *oc)
1613 {
1614 freePreloadObjectFile(oc);
1615
1616 if (oc->symbols != NULL) {
1617 stgFree(oc->symbols);
1618 oc->symbols = NULL;
1619 }
1620
1621 if (oc->extraInfos != NULL) {
1622 freeHashTable(oc->extraInfos, NULL);
1623 oc->extraInfos = NULL;
1624 }
1625
1626 if (oc->sections != NULL) {
1627 int i;
1628 for (i=0; i < oc->n_sections; i++) {
1629 if (oc->sections[i].start != NULL) {
1630 switch(oc->sections[i].alloc){
1631 #if RTS_LINKER_USE_MMAP
1632 case SECTION_MMAP:
1633 munmap(oc->sections[i].mapped_start,
1634 oc->sections[i].mapped_size);
1635 break;
1636 case SECTION_M32:
1637 m32_free(oc->sections[i].start,
1638 oc->sections[i].size);
1639 break;
1640 #endif
1641 case SECTION_MALLOC:
1642 stgFree(oc->sections[i].start);
1643 break;
1644 default:
1645 break;
1646 }
1647 }
1648 }
1649 stgFree(oc->sections);
1650 }
1651
1652 freeProddableBlocks(oc);
1653
1654 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1655 * alongside the image, so we don't need to free. */
1656 #if NEED_SYMBOL_EXTRAS && (!defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS))
1657 if (RTS_LINKER_USE_MMAP) {
1658 if (!USE_CONTIGUOUS_MMAP && oc->symbol_extras != NULL) {
1659 m32_free(oc->symbol_extras,
1660 sizeof(SymbolExtra) * oc->n_symbol_extras);
1661 }
1662 }
1663 else {
1664 stgFree(oc->symbol_extras);
1665 }
1666 #endif
1667
1668 stgFree(oc->fileName);
1669 stgFree(oc->archiveMemberName);
1670
1671 stgFree(oc);
1672 }
1673
1674 /* -----------------------------------------------------------------------------
1675 * Sets the initial status of a fresh ObjectCode
1676 */
1677 static void setOcInitialStatus(ObjectCode* oc) {
1678 if (oc->isImportLib == HS_BOOL_TRUE) {
1679 oc->status = OBJECT_DONT_RESOLVE;
1680 } else if (oc->archiveMemberName == NULL) {
1681 oc->status = OBJECT_NEEDED;
1682 } else {
1683 oc->status = OBJECT_LOADED;
1684 }
1685 }
1686
1687 static ObjectCode*
1688 mkOc( pathchar *path, char *image, int imageSize,
1689 rtsBool mapped, char *archiveMemberName, int misalignment ) {
1690 ObjectCode* oc;
1691
1692 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1693 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1694
1695 # if defined(OBJFORMAT_ELF)
1696 oc->formatName = "ELF";
1697 # elif defined(OBJFORMAT_PEi386)
1698 oc->formatName = "PEi386";
1699 # elif defined(OBJFORMAT_MACHO)
1700 oc->formatName = "Mach-O";
1701 # else
1702 stgFree(oc);
1703 barf("loadObj: not implemented on this platform");
1704 # endif
1705
1706 oc->image = image;
1707 oc->fileName = pathdup(path);
1708
1709 if (archiveMemberName) {
1710 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1, "loadObj" );
1711 strcpy(oc->archiveMemberName, archiveMemberName);
1712 } else {
1713 oc->archiveMemberName = NULL;
1714 }
1715
1716 setOcInitialStatus( oc );
1717
1718 oc->fileSize = imageSize;
1719 oc->symbols = NULL;
1720 oc->n_sections = 0;
1721 oc->sections = NULL;
1722 oc->proddables = NULL;
1723 oc->stable_ptrs = NULL;
1724 #if NEED_SYMBOL_EXTRAS
1725 oc->symbol_extras = NULL;
1726 #endif
1727 oc->imageMapped = mapped;
1728
1729 oc->misalignment = misalignment;
1730 oc->extraInfos = NULL;
1731
1732 /* chain it onto the list of objects */
1733 oc->next = NULL;
1734
1735 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1736 return oc;
1737 }
1738
1739 /* -----------------------------------------------------------------------------
1740 * Check if an object or archive is already loaded.
1741 *
1742 * Returns: 1 if the path is already loaded, 0 otherwise.
1743 */
1744 static HsInt
1745 isAlreadyLoaded( pathchar *path )
1746 {
1747 ObjectCode *o;
1748 for (o = objects; o; o = o->next) {
1749 if (0 == pathcmp(o->fileName, path)) {
1750 return 1; /* already loaded */
1751 }
1752 }
1753 return 0; /* not loaded yet */
1754 }
1755
1756 static HsInt loadArchive_ (pathchar *path)
1757 {
1758 ObjectCode* oc;
1759 char *image;
1760 int memberSize;
1761 FILE *f;
1762 int n;
1763 size_t thisFileNameSize;
1764 char *fileName;
1765 size_t fileNameSize;
1766 int isObject, isGnuIndex, isThin, isImportLib;
1767 char tmp[20];
1768 char *gnuFileIndex;
1769 int gnuFileIndexSize;
1770 #if defined(darwin_HOST_OS)
1771 int i;
1772 uint32_t nfat_arch, nfat_offset, cputype, cpusubtype;
1773 #if defined(i386_HOST_ARCH)
1774 const uint32_t mycputype = CPU_TYPE_X86;
1775 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_ALL;
1776 #elif defined(x86_64_HOST_ARCH)
1777 const uint32_t mycputype = CPU_TYPE_X86_64;
1778 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_64_ALL;
1779 #elif defined(powerpc_HOST_ARCH)
1780 const uint32_t mycputype = CPU_TYPE_POWERPC;
1781 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1782 #elif defined(powerpc64_HOST_ARCH)
1783 const uint32_t mycputype = CPU_TYPE_POWERPC64;
1784 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1785 #else
1786 #error Unknown Darwin architecture
1787 #endif
1788 #endif
1789 int misalignment = 0;
1790
1791 /* TODO: don't call barf() on error, instead return an error code, freeing
1792 * all resources correctly. This function is pretty complex, so it needs
1793 * to be refactored to make this practical. */
1794
1795 IF_DEBUG(linker, debugBelch("loadArchive: start\n"));
1796 IF_DEBUG(linker, debugBelch("loadArchive: Loading archive `%" PATH_FMT" '\n", path));
1797
1798 /* Check that we haven't already loaded this archive.
1799 Ignore requests to load multiple times */
1800 if (isAlreadyLoaded(path)) {
1801 IF_DEBUG(linker,
1802 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1803 return 1; /* success */
1804 }
1805
1806 gnuFileIndex = NULL;
1807 gnuFileIndexSize = 0;
1808
1809 fileNameSize = 32;
1810 fileName = stgMallocBytes(fileNameSize, "loadArchive(fileName)");
1811
1812 isThin = 0;
1813 isImportLib = 0;
1814
1815 f = pathopen(path, WSTR("rb"));
1816 if (!f)
1817 barf("loadObj: can't read `%" PATH_FMT "'", path);
1818
1819 /* Check if this is an archive by looking for the magic "!<arch>\n"
1820 * string. Usually, if this fails, we barf and quit. On Darwin however,
1821 * we may have a fat archive, which contains archives for more than
1822 * one architecture. Fat archives start with the magic number 0xcafebabe,
1823 * always stored big endian. If we find a fat_header, we scan through
1824 * the fat_arch structs, searching through for one for our host
1825 * architecture. If a matching struct is found, we read the offset
1826 * of our archive data (nfat_offset) and seek forward nfat_offset bytes
1827 * from the start of the file.
1828 *
1829 * A subtlety is that all of the members of the fat_header and fat_arch
1830 * structs are stored big endian, so we need to call byte order
1831 * conversion functions.
1832 *
1833 * If we find the appropriate architecture in a fat archive, we gobble
1834 * its magic "!<arch>\n" string and continue processing just as if
1835 * we had a single architecture archive.
1836 */
1837
1838 n = fread ( tmp, 1, 8, f );
1839 if (n != 8)
1840 barf("loadArchive: Failed reading header from `%" PATH_FMT "'", path);
1841 if (strncmp(tmp, "!<arch>\n", 8) == 0) {}
1842 #if !defined(mingw32_HOST_OS)
1843 /* See Note [thin archives on Windows] */
1844 else if (strncmp(tmp, "!<thin>\n", 8) == 0) {
1845 isThin = 1;
1846 }
1847 #endif
1848 #if defined(darwin_HOST_OS)
1849 /* Not a standard archive, look for a fat archive magic number: */
1850 else if (ntohl(*(uint32_t *)tmp) == FAT_MAGIC) {
1851 nfat_arch = ntohl(*(uint32_t *)(tmp + 4));
1852 IF_DEBUG(linker, debugBelch("loadArchive: found a fat archive containing %d architectures\n", nfat_arch));
1853 nfat_offset = 0;
1854
1855 for (i = 0; i < (int)nfat_arch; i++) {
1856 /* search for the right arch */
1857 n = fread( tmp, 1, 20, f );
1858 if (n != 8)
1859 barf("loadArchive: Failed reading arch from `%s'", path);
1860 cputype = ntohl(*(uint32_t *)tmp);
1861 cpusubtype = ntohl(*(uint32_t *)(tmp + 4));
1862
1863 if (cputype == mycputype && cpusubtype == mycpusubtype) {
1864 IF_DEBUG(linker, debugBelch("loadArchive: found my archive in a fat archive\n"));
1865 nfat_offset = ntohl(*(uint32_t *)(tmp + 8));
1866 break;
1867 }
1868 }
1869
1870 if (nfat_offset == 0) {
1871 barf ("loadArchive: searched %d architectures, but no host arch found", (int)nfat_arch);
1872 }
1873 else {
1874 n = fseek( f, nfat_offset, SEEK_SET );
1875 if (n != 0)
1876 barf("loadArchive: Failed to seek to arch in `%s'", path);
1877 n = fread ( tmp, 1, 8, f );
1878 if (n != 8)
1879 barf("loadArchive: Failed reading header from `%s'", path);
1880 if (strncmp(tmp, "!<arch>\n", 8) != 0) {
1881 barf("loadArchive: couldn't find archive in `%s' at offset %d", path, nfat_offset);
1882 }
1883 }
1884 }
1885 else {
1886 barf("loadArchive: Neither an archive, nor a fat archive: `%s'", path);
1887 }
1888 #else
1889 else {
1890 barf("loadArchive: Not an archive: `%" PATH_FMT "'", path);
1891 }
1892 #endif
1893
1894 IF_DEBUG(linker, debugBelch("loadArchive: loading archive contents\n"));
1895
1896 while (1) {
1897 IF_DEBUG(linker, debugBelch("loadArchive: reading at %ld\n", ftell(f)));
1898 n = fread ( fileName, 1, 16, f );
1899 if (n != 16) {
1900 if (feof(f)) {
1901 IF_DEBUG(linker, debugBelch("loadArchive: EOF while reading from '%" PATH_FMT "'\n", path));
1902 break;
1903 }
1904 else {
1905 barf("loadArchive: Failed reading file name from `%" PATH_FMT "'", path);
1906 }
1907 }
1908
1909 #if defined(darwin_HOST_OS)
1910 if (strncmp(fileName, "!<arch>\n", 8) == 0) {
1911 IF_DEBUG(linker, debugBelch("loadArchive: found the start of another archive, breaking\n"));
1912 break;
1913 }
1914 #endif
1915
1916 n = fread ( tmp, 1, 12, f );
1917 if (n != 12)
1918 barf("loadArchive: Failed reading mod time from `%" PATH_FMT "'", path);
1919 n = fread ( tmp, 1, 6, f );
1920 if (n != 6)
1921 barf("loadArchive: Failed reading owner from `%" PATH_FMT "'", path);
1922 n = fread ( tmp, 1, 6, f );
1923 if (n != 6)
1924 barf("loadArchive: Failed reading group from `%" PATH_FMT "'", path);
1925 n = fread ( tmp, 1, 8, f );
1926 if (n != 8)
1927 barf("loadArchive: Failed reading mode from `%" PATH_FMT "'", path);
1928 n = fread ( tmp, 1, 10, f );
1929 if (n != 10)
1930 barf("loadArchive: Failed reading size from `%" PATH_FMT "'", path);
1931 tmp[10] = '\0';
1932 for (n = 0; isdigit(tmp[n]); n++);
1933 tmp[n] = '\0';
1934 memberSize = atoi(tmp);
1935
1936 IF_DEBUG(linker, debugBelch("loadArchive: size of this archive member is %d\n", memberSize));
1937 n = fread ( tmp, 1, 2, f );
1938 if (n != 2)
1939 barf("loadArchive: Failed reading magic from `%" PATH_FMT "'", path);
1940 if (strncmp(tmp, "\x60\x0A", 2) != 0)
1941 barf("loadArchive: Failed reading magic from `%" PATH_FMT "' at %ld. Got %c%c",
1942 path, ftell(f), tmp[0], tmp[1]);
1943
1944 isGnuIndex = 0;
1945 /* Check for BSD-variant large filenames */
1946 if (0 == strncmp(fileName, "#1/", 3)) {
1947 fileName[16] = '\0';
1948 if (isdigit(fileName[3])) {
1949 for (n = 4; isdigit(fileName[n]); n++);
1950 fileName[n] = '\0';
1951 thisFileNameSize = atoi(fileName + 3);
1952 memberSize -= thisFileNameSize;
1953 if (thisFileNameSize >= fileNameSize) {
1954 /* Double it to avoid potentially continually
1955 increasing it by 1 */
1956 fileNameSize = thisFileNameSize * 2;
1957 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
1958 }
1959 n = fread ( fileName, 1, thisFileNameSize, f );
1960 if (n != (int)thisFileNameSize) {
1961 barf("loadArchive: Failed reading filename from `%" PATH_FMT "'",
1962 path);
1963 }
1964 fileName[thisFileNameSize] = 0;
1965
1966 /* On OS X at least, thisFileNameSize is the size of the
1967 fileName field, not the length of the fileName
1968 itself. */
1969 thisFileNameSize = strlen(fileName);
1970 }
1971 else {
1972 barf("loadArchive: BSD-variant filename size not found while reading filename from `%s'", path);
1973 }
1974 }
1975 /* Check for GNU file index file */
1976 else if (0 == strncmp(fileName, "//", 2)) {
1977 fileName[0] = '\0';
1978 thisFileNameSize = 0;
1979 isGnuIndex = 1;
1980 }
1981 /* Check for a file in the GNU file index */
1982 else if (fileName[0] == '/') {
1983 if (isdigit(fileName[1])) {
1984 int i;
1985
1986 for (n = 2; isdigit(fileName[n]); n++);
1987 fileName[n] = '\0';
1988 n = atoi(fileName + 1);
1989
1990 if (gnuFileIndex == NULL) {
1991 barf("loadArchive: GNU-variant filename without an index while reading from `%s'", path);
1992 }
1993 if (n < 0 || n > gnuFileIndexSize) {
1994 barf("loadArchive: GNU-variant filename offset %d out of range [0..%d] while reading filename from `%s'", n, gnuFileIndexSize, path);
1995 }
1996 if (n != 0 && gnuFileIndex[n - 1] != '\n') {
1997 barf("loadArchive: GNU-variant filename offset %d invalid (range [0..%d]) while reading filename from `%s'", n, gnuFileIndexSize, path);
1998 }
1999 for (i = n; gnuFileIndex[i] != '\n'; i++);
2000 thisFileNameSize = i - n - 1;
2001 if (thisFileNameSize >= fileNameSize) {
2002 /* Double it to avoid potentially continually
2003 increasing it by 1 */
2004 fileNameSize = thisFileNameSize * 2;
2005 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
2006 }
2007 memcpy(fileName, gnuFileIndex + n, thisFileNameSize);
2008 fileName[thisFileNameSize] = '\0';
2009 }
2010 else if (fileName[1] == ' ') {
2011 fileName[0] = '\0';
2012 thisFileNameSize = 0;
2013 }
2014 else {
2015 barf("loadArchive: GNU-variant filename offset not found while reading filename from `%s'", path);
2016 }
2017 }
2018 /* Finally, the case where the filename field actually contains
2019 the filename */
2020 else {
2021 /* GNU ar terminates filenames with a '/', this allowing
2022 spaces in filenames. So first look to see if there is a
2023 terminating '/'. */
2024 for (thisFileNameSize = 0;
2025 thisFileNameSize < 16;
2026 thisFileNameSize++) {
2027 if (fileName[thisFileNameSize] == '/') {
2028 fileName[thisFileNameSize] = '\0';
2029 break;
2030 }
2031 }
2032 /* If we didn't find a '/', then a space teminates the
2033 filename. Note that if we don't find one, then
2034 thisFileNameSize ends up as 16, and we already have the
2035 '\0' at the end. */
2036 if (thisFileNameSize == 16) {
2037 for (thisFileNameSize = 0;
2038 thisFileNameSize < 16;
2039 thisFileNameSize++) {
2040 if (fileName[thisFileNameSize] == ' ') {
2041 fileName[thisFileNameSize] = '\0';
2042 break;
2043 }
2044 }
2045 }
2046 }
2047
2048 IF_DEBUG(linker,
2049 debugBelch("loadArchive: Found member file `%s'\n", fileName));
2050
2051 isObject = (thisFileNameSize >= 2 && strncmp(fileName + thisFileNameSize - 2, ".o" , 2) == 0)
2052 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".p_o", 4) == 0);
2053
2054 #if defined(OBJFORMAT_PEi386)
2055 /*
2056 * Note [MSVC import files (ext .lib)]
2057 * MSVC compilers store the object files in
2058 * the import libraries with extension .dll
2059 * so on Windows we should look for those too.
2060 * The PE COFF format doesn't specify any specific file name
2061 * for sections. So on windows, just try to load it all.
2062 *
2063 * Linker members (e.g. filename / are skipped since they are not needed)
2064 */
2065 isImportLib = thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".dll", 4) == 0;
2066
2067 /*
2068 * Note [GCC import files (ext .dll.a)]
2069 * GCC stores import information in the same binary format
2070 * as the object file normally has. The only difference is that
2071 * all the information are put in .idata sections. The only real
2072 * way to tell if we're dealing with an import lib is by looking
2073 * at the file extension.
2074 */
2075 isImportLib = isImportLib || endsWithPath(path, WSTR(".dll.a"));
2076 #endif // windows
2077
2078 IF_DEBUG(linker, debugBelch("loadArchive: \tthisFileNameSize = %d\n", (int)thisFileNameSize));
2079 IF_DEBUG(linker, debugBelch("loadArchive: \tisObject = %d\n", isObject));
2080
2081 if (isObject) {
2082 char *archiveMemberName;
2083
2084 IF_DEBUG(linker, debugBelch("loadArchive: Member is an object file...loading...\n"));
2085
2086 #if defined(mingw32_HOST_OS)
2087 // TODO: We would like to use allocateExec here, but allocateExec
2088 // cannot currently allocate blocks large enough.
2089 image = allocateImageAndTrampolines(path, fileName,
2090 #if defined(x86_64_HOST_ARCH)
2091 f,
2092 #endif
2093 memberSize);
2094 #elif defined(darwin_HOST_OS)
2095 if (RTS_LINKER_USE_MMAP)
2096 image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
2097 else {
2098 /* See loadObj() */
2099 misalignment = machoGetMisalignment(f);
2100 image = stgMallocBytes(memberSize + misalignment,
2101 "loadArchive(image)");
2102 image += misalignment;
2103 }
2104
2105 #else // not windows or darwin
2106 image = stgMallocBytes(memberSize, "loadArchive(image)");
2107 #endif
2108
2109 #if !defined(mingw32_HOST_OS)
2110 /*
2111 * Note [thin archives on Windows]
2112 * This doesn't compile on Windows because it assumes
2113 * char* pathnames, and we use wchar_t* on Windows. It's
2114 * not trivial to fix, so I'm leaving it disabled on
2115 * Windows for now --SDM
2116 */
2117 if (isThin) {
2118 FILE *member;
2119 char *pathCopy, *dirName, *memberPath;
2120
2121 /* Allocate and setup the dirname of the archive. We'll need
2122 this to locate the thin member */
2123 pathCopy = stgMallocBytes(strlen(path) + 1, "loadArchive(file)");
2124 strcpy(pathCopy, path);
2125 dirName = dirname(pathCopy);
2126
2127 /* Append the relative member name to the dirname. This should be
2128 be the full path to the actual thin member. */
2129 memberPath = stgMallocBytes(
2130 strlen(path) + 1 + strlen(fileName) + 1, "loadArchive(file)");
2131 strcpy(memberPath, dirName);
2132 memberPath[strlen(dirName)] = '/';
2133 strcpy(memberPath + strlen(dirName) + 1, fileName);
2134
2135 member = pathopen(memberPath, WSTR("rb"));
2136 if (!member)
2137 barf("loadObj: can't read `%s'", path);
2138
2139 n = fread ( image, 1, memberSize, member );
2140 if (n != memberSize) {
2141 barf("loadArchive: error whilst reading `%s'", fileName);
2142 }
2143
2144 fclose(member);
2145 stgFree(memberPath);
2146 stgFree(pathCopy);
2147 }
2148 else
2149 #endif
2150 {
2151 n = fread ( image, 1, memberSize, f );
2152 if (n != memberSize) {
2153 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2154 }
2155 }
2156
2157 archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
2158 "loadArchive(file)");
2159 sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
2160 path, (int)thisFileNameSize, fileName);
2161
2162 oc = mkOc(path, image, memberSize, rtsFalse, archiveMemberName
2163 , misalignment);
2164
2165 stgFree(archiveMemberName);
2166
2167 if (0 == loadOc(oc)) {
2168 stgFree(fileName);
2169 fclose(f);
2170 return 0;
2171 } else {
2172 #if defined(OBJFORMAT_PEi386)
2173 if (isImportLib)
2174 {
2175 findAndLoadImportLibrary(oc);
2176 stgFree(oc);
2177 oc = NULL;
2178 break;
2179 } else {
2180 #endif
2181 oc->next = objects;
2182 objects = oc;
2183 #if defined(OBJFORMAT_PEi386)
2184 }
2185 #endif
2186 }
2187 }
2188 else if (isGnuIndex) {
2189 if (gnuFileIndex != NULL) {
2190 barf("loadArchive: GNU-variant index found, but already have an index, while reading filename from `%s'", path);
2191 }
2192 IF_DEBUG(linker, debugBelch("loadArchive: Found GNU-variant file index\n"));
2193 #if RTS_LINKER_USE_MMAP
2194 gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
2195 #else
2196 gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
2197 #endif
2198 n = fread ( gnuFileIndex, 1, memberSize, f );
2199 if (n != memberSize) {
2200 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2201 }
2202 gnuFileIndex[memberSize] = '/';
2203 gnuFileIndexSize = memberSize;
2204 }
2205 else if (isImportLib) {
2206 #if defined(OBJFORMAT_PEi386)
2207 if (checkAndLoadImportLibrary(path, fileName, f)) {
2208 IF_DEBUG(linker, debugBelch("loadArchive: Member is an import file section... Corresponding DLL has been loaded...\n"));
2209 }
2210 else {
2211 IF_DEBUG(linker, debugBelch("loadArchive: Member is not a valid import file section... Skipping...\n"));
2212 n = fseek(f, memberSize, SEEK_CUR);
2213 if (n != 0)
2214 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
2215 memberSize, path);
2216 }
2217 #endif
2218 }
2219 else {
2220 IF_DEBUG(linker, debugBelch("loadArchive: '%s' does not appear to be an object file\n", fileName));
2221 if (!isThin || thisFileNameSize == 0) {
2222 n = fseek(f, memberSize, SEEK_CUR);
2223 if (n != 0)
2224 barf("loadArchive: error whilst seeking by %d in `%s'",
2225 memberSize, path);
2226 }
2227 }
2228
2229 /* .ar files are 2-byte aligned */
2230 if (!(isThin && thisFileNameSize > 0) && memberSize % 2) {
2231 IF_DEBUG(linker, debugBelch("loadArchive: trying to read one pad byte\n"));
2232 n = fread ( tmp, 1, 1, f );
2233 if (n != 1) {
2234 if (feof(f)) {
2235 IF_DEBUG(linker, debugBelch("loadArchive: found EOF while reading one pad byte\n"));
2236 break;
2237 }
2238 else {
2239 barf("loadArchive: Failed reading padding from `%" PATH_FMT "'", path);
2240 }
2241 }
2242 IF_DEBUG(linker, debugBelch("loadArchive: successfully read one pad byte\n"));
2243 }
2244 IF_DEBUG(linker, debugBelch("loadArchive: reached end of archive loading while loop\n"));
2245 }
2246
2247 fclose(f);
2248
2249 stgFree(fileName);
2250 if (gnuFileIndex != NULL) {
2251 #if RTS_LINKER_USE_MMAP
2252 munmap(gnuFileIndex, gnuFileIndexSize + 1);
2253 #else
2254 stgFree(gnuFileIndex);
2255 #endif
2256 }
2257
2258 if (RTS_LINKER_USE_MMAP)
2259 m32_allocator_flush();
2260
2261 IF_DEBUG(linker, debugBelch("loadArchive: done\n"));
2262 return 1;
2263 }
2264
2265 HsInt loadArchive (pathchar *path)
2266 {
2267 ACQUIRE_LOCK(&linker_mutex);
2268 HsInt r = loadArchive_(path);
2269 RELEASE_LOCK(&linker_mutex);
2270 return r;
2271 }
2272
2273 //
2274 // Load the object file into memory. This will not be its final resting place,
2275 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
2276 // address space, properly aligned.
2277 //
2278 static ObjectCode *
2279 preloadObjectFile (pathchar *path)
2280 {
2281 int fileSize;
2282 struct_stat st;
2283 int r;
2284 void *image;
2285 ObjectCode *oc;
2286 int misalignment = 0;
2287
2288 r = pathstat(path, &st);
2289 if (r == -1) {
2290 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
2291 return NULL;
2292 }
2293
2294 fileSize = st.st_size;
2295
2296 #if RTS_LINKER_USE_MMAP
2297 int fd;
2298
2299 /* On many architectures malloc'd memory isn't executable, so we need to use
2300 * mmap. */
2301
2302 #if defined(openbsd_HOST_OS)
2303 fd = open(path, O_RDONLY, S_IRUSR);
2304 #else
2305 fd = open(path, O_RDONLY);
2306 #endif
2307 if (fd == -1) {
2308 errorBelch("loadObj: can't open %s", path);
2309 return NULL;
2310 }
2311
2312 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
2313 MAP_PRIVATE, fd, 0);
2314 // not 32-bit yet, we'll remap later
2315 close(fd);
2316
2317 #else /* !RTS_LINKER_USE_MMAP */
2318 FILE *f;
2319
2320 /* load the image into memory */
2321 /* coverity[toctou] */
2322 f = pathopen(path, WSTR("rb"));
2323 if (!f) {
2324 errorBelch("loadObj: can't read `%" PATH_FMT "'", path);
2325 return NULL;
2326 }
2327
2328 # if defined(mingw32_HOST_OS)
2329
2330 // TODO: We would like to use allocateExec here, but allocateExec
2331 // cannot currently allocate blocks large enough.
2332 image = allocateImageAndTrampolines(path, "itself",
2333 #if defined(x86_64_HOST_ARCH)
2334 f,
2335 #endif
2336 fileSize);
2337 if (image == NULL) {
2338 fclose(f);
2339 return NULL;
2340 }
2341
2342 # elif defined(darwin_HOST_OS)
2343
2344 // In a Mach-O .o file, all sections can and will be misaligned
2345 // if the total size of the headers is not a multiple of the
2346 // desired alignment. This is fine for .o files that only serve
2347 // as input for the static linker, but it's not fine for us,
2348 // as SSE (used by gcc for floating point) and Altivec require
2349 // 16-byte alignment.
2350 // We calculate the correct alignment from the header before
2351 // reading the file, and then we misalign image on purpose so
2352 // that the actual sections end up aligned again.
2353 misalignment = machoGetMisalignment(f);
2354 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
2355 image += misalignment;
2356
2357 # else /* !defined(mingw32_HOST_OS) */
2358
2359 image = stgMallocBytes(fileSize, "loadObj(image)");
2360
2361 #endif
2362
2363 int n;
2364 n = fread ( image, 1, fileSize, f );
2365 fclose(f);
2366 if (n != fileSize) {
2367 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
2368 stgFree(image);
2369 return NULL;
2370 }
2371
2372 #endif /* RTS_LINKER_USE_MMAP */
2373
2374 oc = mkOc(path, image, fileSize, rtsTrue, NULL, misalignment);
2375
2376 return oc;
2377 }
2378
2379 /* -----------------------------------------------------------------------------
2380 * Load an obj (populate the global symbol table, but don't resolve yet)
2381 *
2382 * Returns: 1 if ok, 0 on error.
2383 */
2384 static HsInt loadObj_ (pathchar *path)
2385 {
2386 ObjectCode* oc;
2387 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
2388
2389 /* debugBelch("loadObj %s\n", path ); */
2390
2391 /* Check that we haven't already loaded this object.
2392 Ignore requests to load multiple times */
2393
2394 if (isAlreadyLoaded(path)) {
2395 IF_DEBUG(linker,
2396 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
2397 return 1; /* success */
2398 }
2399
2400 oc = preloadObjectFile(path);
2401 if (oc == NULL) return 0;
2402
2403 if (! loadOc(oc)) {
2404 // failed; free everything we've allocated
2405 removeOcSymbols(oc);
2406 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
2407 freeObjectCode(oc);
2408 return 0;
2409 }
2410
2411 oc->next = objects;
2412 objects = oc;
2413 return 1;
2414 }
2415
2416 HsInt loadObj (pathchar *path)
2417 {
2418 ACQUIRE_LOCK(&linker_mutex);
2419 HsInt r = loadObj_(path);
2420 RELEASE_LOCK(&linker_mutex);
2421 return r;
2422 }
2423
2424 static HsInt loadOc (ObjectCode* oc)
2425 {
2426 int r;
2427
2428 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
2429
2430 /* verify the in-memory image */
2431 # if defined(OBJFORMAT_ELF)
2432 r = ocVerifyImage_ELF ( oc );
2433 # elif defined(OBJFORMAT_PEi386)
2434 r = ocVerifyImage_PEi386 ( oc );
2435 # elif defined(OBJFORMAT_MACHO)
2436 r = ocVerifyImage_MachO ( oc );
2437 # else
2438 barf("loadObj: no verify method");
2439 # endif
2440 if (!r) {
2441 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
2442 return r;
2443 }
2444
2445 #if NEED_SYMBOL_EXTRAS
2446 # if defined(OBJFORMAT_MACHO)
2447 r = ocAllocateSymbolExtras_MachO ( oc );
2448 if (!r) {
2449 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_MachO failed\n"));
2450 return r;
2451 }
2452 # elif defined(OBJFORMAT_ELF)
2453 r = ocAllocateSymbolExtras_ELF ( oc );
2454 if (!r) {
2455 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_ELF failed\n"));
2456 return r;
2457 }
2458 # elif defined(OBJFORMAT_PEi386)
2459 ocAllocateSymbolExtras_PEi386 ( oc );
2460 # endif
2461 #endif
2462
2463 /* build the symbol list for this image */
2464 # if defined(OBJFORMAT_ELF)
2465 r = ocGetNames_ELF ( oc );
2466 # elif defined(OBJFORMAT_PEi386)
2467 r = ocGetNames_PEi386 ( oc );
2468 # elif defined(OBJFORMAT_MACHO)
2469 r = ocGetNames_MachO ( oc );
2470 # else
2471 barf("loadObj: no getNames method");
2472 # endif
2473 if (!r) {
2474 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
2475 return r;
2476 }
2477
2478 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
2479 setOcInitialStatus( oc );
2480 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
2481
2482 return 1;
2483 }
2484
2485 /* -----------------------------------------------------------------------------
2486 * try to load and initialize an ObjectCode into memory
2487 *
2488 * Returns: 1 if ok, 0 on error.
2489 */
2490 int ocTryLoad (ObjectCode* oc) {
2491 int r;
2492
2493 if (oc->status != OBJECT_NEEDED) {
2494 return 1;
2495 }
2496
2497 /* Check for duplicate symbols by looking into `symhash`.
2498 Duplicate symbols are any symbols which exist
2499 in different ObjectCodes that have both been loaded, or
2500 are to be loaded by this call.
2501
2502 This call is intended to have no side-effects when a non-duplicate
2503 symbol is re-inserted.
2504
2505 We set the Address to NULL since that is not used to distinguish
2506 symbols. Duplicate symbols are distinguished by name and oc.
2507 */
2508 int x;
2509 SymbolName* symbol;
2510 for (x = 0; x < oc->n_symbols; x++) {
2511 symbol = oc->symbols[x];
2512 if ( symbol
2513 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol, NULL, isSymbolWeak(oc, symbol), oc)) {
2514 return 0;
2515 }
2516 }
2517
2518 # if defined(OBJFORMAT_ELF)
2519 r = ocResolve_ELF ( oc );
2520 # elif defined(OBJFORMAT_PEi386)
2521 r = ocResolve_PEi386 ( oc );
2522 # elif defined(OBJFORMAT_MACHO)
2523 r = ocResolve_MachO ( oc );
2524 # else
2525 barf("ocTryLoad: not implemented on this platform");
2526 # endif
2527 if (!r) { return r; }
2528
2529 // run init/init_array/ctors/mod_init_func
2530
2531 loading_obj = oc; // tells foreignExportStablePtr what to do
2532 #if defined(OBJFORMAT_ELF)
2533 r = ocRunInit_ELF ( oc );
2534 #elif defined(OBJFORMAT_PEi386)
2535 r = ocRunInit_PEi386 ( oc );
2536 #elif defined(OBJFORMAT_MACHO)
2537 r = ocRunInit_MachO ( oc );
2538 #else
2539 barf("ocTryLoad: initializers not implemented on this platform");
2540 #endif
2541 loading_obj = NULL;
2542
2543 if (!r) { return r; }
2544
2545 oc->status = OBJECT_RESOLVED;
2546
2547 return 1;
2548 }
2549
2550 /* -----------------------------------------------------------------------------
2551 * resolve all the currently unlinked objects in memory
2552 *
2553 * Returns: 1 if ok, 0 on error.
2554 */
2555 static HsInt resolveObjs_ (void)
2556 {
2557 ObjectCode *oc;
2558 int r;
2559
2560 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
2561
2562 for (oc = objects; oc; oc = oc->next) {
2563 r = ocTryLoad(oc);
2564 if (!r)
2565 {
2566 return r;
2567 }
2568 }
2569
2570 #ifdef PROFILING
2571 // collect any new cost centres & CCSs that were defined during runInit
2572 initProfiling2();
2573 #endif
2574
2575 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
2576 return 1;
2577 }
2578
2579 HsInt resolveObjs (void)
2580 {
2581 ACQUIRE_LOCK(&linker_mutex);
2582 HsInt r = resolveObjs_();
2583 RELEASE_LOCK(&linker_mutex);
2584 return r;
2585 }
2586
2587 /* -----------------------------------------------------------------------------
2588 * delete an object from the pool
2589 */
2590 static HsInt unloadObj_ (pathchar *path, rtsBool just_purge)
2591 {
2592 ObjectCode *oc, *prev, *next;
2593 HsBool unloadedAnyObj = HS_BOOL_FALSE;
2594
2595 ASSERT(symhash != NULL);
2596 ASSERT(objects != NULL);
2597
2598 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
2599
2600 prev = NULL;
2601 for (oc = objects; oc; oc = next) {
2602 next = oc->next; // oc might be freed
2603
2604 if (!pathcmp(oc->fileName,path)) {
2605
2606 // these are both idempotent, so in just_purge mode we can
2607 // later call unloadObj() to really unload the object.
2608 removeOcSymbols(oc);
2609 freeOcStablePtrs(oc);
2610
2611 if (!just_purge) {
2612 if (prev == NULL) {
2613 objects = oc->next;
2614 } else {
2615 prev->next = oc->next;
2616 }
2617 ACQUIRE_LOCK(&linker_unloaded_mutex);
2618 oc->next = unloaded_objects;
2619 unloaded_objects = oc;
2620 oc->status = OBJECT_UNLOADED;
2621 RELEASE_LOCK(&linker_unloaded_mutex);
2622 // We do not own oc any more; it can be released at any time by
2623 // the GC in checkUnload().
2624 } else {
2625 prev = oc;
2626 }
2627
2628 /* This could be a member of an archive so continue
2629 * unloading other members. */
2630 unloadedAnyObj = HS_BOOL_TRUE;
2631 } else {
2632 prev = oc;
2633 }
2634 }
2635
2636 if (unloadedAnyObj) {
2637 return 1;
2638 }
2639 else {
2640 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
2641 return 0;
2642 }
2643 }
2644
2645 HsInt unloadObj (pathchar *path)
2646 {
2647 ACQUIRE_LOCK(&linker_mutex);
2648 HsInt r = unloadObj_(path, rtsFalse);
2649 RELEASE_LOCK(&linker_mutex);
2650 return r;
2651 }
2652
2653 HsInt purgeObj (pathchar *path)
2654 {
2655 ACQUIRE_LOCK(&linker_mutex);
2656 HsInt r = unloadObj_(path, rtsTrue);
2657 RELEASE_LOCK(&linker_mutex);
2658 return r;
2659 }
2660
2661 /* -----------------------------------------------------------------------------
2662 * Sanity checking. For each ObjectCode, maintain a list of address ranges
2663 * which may be prodded during relocation, and abort if we try and write
2664 * outside any of these.
2665 */
2666 static void
2667 addProddableBlock ( ObjectCode* oc, void* start, int size )
2668 {
2669 ProddableBlock* pb
2670 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
2671
2672 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
2673 ASSERT(size > 0);
2674 pb->start = start;
2675 pb->size = size;
2676 pb->next = oc->proddables;
2677 oc->proddables = pb;
2678 }
2679
2680 static void
2681 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
2682 {
2683 ProddableBlock* pb;
2684
2685 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
2686 char* s = (char*)(pb->start);
2687 char* e = s + pb->size;
2688 char* a = (char*)addr;
2689 if (a >= s && (a+size) <= e) return;
2690 }
2691 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
2692 }
2693
2694 static void freeProddableBlocks (ObjectCode *oc)
2695 {
2696 ProddableBlock *pb, *next;
2697
2698 for (pb = oc->proddables; pb != NULL; pb = next) {
2699 next = pb->next;
2700 stgFree(pb);
2701 }
2702 oc->proddables = NULL;
2703 }
2704
2705 /* -----------------------------------------------------------------------------
2706 * Section management.
2707 */
2708 static void
2709 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
2710 void* start, StgWord size, StgWord mapped_offset,
2711 void* mapped_start, StgWord mapped_size)
2712 {
2713 s->start = start; /* actual start of section in memory */
2714 s->size = size; /* actual size of section in memory */
2715 s->kind = kind;
2716 s->alloc = alloc;
2717 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
2718
2719 s->mapped_start = mapped_start; /* start of mmap() block */
2720 s->mapped_size = mapped_size; /* size of mmap() block */
2721
2722 IF_DEBUG(linker,
2723 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
2724 start, (void*)((StgWord)start + size),
2725 size, kind ));
2726 }
2727
2728
2729 /* --------------------------------------------------------------------------
2730 * Symbol Extras.
2731 * This is about allocating a small chunk of memory for every symbol in the
2732 * object file. We make sure that the SymboLExtras are always "in range" of
2733 * limited-range PC-relative instructions on various platforms by allocating
2734 * them right next to the object code itself.
2735 */
2736
2737 #if NEED_SYMBOL_EXTRAS
2738 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2739
2740 /*
2741 ocAllocateSymbolExtras
2742
2743 Allocate additional space at the end of the object file image to make room
2744 for jump islands (powerpc, x86_64, arm) and GOT entries (x86_64).
2745
2746 PowerPC relative branch instructions have a 24 bit displacement field.
2747 As PPC code is always 4-byte-aligned, this yields a +-32MB range.
2748 If a particular imported symbol is outside this range, we have to redirect
2749 the jump to a short piece of new code that just loads the 32bit absolute
2750 address and jumps there.
2751 On x86_64, PC-relative jumps and PC-relative accesses to the GOT are limited
2752 to 32 bits (+-2GB).
2753
2754 This function just allocates space for one SymbolExtra for every
2755 undefined symbol in the object file. The code for the jump islands is
2756 filled in by makeSymbolExtra below.
2757 */
2758
2759 static int ocAllocateSymbolExtras( ObjectCode* oc, int count, int first )
2760 {
2761 size_t n;
2762
2763 if (RTS_LINKER_USE_MMAP && USE_CONTIGUOUS_MMAP) {
2764 n = roundUpToPage(oc->fileSize);
2765
2766 /* Keep image and symbol_extras contiguous */
2767 void *new = mmapForLinker(n + (sizeof(SymbolExtra) * count),
2768 MAP_ANONYMOUS, -1, 0);
2769 if (new) {
2770 memcpy(new, oc->image, oc->fileSize);
2771 if (oc->imageMapped) {
2772 munmap(oc->image, n);
2773 }
2774 oc->image = new;
2775 oc->imageMapped = rtsTrue;
2776 oc->fileSize = n + (sizeof(SymbolExtra) * count);
2777 oc->symbol_extras = (SymbolExtra *) (oc->image + n);
2778 }
2779 else {
2780 oc->symbol_extras = NULL;
2781 return 0;
2782 }
2783 }
2784 else if( count > 0 ) {
2785 if (RTS_LINKER_USE_MMAP) {
2786 n = roundUpToPage(oc->fileSize);
2787
2788 oc->symbol_extras = m32_alloc(sizeof(SymbolExtra) * count, 8);
2789 if (oc->symbol_extras == NULL) return 0;
2790 }
2791 else {
2792 // round up to the nearest 4
2793 int aligned = (oc->fileSize + 3) & ~3;
2794 int misalignment = oc->misalignment;
2795
2796 oc->image -= misalignment;
2797 oc->image = stgReallocBytes( oc->image,
2798 misalignment +
2799 aligned + sizeof (SymbolExtra) * count,
2800 "ocAllocateSymbolExtras" );
2801 oc->image += misalignment;
2802
2803 oc->symbol_extras = (SymbolExtra *) (oc->image + aligned);
2804 }
2805 }
2806
2807 if (oc->symbol_extras != NULL) {
2808 memset( oc->symbol_extras, 0, sizeof (SymbolExtra) * count );
2809 }
2810
2811 oc->first_symbol_extra = first;
2812 oc->n_symbol_extras = count;
2813
2814 return 1;
2815 }
2816
2817 #endif
2818 #endif // NEED_SYMBOL_EXTRAS
2819
2820 #if defined(arm_HOST_ARCH)
2821
2822 static void
2823 ocFlushInstructionCache( ObjectCode *oc )
2824 {
2825 int i;
2826 // Object code
2827 for (i=0; i < oc->n_sections; i++) {
2828 Section *s = &oc->sections[i];
2829 // This is a bit too broad but we don't have any way to determine what
2830 // is certainly code
2831 if (s->kind == SECTIONKIND_CODE_OR_RODATA)
2832 __clear_cache(s->start, (void*) ((uintptr_t) s->start + s->size));
2833 }
2834
2835 // Jump islands
2836 // Note the (+1) to ensure that the last symbol extra is covered by the
2837 // flush.
2838 __clear_cache(oc->symbol_extras, &oc->symbol_extras[oc->n_symbol_extras+1]);
2839 }
2840
2841 #endif
2842
2843 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2844 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2845
2846 static SymbolExtra* makeSymbolExtra( ObjectCode* oc,
2847 unsigned long symbolNumber,
2848 unsigned long target )
2849 {
2850 SymbolExtra *extra;
2851
2852 ASSERT( symbolNumber >= oc->first_symbol_extra
2853 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2854
2855 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2856
2857 #ifdef powerpc_HOST_ARCH
2858 // lis r12, hi16(target)
2859 extra->jumpIsland.lis_r12 = 0x3d80;
2860 extra->jumpIsland.hi_addr = target >> 16;
2861
2862 // ori r12, r12, lo16(target)
2863 extra->jumpIsland.ori_r12_r12 = 0x618c;
2864 extra->jumpIsland.lo_addr = target & 0xffff;
2865
2866 // mtctr r12
2867 extra->jumpIsland.mtctr_r12 = 0x7d8903a6;
2868
2869 // bctr
2870 extra->jumpIsland.bctr = 0x4e800420;
2871 #endif
2872 #ifdef x86_64_HOST_ARCH
2873 // jmp *-14(%rip)
2874 static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF };
2875 extra->addr = target;
2876 memcpy(extra->jumpIsland, jmp, 6);
2877 #endif
2878
2879 return extra;
2880 }
2881
2882 #endif
2883 #endif // defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2884
2885 #ifdef arm_HOST_ARCH
2886 static SymbolExtra* makeArmSymbolExtra( ObjectCode* oc,
2887 unsigned long symbolNumber,
2888 unsigned long target,
2889 int fromThumb,
2890 int toThumb )
2891 {
2892 SymbolExtra *extra;
2893
2894 ASSERT( symbolNumber >= oc->first_symbol_extra
2895 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2896
2897 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2898
2899 // Make sure instruction mode bit is set properly
2900 if (toThumb)
2901 target |= 1;
2902 else
2903 target &= ~1;
2904
2905 if (!fromThumb) {
2906 // In ARM encoding:
2907 // movw r12, #0
2908 // movt r12, #0
2909 // bx r12
2910 uint32_t code[] = { 0xe300c000, 0xe340c000, 0xe12fff1c };
2911
2912 // Patch lower half-word into movw
2913 code[0] |= ((target>>12) & 0xf) << 16;
2914 code[0] |= target & 0xfff;
2915 // Patch upper half-word into movt
2916 target >>= 16;
2917 code[1] |= ((target>>12) & 0xf) << 16;
2918 code[1] |= target & 0xfff;
2919
2920 memcpy(extra->jumpIsland, code, 12);
2921
2922 } else {
2923 // In Thumb encoding:
2924 // movw r12, #0
2925 // movt r12, #0
2926 // bx r12
2927 uint16_t code[] = { 0xf240, 0x0c00,
2928 0xf2c0, 0x0c00,
2929 0x4760 };
2930
2931 // Patch lower half-word into movw
2932 code[0] |= (target>>12) & 0xf;
2933 code[0] |= ((target>>11) & 0x1) << 10;
2934 code[1] |= ((target>>8) & 0x7) << 12;
2935 code[1] |= target & 0xff;
2936 // Patch upper half-word into movt
2937 target >>= 16;
2938 code[2] |= (target>>12) & 0xf;
2939 code[2] |= ((target>>11) & 0x1) << 10;
2940 code[3] |= ((target>>8) & 0x7) << 12;
2941 code[3] |= target & 0xff;
2942
2943 memcpy(extra->jumpIsland, code, 10);
2944 }
2945
2946 return extra;
2947 }
2948 #endif // arm_HOST_ARCH
2949
2950 /* --------------------------------------------------------------------------
2951 * PowerPC specifics (instruction cache flushing)
2952 * ------------------------------------------------------------------------*/
2953
2954 #ifdef powerpc_HOST_ARCH
2955 /*
2956 ocFlushInstructionCache
2957
2958 Flush the data & instruction caches.
2959 Because the PPC has split data/instruction caches, we have to
2960 do that whenever we modify code at runtime.
2961 */
2962
2963 static void
2964 ocFlushInstructionCacheFrom(void* begin, size_t length)
2965 {
2966 size_t n = (length + 3) / 4;
2967 unsigned long* p = begin;
2968
2969 while (n--)
2970 {
2971 __asm__ volatile ( "dcbf 0,%0\n\t"
2972 "sync\n\t"
2973 "icbi 0,%0"
2974 :
2975 : "r" (p)
2976 );
2977 p++;
2978 }
2979 __asm__ volatile ( "sync\n\t"
2980 "isync"
2981 );
2982 }
2983
2984 static void
2985 ocFlushInstructionCache( ObjectCode *oc )
2986 {
2987 /* The main object code */
2988 ocFlushInstructionCacheFrom(oc->image + oc->misalignment, oc->fileSize);
2989
2990 /* Jump Islands */
2991 ocFlushInstructionCacheFrom(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras);
2992 }
2993 #endif /* powerpc_HOST_ARCH */
2994
2995
2996 /* --------------------------------------------------------------------------
2997 * PEi386(+) specifics (Win32 targets)
2998 * ------------------------------------------------------------------------*/
2999
3000 /* The information for this linker comes from
3001 Microsoft Portable Executable
3002 and Common Object File Format Specification
3003 revision 8.3 February 2013
3004
3005 It can be found online at:
3006
3007 https://msdn.microsoft.com/en-us/windows/hardware/gg463119.aspx
3008
3009 Things move, so if that fails, try searching for it via
3010
3011 http://www.google.com/search?q=PE+COFF+specification
3012
3013 The ultimate reference for the PE format is the Winnt.h
3014 header file that comes with the Platform SDKs; as always,
3015 implementations will drift wrt their documentation.
3016
3017 A good background article on the PE format is Matt Pietrek's
3018 March 1994 article in Microsoft System Journal (MSJ)
3019 (Vol.9, No. 3): "Peering Inside the PE: A Tour of the
3020 Win32 Portable Executable File Format." The info in there
3021 has recently been updated in a two part article in
3022 MSDN magazine, issues Feb and March 2002,
3023 "Inside Windows: An In-Depth Look into the Win32 Portable
3024 Executable File Format"
3025
3026 John Levine's book "Linkers and Loaders" contains useful
3027 info on PE too.
3028
3029 The PE specification doesn't specify how to do the actual
3030 relocations. For this reason, and because both PE and ELF are
3031 based on COFF, the relocations for the PEi386+ code is based on
3032 the ELF relocations for the equivalent relocation type.
3033
3034 The ELF ABI can be found at
3035
3036 http://www.x86-64.org/documentation/abi.pdf
3037
3038 The current code is based on version 0.99.6 - October 2013
3039 */
3040
3041
3042 #if defined(OBJFORMAT_PEi386)
3043
3044 static int verifyCOFFHeader ( COFF_header *hdr, pathchar *filename);
3045
3046 /* We assume file pointer is right at the
3047 beginning of COFF object.
3048 */
3049 static char *
3050 allocateImageAndTrampolines (
3051 pathchar* arch_name, char* member_name,
3052 #if defined(x86_64_HOST_ARCH)
3053 FILE* f,
3054 #endif
3055 int size )
3056 {
3057 char* image;
3058 #if defined(x86_64_HOST_ARCH)
3059 /* PeCoff contains number of symbols right in it's header, so
3060 we can reserve the room for symbolExtras right here. */
3061 COFF_header hdr;
3062 size_t n;
3063
3064 n = fread ( &hdr, 1, sizeof_COFF_header, f );
3065 if (n != sizeof( COFF_header )) {
3066 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%S'",
3067 member_name, arch_name);
3068 return NULL;
3069 }
3070 fseek( f, -sizeof_COFF_header, SEEK_CUR );
3071
3072 if (!verifyCOFFHeader(&hdr, arch_name)) {
3073 return 0;
3074 }
3075
3076 /* We get back 8-byte aligned memory (is that guaranteed?), but
3077 the offsets to the sections within the file are all 4 mod 8
3078 (is that guaranteed?). We therefore need to offset the image
3079 by 4, so that all the pointers are 8-byte aligned, so that
3080 pointer tagging works. */
3081 /* For 32-bit case we don't need this, hence we use macro PEi386_IMAGE_OFFSET,
3082 which equals to 4 for 64-bit case and 0 for 32-bit case. */
3083 /* We allocate trampolines area for all symbols right behind
3084 image data, aligned on 8. */
3085 size = ((PEi386_IMAGE_OFFSET + size + 0x7) & ~0x7)
3086 + hdr.NumberOfSymbols * sizeof(SymbolExtra);
3087 #endif
3088 image = VirtualAlloc(NULL, size,
3089 MEM_RESERVE | MEM_COMMIT,
3090 PAGE_EXECUTE_READWRITE);
3091
3092 if (image == NULL) {
3093 errorBelch("%" PATH_FMT ": failed to allocate memory for image for %s",
3094 arch_name, member_name);
3095 return NULL;
3096 }
3097
3098 return image + PEi386_IMAGE_OFFSET;
3099 }
3100
3101 static int findAndLoadImportLibrary(ObjectCode* oc)
3102 {
3103 int i;
3104
3105 COFF_header* hdr;
3106 COFF_section* sectab;
3107 COFF_symbol* symtab;
3108 UChar* strtab;
3109
3110 hdr = (COFF_header*)(oc->image);
3111 sectab = (COFF_section*)(
3112 ((UChar*)(oc->image))
3113 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3114 );
3115
3116 symtab = (COFF_symbol*)(
3117 ((UChar*)(oc->image))
3118 + hdr->PointerToSymbolTable
3119 );
3120
3121 strtab = ((UChar*)symtab)
3122 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3123
3124 for (i = 0; i < oc->n_sections; i++)
3125 {
3126 COFF_section* sectab_i
3127 = (COFF_section*)myindex(sizeof_COFF_section, sectab, i);
3128
3129 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3130
3131 // Find the first entry containing a valid .idata$7 section.
3132 if (strcmp(secname, ".idata$7") == 0) {
3133 /* First load the containing DLL if not loaded. */
3134 Section section = oc->sections[i];
3135
3136 pathchar* dirName = stgMallocBytes(pathsize * pathlen(oc->fileName), "findAndLoadImportLibrary(oc)");
3137 pathsplit(oc->fileName, NULL, 0, dirName, pathsize * pathlen(oc->fileName), NULL, 0, NULL, 0);
3138 HsPtr token = addLibrarySearchPath(dirName);
3139 char* dllName = (char*)section.start;
3140
3141 if (strlen(dllName) == 0 || dllName[0] == ' ')
3142 {
3143 continue;
3144 }
3145
3146 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand '%ls' => `%s'\n", oc->fileName, dllName));
3147
3148 pathchar* dll = mkPath(dllName);
3149 removeLibrarySearchPath(token);
3150
3151 const char* result = addDLL(dll);
3152 stgFree(dll);
3153
3154 if (result != NULL) {
3155 errorBelch("Could not load `%s'. Reason: %s\n", (char*)dllName, result);
3156 return 0;
3157 }
3158
3159 break;
3160 }
3161
3162 stgFree(secname);
3163 }
3164
3165 return 1;
3166 }
3167
3168 static int checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f)
3169 {
3170 char* image;
3171 static HsBool load_dll_warn = HS_BOOL_FALSE;
3172
3173 if (load_dll_warn) { return 0; }
3174
3175 /* Based on Import Library specification. PE Spec section 7.1 */
3176
3177 COFF_import_header hdr;
3178 size_t n;
3179
3180 n = fread(&hdr, 1, sizeof_COFF_import_Header, f);
3181 if (n != sizeof(COFF_header)) {
3182 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%" PATH_FMT "'\n",
3183 member_name, arch_name);
3184 return 0;
3185 }
3186
3187 if (hdr.Sig1 != 0x0 || hdr.Sig2 != 0xFFFF) {
3188 fseek(f, -sizeof_COFF_import_Header, SEEK_CUR);
3189 IF_DEBUG(linker, debugBelch("loadArchive: Object `%s` is not an import lib. Skipping...\n", member_name));
3190 return 0;
3191 }
3192
3193 IF_DEBUG(linker, debugBelch("loadArchive: reading %d bytes at %ld\n", hdr.SizeOfData, ftell(f)));
3194
3195 image = malloc(hdr.SizeOfData);
3196 n = fread(image, 1, hdr.SizeOfData, f);
3197 if (n != hdr.SizeOfData) {
3198 errorBelch("loadArchive: error whilst reading `%s' header in `%" PATH_FMT "'. Did not read enough bytes.\n",
3199 member_name, arch_name);
3200 }
3201
3202 char* symbol = strtok(image, "\0");
3203 int symLen = strlen(symbol) + 1;
3204 int nameLen = n - symLen;
3205 char* dllName = malloc(sizeof(char) * nameLen);
3206 dllName = strncpy(dllName, image + symLen, nameLen);
3207 pathchar* dll = malloc(sizeof(wchar_t) * nameLen);
3208 mbstowcs(dll, dllName, nameLen);
3209 free(dllName);
3210
3211 IF_DEBUG(linker, debugBelch("loadArchive: read symbol %s from lib `%ls'\n", symbol, dll));
3212 const char* result = addDLL(dll);
3213
3214 free(image);
3215
3216 if (result != NULL) {
3217 errorBelch("Could not load `%ls'. Reason: %s\n", dll, result);
3218 load_dll_warn = HS_BOOL_TRUE;
3219
3220 free(dll);
3221 fseek(f, -(n + sizeof_COFF_import_Header), SEEK_CUR);
3222 return 0;
3223 }
3224
3225 free(dll);
3226 return 1;
3227 }
3228
3229 /* We use myindex to calculate array addresses, rather than
3230 simply doing the normal subscript thing. That's because
3231 some of the above structs have sizes which are not
3232 a whole number of words. GCC rounds their sizes up to a
3233 whole number of words, which means that the address calcs
3234 arising from using normal C indexing or pointer arithmetic
3235 are just plain wrong. Sigh.
3236 */
3237 static UChar *
3238 myindex ( int scale, void* base, int index )
3239 {
3240 return
3241 ((UChar*)base) + scale * index;
3242 }
3243
3244
3245 static void
3246 printName ( UChar* name, UChar* strtab )
3247 {
3248 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3249 UInt32 strtab_offset = * (UInt32*)(name+4);
3250 debugBelch("%s", strtab + strtab_offset );
3251 } else {
3252 int i;
3253 for (i = 0; i < 8; i++) {
3254 if (name[i] == 0) break;
3255 debugBelch("%c", name[i] );
3256 }
3257 }
3258 }
3259
3260
3261 static void
3262 copyName ( UChar* name, UChar* strtab, UChar* dst, int dstSize )
3263 {
3264 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3265 UInt32 strtab_offset = * (UInt32*)(name+4);
3266 strncpy ( (char*)dst, (char*)strtab+strtab_offset, dstSize );
3267 dst[dstSize-1] = 0;
3268 } else {
3269 int i = 0;
3270 while (1) {
3271 if (i >= 8) break;
3272 if (name[i] == 0) break;
3273 dst[i] = name[i];
3274 i++;
3275 }
3276 dst[i] = 0;
3277 }
3278 }
3279
3280
3281 static UChar *
3282 cstring_from_COFF_symbol_name ( UChar* name, UChar* strtab )
3283 {
3284 UChar* newstr;
3285 /* If the string is longer than 8 bytes, look in the
3286 string table for it -- this will be correctly zero terminated.
3287 */
3288 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3289 UInt32 strtab_offset = * (UInt32*)(name+4);
3290 return ((UChar*)strtab) + strtab_offset;
3291 }
3292 /* Otherwise, if shorter than 8 bytes, return the original,
3293 which by defn is correctly terminated.
3294 */
3295 if (name[7]==0) return name;
3296 /* The annoying case: 8 bytes. Copy into a temporary
3297 (XXX which is never freed ...)
3298 */
3299 newstr = stgMallocBytes(9, "cstring_from_COFF_symbol_name");
3300 ASSERT(newstr);
3301 strncpy((char*)newstr,(char*)name,8);
3302 newstr[8] = 0;
3303 return newstr;
3304 }
3305
3306 /* Getting the name of a section is mildly tricky, so we make a
3307 function for it. Sadly, in one case we have to copy the string
3308 (when it is exactly 8 bytes long there's no trailing '\0'), so for
3309 consistency we *always* copy the string; the caller must free it
3310 */
3311 static char *
3312 cstring_from_section_name (UChar* name, UChar* strtab)
3313 {
3314 char *newstr;
3315
3316 if (name[0]=='/') {
3317 int strtab_offset = strtol((char*)name+1,NULL,10);
3318 int len = strlen(((char*)strtab) + strtab_offset);
3319
3320 newstr = stgMallocBytes(len+1, "cstring_from_section_symbol_name");
3321 strcpy((char*)newstr, (char*)((UChar*)strtab) + strtab_offset);
3322 return newstr;
3323 }
3324 else
3325 {
3326 newstr = stgMallocBytes(9, "cstring_from_section_symbol_name");
3327 ASSERT(newstr);
3328 strncpy((char*)newstr,(char*)name,8);
3329 newstr[8] = 0;
3330 return newstr;
3331 }
3332 }
3333
3334 /* See Note [mingw-w64 name decoration scheme] */
3335 #ifndef x86_64_HOST_ARCH
3336 static void
3337 zapTrailingAtSign ( UChar* sym )
3338 {
3339 # define my_isdigit(c) ((c) >= '0' && (c) <= '9')
3340 int i, j;
3341 if (sym[0] == 0) return;
3342 i = 0;
3343 while (sym[i] != 0) i++;
3344 i--;
3345 j = i;
3346 while (j > 0 && my_isdigit(sym[j])) j--;
3347 if (j > 0 && sym[j] == '@' && j != i) sym[j] = 0;
3348 # undef my_isdigit
3349 }
3350 #endif
3351
3352 /* See Note [mingw-w64 name decoration scheme] */
3353 #ifndef x86_64_HOST_ARCH
3354 #define STRIP_LEADING_UNDERSCORE 1
3355 #else
3356 #define STRIP_LEADING_UNDERSCORE 0
3357 #endif
3358
3359 /*
3360 Note [mingw-w64 name decoration scheme]
3361
3362 What's going on with name decoration? Well, original code
3363 have some crufty and ad-hocish paths related mostly to very old
3364 mingw gcc/binutils/runtime combinations. Now mingw-w64 offers pretty
3365 uniform and MS-compatible decoration scheme across its tools and runtime.
3366
3367 The scheme is pretty straightforward: on 32 bit objects symbols are exported
3368 with underscore prepended (and @ + stack size suffix appended for stdcall
3369 functions), on 64 bits no underscore is prepended and no suffix is appended
3370 because we have no stdcall convention on 64 bits.
3371
3372 See #9218
3373 */
3374
3375 static SymbolAddr*
3376 lookupSymbolInDLLs ( UChar *lbl )
3377 {
3378 OpenedDLL* o_dll;
3379 SymbolAddr* sym;
3380
3381 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
3382 /* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */
3383
3384 sym = GetProcAddress(o_dll->instance, (char*)(lbl+STRIP_LEADING_UNDERSCORE));
3385 if (sym != NULL) {
3386 /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/
3387 return sym;
3388 }
3389
3390 /* Ticket #2283.
3391 Long description: http://support.microsoft.com/kb/132044
3392 tl;dr:
3393 If C/C++ compiler sees __declspec(dllimport) ... foo ...
3394 it generates call *__imp_foo, and __imp_foo here has exactly
3395 the same semantics as in __imp_foo = GetProcAddress(..., "foo")
3396 */
3397 if (sym == NULL && strncmp ((const char*)lbl, "__imp_", 6) == 0) {
3398 sym = GetProcAddress(o_dll->instance, (char*)(lbl+6+STRIP_LEADING_UNDERSCORE));
3399 if (sym != NULL) {
3400 IndirectAddr* ret;
3401 ret = stgMallocBytes( sizeof(IndirectAddr), "lookupSymbolInDLLs" );
3402 ret->addr = sym;
3403 ret->next = indirects;
3404 indirects = ret;
3405 IF_DEBUG(linker,
3406 debugBelch("warning: %s from %S is linked instead of %s\n",
3407 (char*)(lbl+6+STRIP_LEADING_UNDERSCORE), o_dll->name, (char*)lbl));
3408 return (void*) & ret->addr;
3409 }
3410 }
3411
3412 sym = GetProcAddress(o_dll->instance, (char*)lbl);
3413 if (sym != NULL) {
3414 /*debugBelch("found %s in %s\n", lbl,o_dll->name);*/
3415 return sym;
3416 }
3417 }
3418 return NULL;
3419 }
3420
3421 static int
3422 verifyCOFFHeader (COFF_header *hdr, pathchar *fileName)
3423 {
3424 #if defined(i386_HOST_ARCH)
3425 if (hdr->Machine != 0x14c) {
3426 errorBelch("%" PATH_FMT ": Not x86 PEi386", fileName);
3427 return 0;
3428 }
3429 #elif defined(x86_64_HOST_ARCH)
3430 if (hdr->Machine != 0x8664) {
3431 errorBelch("%" PATH_FMT ": Not x86_64 PEi386", fileName);
3432 return 0;
3433 }
3434 #else
3435 errorBelch("PEi386 not supported on this arch");
3436 #endif
3437
3438 if (hdr->SizeOfOptionalHeader != 0) {
3439 errorBelch("%" PATH_FMT ": PEi386 with nonempty optional header",
3440 fileName);
3441 return 0;
3442 }
3443 if ( /* (hdr->Characteristics & MYIMAGE_FILE_RELOCS_STRIPPED) || */
3444 (hdr->Characteristics & MYIMAGE_FILE_EXECUTABLE_IMAGE) ||
3445 (hdr->Characteristics & MYIMAGE_FILE_DLL) ||
3446 (hdr->Characteristics & MYIMAGE_FILE_SYSTEM) ) {
3447 errorBelch("%" PATH_FMT ": Not a PEi386 object file", fileName);
3448 return 0;
3449 }
3450 if ( (hdr->Characteristics & MYIMAGE_FILE_BYTES_REVERSED_HI)
3451 /* || !(hdr->Characteristics & MYIMAGE_FILE_32BIT_MACHINE) */ ) {
3452 errorBelch("%" PATH_FMT ": Invalid PEi386 word size or endiannness: %d",
3453 fileName,
3454 (int)(hdr->Characteristics));
3455 return 0;
3456 }
3457 return 1;
3458 }
3459
3460 static int
3461 ocVerifyImage_PEi386 ( ObjectCode* oc )
3462 {
3463 int i;
3464 UInt32 j, noRelocs;
3465 COFF_header* hdr;
3466 COFF_section* sectab;
3467 COFF_symbol* symtab;
3468 UChar* strtab;
3469 /* debugBelch("\nLOADING %s\n", oc->fileName); */
3470 hdr = (COFF_header*)(oc->image);
3471 sectab = (COFF_section*) (
3472 ((UChar*)(oc->image))
3473 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3474 );
3475 symtab = (COFF_symbol*) (
3476 ((UChar*)(oc->image))
3477 + hdr->PointerToSymbolTable
3478 );
3479 strtab = ((UChar*)symtab)
3480 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3481
3482 if (!verifyCOFFHeader(hdr, oc->fileName)) {
3483 return 0;
3484 }
3485
3486 /* If the string table size is way crazy, this might indicate that
3487 there are more than 64k relocations, despite claims to the
3488 contrary. Hence this test. */
3489 /* debugBelch("strtab size %d\n", * (UInt32*)strtab); */
3490 #if 0
3491 if ( (*(UInt32*)strtab) > 600000 ) {
3492 /* Note that 600k has no special significance other than being
3493 big enough to handle the almost-2MB-sized lumps that
3494 constitute HSwin32*.o. */
3495 debugBelch("PEi386 object has suspiciously large string table; > 64k relocs?");
3496 return 0;
3497 }
3498 #endif
3499
3500 /* .BSS Section is initialized in ocGetNames_PEi386
3501 but we need the Sections array initialized here already. */
3502 Section *sections;
3503 sections = (Section*)stgCallocBytes(
3504 sizeof(Section),
3505 hdr->NumberOfSections + 1, /* +1 for the global BSS section see ocGetNames_PEi386 */
3506 "ocVerifyImage_PEi386(sections)");
3507 oc->sections = sections;
3508 oc->n_sections = hdr->NumberOfSections + 1;
3509
3510 /* Initialize the Sections */
3511 for (i = 0; i < hdr->NumberOfSections; i++) {
3512 COFF_section* sectab_i
3513 = (COFF_section*)
3514 myindex(sizeof_COFF_section, sectab, i);
3515
3516 /* Calculate the start of the data section */
3517 sections[i].start = oc->image + sectab_i->PointerToRawData;
3518 }
3519
3520 /* No further verification after this point; only debug printing. */
3521 i = 0;
3522 IF_DEBUG(linker, i=1);
3523 if (i == 0) return 1;
3524
3525 debugBelch("sectab offset = %" FMT_SizeT "\n",
3526 ((UChar*)sectab) - ((UChar*)hdr) );
3527 debugBelch("symtab offset = %" FMT_SizeT "\n",
3528 ((UChar*)symtab) - ((UChar*)hdr) );
3529 debugBelch("strtab offset = %" FMT_SizeT "\n",
3530 ((UChar*)strtab) - ((UChar*)hdr) );
3531
3532 debugBelch("\n" );
3533 debugBelch( "Machine: 0x%x\n", (UInt32)(hdr->Machine) );
3534 debugBelch( "# sections: %d\n", (UInt32)(hdr->NumberOfSections) );
3535 debugBelch( "time/date: 0x%x\n", (UInt32)(hdr->TimeDateStamp) );
3536 debugBelch( "symtab offset: %d\n", (UInt32)(hdr->PointerToSymbolTable) );
3537 debugBelch( "# symbols: %d\n", (UInt32)(hdr->NumberOfSymbols) );
3538 debugBelch( "sz of opt hdr: %d\n", (UInt32)(hdr->SizeOfOptionalHeader) );
3539 debugBelch( "characteristics: 0x%x\n", (UInt32)(hdr->Characteristics) );
3540
3541 /* Print the section table. */
3542 debugBelch("\n" );
3543 for (i = 0; i < hdr->NumberOfSections; i++) {
3544 COFF_reloc* reltab;
3545 COFF_section* sectab_i
3546 = (COFF_section*)
3547 myindex ( sizeof_COFF_section, sectab, i );
3548 Section section = sections[i];
3549 debugBelch(
3550 "\n"
3551 "section %d\n"
3552 " name `",
3553 i
3554 );
3555 printName ( sectab_i->Name, strtab );
3556 debugBelch(
3557 "'\n"
3558 " vsize %d\n"
3559 " vaddr %d\n"
3560 " data sz %d\n"
3561 " data off 0x%p\n"
3562 " num rel %d\n"
3563 " off rel %d\n"
3564 " ptr raw 0x%x\n",
3565 sectab_i->VirtualSize,
3566 sectab_i->VirtualAddress,
3567 sectab_i->SizeOfRawData,
3568 section.start,
3569 sectab_i->NumberOfRelocations,
3570 sectab_i->PointerToRelocations,
3571 sectab_i->PointerToRawData
3572 );
3573 reltab = (COFF_reloc*) (
3574 ((UChar*)(oc->image)) + sectab_i->PointerToRelocations
3575 );
3576
3577 if ( sectab_i->Characteristics & MYIMAGE_SCN_LNK_NRELOC_OVFL ) {
3578 /* If the relocation field (a short) has overflowed, the
3579 * real count can be found in the first reloc entry.
3580 *
3581 * See Section 4.1 (last para) of the PE spec (rev6.0).
3582 */
3583 COFF_reloc* rel = (COFF_reloc*)
3584 myindex ( sizeof_COFF_reloc, reltab, 0 );
3585 noRelocs = rel->VirtualAddress;
3586 j = 1;
3587 } else {
3588 noRelocs = sectab_i->NumberOfRelocations;
3589 j = 0;
3590 }
3591
3592 for (; j < noRelocs; j++) {
3593 COFF_symbol* sym;
3594 COFF_reloc* rel = (COFF_reloc*)
3595 myindex ( sizeof_COFF_reloc, reltab, j );
3596 debugBelch(
3597 " type 0x%-4x vaddr 0x%-8x name `",
3598 (UInt32)rel->Type,
3599 rel->VirtualAddress );
3600 sym = (COFF_symbol*)
3601 myindex ( sizeof_COFF_symbol, symtab, rel->SymbolTableIndex );
3602 /* Hmm..mysterious looking offset - what's it for? SOF */
3603 printName ( sym->Name, strtab -10 );
3604 debugBelch("'\n" );
3605 }
3606
3607 debugBelch("\n" );
3608 }
3609 debugBelch("\n" );
3610 debugBelch("string table has size 0x%x\n", * (UInt32*)strtab );
3611 debugBelch("---START of string table---\n");
3612 for (i = 4; i < *(Int32*)strtab; i++) {
3613 if (strtab[i] == 0)
3614 debugBelch("\n"); else
3615 debugBelch("%c", strtab[i] );
3616 }
3617 debugBelch("--- END of string table---\n");
3618
3619 debugBelch("\n" );
3620 i = 0;
3621 while (1) {
3622 COFF_symbol* symtab_i;
3623 if (i >= (Int32)(hdr->NumberOfSymbols)) break;
3624 symtab_i = (COFF_symbol*)
3625 myindex ( sizeof_COFF_symbol, symtab, i );
3626 debugBelch(
3627 "symbol %d\n"
3628 " name `",
3629 i
3630 );
3631 printName ( symtab_i->Name, strtab );
3632 debugBelch(
3633 "'\n"
3634 " value 0x%x\n"
3635 " 1+sec# %d\n"
3636 " type 0x%x\n"
3637 " sclass 0x%x\n"
3638 " nAux %d\n",
3639 symtab_i->Value,
3640 (Int32)(symtab_i->SectionNumber),
3641 (UInt32)symtab_i->Type,
3642 (UInt32)symtab_i->StorageClass,
3643 (UInt32)symtab_i->NumberOfAuxSymbols
3644 );
3645 i += symtab_i->NumberOfAuxSymbols;
3646 i++;
3647 }
3648
3649 debugBelch("\n" );
3650 return 1;
3651 }
3652
3653
3654 static int
3655 ocGetNames_PEi386 ( ObjectCode* oc )
3656 {
3657 COFF_header* hdr;
3658 COFF_section* sectab;
3659 COFF_symbol* symtab;
3660 UChar* strtab;
3661
3662 UChar* sname;
3663 SymbolAddr* addr;
3664 int i;
3665
3666 hdr = (COFF_header*)(oc->image);
3667 sectab = (COFF_section*) (
3668 ((UChar*)(oc->image))
3669 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3670 );
3671 symtab = (COFF_symbol*) (
3672 ((UChar*)(oc->image))
3673 + hdr->PointerToSymbolTable
3674 );
3675 strtab = ((UChar*)(oc->image))
3676 + hdr->PointerToSymbolTable
3677 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3678
3679 /* Allocate space for any (local, anonymous) .bss sections. */
3680
3681 for (i = 0; i < hdr->NumberOfSections; i++) {
3682 UInt32 bss_sz;
3683 UChar* zspace;
3684 COFF_section* sectab_i
3685 = (COFF_section*)
3686 myindex ( sizeof_COFF_section, sectab, i );
3687
3688 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3689
3690 if (0 != strcmp(secname, ".bss")) {
3691 stgFree(secname);
3692 continue;
3693 }
3694
3695 stgFree(secname);
3696
3697 /* sof 10/05: the PE spec text isn't too clear regarding what
3698 * the SizeOfRawData field is supposed to hold for object
3699 * file sections containing just uninitialized data -- for executables,
3700 * it is supposed to be zero; unclear what it's supposed to be
3701 * for object files. However, VirtualSize is guaranteed to be
3702 * zero for object files, which definitely suggests that SizeOfRawData
3703 * will be non-zero (where else would the size of this .bss section be
3704 * stored?) Looking at the COFF_section info for incoming object files,
3705 * this certainly appears to be the case.
3706 *
3707 * => I suspect we've been incorrectly handling .bss sections in (relocatable)
3708 * object files up until now. This turned out to bite us with ghc-6.4.1's use
3709 * of gcc-3.4.x, which has started to emit initially-zeroed-out local 'static'
3710 * variable decls into the .bss section. (The specific function in Q which
3711 * triggered this is libraries/base/cbits/dirUtils.c:__hscore_getFolderPath())
3712 */
3713 if (sectab_i->VirtualSize == 0 && sectab_i->SizeOfRawData == 0) continue;
3714 /* This is a non-empty .bss section.
3715 Allocate zeroed space for it */
3716 bss_sz = sectab_i->VirtualSize;
3717 if ( bss_sz < sectab_i->SizeOfRawData) { bss_sz = sectab_i->SizeOfRawData; }
3718 zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
3719 oc->sections[i].start = zspace;
3720 addProddableBlock(oc, zspace, bss_sz);
3721 /* debugBelch("BSS anon section at 0x%x\n", zspace); */
3722 }
3723
3724 /* Copy section information into the ObjectCode. */
3725
3726 for (i = 0; i < hdr->NumberOfSections; i++) {
3727 UChar* start;
3728 UChar* end;
3729 UInt32 sz;
3730
3731 /* By default consider all section as CODE or DATA, which means we want to load them. */
3732 SectionKind kind
3733 = SECTIONKIND_CODE_OR_RODATA;
3734 COFF_section* sectab_i
3735 = (COFF_section*)
3736 myindex ( sizeof_COFF_section, sectab, i );
3737 Section section = oc->sections[i];
3738
3739 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3740
3741 IF_DEBUG(linker, debugBelch("section name = %s\n", secname ));
3742
3743 /* The PE file section flag indicates whether the section contains code or data. */
3744 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_CODE ||
3745 sectab_i->Characteristics & MYIMAGE_SCN_CNT_INITIALIZED_DATA)
3746 kind = SECTIONKIND_CODE_OR_RODATA;
3747
3748 /* Check next if it contains any uninitialized data */
3749 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_UNINITIALIZED_DATA)
3750 kind = SECTIONKIND_RWDATA;
3751
3752 /* Finally check if it can be discarded. This will also ignore .debug sections */
3753 if (sectab_i->Characteristics & MYIMAGE_SCN_MEM_DISCARDABLE ||
3754 sectab_i->Characteristics & MYIMAGE_SCN_LNK_REMOVE)
3755 kind = SECTIONKIND_OTHER;
3756
3757 if (0==strcmp(".ctors", (char*)secname))
3758 kind = SECTIONKIND_INIT_ARRAY;
3759
3760 ASSERT(sectab_i->SizeOfRawData == 0 || sectab_i->VirtualSize == 0);
3761 sz = sectab_i->SizeOfRawData;
3762 if (sz < sectab_i->VirtualSize) sz = sectab_i->VirtualSize;
3763
3764 start = section.start;
3765 end = start + sz - 1;
3766
3767 if (kind != SECTIONKIND_OTHER && end >= start) {
3768 addSection(&oc->sections[i], kind, SECTION_NOMEM, start, sz, 0, 0, 0);
3769 addProddableBlock(oc, start, sz);
3770 }
3771
3772 stgFree(secname);
3773 }
3774
3775 /* Copy exported symbols into the ObjectCode. */
3776
3777 oc->n_symbols = hdr->NumberOfSymbols;
3778 oc->symbols = stgCallocBytes(sizeof(SymbolName*), oc->n_symbols,
3779 "ocGetNames_PEi386(oc->symbols)");
3780
3781 /* Work out the size of the global BSS section */
3782 StgWord globalBssSize = 0;
3783 for (i=0; i < (int)hdr->NumberOfSymbols; i++) {
3784 COFF_symbol* symtab_i;
3785 symtab_i = (COFF_symbol*)
3786 myindex ( sizeof_COFF_symbol, symtab, i );
3787 if (symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3788 && symtab_i->Value > 0
3789 && symtab_i->StorageClass != MYIMAGE_SYM_CLASS_SECTION) {
3790 globalBssSize += symtab_i->Value;
3791 }
3792 i += symtab_i->NumberOfAuxSymbols;
3793 }
3794
3795 /* Allocate BSS space */
3796 SymbolAddr* bss = NULL;
3797 if (globalBssSize > 0) {
3798 bss = stgCallocBytes(1, globalBssSize,
3799 "ocGetNames_PEi386(non-anonymous bss)");
3800 addSection(&oc->sections[oc->n_sections-1],
3801 SECTIONKIND_RWDATA, SECTION_MALLOC,
3802 bss, globalBssSize, 0, 0, 0);
3803 IF_DEBUG(linker, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize));
3804 addProddableBlock(oc, bss, globalBssSize);
3805 } else {
3806 addSection(&oc->sections[oc->n_sections-1],
3807 SECTIONKIND_OTHER, SECTION_NOMEM, NULL, 0, 0, 0, 0);
3808 }
3809
3810 for (i = 0; i < oc->n_symbols; i++) {
3811 COFF_symbol* symtab_i;
3812 symtab_i = (COFF_symbol*)
3813 myindex ( sizeof_COFF_symbol, symtab, i );
3814
3815 addr = NULL;
3816 HsBool isWeak = HS_BOOL_FALSE;
3817 if ( symtab_i->SectionNumber != MYIMAGE_SYM_UNDEFINED
3818 && symtab_i->SectionNumber > 0) {
3819 /* This symbol is global and defined, viz, exported */
3820 /* for MYIMAGE_SYMCLASS_EXTERNAL
3821 && !MYIMAGE_SYM_UNDEFINED,
3822 the address of the symbol is:
3823 address of relevant section + offset in section
3824 */
3825 COFF_section* sectabent
3826 = (COFF_section*) myindex ( sizeof_COFF_section,
3827 sectab,
3828 symtab_i->SectionNumber-1 );
3829 if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_EXTERNAL
3830 || ( symtab_i->StorageClass == MYIMAGE_SYM_CLASS_STATIC
3831 && sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT)
3832 ) {
3833 addr = (void*)((size_t)oc->sections[symtab_i->SectionNumber-1].start
3834 + symtab_i->Value);
3835 if (sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT) {
3836 isWeak = HS_BOOL_TRUE;
3837 }
3838 }
3839 }
3840 else if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_WEAK_EXTERNAL) {
3841 isWeak = HS_BOOL_TRUE;
3842 }
3843 else if ( symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3844 && symtab_i->Value > 0) {
3845 /* This symbol isn't in any section at all, ie, global bss.
3846 Allocate zeroed space for it from the BSS section */
3847 addr = bss;
3848 bss = (SymbolAddr*)((StgWord)bss + (StgWord)symtab_i->Value);
3849 IF_DEBUG(linker, debugBelch("bss symbol @ %p %u\n", addr, symtab_i->Value));
3850 }
3851
3852 sname = cstring_from_COFF_symbol_name(symtab_i->Name, strtab);
3853 if (addr != NULL || isWeak == HS_BOOL_TRUE) {
3854
3855 /* debugBelch("addSymbol %p `%s' Weak:%lld \n", addr, sname, isWeak); */
3856 IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr,sname);)
3857 ASSERT(i >= 0 && i < oc->n_symbols);
3858 /* cstring_from_COFF_symbol_name always succeeds. */
3859 oc->symbols[i] = (SymbolName*)sname;
3860 if (isWeak == HS_BOOL_TRUE) {
3861 setWeakSymbol(oc, sname);
3862 }
3863
3864 if (! ghciInsertSymbolTable(oc->fileName, symhash, (SymbolName*)sname, addr,
3865 isWeak, oc)) {
3866 return 0;
3867 }
3868 } else {
3869 /* We're skipping the symbol, but if we ever load this
3870 object file we'll want to skip it then too. */
3871 oc->symbols[i] = NULL;
3872
3873 # if 0
3874 debugBelch(
3875 "IGNORING symbol %d\n"
3876 " name `",
3877 i
3878 );
3879 printName ( symtab_i->Name, strtab );
3880 debugBelch(
3881 "'\n"
3882 " value 0x%x\n"
3883 " 1+sec# %d\n"
3884 " type 0x%x\n"
3885 " sclass 0x%x\n"
3886 " nAux %d\n",
3887 symtab_i->Value,
3888 (Int32)(symtab_i->SectionNumber),
3889 (UInt32)symtab_i->Type,
3890 (UInt32)symtab_i->StorageClass,
3891 (UInt32)symtab_i->NumberOfAuxSymbols
3892 );
3893 # endif
3894 }
3895
3896 i += symtab_i->NumberOfAuxSymbols;
3897 }
3898
3899 return 1;
3900 }
3901
3902 #if defined(x86_64_HOST_ARCH)
3903
3904 /* We've already reserved a room for symbol extras in loadObj,
3905 * so simply set correct pointer here.
3906 */
3907 static int
3908 ocAllocateSymbolExtras_PEi386 ( ObjectCode* oc )
3909 {
3910 oc->symbol_extras = (SymbolExtra*)(oc->image - PEi386_IMAGE_OFFSET
3911 + ((PEi386_IMAGE_OFFSET + oc->fileSize + 0x7) & ~0x7));
3912 oc->first_symbol_extra = 0;
3913 oc->n_symbol_extras = ((COFF_header*)oc->image)->NumberOfSymbols;
3914
3915 return 1;
3916 }
3917
3918 static size_t
3919 makeSymbolExtra_PEi386( ObjectCode* oc, size_t s, char* symbol )
3920 {
3921 unsigned int curr_thunk;
3922 SymbolExtra *extra;
3923
3924 curr_thunk = oc->first_symbol_extra;
3925 if (curr_thunk >= oc->n_symbol_extras) {
3926 barf("Can't allocate thunk for %s", symbol);