Revert "Fix 32-bit build failures"
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "Stable.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31
32 #if !defined(mingw32_HOST_OS)
33 #include "posix/Signals.h"
34 #endif
35
36 // get protos for is*()
37 #include <ctype.h>
38
39 #ifdef HAVE_SYS_TYPES_H
40 #include <sys/types.h>
41 #endif
42
43 #include <inttypes.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <stdio.h>
47 #include <assert.h>
48 #include <libgen.h>
49
50 #ifdef HAVE_SYS_STAT_H
51 #include <sys/stat.h>
52 #endif
53
54 #if defined(HAVE_DLFCN_H)
55 #include <dlfcn.h>
56 #endif
57
58 /* PowerPC and ARM have relative branch instructions with only 24 bit
59 * displacements and therefore need jump islands contiguous with each object
60 * code module.
61 */
62 #if defined(powerpc_HOST_ARCH)
63 #define SHORT_REL_BRANCH 1
64 #endif
65 #if defined(arm_HOST_ARCH)
66 #define SHORT_REL_BRANCH 1
67 #endif
68
69 #if (RTS_LINKER_USE_MMAP && defined(SHORT_REL_BRANCH) && defined(linux_HOST_OS))
70 #define USE_CONTIGUOUS_MMAP 1
71 #else
72 #define USE_CONTIGUOUS_MMAP 0
73 #endif
74
75 #if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
76 # define OBJFORMAT_ELF
77 # include <regex.h> // regex is already used by dlopen() so this is OK
78 // to use here without requiring an additional lib
79 #elif defined (mingw32_HOST_OS)
80 # define OBJFORMAT_PEi386
81 # include <windows.h>
82 # include <shfolder.h> /* SHGetFolderPathW */
83 # include <math.h>
84 # include <wchar.h>
85 #elif defined(darwin_HOST_OS)
86 # define OBJFORMAT_MACHO
87 # include <regex.h>
88 # include <mach/machine.h>
89 # include <mach-o/fat.h>
90 # include <mach-o/loader.h>
91 # include <mach-o/nlist.h>
92 # include <mach-o/reloc.h>
93 #if defined(powerpc_HOST_ARCH)
94 # include <mach-o/ppc/reloc.h>
95 #endif
96 #if defined(x86_64_HOST_ARCH)
97 # include <mach-o/x86_64/reloc.h>
98 #endif
99 #endif
100
101 #if defined(x86_64_HOST_ARCH) && defined(darwin_HOST_OS)
102 #define ALWAYS_PIC
103 #endif
104
105 #if defined(dragonfly_HOST_OS)
106 #include <sys/tls.h>
107 #endif
108
109 /* SymbolInfo tracks a symbol's address, the object code from which
110 it originated, and whether or not it's weak.
111
112 RtsSymbolInfo is used to track the state of the symbols currently
113 loaded or to be loaded by the Linker.
114
115 Where the information in the `ObjectCode` is used to track the
116 original status of the symbol inside the `ObjectCode`.
117
118 A weak symbol that has been used will still be marked as weak
119 in the `ObjectCode` but in the `RtsSymbolInfo` it won't be.
120 */
121 typedef struct _RtsSymbolInfo {
122 SymbolAddr* value;
123 ObjectCode *owner;
124 HsBool weak;
125 } RtsSymbolInfo;
126
127 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
128 This hashtable will contain information on all symbols
129 that we know of, however the .o they are in may not be loaded.
130
131 Until the ObjectCode the symbol belongs to is actually
132 loaded this symbol may be replaced. So do not rely on
133 addresses of unloaded symbols.
134
135 Note [runtime-linker-phases]
136 --------------------------------------
137 Broadly the behavior of the runtime linker can be
138 split into the following four phases:
139
140 - Indexing (e.g. ocVerifyImage and ocGetNames)
141 - Initialization (e.g. ocResolve and ocRunInit)
142 - Resolve (e.g. resolveObjs())
143 - Lookup (e.g. lookupSymbol)
144
145 This is to enable lazy loading of symbols. Eager loading is problematic
146 as it means that all symbols must be available, even those which we will
147 never use. This is especially painful of Windows, where the number of
148 libraries required to link things like mingwex grows to be quite high.
149
150 We proceed through these stages as follows,
151
152 * During Indexing we verify and open the ObjectCode and
153 perform a quick scan/indexing of the ObjectCode. All the work
154 required to actually load the ObjectCode is done.
155
156 All symbols from the ObjectCode is also inserted into
157 `symhash`, where possible duplicates are handled via the semantics
158 described in `ghciInsertSymbolTable`.
159
160 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
161 depending on whether they are an archive members or not.
162
163 * During initialization we load ObjectCode, perform relocations, execute
164 static constructors etc. This phase may trigger other ObjectCodes to
165 be loaded because of the calls to lookupSymbol.
166
167 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
168 previous status was `OBJECT_LOADED`.
169
170 * During resolve we attempt to resolve all the symbols needed for the
171 initial link. This essentially means, that for any ObjectCode given
172 directly to the command-line we perform lookupSymbols on the required
173 symbols. lookupSymbols may trigger the loading of additional ObjectCode
174 if required.
175
176 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
177 the previous status was `OBJECT_NEEDED`.
178
179 * Lookup symbols is used to lookup any symbols required, both during initial
180 link and during statement and expression compilations in the REPL.
181 Declaration of e.g. an foreign import, will eventually call lookupSymbol
182 which will either fail (symbol unknown) or succeed (and possibly triggered a
183 load).
184
185 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
186
187 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
188 by calling unloadObj and starting over.
189 When a new declaration or statement is performed ultimately lookupSymbol is called
190 without doing a re-link.
191
192 The goal of these different phases is to allow the linker to be able to perform
193 "lazy loading" of ObjectCode. The reason for this is that we want to only link
194 in symbols that are actually required for the link. This reduces:
195
196 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
197 then we don't need to load libB. This means the dependency chain for libraries
198 such as mingw32 and mingwex can be broken down.
199
200 2) The number of duplicate symbols, since now only symbols that are
201 true duplicates will display the error.
202 */
203 static /*Str*/HashTable *symhash;
204
205 /* List of currently loaded objects */
206 ObjectCode *objects = NULL; /* initially empty */
207
208 /* List of objects that have been unloaded via unloadObj(), but are waiting
209 to be actually freed via checkUnload() */
210 ObjectCode *unloaded_objects = NULL; /* initially empty */
211
212 #ifdef THREADED_RTS
213 /* This protects all the Linker's global state except unloaded_objects */
214 Mutex linker_mutex;
215 /*
216 * This protects unloaded_objects. We have a separate mutex for this, because
217 * the GC needs to access unloaded_objects in checkUnload, while the linker only
218 * needs to access unloaded_objects in unloadObj(), so this allows most linker
219 * operations proceed concurrently with the GC.
220 */
221 Mutex linker_unloaded_mutex;
222 #endif
223
224 /* Type of the initializer */
225 typedef void (*init_t) (int argc, char **argv, char **env);
226
227 static HsInt isAlreadyLoaded( pathchar *path );
228 static HsInt loadOc( ObjectCode* oc );
229 static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
230 rtsBool mapped, char *archiveMemberName,
231 int misalignment
232 );
233
234 // Use wchar_t for pathnames on Windows (#5697)
235 #if defined(mingw32_HOST_OS)
236 #define pathcmp wcscmp
237 #define pathlen wcslen
238 #define pathopen _wfopen
239 #define pathstat _wstat
240 #define struct_stat struct _stat
241 #define open wopen
242 #define WSTR(s) L##s
243 #define pathprintf swprintf
244 #define pathsize sizeof(wchar_t)
245 #else
246 #define pathcmp strcmp
247 #define pathlen strlen
248 #define pathopen fopen
249 #define pathstat stat
250 #define struct_stat struct stat
251 #define WSTR(s) s
252 #define pathprintf snprintf
253 #define pathsize sizeof(char)
254 #endif
255
256 static pathchar* pathdup(pathchar *path)
257 {
258 pathchar *ret;
259 #if defined(mingw32_HOST_OS)
260 ret = wcsdup(path);
261 #else
262 /* sigh, strdup() isn't a POSIX function, so do it the long way */
263 ret = stgMallocBytes( strlen(path)+1, "pathdup" );
264 strcpy(ret, path);
265 #endif
266 return ret;
267 }
268
269 static pathchar* pathdir(pathchar *path)
270 {
271 pathchar *ret;
272 #if defined(mingw32_HOST_OS)
273 pathchar *drive, *dirName;
274 size_t memberLen = pathlen(path) + 1;
275 dirName = stgMallocBytes(pathsize * memberLen, "pathdir(path)");
276 ret = stgMallocBytes(pathsize * memberLen, "pathdir(path)");
277 drive = stgMallocBytes(pathsize * _MAX_DRIVE, "pathdir(path)");
278 _wsplitpath_s(path, drive, _MAX_DRIVE, dirName, pathsize * pathlen(path), NULL, 0, NULL, 0);
279 pathprintf(ret, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), drive, dirName);
280 stgFree(drive);
281 stgFree(dirName);
282 #else
283 pathchar* dirName = dirname(path);
284 size_t memberLen = pathlen(dirName);
285 ret = stgMallocBytes(pathsize * (memberLen + 2), "pathdir(path)");
286 strcpy(ret, dirName);
287 ret[memberLen ] = '/';
288 ret[memberLen+1] = '\0';
289 #endif
290 return ret;
291 }
292
293 static pathchar* mkPath(char* path)
294 {
295 #if defined(mingw32_HOST_OS)
296 size_t required = mbstowcs(NULL, path, 0);
297 pathchar *ret = stgMallocBytes(sizeof(pathchar) * (required + 1), "mkPath");
298 if (mbstowcs(ret, path, required) == (size_t)-1)
299 {
300 barf("mkPath failed converting char* to wchar_t*");
301 }
302 ret[required] = '\0';
303 return ret;
304 #else
305 return pathdup(path);
306 #endif
307 }
308
309 /* Generic wrapper function to try and Resolve and RunInit oc files */
310 int ocTryLoad( ObjectCode* oc );
311
312 #if defined(OBJFORMAT_ELF)
313 static int ocVerifyImage_ELF ( ObjectCode* oc );
314 static int ocGetNames_ELF ( ObjectCode* oc );
315 static int ocResolve_ELF ( ObjectCode* oc );
316 static int ocRunInit_ELF ( ObjectCode* oc );
317 #if NEED_SYMBOL_EXTRAS
318 static int ocAllocateSymbolExtras_ELF ( ObjectCode* oc );
319 #endif
320 #elif defined(OBJFORMAT_PEi386)
321 static int ocVerifyImage_PEi386 ( ObjectCode* oc );
322 static int ocGetNames_PEi386 ( ObjectCode* oc );
323 static int ocResolve_PEi386 ( ObjectCode* oc );
324 static int ocRunInit_PEi386 ( ObjectCode* oc );
325 static void *lookupSymbolInDLLs ( unsigned char *lbl );
326 /* See Note [mingw-w64 name decoration scheme] */
327 #ifndef x86_64_HOST_ARCH
328 static void zapTrailingAtSign ( unsigned char *sym );
329 #endif
330
331 #if defined(x86_64_HOST_ARCH)
332 #define USED_IF_x86_64_HOST_ARCH /* Nothing */
333 #else
334 #define USED_IF_x86_64_HOST_ARCH STG_UNUSED
335 #endif
336
337 static char *allocateImageAndTrampolines (
338 pathchar* arch_name, char* member_name,
339 FILE* f,
340 int size,
341 int isThin);
342 #if defined(x86_64_HOST_ARCH)
343 static int ocAllocateSymbolExtras_PEi386 ( ObjectCode* oc );
344 static size_t makeSymbolExtra_PEi386( ObjectCode* oc, size_t, char* symbol );
345 #define PEi386_IMAGE_OFFSET 4
346 #else
347 #define PEi386_IMAGE_OFFSET 0
348 #endif
349 #elif defined(OBJFORMAT_MACHO)
350 static int ocVerifyImage_MachO ( ObjectCode* oc );
351 static int ocGetNames_MachO ( ObjectCode* oc );
352 static int ocResolve_MachO ( ObjectCode* oc );
353 static int ocRunInit_MachO ( ObjectCode* oc );
354 static int machoGetMisalignment( FILE * );
355 #if NEED_SYMBOL_EXTRAS
356 static int ocAllocateSymbolExtras_MachO ( ObjectCode* oc );
357 #endif
358 #ifdef powerpc_HOST_ARCH
359 static void machoInitSymbolsWithoutUnderscore( void );
360 #endif
361 #endif
362
363 #if defined(OBJFORMAT_PEi386)
364 /* string utility function */
365 static HsBool endsWithPath(pathchar* base, pathchar* str) {
366 int blen = pathlen(base);
367 int slen = pathlen(str);
368 return (blen >= slen) && (0 == pathcmp(base + blen - slen, str));
369 }
370
371 static int checkAndLoadImportLibrary(
372 pathchar* arch_name,
373 char* member_name,
374 FILE* f);
375
376 static int findAndLoadImportLibrary(
377 ObjectCode* oc
378 );
379
380 static UChar *myindex(
381 int scale,
382 void* base,
383 int index);
384 static UChar *cstring_from_COFF_symbol_name(
385 UChar* name,
386 UChar* strtab);
387 static char *cstring_from_section_name(
388 UChar* name,
389 UChar* strtab);
390
391
392 /* Add ld symbol for PE image base. */
393 #if defined(__GNUC__)
394 #define __ImageBase __MINGW_LSYMBOL(_image_base__)
395 #endif
396
397 /* Get the base of the module. */
398 /* This symbol is defined by ld. */
399 extern IMAGE_DOS_HEADER __ImageBase;
400 #define __image_base (void*)((HINSTANCE)&__ImageBase)
401
402 // MingW-w64 is missing these from the implementation. So we have to look them up
403 typedef DLL_DIRECTORY_COOKIE(WINAPI *LPAddDLLDirectory)(PCWSTR NewDirectory);
404 typedef WINBOOL(WINAPI *LPRemoveDLLDirectory)(DLL_DIRECTORY_COOKIE Cookie);
405 #endif /* OBJFORMAT_PEi386 */
406
407 static void freeProddableBlocks (ObjectCode *oc);
408
409 /* on x86_64 we have a problem with relocating symbol references in
410 * code that was compiled without -fPIC. By default, the small memory
411 * model is used, which assumes that symbol references can fit in a
412 * 32-bit slot. The system dynamic linker makes this work for
413 * references to shared libraries by either (a) allocating a jump
414 * table slot for code references, or (b) moving the symbol at load
415 * time (and copying its contents, if necessary) for data references.
416 *
417 * We unfortunately can't tell whether symbol references are to code
418 * or data. So for now we assume they are code (the vast majority
419 * are), and allocate jump-table slots. Unfortunately this will
420 * SILENTLY generate crashing code for data references. This hack is
421 * enabled by X86_64_ELF_NONPIC_HACK.
422 *
423 * One workaround is to use shared Haskell libraries. This is
424 * coming. Another workaround is to keep the static libraries but
425 * compile them with -fPIC, because that will generate PIC references
426 * to data which can be relocated. The PIC code is still too green to
427 * do this systematically, though.
428 *
429 * See bug #781
430 * See thread http://www.haskell.org/pipermail/cvs-ghc/2007-September/038458.html
431 *
432 * Naming Scheme for Symbol Macros
433 *
434 * SymI_*: symbol is internal to the RTS. It resides in an object
435 * file/library that is statically.
436 * SymE_*: symbol is external to the RTS library. It might be linked
437 * dynamically.
438 *
439 * Sym*_HasProto : the symbol prototype is imported in an include file
440 * or defined explicitly
441 * Sym*_NeedsProto: the symbol is undefined and we add a dummy
442 * default proto extern void sym(void);
443 */
444 #define X86_64_ELF_NONPIC_HACK 1
445
446 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
447 * small memory model on this architecture (see gcc docs,
448 * -mcmodel=small).
449 *
450 * MAP_32BIT not available on OpenBSD/amd64
451 */
452 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
453 #define TRY_MAP_32BIT MAP_32BIT
454 #else
455 #define TRY_MAP_32BIT 0
456 #endif
457
458 /*
459 Note [The ARM/Thumb Story]
460 ~~~~~~~~~~~~~~~~~~~~~~~~~~
461
462 Support for the ARM architecture is complicated by the fact that ARM has not
463 one but several instruction encodings. The two relevant ones here are the original
464 ARM encoding and Thumb, a more dense variant of ARM supporting only a subset
465 of the instruction set.
466
467 How the CPU decodes a particular instruction is determined by a mode bit. This
468 mode bit is set on jump instructions, the value being determined by the low
469 bit of the target address: An odd address means the target is a procedure
470 encoded in the Thumb encoding whereas an even address means it's a traditional
471 ARM procedure (the actual address jumped to is even regardless of the encoding bit).
472
473 Interoperation between Thumb- and ARM-encoded object code (known as "interworking")
474 is tricky. If the linker needs to link a call by an ARM object into Thumb code
475 (or vice-versa) it will produce a jump island. This, however, is incompatible with
476 GHC's tables-next-to-code. For this reason, it is critical that GHC emit
477 exclusively ARM or Thumb objects for all Haskell code.
478
479 We still do, however, need to worry about foreign code.
480 */
481
482 /*
483 * Due to the small memory model (see above), on x86_64 we have to map
484 * all our non-PIC object files into the low 2Gb of the address space
485 * (why 2Gb and not 4Gb? Because all addresses must be reachable
486 * using a 32-bit signed PC-relative offset). On Linux we can do this
487 * using the MAP_32BIT flag to mmap(), however on other OSs
488 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
489 * can't do this. So on these systems, we have to pick a base address
490 * in the low 2Gb of the address space and try to allocate memory from
491 * there.
492 *
493 * We pick a default address based on the OS, but also make this
494 * configurable via an RTS flag (+RTS -xm)
495 */
496 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
497
498 #if defined(MAP_32BIT)
499 // Try to use MAP_32BIT
500 #define MMAP_32BIT_BASE_DEFAULT 0
501 #else
502 // A guess: 1Gb.
503 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
504 #endif
505
506 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
507 #endif
508
509 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
510 ObjectCode *owner)
511 {
512 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
513 if (!pinfo || owner != pinfo->owner) return;
514 removeStrHashTable(table, key, NULL);
515 stgFree(pinfo);
516 }
517
518 /* -----------------------------------------------------------------------------
519 * Insert symbols into hash tables, checking for duplicates.
520 *
521 * Returns: 0 on failure, nonzero on success
522 */
523 /*
524 Note [weak-symbols-support]
525 -------------------------------------
526 While ghciInsertSymbolTable does implement extensive
527 logic for weak symbol support, weak symbols are not currently
528 fully supported by the RTS. This code is mostly here for COMDAT
529 support which uses the weak symbols support.
530
531 Linking weak symbols defined purely in C code with other C code
532 should also work, probably. Observing weak symbols in Haskell
533 won't.
534
535 Some test have been written for weak symbols but have been disabled
536 mostly because it's unsure how the weak symbols support should look.
537 See Trac #11223
538 */
539 static int ghciInsertSymbolTable(
540 pathchar* obj_name,
541 HashTable *table,
542 const SymbolName* key,
543 SymbolAddr* data,
544 HsBool weak,
545 ObjectCode *owner)
546 {
547 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
548 if (!pinfo) /* new entry */
549 {
550 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
551 pinfo->value = data;
552 pinfo->owner = owner;
553 pinfo->weak = weak;
554 insertStrHashTable(table, key, pinfo);
555 return 1;
556 }
557 else if (weak && data && pinfo->weak && !pinfo->value)
558 {
559 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
560 pinfo->value = data;
561 pinfo->owner = owner;
562 return 1;
563 }
564 else if (weak)
565 {
566 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
567 already know of another copy throw this one away.
568
569 or both weak symbols have a nonzero value. Keep the existing one.
570
571 This also preserves the semantics of linking against
572 the first symbol we find. */
573 }
574 else if (pinfo->weak && !weak) /* weak symbol is in the table */
575 {
576 /* override the weak definition with the non-weak one */
577 pinfo->value = data;
578 pinfo->owner = owner;
579 pinfo->weak = HS_BOOL_FALSE;
580 return 1;
581 }
582 else if ( pinfo->owner
583 && pinfo->owner->status != OBJECT_RESOLVED
584 && pinfo->owner->status != OBJECT_NEEDED)
585 {
586 /* If the other symbol hasn't been loaded or will be loaded and we want to
587 explicitly load the new one, we can just swap it out and load the one
588 that has been requested. If not, just keep the first one encountered.
589
590 Because the `symHash' table consists symbols we've also not loaded but
591 found during the initial scan this is safe to do. If however the existing
592 symbol has been loaded then it means we have a duplicate.
593
594 This is essentially emulating the behavior of a linker wherein it will always
595 link in object files that are .o file arguments, but only take object files
596 from archives as needed. */
597 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
598 pinfo->value = data;
599 pinfo->owner = owner;
600 pinfo->weak = weak;
601 }
602
603 return 1;
604 }
605 else if (pinfo->owner == owner)
606 {
607 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
608 return 1;
609 }
610 else if (owner && owner->status == OBJECT_LOADED)
611 {
612 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
613 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
614 call this function again to trigger the duplicate error. */
615 return 1;
616 }
617
618 pathchar* archiveName = NULL;
619 debugBelch(
620 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
621 " %s\n"
622 "whilst processing object file\n"
623 " %" PATH_FMT "\n"
624 "The symbol was previously defined in\n"
625 " %" PATH_FMT "\n"
626 "This could be caused by:\n"
627 " * Loading two different object files which export the same symbol\n"
628 " * Specifying the same object file twice on the GHCi command line\n"
629 " * An incorrect `package.conf' entry, causing some object to be\n"
630 " loaded twice.\n",
631 (char*)key,
632 obj_name,
633 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
634 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
635 : pinfo->owner->fileName
636 );
637
638 if (archiveName)
639 {
640 stgFree(archiveName);
641 archiveName = NULL;
642 }
643 return 0;
644 }
645
646 /* -----------------------------------------------------------------------------
647 * Looks up symbols into hash tables.
648 *
649 * Returns: 0 on failure and result is not set,
650 * nonzero on success and result set to nonzero pointer
651 */
652 static HsBool ghciLookupSymbolInfo(HashTable *table,
653 const SymbolName* key, RtsSymbolInfo **result)
654 {
655 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
656 if (!pinfo) {
657 *result = NULL;
658 return HS_BOOL_FALSE;
659 }
660 if (pinfo->weak)
661 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
662 /* Once it's looked up, it can no longer be overridden */
663 pinfo->weak = HS_BOOL_FALSE;
664
665 *result = pinfo;
666 return HS_BOOL_TRUE;
667 }
668
669 /* -----------------------------------------------------------------------------
670 * initialize the object linker
671 */
672
673
674 static int linker_init_done = 0 ;
675
676 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
677 static void *dl_prog_handle;
678 static regex_t re_invalid;
679 static regex_t re_realso;
680 #ifdef THREADED_RTS
681 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
682 #endif
683 #elif defined(OBJFORMAT_PEi386)
684 void addDLLHandle(pathchar* dll_name, HINSTANCE instance);
685 #endif
686
687 void initLinker (void)
688 {
689 // default to retaining CAFs for backwards compatibility. Most
690 // users will want initLinker_(0): otherwise unloadObj() will not
691 // be able to unload object files when they contain CAFs.
692 initLinker_(1);
693 }
694
695 void
696 initLinker_ (int retain_cafs)
697 {
698 RtsSymbolVal *sym;
699 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
700 int compileResult;
701 #endif
702
703 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
704
705 /* Make initLinker idempotent, so we can call it
706 before every relevant operation; that means we
707 don't need to initialise the linker separately */
708 if (linker_init_done == 1) {
709 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
710 return;
711 } else {
712 linker_init_done = 1;
713 }
714
715 objects = NULL;
716 unloaded_objects = NULL;
717
718 #if defined(THREADED_RTS)
719 initMutex(&linker_mutex);
720 initMutex(&linker_unloaded_mutex);
721 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
722 initMutex(&dl_mutex);
723 #endif
724 #endif
725 symhash = allocStrHashTable();
726
727 /* populate the symbol table with stuff from the RTS */
728 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
729 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
730 symhash, sym->lbl, sym->addr, HS_BOOL_FALSE, NULL)) {
731 barf("ghciInsertSymbolTable failed");
732 }
733 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
734 }
735 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
736 machoInitSymbolsWithoutUnderscore();
737 # endif
738 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
739 referenced from a statically linked module. We need to mimic this, but
740 we cannot use NULL because we use it to mean nonexistent symbols. So we
741 use an arbitrary (hopefully unique) address here.
742 */
743 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
744 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
745 barf("ghciInsertSymbolTable failed");
746 }
747
748 #if defined(OBJFORMAT_PEi386)
749 if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"),
750 symhash, "__image_base__", __image_base, HS_BOOL_TRUE, NULL)) {
751 barf("ghciInsertSymbolTable failed");
752 }
753 #endif /* OBJFORMAT_PEi386 */
754
755
756 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
757 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
758 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
759 retain_cafs ? newRetainedCAF : newGCdCAF,
760 HS_BOOL_FALSE, NULL)) {
761 barf("ghciInsertSymbolTable failed");
762 }
763
764 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
765 # if defined(RTLD_DEFAULT)
766 dl_prog_handle = RTLD_DEFAULT;
767 # else
768 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
769 # endif /* RTLD_DEFAULT */
770
771 compileResult = regcomp(&re_invalid,
772 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short)",
773 REG_EXTENDED);
774 if (compileResult != 0) {
775 barf("Compiling re_invalid failed");
776 }
777 compileResult = regcomp(&re_realso,
778 "(GROUP|INPUT) *\\( *([^ )]+)",
779 REG_EXTENDED);
780 if (compileResult != 0) {
781 barf("Compiling re_realso failed");
782 }
783 # endif
784
785 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
786 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
787 // User-override for mmap_32bit_base
788 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
789 }
790 #endif
791
792 #if defined(mingw32_HOST_OS)
793 /*
794 * These two libraries cause problems when added to the static link,
795 * but are necessary for resolving symbols in GHCi, hence we load
796 * them manually here.
797 */
798 addDLL(WSTR("msvcrt"));
799 addDLL(WSTR("kernel32"));
800 addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL));
801 #endif
802
803 if (RTS_LINKER_USE_MMAP)
804 m32_allocator_init();
805
806 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
807 return;
808 }
809
810 void
811 exitLinker( void ) {
812 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
813 if (linker_init_done == 1) {
814 regfree(&re_invalid);
815 regfree(&re_realso);
816 #ifdef THREADED_RTS
817 closeMutex(&dl_mutex);
818 #endif
819 }
820 #endif
821 if (linker_init_done == 1) {
822 freeHashTable(symhash, free);
823 }
824 #ifdef THREADED_RTS
825 closeMutex(&linker_mutex);
826 #endif
827 }
828
829 /* -----------------------------------------------------------------------------
830 * Loading DLL or .so dynamic libraries
831 * -----------------------------------------------------------------------------
832 *
833 * Add a DLL from which symbols may be found. In the ELF case, just
834 * do RTLD_GLOBAL-style add, so no further messing around needs to
835 * happen in order that symbols in the loaded .so are findable --
836 * lookupSymbol() will subsequently see them by dlsym on the program's
837 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
838 *
839 * In the PEi386 case, open the DLLs and put handles to them in a
840 * linked list. When looking for a symbol, try all handles in the
841 * list. This means that we need to load even DLLs that are guaranteed
842 * to be in the ghc.exe image already, just so we can get a handle
843 * to give to loadSymbol, so that we can find the symbols. For such
844 * libraries, the LoadLibrary call should be a no-op except for returning
845 * the handle.
846 *
847 */
848
849 #if defined(OBJFORMAT_PEi386)
850 /* A record for storing handles into DLLs. */
851
852 typedef
853 struct _OpenedDLL {
854 pathchar* name;
855 struct _OpenedDLL* next;
856 HINSTANCE instance;
857 }
858 OpenedDLL;
859
860 /* A list thereof. */
861 static OpenedDLL* opened_dlls = NULL;
862
863 /* A record for storing indirectly linked functions from DLLs. */
864 typedef
865 struct _IndirectAddr {
866 SymbolAddr* addr;
867 struct _IndirectAddr* next;
868 }
869 IndirectAddr;
870
871 /* A list thereof. */
872 static IndirectAddr* indirects = NULL;
873
874 /* Adds a DLL instance to the list of DLLs in which to search for symbols. */
875 void addDLLHandle(pathchar* dll_name, HINSTANCE instance) {
876 OpenedDLL* o_dll;
877 o_dll = stgMallocBytes( sizeof(OpenedDLL), "addDLLHandle" );
878 o_dll->name = dll_name ? pathdup(dll_name) : NULL;
879 o_dll->instance = instance;
880 o_dll->next = opened_dlls;
881 opened_dlls = o_dll;
882 }
883
884 #endif
885
886 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
887
888 /* Suppose in ghci we load a temporary SO for a module containing
889 f = 1
890 and then modify the module, recompile, and load another temporary
891 SO with
892 f = 2
893 Then as we don't unload the first SO, dlsym will find the
894 f = 1
895 symbol whereas we want the
896 f = 2
897 symbol. We therefore need to keep our own SO handle list, and
898 try SOs in the right order. */
899
900 typedef
901 struct _OpenedSO {
902 struct _OpenedSO* next;
903 void *handle;
904 }
905 OpenedSO;
906
907 /* A list thereof. */
908 static OpenedSO* openedSOs = NULL;
909
910 static const char *
911 internal_dlopen(const char *dll_name)
912 {
913 OpenedSO* o_so;
914 void *hdl;
915 const char *errmsg;
916 char *errmsg_copy;
917
918 // omitted: RTLD_NOW
919 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
920 IF_DEBUG(linker,
921 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
922
923 //-------------- Begin critical section ------------------
924 // This critical section is necessary because dlerror() is not
925 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
926 // Also, the error message returned must be copied to preserve it
927 // (see POSIX also)
928
929 ACQUIRE_LOCK(&dl_mutex);
930 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
931
932 errmsg = NULL;
933 if (hdl == NULL) {
934 /* dlopen failed; return a ptr to the error msg. */
935 errmsg = dlerror();
936 if (errmsg == NULL) errmsg = "addDLL: unknown error";
937 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
938 strcpy(errmsg_copy, errmsg);
939 errmsg = errmsg_copy;
940 } else {
941 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
942 o_so->handle = hdl;
943 o_so->next = openedSOs;
944 openedSOs = o_so;
945 }
946
947 RELEASE_LOCK(&dl_mutex);
948 //--------------- End critical section -------------------
949
950 return errmsg;
951 }
952
953 /*
954 Note [RTLD_LOCAL]
955
956 In GHCi we want to be able to override previous .so's with newly
957 loaded .so's when we recompile something. This further implies that
958 when we look up a symbol in internal_dlsym() we have to iterate
959 through the loaded libraries (in order from most recently loaded to
960 oldest) looking up the symbol in each one until we find it.
961
962 However, this can cause problems for some symbols that are copied
963 by the linker into the executable image at runtime - see #8935 for a
964 lengthy discussion. To solve that problem we need to look up
965 symbols in the main executable *first*, before attempting to look
966 them up in the loaded .so's. But in order to make that work, we
967 have to always call dlopen with RTLD_LOCAL, so that the loaded
968 libraries don't populate the global symbol table.
969 */
970
971 static void *
972 internal_dlsym(const char *symbol) {
973 OpenedSO* o_so;
974 void *v;
975
976 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
977 ACQUIRE_LOCK(&dl_mutex);
978 dlerror();
979 // look in program first
980 v = dlsym(dl_prog_handle, symbol);
981 if (dlerror() == NULL) {
982 RELEASE_LOCK(&dl_mutex);
983 return v;
984 }
985
986 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
987 v = dlsym(o_so->handle, symbol);
988 if (dlerror() == NULL) {
989 RELEASE_LOCK(&dl_mutex);
990 return v;
991 }
992 }
993 RELEASE_LOCK(&dl_mutex);
994 return v;
995 }
996 # endif
997
998 const char *
999 addDLL( pathchar *dll_name )
1000 {
1001 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
1002 /* ------------------- ELF DLL loader ------------------- */
1003
1004 #define NMATCH 5
1005 regmatch_t match[NMATCH];
1006 const char *errmsg;
1007 FILE* fp;
1008 size_t match_length;
1009 #define MAXLINE 1000
1010 char line[MAXLINE];
1011 int result;
1012
1013 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
1014 errmsg = internal_dlopen(dll_name);
1015
1016 if (errmsg == NULL) {
1017 return NULL;
1018 }
1019
1020 // GHC Trac ticket #2615
1021 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
1022 // contain linker scripts rather than ELF-format object code. This
1023 // code handles the situation by recognizing the real object code
1024 // file name given in the linker script.
1025 //
1026 // If an "invalid ELF header" error occurs, it is assumed that the
1027 // .so file contains a linker script instead of ELF object code.
1028 // In this case, the code looks for the GROUP ( ... ) linker
1029 // directive. If one is found, the first file name inside the
1030 // parentheses is treated as the name of a dynamic library and the
1031 // code attempts to dlopen that file. If this is also unsuccessful,
1032 // an error message is returned.
1033
1034 // see if the error message is due to an invalid ELF header
1035 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
1036 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
1037 IF_DEBUG(linker, debugBelch("result = %i\n", result));
1038 if (result == 0) {
1039 // success -- try to read the named file as a linker script
1040 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
1041 MAXLINE-1);
1042 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
1043 line[match_length] = '\0'; // make sure string is null-terminated
1044 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
1045 if ((fp = fopen(line, "r")) == NULL) {
1046 return errmsg; // return original error if open fails
1047 }
1048 // try to find a GROUP or INPUT ( ... ) command
1049 while (fgets(line, MAXLINE, fp) != NULL) {
1050 IF_DEBUG(linker, debugBelch("input line = %s", line));
1051 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
1052 // success -- try to dlopen the first named file
1053 IF_DEBUG(linker, debugBelch("match%s\n",""));
1054 line[match[2].rm_eo] = '\0';
1055 stgFree((void*)errmsg); // Free old message before creating new one
1056 errmsg = internal_dlopen(line+match[2].rm_so);
1057 break;
1058 }
1059 // if control reaches here, no GROUP or INPUT ( ... ) directive
1060 // was found and the original error message is returned to the
1061 // caller
1062 }
1063 fclose(fp);
1064 }
1065 return errmsg;
1066
1067 # elif defined(OBJFORMAT_PEi386)
1068 /* ------------------- Win32 DLL loader ------------------- */
1069
1070 pathchar* buf;
1071 OpenedDLL* o_dll;
1072 HINSTANCE instance;
1073
1074 IF_DEBUG(linker, debugBelch("\naddDLL; dll_name = `%" PATH_FMT "'\n", dll_name));
1075
1076 /* See if we've already got it, and ignore if so. */
1077 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
1078 if (0 == pathcmp(o_dll->name, dll_name))
1079 return NULL;
1080 }
1081
1082 /* The file name has no suffix (yet) so that we can try
1083 both foo.dll and foo.drv
1084
1085 The documentation for LoadLibrary says:
1086 If no file name extension is specified in the lpFileName
1087 parameter, the default library extension .dll is
1088 appended. However, the file name string can include a trailing
1089 point character (.) to indicate that the module name has no
1090 extension. */
1091
1092 size_t bufsize = pathlen(dll_name) + 10;
1093 buf = stgMallocBytes(bufsize * sizeof(wchar_t), "addDLL");
1094
1095 /* These are ordered by probability of success and order we'd like them */
1096 const wchar_t *formats[] = { L"%ls.DLL", L"%ls.DRV", L"lib%ls.DLL", L"%ls" };
1097 const DWORD flags[] = { LOAD_LIBRARY_SEARCH_USER_DIRS | LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, 0 };
1098
1099 int cFormat;
1100 int cFlag;
1101 int flags_start = 1; // Assume we don't support the new API
1102
1103 /* Detect if newer API are available, if not, skip the first flags entry */
1104 if (GetProcAddress((HMODULE)LoadLibraryW(L"Kernel32.DLL"), "AddDllDirectory")) {
1105 flags_start = 0;
1106 }
1107
1108 /* Iterate through the possible flags and formats */
1109 for (cFlag = flags_start; cFlag < 2; cFlag++)
1110 {
1111 for (cFormat = 0; cFormat < 4; cFormat++)
1112 {
1113 snwprintf(buf, bufsize, formats[cFormat], dll_name);
1114 instance = LoadLibraryExW(buf, NULL, flags[cFlag]);
1115 if (instance == NULL)
1116 {
1117 if (GetLastError() != ERROR_MOD_NOT_FOUND)
1118 {
1119 goto error;
1120 }
1121 }
1122 else
1123 {
1124 break; // We're done. DLL has been loaded.
1125 }
1126 }
1127 }
1128
1129 // Check if we managed to load the DLL
1130 if (instance == NULL) {
1131 goto error;
1132 }
1133
1134 stgFree(buf);
1135
1136 addDLLHandle(dll_name, instance);
1137
1138 return NULL;
1139
1140 error:
1141 stgFree(buf);
1142 sysErrorBelch("addDLL: %" PATH_FMT " (Win32 error %lu)", dll_name, GetLastError());
1143
1144 /* LoadLibrary failed; return a ptr to the error msg. */
1145 return "addDLL: could not load DLL";
1146
1147 # else
1148 barf("addDLL: not implemented on this platform");
1149 # endif
1150 }
1151
1152 /* -----------------------------------------------------------------------------
1153 * Searches the system directories to determine if there is a system DLL that
1154 * satisfies the given name. This prevent GHCi from linking against a static
1155 * library if a DLL is available.
1156 *
1157 * Returns: NULL on failure or no DLL found, else the full path to the DLL
1158 * that can be loaded.
1159 */
1160 pathchar* findSystemLibrary(pathchar* dll_name)
1161 {
1162 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%" PATH_FMT "'\n", dll_name));
1163
1164 #if defined(OBJFORMAT_PEi386)
1165 const unsigned int init_buf_size = 1024;
1166 unsigned int bufsize = init_buf_size;
1167 wchar_t* result = malloc(sizeof(wchar_t) * bufsize);
1168 DWORD wResult = SearchPathW(NULL, dll_name, NULL, bufsize, result, NULL);
1169
1170 if (wResult > bufsize) {
1171 result = realloc(result, sizeof(wchar_t) * wResult);
1172 wResult = SearchPathW(NULL, dll_name, NULL, wResult, result, NULL);
1173 }
1174
1175
1176 if (!wResult) {
1177 free(result);
1178 return NULL;
1179 }
1180
1181 return result;
1182 #else
1183 (void)(dll_name); // Function not implemented for other platforms.
1184 return NULL;
1185 #endif
1186 }
1187
1188 /* -----------------------------------------------------------------------------
1189 * Emits a warning determining that the system is missing a required security
1190 * update that we need to get access to the proper APIs
1191 */
1192 void warnMissingKBLibraryPaths( void )
1193 {
1194 static HsBool missing_update_warn = HS_BOOL_FALSE;
1195 if (!missing_update_warn) {
1196 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
1197 missing_update_warn = HS_BOOL_TRUE;
1198 }
1199 }
1200
1201 /* -----------------------------------------------------------------------------
1202 * appends a directory to the process DLL Load path so LoadLibrary can find it
1203 *
1204 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
1205 * restore the search path to what it was before this call.
1206 */
1207 HsPtr addLibrarySearchPath(pathchar* dll_path)
1208 {
1209 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%" PATH_FMT "'\n", dll_path));
1210
1211 #if defined(OBJFORMAT_PEi386)
1212 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1213 LPAddDLLDirectory AddDllDirectory = (LPAddDLLDirectory)GetProcAddress((HMODULE)hDLL, "AddDllDirectory");
1214
1215 HsPtr result = NULL;
1216
1217 const unsigned int init_buf_size = 4096;
1218 int bufsize = init_buf_size;
1219
1220 // Make sure the path is an absolute path
1221 WCHAR* abs_path = malloc(sizeof(WCHAR) * init_buf_size);
1222 DWORD wResult = GetFullPathNameW(dll_path, bufsize, abs_path, NULL);
1223 if (!wResult){
1224 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1225 }
1226 else if (wResult > init_buf_size) {
1227 abs_path = realloc(abs_path, sizeof(WCHAR) * wResult);
1228 if (!GetFullPathNameW(dll_path, bufsize, abs_path, NULL)) {
1229 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1230 }
1231 }
1232
1233 if (AddDllDirectory) {
1234 result = AddDllDirectory(abs_path);
1235 }
1236 else
1237 {
1238 warnMissingKBLibraryPaths();
1239 WCHAR* str = malloc(sizeof(WCHAR) * init_buf_size);
1240 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1241
1242 if (wResult > init_buf_size) {
1243 str = realloc(str, sizeof(WCHAR) * wResult);
1244 bufsize = wResult;
1245 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1246 if (!wResult) {
1247 sysErrorBelch("addLibrarySearchPath[GetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1248 }
1249 }
1250
1251 bufsize = wResult + 2 + pathlen(abs_path);
1252 wchar_t* newPath = malloc(sizeof(wchar_t) * bufsize);
1253
1254 wcscpy(newPath, abs_path);
1255 wcscat(newPath, L";");
1256 wcscat(newPath, str);
1257 if (!SetEnvironmentVariableW(L"PATH", (LPCWSTR)newPath)) {
1258 sysErrorBelch("addLibrarySearchPath[SetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1259 }
1260
1261 free(newPath);
1262 free(abs_path);
1263
1264 return str;
1265 }
1266
1267 if (!result) {
1268 sysErrorBelch("addLibrarySearchPath: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1269 free(abs_path);
1270 return NULL;
1271 }
1272
1273 free(abs_path);
1274 return result;
1275 #else
1276 (void)(dll_path); // Function not implemented for other platforms.
1277 return NULL;
1278 #endif
1279 }
1280
1281 /* -----------------------------------------------------------------------------
1282 * removes a directory from the process DLL Load path
1283 *
1284 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
1285 */
1286 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
1287 {
1288 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n", dll_path_index));
1289
1290 #if defined(OBJFORMAT_PEi386)
1291 HsBool result = 0;
1292
1293 if (dll_path_index != NULL) {
1294 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1295 LPRemoveDLLDirectory RemoveDllDirectory = (LPRemoveDLLDirectory)GetProcAddress((HMODULE)hDLL, "RemoveDllDirectory");
1296
1297 if (RemoveDllDirectory) {
1298 result = RemoveDllDirectory(dll_path_index);
1299 // dll_path_index is now invalid, do not use it after this point.
1300 }
1301 else
1302 {
1303 warnMissingKBLibraryPaths();
1304 result = SetEnvironmentVariableW(L"PATH", (LPCWSTR)dll_path_index);
1305 free(dll_path_index);
1306 }
1307
1308 if (!result) {
1309 sysErrorBelch("removeLibrarySearchPath: (Win32 error %lu)", GetLastError());
1310 return HS_BOOL_FALSE;
1311 }
1312 }
1313
1314 return result == 0 ? HS_BOOL_TRUE : HS_BOOL_FALSE;
1315 #else
1316 (void)(dll_path_index); // Function not implemented for other platforms.
1317 return HS_BOOL_FALSE;
1318 #endif
1319 }
1320
1321 /* -----------------------------------------------------------------------------
1322 * insert a symbol in the hash table
1323 *
1324 * Returns: 0 on failure, nozero on success
1325 */
1326 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
1327 {
1328 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE, NULL);
1329 }
1330
1331 /* -----------------------------------------------------------------------------
1332 * lookup a symbol in the hash table
1333 */
1334 static SymbolAddr* lookupSymbol_ (SymbolName* lbl)
1335 {
1336 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
1337
1338 ASSERT(symhash != NULL);
1339 RtsSymbolInfo *pinfo;
1340
1341 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
1342 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
1343
1344 # if defined(OBJFORMAT_ELF)
1345 return internal_dlsym(lbl);
1346 # elif defined(OBJFORMAT_MACHO)
1347
1348 /* HACK: On OS X, all symbols are prefixed with an underscore.
1349 However, dlsym wants us to omit the leading underscore from the
1350 symbol name -- the dlsym routine puts it back on before searching
1351 for the symbol. For now, we simply strip it off here (and ONLY
1352 here).
1353 */
1354 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", lbl));
1355 ASSERT(lbl[0] == '_');
1356 return internal_dlsym(lbl + 1);
1357 # elif defined(OBJFORMAT_PEi386)
1358 SymbolAddr* sym;
1359
1360 /* See Note [mingw-w64 name decoration scheme] */
1361 #ifndef x86_64_HOST_ARCH
1362 zapTrailingAtSign ( (unsigned char*)lbl );
1363 #endif
1364 sym = lookupSymbolInDLLs((unsigned char*)lbl);
1365 return sym; // might be NULL if not found
1366
1367 # else
1368 ASSERT(2+2 == 5);
1369 return NULL;
1370 # endif
1371 } else {
1372 SymbolAddr* val = pinfo->value;
1373 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl, val));
1374
1375 int r;
1376 ObjectCode* oc = pinfo->owner;
1377
1378 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
1379 See Note [runtime-linker-phases] */
1380 if (oc && oc->status == OBJECT_LOADED) {
1381 oc->status = OBJECT_NEEDED;
1382 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand loading symbol '%s'\n", lbl));
1383 r = ocTryLoad(oc);
1384
1385 if (!r) {
1386 errorBelch("Could not on-demand load symbol '%s'\n", lbl);
1387 return NULL;
1388 }
1389 }
1390
1391 return val;
1392 }
1393 }
1394
1395 SymbolAddr* lookupSymbol( SymbolName* lbl )
1396 {
1397 ACQUIRE_LOCK(&linker_mutex);
1398 SymbolAddr* r = lookupSymbol_(lbl);
1399 RELEASE_LOCK(&linker_mutex);
1400 return r;
1401 }
1402
1403 /* -----------------------------------------------------------------------------
1404 Create a StablePtr for a foreign export. This is normally called by
1405 a C function with __attribute__((constructor)), which is generated
1406 by GHC and linked into the module.
1407
1408 If the object code is being loaded dynamically, then we remember
1409 which StablePtrs were allocated by the constructors and free them
1410 again in unloadObj().
1411 -------------------------------------------------------------------------- */
1412
1413 static ObjectCode *loading_obj = NULL;
1414
1415 StgStablePtr foreignExportStablePtr (StgPtr p)
1416 {
1417 ForeignExportStablePtr *fe_sptr;
1418 StgStablePtr *sptr;
1419
1420 sptr = getStablePtr(p);
1421
1422 if (loading_obj != NULL) {
1423 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
1424 "foreignExportStablePtr");
1425 fe_sptr->stable_ptr = sptr;
1426 fe_sptr->next = loading_obj->stable_ptrs;
1427 loading_obj->stable_ptrs = fe_sptr;
1428 }
1429
1430 return sptr;
1431 }
1432
1433
1434 /* -----------------------------------------------------------------------------
1435 * Debugging aid: look in GHCi's object symbol tables for symbols
1436 * within DELTA bytes of the specified address, and show their names.
1437 */
1438 #ifdef DEBUG
1439 void ghci_enquire ( SymbolAddr* addr );
1440
1441 void ghci_enquire(SymbolAddr* addr)
1442 {
1443 int i;
1444 SymbolName* sym;
1445 RtsSymbolInfo* a;
1446 const int DELTA = 64;
1447 ObjectCode* oc;
1448
1449 for (oc = objects; oc; oc = oc->next) {
1450 for (i = 0; i < oc->n_symbols; i++) {
1451 sym = oc->symbols[i];
1452 if (sym == NULL) continue;
1453 a = NULL;
1454 if (a == NULL) {
1455 ghciLookupSymbolInfo(symhash, sym, &a);
1456 }
1457 if (a == NULL) {
1458 // debugBelch("ghci_enquire: can't find %s\n", sym);
1459 }
1460 else if ( a->value
1461 && (char*)addr-DELTA <= (char*)a->value
1462 && (char*)a->value <= (char*)addr+DELTA) {
1463 debugBelch("%p + %3d == `%s'\n", addr, (int)((char*)a->value - (char*)addr), sym);
1464 }
1465 }
1466 }
1467 }
1468
1469 void ghci_find(SymbolAddr *addr);
1470 void ghci_find(SymbolAddr *addr)
1471 {
1472 ObjectCode *oc;
1473 uint32_t i;
1474
1475 for (oc = objects; oc != NULL; oc = oc->next) {
1476 for (i = 0; i < (uint32_t)oc->n_sections; i++) {
1477 Section *section = &oc->sections[i];
1478 if (addr > section->start &&
1479 (StgWord)addr < (StgWord)section->start+section->size) {
1480 debugBelch("%p is in %" PATH_FMT, addr,
1481 oc->archiveMemberName ?
1482 oc->archiveMemberName : oc->fileName);
1483 debugBelch(", section %d, offset %lx\n", i,
1484 (StgWord)addr - (StgWord)section->start);
1485 }
1486 }
1487 }
1488 }
1489 #endif
1490
1491 #if RTS_LINKER_USE_MMAP
1492 //
1493 // Returns NULL on failure.
1494 //
1495 void *
1496 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
1497 {
1498 void *map_addr = NULL;
1499 void *result;
1500 size_t size;
1501 static uint32_t fixed = 0;
1502
1503 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1504 size = roundUpToPage(bytes);
1505
1506 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1507 mmap_again:
1508
1509 if (mmap_32bit_base != 0) {
1510 map_addr = mmap_32bit_base;
1511 }
1512 #endif
1513
1514 IF_DEBUG(linker,
1515 debugBelch("mmapForLinker: \tprotection %#0x\n",
1516 PROT_EXEC | PROT_READ | PROT_WRITE));
1517 IF_DEBUG(linker,
1518 debugBelch("mmapForLinker: \tflags %#0x\n",
1519 MAP_PRIVATE | TRY_MAP_32BIT | fixed | flags));
1520
1521 result = mmap(map_addr, size,
1522 PROT_EXEC|PROT_READ|PROT_WRITE,
1523 MAP_PRIVATE|TRY_MAP_32BIT|fixed|flags, fd, offset);
1524
1525 if (result == MAP_FAILED) {
1526 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1527 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1528 return NULL;
1529 }
1530
1531 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1532 if (mmap_32bit_base != 0) {
1533 if (result == map_addr) {
1534 mmap_32bit_base = (StgWord8*)map_addr + size;
1535 } else {
1536 if ((W_)result > 0x80000000) {
1537 // oops, we were given memory over 2Gb
1538 munmap(result,size);
1539 #if defined(freebsd_HOST_OS) || \
1540 defined(kfreebsdgnu_HOST_OS) || \
1541 defined(dragonfly_HOST_OS)
1542 // Some platforms require MAP_FIXED. This is normally
1543 // a bad idea, because MAP_FIXED will overwrite
1544 // existing mappings.
1545 fixed = MAP_FIXED;
1546 goto mmap_again;
1547 #else
1548 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1549 "asked for %lu bytes at %p. "
1550 "Try specifying an address with +RTS -xm<addr> -RTS",
1551 size, map_addr);
1552 return NULL;
1553 #endif
1554 } else {
1555 // hmm, we were given memory somewhere else, but it's
1556 // still under 2Gb so we can use it. Next time, ask
1557 // for memory right after the place we just got some
1558 mmap_32bit_base = (StgWord8*)result + size;
1559 }
1560 }
1561 } else {
1562 if ((W_)result > 0x80000000) {
1563 // oops, we were given memory over 2Gb
1564 // ... try allocating memory somewhere else?;
1565 debugTrace(DEBUG_linker,
1566 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1567 bytes, result);
1568 munmap(result, size);
1569
1570 // Set a base address and try again... (guess: 1Gb)
1571 mmap_32bit_base = (void*)0x40000000;
1572 goto mmap_again;
1573 }
1574 }
1575 #endif
1576
1577 IF_DEBUG(linker,
1578 debugBelch("mmapForLinker: mapped %" FMT_Word
1579 " bytes starting at %p\n", (W_)size, result));
1580 IF_DEBUG(linker,
1581 debugBelch("mmapForLinker: done\n"));
1582
1583 return result;
1584 }
1585 #endif
1586
1587 /*
1588 * Remove symbols from the symbol table, and free oc->symbols.
1589 * This operation is idempotent.
1590 */
1591 static void removeOcSymbols (ObjectCode *oc)
1592 {
1593 if (oc->symbols == NULL) return;
1594
1595 // Remove all the mappings for the symbols within this object..
1596 int i;
1597 for (i = 0; i < oc->n_symbols; i++) {
1598 if (oc->symbols[i] != NULL) {
1599 ghciRemoveSymbolTable(symhash, oc->symbols[i], oc);
1600 }
1601 }
1602
1603 stgFree(oc->symbols);
1604 oc->symbols = NULL;
1605 }
1606
1607 /*
1608 * Release StablePtrs and free oc->stable_ptrs.
1609 * This operation is idempotent.
1610 */
1611 static void freeOcStablePtrs (ObjectCode *oc)
1612 {
1613 // Release any StablePtrs that were created when this
1614 // object module was initialized.
1615 ForeignExportStablePtr *fe_ptr, *next;
1616
1617 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1618 next = fe_ptr->next;
1619 freeStablePtr(fe_ptr->stable_ptr);
1620 stgFree(fe_ptr);
1621 }
1622 oc->stable_ptrs = NULL;
1623 }
1624
1625 static void
1626 freePreloadObjectFile (ObjectCode *oc)
1627 {
1628 #if defined(mingw32_HOST_OS)
1629
1630 VirtualFree(oc->image - PEi386_IMAGE_OFFSET, 0, MEM_RELEASE);
1631
1632 IndirectAddr *ia, *ia_next;
1633 ia = indirects;
1634 while (ia != NULL) {
1635 ia_next = ia->next;
1636 stgFree(ia);
1637 ia = ia_next;
1638 }
1639 indirects = NULL;
1640
1641 #else
1642
1643 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1644 munmap(oc->image, oc->fileSize);
1645 }
1646 else {
1647 stgFree(oc->image);
1648 }
1649
1650 #endif
1651
1652 oc->image = NULL;
1653 oc->fileSize = 0;
1654 }
1655
1656 /*
1657 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1658 * the GC when a previously unloaded ObjectCode has been determined to be
1659 * unused, and when an error occurs during loadObj().
1660 */
1661 void freeObjectCode (ObjectCode *oc)
1662 {
1663 freePreloadObjectFile(oc);
1664
1665 if (oc->symbols != NULL) {
1666 stgFree(oc->symbols);
1667 oc->symbols = NULL;
1668 }
1669
1670 if (oc->extraInfos != NULL) {
1671 freeHashTable(oc->extraInfos, NULL);
1672 oc->extraInfos = NULL;
1673 }
1674
1675 if (oc->sections != NULL) {
1676 int i;
1677 for (i=0; i < oc->n_sections; i++) {
1678 if (oc->sections[i].start != NULL) {
1679 switch(oc->sections[i].alloc){
1680 #if RTS_LINKER_USE_MMAP
1681 case SECTION_MMAP:
1682 munmap(oc->sections[i].mapped_start,
1683 oc->sections[i].mapped_size);
1684 break;
1685 case SECTION_M32:
1686 m32_free(oc->sections[i].start,
1687 oc->sections[i].size);
1688 break;
1689 #endif
1690 case SECTION_MALLOC:
1691 stgFree(oc->sections[i].start);
1692 break;
1693 default:
1694 break;
1695 }
1696 }
1697 }
1698 stgFree(oc->sections);
1699 }
1700
1701 freeProddableBlocks(oc);
1702
1703 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1704 * alongside the image, so we don't need to free. */
1705 #if NEED_SYMBOL_EXTRAS && (!defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS))
1706 if (RTS_LINKER_USE_MMAP) {
1707 if (!USE_CONTIGUOUS_MMAP && oc->symbol_extras != NULL) {
1708 m32_free(oc->symbol_extras,
1709 sizeof(SymbolExtra) * oc->n_symbol_extras);
1710 }
1711 }
1712 else {
1713 stgFree(oc->symbol_extras);
1714 }
1715 #endif
1716
1717 stgFree(oc->fileName);
1718 stgFree(oc->archiveMemberName);
1719
1720 stgFree(oc);
1721 }
1722
1723 /* -----------------------------------------------------------------------------
1724 * Sets the initial status of a fresh ObjectCode
1725 */
1726 static void setOcInitialStatus(ObjectCode* oc) {
1727 if (oc->archiveMemberName == NULL) {
1728 oc->status = OBJECT_NEEDED;
1729 } else {
1730 oc->status = OBJECT_LOADED;
1731 }
1732 }
1733
1734 static ObjectCode*
1735 mkOc( pathchar *path, char *image, int imageSize,
1736 rtsBool mapped, char *archiveMemberName, int misalignment ) {
1737 ObjectCode* oc;
1738
1739 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1740 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1741
1742 # if defined(OBJFORMAT_ELF)
1743 oc->formatName = "ELF";
1744 # elif defined(OBJFORMAT_PEi386)
1745 oc->formatName = "PEi386";
1746 # elif defined(OBJFORMAT_MACHO)
1747 oc->formatName = "Mach-O";
1748 # else
1749 stgFree(oc);
1750 barf("loadObj: not implemented on this platform");
1751 # endif
1752
1753 oc->image = image;
1754 oc->fileName = pathdup(path);
1755
1756 if (archiveMemberName) {
1757 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1, "loadObj" );
1758 strcpy(oc->archiveMemberName, archiveMemberName);
1759 } else {
1760 oc->archiveMemberName = NULL;
1761 }
1762
1763 setOcInitialStatus( oc );
1764
1765 oc->fileSize = imageSize;
1766 oc->symbols = NULL;
1767 oc->n_sections = 0;
1768 oc->sections = NULL;
1769 oc->proddables = NULL;
1770 oc->stable_ptrs = NULL;
1771 #if NEED_SYMBOL_EXTRAS
1772 oc->symbol_extras = NULL;
1773 #endif
1774 oc->imageMapped = mapped;
1775
1776 oc->misalignment = misalignment;
1777 oc->extraInfos = NULL;
1778
1779 /* chain it onto the list of objects */
1780 oc->next = NULL;
1781
1782 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1783 return oc;
1784 }
1785
1786 /* -----------------------------------------------------------------------------
1787 * Check if an object or archive is already loaded.
1788 *
1789 * Returns: 1 if the path is already loaded, 0 otherwise.
1790 */
1791 static HsInt
1792 isAlreadyLoaded( pathchar *path )
1793 {
1794 ObjectCode *o;
1795 for (o = objects; o; o = o->next) {
1796 if (0 == pathcmp(o->fileName, path)) {
1797 return 1; /* already loaded */
1798 }
1799 }
1800 return 0; /* not loaded yet */
1801 }
1802
1803 static HsInt loadArchive_ (pathchar *path)
1804 {
1805 ObjectCode* oc;
1806 char *image;
1807 int memberSize;
1808 FILE *f;
1809 int n;
1810 size_t thisFileNameSize;
1811 char *fileName;
1812 size_t fileNameSize;
1813 int isObject, isGnuIndex, isThin, isImportLib;
1814 char tmp[20];
1815 char *gnuFileIndex;
1816 int gnuFileIndexSize;
1817 #if defined(darwin_HOST_OS)
1818 int i;
1819 uint32_t nfat_arch, nfat_offset, cputype, cpusubtype;
1820 #if defined(i386_HOST_ARCH)
1821 const uint32_t mycputype = CPU_TYPE_X86;
1822 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_ALL;
1823 #elif defined(x86_64_HOST_ARCH)
1824 const uint32_t mycputype = CPU_TYPE_X86_64;
1825 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_64_ALL;
1826 #elif defined(powerpc_HOST_ARCH)
1827 const uint32_t mycputype = CPU_TYPE_POWERPC;
1828 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1829 #elif defined(powerpc64_HOST_ARCH)
1830 const uint32_t mycputype = CPU_TYPE_POWERPC64;
1831 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1832 #else
1833 #error Unknown Darwin architecture
1834 #endif
1835 #endif
1836 int misalignment = 0;
1837
1838 /* TODO: don't call barf() on error, instead return an error code, freeing
1839 * all resources correctly. This function is pretty complex, so it needs
1840 * to be refactored to make this practical. */
1841
1842 IF_DEBUG(linker, debugBelch("loadArchive: start\n"));
1843 IF_DEBUG(linker, debugBelch("loadArchive: Loading archive `%" PATH_FMT" '\n", path));
1844
1845 /* Check that we haven't already loaded this archive.
1846 Ignore requests to load multiple times */
1847 if (isAlreadyLoaded(path)) {
1848 IF_DEBUG(linker,
1849 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1850 return 1; /* success */
1851 }
1852
1853 gnuFileIndex = NULL;
1854 gnuFileIndexSize = 0;
1855
1856 fileNameSize = 32;
1857 fileName = stgMallocBytes(fileNameSize, "loadArchive(fileName)");
1858
1859 isThin = 0;
1860 isImportLib = 0;
1861
1862 f = pathopen(path, WSTR("rb"));
1863 if (!f)
1864 barf("loadObj: can't read `%" PATH_FMT "'", path);
1865
1866 /* Check if this is an archive by looking for the magic "!<arch>\n"
1867 * string. Usually, if this fails, we barf and quit. On Darwin however,
1868 * we may have a fat archive, which contains archives for more than
1869 * one architecture. Fat archives start with the magic number 0xcafebabe,
1870 * always stored big endian. If we find a fat_header, we scan through
1871 * the fat_arch structs, searching through for one for our host
1872 * architecture. If a matching struct is found, we read the offset
1873 * of our archive data (nfat_offset) and seek forward nfat_offset bytes
1874 * from the start of the file.
1875 *
1876 * A subtlety is that all of the members of the fat_header and fat_arch
1877 * structs are stored big endian, so we need to call byte order
1878 * conversion functions.
1879 *
1880 * If we find the appropriate architecture in a fat archive, we gobble
1881 * its magic "!<arch>\n" string and continue processing just as if
1882 * we had a single architecture archive.
1883 */
1884
1885 n = fread ( tmp, 1, 8, f );
1886 if (n != 8)
1887 barf("loadArchive: Failed reading header from `%" PATH_FMT "'", path);
1888 if (strncmp(tmp, "!<arch>\n", 8) == 0) {}
1889 /* Check if this is a thin archive by looking for the magic string "!<thin>\n"
1890 *
1891 * ar thin libraries have the exact same format as normal archives except they
1892 * have a different magic string and they don't copy the object files into the
1893 * archive.
1894 *
1895 * Instead each header entry points to the location of the object file on disk.
1896 * This is useful when a library is only created to satisfy a compile time dependency
1897 * instead of to be distributed. This saves the time required for copying.
1898 *
1899 * Thin archives are always flattened. They always only contain simple headers
1900 * pointing to the object file and so we need not allocate more memory than needed
1901 * to find the object file.
1902 *
1903 */
1904 else if (strncmp(tmp, "!<thin>\n", 8) == 0) {
1905 isThin = 1;
1906 }
1907 #if defined(darwin_HOST_OS)
1908 /* Not a standard archive, look for a fat archive magic number: */
1909 else if (ntohl(*(uint32_t *)tmp) == FAT_MAGIC) {
1910 nfat_arch = ntohl(*(uint32_t *)(tmp + 4));
1911 IF_DEBUG(linker, debugBelch("loadArchive: found a fat archive containing %d architectures\n", nfat_arch));
1912 nfat_offset = 0;
1913
1914 for (i = 0; i < (int)nfat_arch; i++) {
1915 /* search for the right arch */
1916 n = fread( tmp, 1, 20, f );
1917 if (n != 8)
1918 barf("loadArchive: Failed reading arch from `%s'", path);
1919 cputype = ntohl(*(uint32_t *)tmp);
1920 cpusubtype = ntohl(*(uint32_t *)(tmp + 4));
1921
1922 if (cputype == mycputype && cpusubtype == mycpusubtype) {
1923 IF_DEBUG(linker, debugBelch("loadArchive: found my archive in a fat archive\n"));
1924 nfat_offset = ntohl(*(uint32_t *)(tmp + 8));
1925 break;
1926 }
1927 }
1928
1929 if (nfat_offset == 0) {
1930 barf ("loadArchive: searched %d architectures, but no host arch found", (int)nfat_arch);
1931 }
1932 else {
1933 n = fseek( f, nfat_offset, SEEK_SET );
1934 if (n != 0)
1935 barf("loadArchive: Failed to seek to arch in `%s'", path);
1936 n = fread ( tmp, 1, 8, f );
1937 if (n != 8)
1938 barf("loadArchive: Failed reading header from `%s'", path);
1939 if (strncmp(tmp, "!<arch>\n", 8) != 0) {
1940 barf("loadArchive: couldn't find archive in `%s' at offset %d", path, nfat_offset);
1941 }
1942 }
1943 }
1944 else {
1945 barf("loadArchive: Neither an archive, nor a fat archive: `%s'", path);
1946 }
1947 #else
1948 else {
1949 barf("loadArchive: Not an archive: `%" PATH_FMT "'", path);
1950 }
1951 #endif
1952
1953 IF_DEBUG(linker, debugBelch("loadArchive: loading archive contents\n"));
1954
1955 while (1) {
1956 IF_DEBUG(linker, debugBelch("loadArchive: reading at %ld\n", ftell(f)));
1957 n = fread ( fileName, 1, 16, f );
1958 if (n != 16) {
1959 if (feof(f)) {
1960 IF_DEBUG(linker, debugBelch("loadArchive: EOF while reading from '%" PATH_FMT "'\n", path));
1961 break;
1962 }
1963 else {
1964 barf("loadArchive: Failed reading file name from `%" PATH_FMT "'", path);
1965 }
1966 }
1967
1968 #if defined(darwin_HOST_OS)
1969 if (strncmp(fileName, "!<arch>\n", 8) == 0) {
1970 IF_DEBUG(linker, debugBelch("loadArchive: found the start of another archive, breaking\n"));
1971 break;
1972 }
1973 #endif
1974
1975 n = fread ( tmp, 1, 12, f );
1976 if (n != 12)
1977 barf("loadArchive: Failed reading mod time from `%" PATH_FMT "'", path);
1978 n = fread ( tmp, 1, 6, f );
1979 if (n != 6)
1980 barf("loadArchive: Failed reading owner from `%" PATH_FMT "'", path);
1981 n = fread ( tmp, 1, 6, f );
1982 if (n != 6)
1983 barf("loadArchive: Failed reading group from `%" PATH_FMT "'", path);
1984 n = fread ( tmp, 1, 8, f );
1985 if (n != 8)
1986 barf("loadArchive: Failed reading mode from `%" PATH_FMT "'", path);
1987 n = fread ( tmp, 1, 10, f );
1988 if (n != 10)
1989 barf("loadArchive: Failed reading size from `%" PATH_FMT "'", path);
1990 tmp[10] = '\0';
1991 for (n = 0; isdigit(tmp[n]); n++);
1992 tmp[n] = '\0';
1993 memberSize = atoi(tmp);
1994
1995 IF_DEBUG(linker, debugBelch("loadArchive: size of this archive member is %d\n", memberSize));
1996 n = fread ( tmp, 1, 2, f );
1997 if (n != 2)
1998 barf("loadArchive: Failed reading magic from `%" PATH_FMT "'", path);
1999 if (strncmp(tmp, "\x60\x0A", 2) != 0)
2000 barf("loadArchive: Failed reading magic from `%" PATH_FMT "' at %ld. Got %c%c",
2001 path, ftell(f), tmp[0], tmp[1]);
2002
2003 isGnuIndex = 0;
2004 /* Check for BSD-variant large filenames */
2005 if (0 == strncmp(fileName, "#1/", 3)) {
2006 fileName[16] = '\0';
2007 if (isdigit(fileName[3])) {
2008 for (n = 4; isdigit(fileName[n]); n++);
2009 fileName[n] = '\0';
2010 thisFileNameSize = atoi(fileName + 3);
2011 memberSize -= thisFileNameSize;
2012 if (thisFileNameSize >= fileNameSize) {
2013 /* Double it to avoid potentially continually
2014 increasing it by 1 */
2015 fileNameSize = thisFileNameSize * 2;
2016 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
2017 }
2018 n = fread ( fileName, 1, thisFileNameSize, f );
2019 if (n != (int)thisFileNameSize) {
2020 barf("loadArchive: Failed reading filename from `%" PATH_FMT "'",
2021 path);
2022 }
2023 fileName[thisFileNameSize] = 0;
2024
2025 /* On OS X at least, thisFileNameSize is the size of the
2026 fileName field, not the length of the fileName
2027 itself. */
2028 thisFileNameSize = strlen(fileName);
2029 }
2030 else {
2031 barf("loadArchive: BSD-variant filename size not found while reading filename from `%s'", path);
2032 }
2033 }
2034 /* Check for GNU file index file */
2035 else if (0 == strncmp(fileName, "//", 2)) {
2036 fileName[0] = '\0';
2037 thisFileNameSize = 0;
2038 isGnuIndex = 1;
2039 }
2040 /* Check for a file in the GNU file index */
2041 else if (fileName[0] == '/') {
2042 if (isdigit(fileName[1])) {
2043 int i;
2044
2045 for (n = 2; isdigit(fileName[n]); n++);
2046 fileName[n] = '\0';
2047 n = atoi(fileName + 1);
2048
2049 if (gnuFileIndex == NULL) {
2050 barf("loadArchive: GNU-variant filename without an index while reading from `%s'", path);
2051 }
2052 if (n < 0 || n > gnuFileIndexSize) {
2053 barf("loadArchive: GNU-variant filename offset %d out of range [0..%d] while reading filename from `%s'", n, gnuFileIndexSize, path);
2054 }
2055 if (n != 0 && gnuFileIndex[n - 1] != '\n') {
2056 barf("loadArchive: GNU-variant filename offset %d invalid (range [0..%d]) while reading filename from `%s'", n, gnuFileIndexSize, path);
2057 }
2058 for (i = n; gnuFileIndex[i] != '\n'; i++);
2059 thisFileNameSize = i - n - 1;
2060 if (thisFileNameSize >= fileNameSize) {
2061 /* Double it to avoid potentially continually
2062 increasing it by 1 */
2063 fileNameSize = thisFileNameSize * 2;
2064 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
2065 }
2066 memcpy(fileName, gnuFileIndex + n, thisFileNameSize);
2067 fileName[thisFileNameSize] = '\0';
2068 }
2069 else if (fileName[1] == ' ') {
2070 fileName[0] = '\0';
2071 thisFileNameSize = 0;
2072 }
2073 else {
2074 barf("loadArchive: GNU-variant filename offset not found while reading filename from `%s'", path);
2075 }
2076 }
2077 /* Finally, the case where the filename field actually contains
2078 the filename */
2079 else {
2080 /* GNU ar terminates filenames with a '/', this allowing
2081 spaces in filenames. So first look to see if there is a
2082 terminating '/'. */
2083 for (thisFileNameSize = 0;
2084 thisFileNameSize < 16;
2085 thisFileNameSize++) {
2086 if (fileName[thisFileNameSize] == '/') {
2087 fileName[thisFileNameSize] = '\0';
2088 break;
2089 }
2090 }
2091 /* If we didn't find a '/', then a space teminates the
2092 filename. Note that if we don't find one, then
2093 thisFileNameSize ends up as 16, and we already have the
2094 '\0' at the end. */
2095 if (thisFileNameSize == 16) {
2096 for (thisFileNameSize = 0;
2097 thisFileNameSize < 16;
2098 thisFileNameSize++) {
2099 if (fileName[thisFileNameSize] == ' ') {
2100 fileName[thisFileNameSize] = '\0';
2101 break;
2102 }
2103 }
2104 }
2105 }
2106
2107 IF_DEBUG(linker,
2108 debugBelch("loadArchive: Found member file `%s'\n", fileName));
2109
2110 isObject = (thisFileNameSize >= 2 && strncmp(fileName + thisFileNameSize - 2, ".o" , 2) == 0)
2111 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".p_o", 4) == 0);
2112
2113 #if defined(OBJFORMAT_PEi386)
2114 /*
2115 * Note [MSVC import files (ext .lib)]
2116 * MSVC compilers store the object files in
2117 * the import libraries with extension .dll
2118 * so on Windows we should look for those too.
2119 * The PE COFF format doesn't specify any specific file name
2120 * for sections. So on windows, just try to load it all.
2121 *
2122 * Linker members (e.g. filename / are skipped since they are not needed)
2123 */
2124 isImportLib = thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".dll", 4) == 0;
2125
2126 /*
2127 * Note [GCC import files (ext .dll.a)]
2128 * GCC stores import information in the same binary format
2129 * as the object file normally has. The only difference is that
2130 * all the information are put in .idata sections. The only real
2131 * way to tell if we're dealing with an import lib is by looking
2132 * at the file extension.
2133 */
2134 isImportLib = isImportLib || endsWithPath(path, WSTR(".dll.a"));
2135 #endif // windows
2136
2137 IF_DEBUG(linker, debugBelch("loadArchive: \tthisFileNameSize = %d\n", (int)thisFileNameSize));
2138 IF_DEBUG(linker, debugBelch("loadArchive: \tisObject = %d\n", isObject));
2139
2140 if (isObject) {
2141 char *archiveMemberName;
2142
2143 IF_DEBUG(linker, debugBelch("loadArchive: Member is an object file...loading...\n"));
2144
2145 #if defined(mingw32_HOST_OS)
2146 // TODO: We would like to use allocateExec here, but allocateExec
2147 // cannot currently allocate blocks large enough.
2148 image = allocateImageAndTrampolines(path, fileName, f, memberSize,
2149 isThin);
2150 #elif defined(darwin_HOST_OS)
2151 if (RTS_LINKER_USE_MMAP)
2152 image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
2153 else {
2154 /* See loadObj() */
2155 misalignment = machoGetMisalignment(f);
2156 image = stgMallocBytes(memberSize + misalignment,
2157 "loadArchive(image)");
2158 image += misalignment;
2159 }
2160
2161 #else // not windows or darwin
2162 image = stgMallocBytes(memberSize, "loadArchive(image)");
2163 #endif
2164 if (isThin) {
2165 FILE *member;
2166 pathchar *pathCopy, *dirName, *memberPath, *objFileName;
2167
2168 /* Allocate and setup the dirname of the archive. We'll need
2169 this to locate the thin member */
2170 pathCopy = pathdup(path); // Convert the char* to a pathchar*
2171 dirName = pathdir(pathCopy);
2172
2173 /* Append the relative member name to the dirname. This should be
2174 be the full path to the actual thin member. */
2175 int memberLen = pathlen(dirName) + 1 + strlen(fileName) + 1;
2176 memberPath = stgMallocBytes(pathsize * memberLen, "loadArchive(file)");
2177 objFileName = mkPath(fileName);
2178 pathprintf(memberPath, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), dirName, objFileName);
2179 stgFree(objFileName);
2180 stgFree(dirName);
2181
2182 member = pathopen(memberPath, WSTR("rb"));
2183 if (!member)
2184 barf("loadObj: can't read thin archive `%" PATH_FMT "'", memberPath);
2185
2186 n = fread ( image, 1, memberSize, member );
2187 if (n != memberSize) {
2188 barf("loadArchive: error whilst reading `%s'", fileName);
2189 }
2190
2191 fclose(member);
2192 stgFree(memberPath);
2193 stgFree(pathCopy);
2194 }
2195 else
2196 {
2197 n = fread ( image, 1, memberSize, f );
2198 if (n != memberSize) {
2199 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2200 }
2201 }
2202
2203 archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
2204 "loadArchive(file)");
2205 sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
2206 path, (int)thisFileNameSize, fileName);
2207
2208 oc = mkOc(path, image, memberSize, rtsFalse, archiveMemberName
2209 , misalignment);
2210
2211 stgFree(archiveMemberName);
2212
2213 if (0 == loadOc(oc)) {
2214 stgFree(fileName);
2215 fclose(f);
2216 return 0;
2217 } else {
2218 #if defined(OBJFORMAT_PEi386)
2219 if (isImportLib)
2220 {
2221 findAndLoadImportLibrary(oc);
2222 stgFree(oc);
2223 oc = NULL;
2224 break;
2225 } else {
2226 #endif
2227 oc->next = objects;
2228 objects = oc;
2229 #if defined(OBJFORMAT_PEi386)
2230 }
2231 #endif
2232 }
2233 }
2234 else if (isGnuIndex) {
2235 if (gnuFileIndex != NULL) {
2236 barf("loadArchive: GNU-variant index found, but already have an index, while reading filename from `%s'", path);
2237 }
2238 IF_DEBUG(linker, debugBelch("loadArchive: Found GNU-variant file index\n"));
2239 #if RTS_LINKER_USE_MMAP
2240 gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
2241 #else
2242 gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
2243 #endif
2244 n = fread ( gnuFileIndex, 1, memberSize, f );
2245 if (n != memberSize) {
2246 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2247 }
2248 gnuFileIndex[memberSize] = '/';
2249 gnuFileIndexSize = memberSize;
2250 }
2251 else if (isImportLib) {
2252 #if defined(OBJFORMAT_PEi386)
2253 if (checkAndLoadImportLibrary(path, fileName, f)) {
2254 IF_DEBUG(linker, debugBelch("loadArchive: Member is an import file section... Corresponding DLL has been loaded...\n"));
2255 }
2256 else {
2257 IF_DEBUG(linker, debugBelch("loadArchive: Member is not a valid import file section... Skipping...\n"));
2258 n = fseek(f, memberSize, SEEK_CUR);
2259 if (n != 0)
2260 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
2261 memberSize, path);
2262 }
2263 #endif
2264 }
2265 else {
2266 IF_DEBUG(linker, debugBelch("loadArchive: '%s' does not appear to be an object file\n", fileName));
2267 if (!isThin || thisFileNameSize == 0) {
2268 n = fseek(f, memberSize, SEEK_CUR);
2269 if (n != 0)
2270 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
2271 memberSize, path);
2272 }
2273 }
2274
2275 /* .ar files are 2-byte aligned */
2276 if (!(isThin && thisFileNameSize > 0) && memberSize % 2) {
2277 IF_DEBUG(linker, debugBelch("loadArchive: trying to read one pad byte\n"));
2278 n = fread ( tmp, 1, 1, f );
2279 if (n != 1) {
2280 if (feof(f)) {
2281 IF_DEBUG(linker, debugBelch("loadArchive: found EOF while reading one pad byte\n"));
2282 break;
2283 }
2284 else {
2285 barf("loadArchive: Failed reading padding from `%" PATH_FMT "'", path);
2286 }
2287 }
2288 IF_DEBUG(linker, debugBelch("loadArchive: successfully read one pad byte\n"));
2289 }
2290 IF_DEBUG(linker, debugBelch("loadArchive: reached end of archive loading while loop\n"));
2291 }
2292
2293 fclose(f);
2294
2295 stgFree(fileName);
2296 if (gnuFileIndex != NULL) {
2297 #if RTS_LINKER_USE_MMAP
2298 munmap(gnuFileIndex, gnuFileIndexSize + 1);
2299 #else
2300 stgFree(gnuFileIndex);
2301 #endif
2302 }
2303
2304 if (RTS_LINKER_USE_MMAP)
2305 m32_allocator_flush();
2306
2307 IF_DEBUG(linker, debugBelch("loadArchive: done\n"));
2308 return 1;
2309 }
2310
2311 HsInt loadArchive (pathchar *path)
2312 {
2313 ACQUIRE_LOCK(&linker_mutex);
2314 HsInt r = loadArchive_(path);
2315 RELEASE_LOCK(&linker_mutex);
2316 return r;
2317 }
2318
2319 //
2320 // Load the object file into memory. This will not be its final resting place,
2321 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
2322 // address space, properly aligned.
2323 //
2324 static ObjectCode *
2325 preloadObjectFile (pathchar *path)
2326 {
2327 int fileSize;
2328 struct_stat st;
2329 int r;
2330 void *image;
2331 ObjectCode *oc;
2332 int misalignment = 0;
2333
2334 r = pathstat(path, &st);
2335 if (r == -1) {
2336 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
2337 return NULL;
2338 }
2339
2340 fileSize = st.st_size;
2341
2342 #if RTS_LINKER_USE_MMAP
2343 int fd;
2344
2345 /* On many architectures malloc'd memory isn't executable, so we need to use
2346 * mmap. */
2347
2348 #if defined(openbsd_HOST_OS)
2349 fd = open(path, O_RDONLY, S_IRUSR);
2350 #else
2351 fd = open(path, O_RDONLY);
2352 #endif
2353 if (fd == -1) {
2354 errorBelch("loadObj: can't open %s", path);
2355 return NULL;
2356 }
2357
2358 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
2359 MAP_PRIVATE, fd, 0);
2360 // not 32-bit yet, we'll remap later
2361 close(fd);
2362
2363 #else /* !RTS_LINKER_USE_MMAP */
2364 FILE *f;
2365
2366 /* load the image into memory */
2367 /* coverity[toctou] */
2368 f = pathopen(path, WSTR("rb"));
2369 if (!f) {
2370 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
2371 return NULL;
2372 }
2373
2374 # if defined(mingw32_HOST_OS)
2375
2376 // TODO: We would like to use allocateExec here, but allocateExec
2377 // cannot currently allocate blocks large enough.
2378 image = allocateImageAndTrampolines(path, "itself", f, fileSize,
2379 HS_BOOL_FALSE);
2380 if (image == NULL) {
2381 fclose(f);
2382 return NULL;
2383 }
2384
2385 # elif defined(darwin_HOST_OS)
2386
2387 // In a Mach-O .o file, all sections can and will be misaligned
2388 // if the total size of the headers is not a multiple of the
2389 // desired alignment. This is fine for .o files that only serve
2390 // as input for the static linker, but it's not fine for us,
2391 // as SSE (used by gcc for floating point) and Altivec require
2392 // 16-byte alignment.
2393 // We calculate the correct alignment from the header before
2394 // reading the file, and then we misalign image on purpose so
2395 // that the actual sections end up aligned again.
2396 misalignment = machoGetMisalignment(f);
2397 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
2398 image += misalignment;
2399
2400 # else /* !defined(mingw32_HOST_OS) */
2401
2402 image = stgMallocBytes(fileSize, "loadObj(image)");
2403
2404 #endif
2405
2406 int n;
2407 n = fread ( image, 1, fileSize, f );
2408 fclose(f);
2409 if (n != fileSize) {
2410 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
2411 stgFree(image);
2412 return NULL;
2413 }
2414
2415 #endif /* RTS_LINKER_USE_MMAP */
2416
2417 oc = mkOc(path, image, fileSize, rtsTrue, NULL, misalignment);
2418
2419 return oc;
2420 }
2421
2422 /* -----------------------------------------------------------------------------
2423 * Load an obj (populate the global symbol table, but don't resolve yet)
2424 *
2425 * Returns: 1 if ok, 0 on error.
2426 */
2427 static HsInt loadObj_ (pathchar *path)
2428 {
2429 ObjectCode* oc;
2430 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
2431
2432 /* debugBelch("loadObj %s\n", path ); */
2433
2434 /* Check that we haven't already loaded this object.
2435 Ignore requests to load multiple times */
2436
2437 if (isAlreadyLoaded(path)) {
2438 IF_DEBUG(linker,
2439 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
2440 return 1; /* success */
2441 }
2442
2443 oc = preloadObjectFile(path);
2444 if (oc == NULL) return 0;
2445
2446 if (! loadOc(oc)) {
2447 // failed; free everything we've allocated
2448 removeOcSymbols(oc);
2449 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
2450 freeObjectCode(oc);
2451 return 0;
2452 }
2453
2454 oc->next = objects;
2455 objects = oc;
2456 return 1;
2457 }
2458
2459 HsInt loadObj (pathchar *path)
2460 {
2461 ACQUIRE_LOCK(&linker_mutex);
2462 HsInt r = loadObj_(path);
2463 RELEASE_LOCK(&linker_mutex);
2464 return r;
2465 }
2466
2467 static HsInt loadOc (ObjectCode* oc)
2468 {
2469 int r;
2470
2471 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
2472
2473 /* verify the in-memory image */
2474 # if defined(OBJFORMAT_ELF)
2475 r = ocVerifyImage_ELF ( oc );
2476 # elif defined(OBJFORMAT_PEi386)
2477 r = ocVerifyImage_PEi386 ( oc );
2478 # elif defined(OBJFORMAT_MACHO)
2479 r = ocVerifyImage_MachO ( oc );
2480 # else
2481 barf("loadObj: no verify method");
2482 # endif
2483 if (!r) {
2484 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
2485 return r;
2486 }
2487
2488 #if NEED_SYMBOL_EXTRAS
2489 # if defined(OBJFORMAT_MACHO)
2490 r = ocAllocateSymbolExtras_MachO ( oc );
2491 if (!r) {
2492 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_MachO failed\n"));
2493 return r;
2494 }
2495 # elif defined(OBJFORMAT_ELF)
2496 r = ocAllocateSymbolExtras_ELF ( oc );
2497 if (!r) {
2498 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_ELF failed\n"));
2499 return r;
2500 }
2501 # elif defined(OBJFORMAT_PEi386)
2502 ocAllocateSymbolExtras_PEi386 ( oc );
2503 # endif
2504 #endif
2505
2506 /* build the symbol list for this image */
2507 # if defined(OBJFORMAT_ELF)
2508 r = ocGetNames_ELF ( oc );
2509 # elif defined(OBJFORMAT_PEi386)
2510 r = ocGetNames_PEi386 ( oc );
2511 # elif defined(OBJFORMAT_MACHO)
2512 r = ocGetNames_MachO ( oc );
2513 # else
2514 barf("loadObj: no getNames method");
2515 # endif
2516 if (!r) {
2517 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
2518 return r;
2519 }
2520
2521 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
2522 setOcInitialStatus( oc );
2523 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
2524
2525 return 1;
2526 }
2527
2528 /* -----------------------------------------------------------------------------
2529 * try to load and initialize an ObjectCode into memory
2530 *
2531 * Returns: 1 if ok, 0 on error.
2532 */
2533 int ocTryLoad (ObjectCode* oc) {
2534 int r;
2535
2536 if (oc->status != OBJECT_NEEDED) {
2537 return 1;
2538 }
2539
2540 /* Check for duplicate symbols by looking into `symhash`.
2541 Duplicate symbols are any symbols which exist
2542 in different ObjectCodes that have both been loaded, or
2543 are to be loaded by this call.
2544
2545 This call is intended to have no side-effects when a non-duplicate
2546 symbol is re-inserted.
2547
2548 We set the Address to NULL since that is not used to distinguish
2549 symbols. Duplicate symbols are distinguished by name and oc.
2550 */
2551 int x;
2552 SymbolName* symbol;
2553 for (x = 0; x < oc->n_symbols; x++) {
2554 symbol = oc->symbols[x];
2555 if ( symbol
2556 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol, NULL, isSymbolWeak(oc, symbol), oc)) {
2557 return 0;
2558 }
2559 }
2560
2561 IF_DEBUG(linker, debugBelch("Resolving %" PATH_FMT "\n",
2562 oc->archiveMemberName ?
2563 oc->archiveMemberName : oc->fileName));
2564
2565 # if defined(OBJFORMAT_ELF)
2566 r = ocResolve_ELF ( oc );
2567 # elif defined(OBJFORMAT_PEi386)
2568 r = ocResolve_PEi386 ( oc );
2569 # elif defined(OBJFORMAT_MACHO)
2570 r = ocResolve_MachO ( oc );
2571 # else
2572 barf("ocTryLoad: not implemented on this platform");
2573 # endif
2574 if (!r) { return r; }
2575
2576 // run init/init_array/ctors/mod_init_func
2577
2578 loading_obj = oc; // tells foreignExportStablePtr what to do
2579 #if defined(OBJFORMAT_ELF)
2580 r = ocRunInit_ELF ( oc );
2581 #elif defined(OBJFORMAT_PEi386)
2582 r = ocRunInit_PEi386 ( oc );
2583 #elif defined(OBJFORMAT_MACHO)
2584 r = ocRunInit_MachO ( oc );
2585 #else
2586 barf("ocTryLoad: initializers not implemented on this platform");
2587 #endif
2588 loading_obj = NULL;
2589
2590 if (!r) { return r; }
2591
2592 oc->status = OBJECT_RESOLVED;
2593
2594 return 1;
2595 }
2596
2597 /* -----------------------------------------------------------------------------
2598 * resolve all the currently unlinked objects in memory
2599 *
2600 * Returns: 1 if ok, 0 on error.
2601 */
2602 static HsInt resolveObjs_ (void)
2603 {
2604 ObjectCode *oc;
2605 int r;
2606
2607 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
2608
2609 for (oc = objects; oc; oc = oc->next) {
2610 r = ocTryLoad(oc);
2611 if (!r)
2612 {
2613 return r;
2614 }
2615 }
2616
2617 #ifdef PROFILING
2618 // collect any new cost centres & CCSs that were defined during runInit
2619 initProfiling2();
2620 #endif
2621
2622 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
2623 return 1;
2624 }
2625
2626 HsInt resolveObjs (void)
2627 {
2628 ACQUIRE_LOCK(&linker_mutex);
2629 HsInt r = resolveObjs_();
2630 RELEASE_LOCK(&linker_mutex);
2631 return r;
2632 }
2633
2634 /* -----------------------------------------------------------------------------
2635 * delete an object from the pool
2636 */
2637 static HsInt unloadObj_ (pathchar *path, rtsBool just_purge)
2638 {
2639 ObjectCode *oc, *prev, *next;
2640 HsBool unloadedAnyObj = HS_BOOL_FALSE;
2641
2642 ASSERT(symhash != NULL);
2643 ASSERT(objects != NULL);
2644
2645 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
2646
2647 prev = NULL;
2648 for (oc = objects; oc; oc = next) {
2649 next = oc->next; // oc might be freed
2650
2651 if (!pathcmp(oc->fileName,path)) {
2652
2653 // these are both idempotent, so in just_purge mode we can
2654 // later call unloadObj() to really unload the object.
2655 removeOcSymbols(oc);
2656 freeOcStablePtrs(oc);
2657
2658 if (!just_purge) {
2659 if (prev == NULL) {
2660 objects = oc->next;
2661 } else {
2662 prev->next = oc->next;
2663 }
2664 ACQUIRE_LOCK(&linker_unloaded_mutex);
2665 oc->next = unloaded_objects;
2666 unloaded_objects = oc;
2667 oc->status = OBJECT_UNLOADED;
2668 RELEASE_LOCK(&linker_unloaded_mutex);
2669 // We do not own oc any more; it can be released at any time by
2670 // the GC in checkUnload().
2671 } else {
2672 prev = oc;
2673 }
2674
2675 /* This could be a member of an archive so continue
2676 * unloading other members. */
2677 unloadedAnyObj = HS_BOOL_TRUE;
2678 } else {
2679 prev = oc;
2680 }
2681 }
2682
2683 if (unloadedAnyObj) {
2684 return 1;
2685 }
2686 else {
2687 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
2688 return 0;
2689 }
2690 }
2691
2692 HsInt unloadObj (pathchar *path)
2693 {
2694 ACQUIRE_LOCK(&linker_mutex);
2695 HsInt r = unloadObj_(path, rtsFalse);
2696 RELEASE_LOCK(&linker_mutex);
2697 return r;
2698 }
2699
2700 HsInt purgeObj (pathchar *path)
2701 {
2702 ACQUIRE_LOCK(&linker_mutex);
2703 HsInt r = unloadObj_(path, rtsTrue);
2704 RELEASE_LOCK(&linker_mutex);
2705 return r;
2706 }
2707
2708 /* -----------------------------------------------------------------------------
2709 * Sanity checking. For each ObjectCode, maintain a list of address ranges
2710 * which may be prodded during relocation, and abort if we try and write
2711 * outside any of these.
2712 */
2713 static void
2714 addProddableBlock ( ObjectCode* oc, void* start, int size )
2715 {
2716 ProddableBlock* pb
2717 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
2718
2719 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
2720 ASSERT(size > 0);
2721 pb->start = start;
2722 pb->size = size;
2723 pb->next = oc->proddables;
2724 oc->proddables = pb;
2725 }
2726
2727 static void
2728 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
2729 {
2730 ProddableBlock* pb;
2731
2732 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
2733 char* s = (char*)(pb->start);
2734 char* e = s + pb->size;
2735 char* a = (char*)addr;
2736 if (a >= s && (a+size) <= e) return;
2737 }
2738 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
2739 }
2740
2741 static void freeProddableBlocks (ObjectCode *oc)
2742 {
2743 ProddableBlock *pb, *next;
2744
2745 for (pb = oc->proddables; pb != NULL; pb = next) {
2746 next = pb->next;
2747 stgFree(pb);
2748 }
2749 oc->proddables = NULL;
2750 }
2751
2752 /* -----------------------------------------------------------------------------
2753 * Section management.
2754 */
2755 static void
2756 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
2757 void* start, StgWord size, StgWord mapped_offset,
2758 void* mapped_start, StgWord mapped_size)
2759 {
2760 s->start = start; /* actual start of section in memory */
2761 s->size = size; /* actual size of section in memory */
2762 s->kind = kind;
2763 s->alloc = alloc;
2764 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
2765
2766 s->mapped_start = mapped_start; /* start of mmap() block */
2767 s->mapped_size = mapped_size; /* size of mmap() block */
2768
2769 IF_DEBUG(linker,
2770 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
2771 start, (void*)((StgWord)start + size),
2772 size, kind ));
2773 }
2774
2775
2776 /* --------------------------------------------------------------------------
2777 * Symbol Extras.
2778 * This is about allocating a small chunk of memory for every symbol in the
2779 * object file. We make sure that the SymboLExtras are always "in range" of
2780 * limited-range PC-relative instructions on various platforms by allocating
2781 * them right next to the object code itself.
2782 */
2783
2784 #if NEED_SYMBOL_EXTRAS
2785 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2786
2787 /*
2788 ocAllocateSymbolExtras
2789
2790 Allocate additional space at the end of the object file image to make room
2791 for jump islands (powerpc, x86_64, arm) and GOT entries (x86_64).
2792
2793 PowerPC relative branch instructions have a 24 bit displacement field.
2794 As PPC code is always 4-byte-aligned, this yields a +-32MB range.
2795 If a particular imported symbol is outside this range, we have to redirect
2796 the jump to a short piece of new code that just loads the 32bit absolute
2797 address and jumps there.
2798 On x86_64, PC-relative jumps and PC-relative accesses to the GOT are limited
2799 to 32 bits (+-2GB).
2800
2801 This function just allocates space for one SymbolExtra for every
2802 undefined symbol in the object file. The code for the jump islands is
2803 filled in by makeSymbolExtra below.
2804 */
2805
2806 static int ocAllocateSymbolExtras( ObjectCode* oc, int count, int first )
2807 {
2808 size_t n;
2809
2810 if (RTS_LINKER_USE_MMAP && USE_CONTIGUOUS_MMAP) {
2811 n = roundUpToPage(oc->fileSize);
2812
2813 /* Keep image and symbol_extras contiguous */
2814 void *new = mmapForLinker(n + (sizeof(SymbolExtra) * count),
2815 MAP_ANONYMOUS, -1, 0);
2816 if (new) {
2817 memcpy(new, oc->image, oc->fileSize);
2818 if (oc->imageMapped) {
2819 munmap(oc->image, n);
2820 }
2821 oc->image = new;
2822 oc->imageMapped = rtsTrue;
2823 oc->fileSize = n + (sizeof(SymbolExtra) * count);
2824 oc->symbol_extras = (SymbolExtra *) (oc->image + n);
2825 }
2826 else {
2827 oc->symbol_extras = NULL;
2828 return 0;
2829 }
2830 }
2831 else if( count > 0 ) {
2832 if (RTS_LINKER_USE_MMAP) {
2833 n = roundUpToPage(oc->fileSize);
2834
2835 oc->symbol_extras = m32_alloc(sizeof(SymbolExtra) * count, 8);
2836 if (oc->symbol_extras == NULL) return 0;
2837 }
2838 else {
2839 // round up to the nearest 4
2840 int aligned = (oc->fileSize + 3) & ~3;
2841 int misalignment = oc->misalignment;
2842
2843 oc->image -= misalignment;
2844 oc->image = stgReallocBytes( oc->image,
2845 misalignment +
2846 aligned + sizeof (SymbolExtra) * count,
2847 "ocAllocateSymbolExtras" );
2848 oc->image += misalignment;
2849
2850 oc->symbol_extras = (SymbolExtra *) (oc->image + aligned);
2851 }
2852 }
2853
2854 if (oc->symbol_extras != NULL) {
2855 memset( oc->symbol_extras, 0, sizeof (SymbolExtra) * count );
2856 }
2857
2858 oc->first_symbol_extra = first;
2859 oc->n_symbol_extras = count;
2860
2861 return 1;
2862 }
2863
2864 #endif
2865 #endif // NEED_SYMBOL_EXTRAS
2866
2867 #if defined(arm_HOST_ARCH)
2868
2869 static void
2870 ocFlushInstructionCache( ObjectCode *oc )
2871 {
2872 int i;
2873 // Object code
2874 for (i=0; i < oc->n_sections; i++) {
2875 Section *s = &oc->sections[i];
2876 // This is a bit too broad but we don't have any way to determine what
2877 // is certainly code
2878 if (s->kind == SECTIONKIND_CODE_OR_RODATA)
2879 __clear_cache(s->start, (void*) ((uintptr_t) s->start + s->size));
2880 }
2881
2882 // Jump islands
2883 // Note the (+1) to ensure that the last symbol extra is covered by the
2884 // flush.
2885 __clear_cache(oc->symbol_extras, &oc->symbol_extras[oc->n_symbol_extras+1]);
2886 }
2887
2888 #endif
2889
2890 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2891 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2892
2893 static SymbolExtra* makeSymbolExtra( ObjectCode* oc,
2894 unsigned long symbolNumber,
2895 unsigned long target )
2896 {
2897 SymbolExtra *extra;
2898
2899 ASSERT( symbolNumber >= oc->first_symbol_extra
2900 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2901
2902 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2903
2904 #ifdef powerpc_HOST_ARCH
2905 // lis r12, hi16(target)
2906 extra->jumpIsland.lis_r12 = 0x3d80;
2907 extra->jumpIsland.hi_addr = target >> 16;
2908
2909 // ori r12, r12, lo16(target)
2910 extra->jumpIsland.ori_r12_r12 = 0x618c;
2911 extra->jumpIsland.lo_addr = target & 0xffff;
2912
2913 // mtctr r12
2914 extra->jumpIsland.mtctr_r12 = 0x7d8903a6;
2915
2916 // bctr
2917 extra->jumpIsland.bctr = 0x4e800420;
2918 #endif
2919 #ifdef x86_64_HOST_ARCH
2920 // jmp *-14(%rip)
2921 static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF };
2922 extra->addr = target;
2923 memcpy(extra->jumpIsland, jmp, 6);
2924 #endif
2925
2926 return extra;
2927 }
2928
2929 #endif
2930 #endif // defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2931
2932 #ifdef arm_HOST_ARCH
2933 static SymbolExtra* makeArmSymbolExtra( ObjectCode* oc,
2934 unsigned long symbolNumber,
2935 unsigned long target,
2936 int fromThumb,
2937 int toThumb )
2938 {
2939 SymbolExtra *extra;
2940
2941 ASSERT( symbolNumber >= oc->first_symbol_extra
2942 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2943
2944 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2945
2946 // Make sure instruction mode bit is set properly
2947 if (toThumb)
2948 target |= 1;
2949 else
2950 target &= ~1;
2951
2952 if (!fromThumb) {
2953 // In ARM encoding:
2954 // movw r12, #0
2955 // movt r12, #0
2956 // bx r12
2957 uint32_t code[] = { 0xe300c000, 0xe340c000, 0xe12fff1c };
2958
2959 // Patch lower half-word into movw
2960 code[0] |= ((target>>12) & 0xf) << 16;
2961 code[0] |= target & 0xfff;
2962 // Patch upper half-word into movt
2963 target >>= 16;
2964 code[1] |= ((target>>12) & 0xf) << 16;
2965 code[1] |= target & 0xfff;
2966
2967 memcpy(extra->jumpIsland, code, 12);
2968
2969 } else {
2970 // In Thumb encoding:
2971 // movw r12, #0
2972 // movt r12, #0
2973 // bx r12
2974 uint16_t code[] = { 0xf240, 0x0c00,
2975 0xf2c0, 0x0c00,
2976 0x4760 };
2977
2978 // Patch lower half-word into movw
2979 code[0] |= (target>>12) & 0xf;
2980 code[0] |= ((target>>11) & 0x1) << 10;
2981 code[1] |= ((target>>8) & 0x7) << 12;
2982 code[1] |= target & 0xff;
2983 // Patch upper half-word into movt
2984 target >>= 16;
2985 code[2] |= (target>>12) & 0xf;
2986 code[2] |= ((target>>11) & 0x1) << 10;
2987 code[3] |= ((target>>8) & 0x7) << 12;
2988 code[3] |= target & 0xff;
2989
2990 memcpy(extra->jumpIsland, code, 10);
2991 }
2992
2993 return extra;
2994 }
2995 #endif // arm_HOST_ARCH
2996
2997 /* --------------------------------------------------------------------------
2998 * PowerPC specifics (instruction cache flushing)
2999 * ------------------------------------------------------------------------*/
3000
3001 #ifdef powerpc_HOST_ARCH
3002 /*
3003 ocFlushInstructionCache
3004
3005 Flush the data & instruction caches.
3006 Because the PPC has split data/instruction caches, we have to
3007 do that whenever we modify code at runtime.
3008 */
3009
3010 static void
3011 ocFlushInstructionCacheFrom(void* begin, size_t length)
3012 {
3013 size_t n = (length + 3) / 4;
3014 unsigned long* p = begin;
3015
3016 while (n--)
3017 {
3018 __asm__ volatile ( "dcbf 0,%0\n\t"
3019 "sync\n\t"
3020 "icbi 0,%0"
3021 :
3022 : "r" (p)
3023 );
3024 p++;
3025 }
3026 __asm__ volatile ( "sync\n\t"
3027 "isync"
3028 );
3029 }
3030
3031 static void
3032 ocFlushInstructionCache( ObjectCode *oc )
3033 {
3034 /* The main object code */
3035 ocFlushInstructionCacheFrom(oc->image + oc->misalignment, oc->fileSize);
3036
3037 /* Jump Islands */
3038 ocFlushInstructionCacheFrom(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras);
3039 }
3040 #endif /* powerpc_HOST_ARCH */
3041
3042
3043 /* --------------------------------------------------------------------------
3044 * PEi386(+) specifics (Win32 targets)
3045 * ------------------------------------------------------------------------*/
3046
3047 /* The information for this linker comes from
3048 Microsoft Portable Executable
3049 and Common Object File Format Specification
3050 revision 8.3 February 2013
3051
3052 It can be found online at:
3053
3054 https://msdn.microsoft.com/en-us/windows/hardware/gg463119.aspx
3055
3056 Things move, so if that fails, try searching for it via
3057
3058 http://www.google.com/search?q=PE+COFF+specification
3059
3060 The ultimate reference for the PE format is the Winnt.h
3061 header file that comes with the Platform SDKs; as always,
3062 implementations will drift wrt their documentation.
3063
3064 A good background article on the PE format is Matt Pietrek's
3065 March 1994 article in Microsoft System Journal (MSJ)
3066 (Vol.9, No. 3): "Peering Inside the PE: A Tour of the
3067 Win32 Portable Executable File Format." The info in there
3068 has recently been updated in a two part article in
3069 MSDN magazine, issues Feb and March 2002,
3070 "Inside Windows: An In-Depth Look into the Win32 Portable
3071 Executable File Format"
3072
3073 John Levine's book "Linkers and Loaders" contains useful
3074 info on PE too.
3075
3076 The PE specification doesn't specify how to do the actual
3077 relocations. For this reason, and because both PE and ELF are
3078 based on COFF, the relocations for the PEi386+ code is based on
3079 the ELF relocations for the equivalent relocation type.
3080
3081 The ELF ABI can be found at
3082
3083 http://www.x86-64.org/documentation/abi.pdf
3084
3085 The current code is based on version 0.99.6 - October 2013
3086 */
3087
3088
3089 #if defined(OBJFORMAT_PEi386)
3090
3091 static int verifyCOFFHeader ( COFF_header *hdr, pathchar *filename);
3092
3093 /* We assume file pointer is right at the
3094 beginning of COFF object.
3095 */
3096 static char *
3097 allocateImageAndTrampolines (
3098 pathchar* arch_name, char* member_name,
3099 FILE* f USED_IF_x86_64_HOST_ARCH,
3100 int size,
3101 int isThin USED_IF_x86_64_HOST_ARCH)
3102 {
3103 char* image;
3104 #if defined(x86_64_HOST_ARCH)
3105 if (!isThin)
3106 {
3107 /* PeCoff contains number of symbols right in it's header, so
3108 we can reserve the room for symbolExtras right here. */
3109 COFF_header hdr;
3110 size_t n;
3111
3112 n = fread(&hdr, 1, sizeof_COFF_header, f);
3113 if (n != sizeof(COFF_header)) {
3114 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%S'",
3115 member_name, arch_name);
3116 return NULL;
3117 }
3118 fseek(f, -sizeof_COFF_header, SEEK_CUR);
3119
3120 if (!verifyCOFFHeader(&hdr, arch_name)) {
3121 return 0;
3122 }
3123
3124 /* We get back 8-byte aligned memory (is that guaranteed?), but
3125 the offsets to the sections within the file are all 4 mod 8
3126 (is that guaranteed?). We therefore need to offset the image
3127 by 4, so that all the pointers are 8-byte aligned, so that
3128 pointer tagging works. */
3129 /* For 32-bit case we don't need this, hence we use macro PEi386_IMAGE_OFFSET,
3130 which equals to 4 for 64-bit case and 0 for 32-bit case. */
3131 /* We allocate trampolines area for all symbols right behind
3132 image data, aligned on 8. */
3133 size = ((PEi386_IMAGE_OFFSET + size + 0x7) & ~0x7)
3134 + hdr.NumberOfSymbols * sizeof(SymbolExtra);
3135 }
3136 #endif
3137 image = VirtualAlloc(NULL, size,
3138 MEM_RESERVE | MEM_COMMIT,
3139 PAGE_EXECUTE_READWRITE);
3140
3141 if (image == NULL) {
3142 errorBelch("%" PATH_FMT ": failed to allocate memory for image for %s",
3143 arch_name, member_name);
3144 return NULL;
3145 }
3146
3147 return image + PEi386_IMAGE_OFFSET;
3148 }
3149
3150 static int findAndLoadImportLibrary(ObjectCode* oc)
3151 {
3152 int i;
3153
3154 COFF_header* hdr;
3155 COFF_section* sectab;
3156 COFF_symbol* symtab;
3157 UChar* strtab;
3158
3159 hdr = (COFF_header*)(oc->image);
3160 sectab = (COFF_section*)(
3161 ((UChar*)(oc->image))
3162 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3163 );
3164
3165 symtab = (COFF_symbol*)(
3166 ((UChar*)(oc->image))
3167 + hdr->PointerToSymbolTable
3168 );
3169
3170 strtab = ((UChar*)symtab)
3171 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3172
3173 for (i = 0; i < oc->n_sections; i++)
3174 {
3175 COFF_section* sectab_i
3176 = (COFF_section*)myindex(sizeof_COFF_section, sectab, i);
3177
3178 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3179
3180 // Find the first entry containing a valid .idata$7 section.
3181 if (strcmp(secname, ".idata$7") == 0) {
3182 /* First load the containing DLL if not loaded. */
3183 Section section = oc->sections[i];
3184
3185 pathchar* dirName = pathdir(oc->fileName);
3186 HsPtr token = addLibrarySearchPath(dirName);
3187 stgFree(dirName);
3188 char* dllName = (char*)section.start;
3189
3190 if (strlen(dllName) == 0 || dllName[0] == ' ')
3191 {
3192 continue;
3193 }
3194
3195 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand '%ls' => `%s'\n", oc->fileName, dllName));
3196
3197 pathchar* dll = mkPath(dllName);
3198 removeLibrarySearchPath(token);
3199
3200 const char* result = addDLL(dll);
3201 stgFree(dll);
3202
3203 if (result != NULL) {
3204 errorBelch("Could not load `%s'. Reason: %s\n", (char*)dllName, result);
3205 return 0;
3206 }
3207
3208 break;
3209 }
3210
3211 stgFree(secname);
3212 }
3213
3214 return 1;
3215 }
3216
3217 static int checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f)
3218 {
3219 char* image;
3220 static HsBool load_dll_warn = HS_BOOL_FALSE;
3221
3222 if (load_dll_warn) { return 0; }
3223
3224 /* Based on Import Library specification. PE Spec section 7.1 */
3225
3226 COFF_import_header hdr;
3227 size_t n;
3228
3229 n = fread(&hdr, 1, sizeof_COFF_import_Header, f);
3230 if (n != sizeof(COFF_header)) {
3231 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%" PATH_FMT "'\n",
3232 member_name, arch_name);
3233 return 0;
3234 }
3235
3236 if (hdr.Sig1 != 0x0 || hdr.Sig2 != 0xFFFF) {
3237 fseek(f, -sizeof_COFF_import_Header, SEEK_CUR);
3238 IF_DEBUG(linker, debugBelch("loadArchive: Object `%s` is not an import lib. Skipping...\n", member_name));
3239 return 0;
3240 }
3241
3242 IF_DEBUG(linker, debugBelch("loadArchive: reading %d bytes at %ld\n", hdr.SizeOfData, ftell(f)));
3243
3244 image = malloc(hdr.SizeOfData);
3245 n = fread(image, 1, hdr.SizeOfData, f);
3246 if (n != hdr.SizeOfData) {
3247 errorBelch("loadArchive: error whilst reading `%s' header in `%" PATH_FMT "'. Did not read enough bytes.\n",
3248 member_name, arch_name);
3249 }
3250
3251 char* symbol = strtok(image, "\0");
3252 int symLen = strlen(symbol) + 1;
3253 int nameLen = n - symLen;
3254 char* dllName = malloc(sizeof(char) * nameLen);
3255 dllName = strncpy(dllName, image + symLen, nameLen);
3256 pathchar* dll = malloc(sizeof(wchar_t) * nameLen);
3257 mbstowcs(dll, dllName, nameLen);
3258 free(dllName);
3259
3260 IF_DEBUG(linker, debugBelch("loadArchive: read symbol %s from lib `%ls'\n", symbol, dll));
3261 const char* result = addDLL(dll);
3262
3263 free(image);
3264
3265 if (result != NULL) {
3266 errorBelch("Could not load `%ls'. Reason: %s\n", dll, result);
3267 load_dll_warn = HS_BOOL_TRUE;
3268
3269 free(dll);
3270 fseek(f, -(n + sizeof_COFF_import_Header), SEEK_CUR);
3271 return 0;
3272 }
3273
3274 free(dll);
3275 return 1;
3276 }
3277
3278 /* We use myindex to calculate array addresses, rather than
3279 simply doing the normal subscript thing. That's because
3280 some of the above structs have sizes which are not
3281 a whole number of words. GCC rounds their sizes up to a
3282 whole number of words, which means that the address calcs
3283 arising from using normal C indexing or pointer arithmetic
3284 are just plain wrong. Sigh.
3285 */
3286 static UChar *
3287 myindex ( int scale, void* base, int index )
3288 {
3289 return
3290 ((UChar*)base) + scale * index;
3291 }
3292
3293
3294 static void
3295 printName ( UChar* name, UChar* strtab )
3296 {
3297 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3298 UInt32 strtab_offset = * (UInt32*)(name+4);
3299 debugBelch("%s", strtab + strtab_offset );
3300 } else {
3301 int i;
3302 for (i = 0; i < 8; i++) {
3303 if (name[i] == 0) break;
3304 debugBelch("%c", name[i] );
3305 }
3306 }
3307 }
3308
3309
3310 static void
3311 copyName ( UChar* name, UChar* strtab, UChar* dst, int dstSize )
3312 {
3313 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3314 UInt32 strtab_offset = * (UInt32*)(name+4);
3315 strncpy ( (char*)dst, (char*)strtab+strtab_offset, dstSize );
3316 dst[dstSize-1] = 0;
3317 } else {
3318 int i = 0;
3319 while (1) {
3320 if (i >= 8) break;
3321 if (name[i] == 0) break;
3322 dst[i] = name[i];
3323 i++;
3324 }
3325 dst[i] = 0;
3326 }
3327 }
3328
3329
3330 static UChar *
3331 cstring_from_COFF_symbol_name ( UChar* name, UChar* strtab )
3332 {
3333 UChar* newstr;
3334 /* If the string is longer than 8 bytes, look in the
3335 string table for it -- this will be correctly zero terminated.
3336 */
3337 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3338 UInt32 strtab_offset = * (UInt32*)(name+4);
3339 return ((UChar*)strtab) + strtab_offset;
3340 }
3341 /* Otherwise, if shorter than 8 bytes, return the original,
3342 which by defn is correctly terminated.
3343 */
3344 if (name[7]==0) return name;
3345 /* The annoying case: 8 bytes. Copy into a temporary
3346 (XXX which is never freed ...)
3347 */
3348 newstr = stgMallocBytes(9, "cstring_from_COFF_symbol_name");
3349 ASSERT(newstr);
3350 strncpy((char*)newstr,(char*)name,8);
3351 newstr[8] = 0;
3352 return newstr;
3353 }
3354
3355 /* Getting the name of a section is mildly tricky, so we make a
3356 function for it. Sadly, in one case we have to copy the string
3357 (when it is exactly 8 bytes long there's no trailing '\0'), so for
3358 consistency we *always* copy the string; the caller must free it
3359 */
3360 static char *
3361 cstring_from_section_name (UChar* name, UChar* strtab)
3362 {
3363 char *newstr;
3364
3365 if (name[0]=='/') {
3366 int strtab_offset = strtol((char*)name+1,NULL,10);
3367 int len = strlen(((char*)strtab) + strtab_offset);
3368
3369 newstr = stgMallocBytes(len+1, "cstring_from_section_symbol_name");
3370 strcpy((char*)newstr, (char*)((UChar*)strtab) + strtab_offset);
3371 return newstr;
3372 }
3373 else
3374 {
3375 newstr = stgMallocBytes(9, "cstring_from_section_symbol_name");
3376 ASSERT(newstr);
3377 strncpy((char*)newstr,(char*)name,8);
3378 newstr[8] = 0;
3379 return newstr;
3380 }
3381 }
3382
3383 /* See Note [mingw-w64 name decoration scheme] */
3384 #ifndef x86_64_HOST_ARCH
3385 static void
3386 zapTrailingAtSign ( UChar* sym )
3387 {
3388 # define my_isdigit(c) ((c) >= '0' && (c) <= '9')
3389 int i, j;
3390 if (sym[0] == 0) return;
3391 i = 0;
3392 while (sym[i] != 0) i++;
3393 i--;
3394 j = i;
3395 while (j > 0 && my_isdigit(sym[j])) j--;
3396 if (j > 0 && sym[j] == '@' && j != i) sym[j] = 0;
3397 # undef my_isdigit
3398 }
3399 #endif
3400
3401 /* See Note [mingw-w64 name decoration scheme] */
3402 #ifndef x86_64_HOST_ARCH
3403 #define STRIP_LEADING_UNDERSCORE 1
3404 #else
3405 #define STRIP_LEADING_UNDERSCORE 0
3406 #endif
3407
3408 /*
3409 Note [mingw-w64 name decoration scheme]
3410
3411 What's going on with name decoration? Well, original code
3412 have some crufty and ad-hocish paths related mostly to very old
3413 mingw gcc/binutils/runtime combinations. Now mingw-w64 offers pretty
3414 uniform and MS-compatible decoration scheme across its tools and runtime.
3415
3416 The scheme is pretty straightforward: on 32 bit objects symbols are exported
3417 with underscore prepended (and @ + stack size suffix appended for stdcall
3418 functions), on 64 bits no underscore is prepended and no suffix is appended
3419 because we have no stdcall convention on 64 bits.
3420
3421 See #9218
3422 */
3423
3424 static SymbolAddr*
3425 lookupSymbolInDLLs ( UChar *lbl )
3426 {
3427 OpenedDLL* o_dll;
3428 SymbolAddr* sym;
3429
3430 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
3431 /* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */
3432
3433 sym = GetProcAddress(o_dll->instance, (char*)(lbl+STRIP_LEADING_UNDERSCORE));
3434 if (sym != NULL) {
3435 /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/
3436 return sym;
3437 }
3438
3439 /* Ticket #2283.
3440 Long description: http://support.microsoft.com/kb/132044
3441 tl;dr:
3442 If C/C++ compiler sees __declspec(dllimport) ... foo ...
3443 it generates call *__imp_foo, and __imp_foo here has exactly
3444 the same semantics as in __imp_foo = GetProcAddress(..., "foo")
3445 */
3446 if (sym == NULL && strncmp ((const char*)lbl, "__imp_", 6) == 0) {
3447 sym = GetProcAddress(o_dll->instance, (char*)(lbl+6+STRIP_LEADING_UNDERSCORE));
3448 if (sym != NULL) {
3449 IndirectAddr* ret;
3450 ret = stgMallocBytes( sizeof(IndirectAddr), "lookupSymbolInDLLs" );
3451 ret->addr = sym;
3452 ret->next = indirects;
3453 indirects = ret;
3454 IF_DEBUG(linker,
3455 debugBelch("warning: %s from %S is linked instead of %s\n",
3456 (char*)(lbl+6+STRIP_LEADING_UNDERSCORE), o_dll->name, (char*)lbl));
3457 return (void*) & ret->addr;
3458 }
3459 }
3460
3461 sym = GetProcAddress(o_dll->instance, (char*)lbl);
3462 if (sym != NULL) {
3463 /*debugBelch("found %s in %s\n", lbl,o_dll->name);*/
3464 return sym;
3465 }
3466 }
3467 return NULL;
3468 }
3469
3470 static int
3471 verifyCOFFHeader (COFF_header *hdr, pathchar *fileName)
3472 {
3473 #if defined(i386_HOST_ARCH)
3474 if (hdr->Machine != 0x14c) {
3475 errorBelch("%" PATH_FMT ": Not x86 PEi386", fileName);
3476 return 0;
3477 }
3478 #elif defined(x86_64_HOST_ARCH)
3479 if (hdr->Machine != 0x8664) {
3480 errorBelch("%" PATH_FMT ": Not x86_64 PEi386", fileName);
3481 return 0;
3482 }
3483 #else
3484 errorBelch("PEi386 not supported on this arch");
3485 #endif
3486
3487 if (hdr->SizeOfOptionalHeader != 0) {
3488 errorBelch("%" PATH_FMT ": PEi386 with nonempty optional header",
3489 fileName);
3490 return 0;
3491 }
3492 if ( /* (hdr->Characteristics & MYIMAGE_FILE_RELOCS_STRIPPED) || */
3493 (hdr->Characteristics & MYIMAGE_FILE_EXECUTABLE_IMAGE) ||
3494 (hdr->Characteristics & MYIMAGE_FILE_DLL) ||
3495 (hdr->Characteristics & MYIMAGE_FILE_SYSTEM) ) {
3496 errorBelch("%" PATH_FMT ": Not a PEi386 object file", fileName);
3497 return 0;
3498 }
3499 if ( (hdr->Characteristics & MYIMAGE_FILE_BYTES_REVERSED_HI)
3500 /* || !(hdr->Characteristics & MYIMAGE_FILE_32BIT_MACHINE) */ ) {
3501 errorBelch("%" PATH_FMT ": Invalid PEi386 word size or endiannness: %d",
3502 fileName,
3503 (int)(hdr->Characteristics));
3504 return 0;
3505 }
3506 return 1;
3507 }
3508
3509 static int
3510 ocVerifyImage_PEi386 ( ObjectCode* oc )
3511 {
3512 int i;
3513 UInt32 j, noRelocs;
3514 COFF_header* hdr;
3515 COFF_section* sectab;
3516 COFF_symbol* symtab;
3517 UChar* strtab;
3518 /* debugBelch("\nLOADING %s\n", oc->fileName); */
3519 hdr = (COFF_header*)(oc->image);
3520 sectab = (COFF_section*) (
3521 ((UChar*)(oc->image))
3522 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3523 );
3524 symtab = (COFF_symbol*) (
3525 ((UChar*)(oc->image))
3526 + hdr->PointerToSymbolTable
3527 );
3528 strtab = ((UChar*)symtab)
3529 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3530
3531 if (!verifyCOFFHeader(hdr, oc->fileName)) {
3532 return 0;
3533 }
3534
3535 /* If the string table size is way crazy, this might indicate that
3536 there are more than 64k relocations, despite claims to the
3537 contrary. Hence this test. */
3538 /* debugBelch("strtab size %d\n", * (UInt32*)strtab); */
3539 #if 0
3540 if ( (*(UInt32*)strtab) > 600000 ) {
3541 /* Note that 600k has no special significance other than being
3542 big enough to handle the almost-2MB-sized lumps that
3543 constitute HSwin32*.o. */
3544 debugBelch("PEi386 object has suspiciously large string table; > 64k relocs?");
3545 return 0;
3546 }
3547 #endif
3548
3549 /* .BSS Section is initialized in ocGetNames_PEi386
3550 but we need the Sections array initialized here already. */
3551 Section *sections;
3552 sections = (Section*)stgCallocBytes(
3553 sizeof(Section),
3554 hdr->NumberOfSections + 1, /* +1 for the global BSS section see ocGetNames_PEi386 */
3555 "ocVerifyImage_PEi386(sections)");
3556 oc->sections = sections;
3557 oc->n_sections = hdr->NumberOfSections + 1;
3558
3559 /* Initialize the Sections */
3560 for (i = 0; i < hdr->NumberOfSections; i++) {
3561 COFF_section* sectab_i
3562 = (COFF_section*)
3563 myindex(sizeof_COFF_section, sectab, i);
3564
3565 /* Calculate the start of the data section */
3566 sections[i].start = oc->image + sectab_i->PointerToRawData;
3567 }
3568
3569 /* No further verification after this point; only debug printing. */
3570 i = 0;
3571 IF_DEBUG(linker, i=1);
3572 if (i == 0) return 1;
3573
3574 debugBelch("sectab offset = %" FMT_SizeT "\n",
3575 ((UChar*)sectab) - ((UChar*)hdr) );
3576 debugBelch("symtab offset = %" FMT_SizeT "\n",
3577 ((UChar*)symtab) - ((UChar*)hdr) );
3578 debugBelch("strtab offset = %" FMT_SizeT "\n",
3579 ((UChar*)strtab) - ((UChar*)hdr) );
3580
3581 debugBelch("\n" );
3582 debugBelch( "Machine: 0x%x\n", (UInt32)(hdr->Machine) );
3583 debugBelch( "# sections: %d\n", (UInt32)(hdr->NumberOfSections) );
3584 debugBelch( "time/date: 0x%x\n", (UInt32)(hdr->TimeDateStamp) );
3585 debugBelch( "symtab offset: %d\n", (UInt32)(hdr->PointerToSymbolTable) );
3586 debugBelch( "# symbols: %d\n", (UInt32)(hdr->NumberOfSymbols) );
3587 debugBelch( "sz of opt hdr: %d\n", (UInt32)(hdr->SizeOfOptionalHeader) );
3588 debugBelch( "characteristics: 0x%x\n", (UInt32)(hdr->Characteristics) );
3589
3590 /* Print the section table. */
3591 debugBelch("\n" );
3592 for (i = 0; i < hdr->NumberOfSections; i++) {
3593 COFF_reloc* reltab;
3594 COFF_section* sectab_i
3595 = (COFF_section*)
3596 myindex ( sizeof_COFF_section, sectab, i );
3597 Section section = sections[i];
3598 debugBelch(
3599 "\n"
3600 "section %d\n"
3601 " name `",
3602 i
3603 );
3604 printName ( sectab_i->Name, strtab );
3605 debugBelch(
3606 "'\n"
3607 " vsize %d\n"
3608 " vaddr %d\n"
3609 " data sz %d\n"
3610 " data off 0x%p\n"
3611 " num rel %d\n"
3612 " off rel %d\n"
3613 " ptr raw 0x%x\n",
3614 sectab_i->VirtualSize,
3615 sectab_i->VirtualAddress,
3616 sectab_i->SizeOfRawData,
3617 section.start,
3618 sectab_i->NumberOfRelocations,
3619 sectab_i->PointerToRelocations,
3620 sectab_i->PointerToRawData
3621 );
3622 reltab = (COFF_reloc*) (
3623 ((UChar*)(oc->image)) + sectab_i->PointerToRelocations
3624 );
3625
3626 if ( sectab_i->Characteristics & MYIMAGE_SCN_LNK_NRELOC_OVFL ) {
3627 /* If the relocation field (a short) has overflowed, the
3628 * real count can be found in the first reloc entry.
3629 *
3630 * See Section 4.1 (last para) of the PE spec (rev6.0).
3631 */
3632 COFF_reloc* rel = (COFF_reloc*)
3633 myindex ( sizeof_COFF_reloc, reltab, 0 );
3634 noRelocs = rel->VirtualAddress;
3635 j = 1;
3636 } else {
3637 noRelocs = sectab_i->NumberOfRelocations;
3638 j = 0;
3639 }
3640
3641 for (; j < noRelocs; j++) {
3642 COFF_symbol* sym;
3643 COFF_reloc* rel = (COFF_reloc*)
3644 myindex ( sizeof_COFF_reloc, reltab, j );
3645 debugBelch(
3646 " type 0x%-4x vaddr 0x%-8x name `",
3647 (UInt32)rel->Type,
3648 rel->VirtualAddress );
3649 sym = (COFF_symbol*)
3650 myindex ( sizeof_COFF_symbol, symtab, rel->SymbolTableIndex );
3651 /* Hmm..mysterious looking offset - what's it for? SOF */
3652 printName ( sym->Name, strtab -10 );
3653 debugBelch("'\n" );
3654 }
3655
3656 debugBelch("\n" );
3657 }
3658 debugBelch("\n" );
3659 debugBelch("string table has size 0x%x\n", * (UInt32*)strtab );
3660 debugBelch("---START of string table---\n");
3661 for (i = 4; i < *(Int32*)strtab; i++) {
3662 if (strtab[i] == 0)
3663 debugBelch("\n"); else
3664 debugBelch("%c", strtab[i] );
3665 }
3666 debugBelch("--- END of string table---\n");
3667
3668 debugBelch("\n" );
3669 i = 0;
3670 while (1) {
3671 COFF_symbol* symtab_i;
3672 if (i >= (Int32)(hdr->NumberOfSymbols)) break;
3673 symtab_i = (COFF_symbol*)
3674 myindex ( sizeof_COFF_symbol, symtab, i );
3675 debugBelch(
3676 "symbol %d\n"
3677 " name `",
3678 i
3679 );
3680 printName ( symtab_i->Name, strtab );
3681 debugBelch(
3682 "'\n"
3683 " value 0x%x\n"
3684 " 1+sec# %d\n"
3685 " type 0x%x\n"
3686 " sclass 0x%x\n"
3687 " nAux %d\n",
3688 symtab_i->Value,
3689 (Int32)(symtab_i->SectionNumber),
3690 (UInt32)symtab_i->Type,
3691 (UInt32)symtab_i->StorageClass,
3692 (UInt32)symtab_i->NumberOfAuxSymbols
3693 );
3694 i += symtab_i->NumberOfAuxSymbols;
3695 i++;
3696 }
3697
3698 debugBelch("\n" );
3699 return 1;
3700 }
3701
3702
3703 static int
3704 ocGetNames_PEi386 ( ObjectCode* oc )
3705 {
3706 COFF_header* hdr;
3707 COFF_section* sectab;
3708 COFF_symbol* symtab;
3709 UChar* strtab;
3710
3711 UChar* sname;
3712 SymbolAddr* addr;
3713 int i;
3714
3715 hdr = (COFF_header*)(oc->image);
3716 sectab = (COFF_section*) (
3717 ((UChar*)(oc->image))
3718 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3719 );
3720 symtab = (COFF_symbol*) (
3721 ((UChar*)(oc->image))
3722 + hdr->PointerToSymbolTable
3723 );
3724 strtab = ((UChar*)(oc->image))
3725 + hdr->PointerToSymbolTable
3726 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3727
3728 /* Allocate space for any (local, anonymous) .bss sections. */
3729
3730 for (i = 0; i < hdr->NumberOfSections; i++) {
3731 UInt32 bss_sz;
3732 UChar* zspace;
3733 COFF_section* sectab_i
3734 = (COFF_section*)
3735 myindex ( sizeof_COFF_section, sectab, i );
3736
3737 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3738
3739 if (0 != strcmp(secname, ".bss")) {
3740 stgFree(secname);
3741 continue;
3742 }
3743
3744 stgFree(secname);
3745
3746 /* sof 10/05: the PE spec text isn't too clear regarding what
3747 * the SizeOfRawData field is supposed to hold for object
3748 * file sections containing just uninitialized data -- for executables,
3749 * it is supposed to be zero; unclear what it's supposed to be
3750 * for object files. However, VirtualSize is guaranteed to be
3751 * zero for object files, which definitely suggests that SizeOfRawData
3752 * will be non-zero (where else would the size of this .bss section be
3753 * stored?) Looking at the COFF_section info for incoming object files,
3754 * this certainly appears to be the case.
3755 *
3756 * => I suspect we've been incorrectly handling .bss sections in (relocatable)
3757 * object files up until now. This turned out to bite us with ghc-6.4.1's use
3758 * of gcc-3.4.x, which has started to emit initially-zeroed-out local 'static'
3759 * variable decls into the .bss section. (The specific function in Q which
3760 * triggered this is libraries/base/cbits/dirUtils.c:__hscore_getFolderPath())
3761 */
3762 if (sectab_i->VirtualSize == 0 && sectab_i->SizeOfRawData == 0) continue;
3763 /* This is a non-empty .bss section.
3764 Allocate zeroed space for it */
3765 bss_sz = sectab_i->VirtualSize;
3766 if ( bss_sz < sectab_i->SizeOfRawData) { bss_sz = sectab_i->SizeOfRawData; }
3767 zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
3768 oc->sections[i].start = zspace;
3769 addProddableBlock(oc, zspace, bss_sz);
3770 /* debugBelch("BSS anon section at 0x%x\n", zspace); */
3771 }
3772
3773 /* Copy section information into the ObjectCode. */
3774
3775 for (i = 0; i < hdr->NumberOfSections; i++) {
3776 UChar* start;
3777 UChar* end;
3778 UInt32 sz;
3779
3780 /* By default consider all section as CODE or DATA, which means we want to load them. */
3781 SectionKind kind
3782 = SECTIONKIND_CODE_OR_RODATA;
3783 COFF_section* sectab_i
3784 = (COFF_section*)
3785 myindex ( sizeof_COFF_section, sectab, i );
3786 Section section = oc->sections[i];
3787
3788 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3789
3790 IF_DEBUG(linker, debugBelch("section name = %s\n", secname ));
3791
3792 /* The PE file section flag indicates whether the section contains code or data. */
3793 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_CODE ||
3794 sectab_i->Characteristics & MYIMAGE_SCN_CNT_INITIALIZED_DATA)
3795 kind = SECTIONKIND_CODE_OR_RODATA;
3796
3797 /* Check next if it contains any uninitialized data */
3798 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_UNINITIALIZED_DATA)
3799 kind = SECTIONKIND_RWDATA;
3800
3801 /* Finally check if it can be discarded. This will also ignore .debug sections */
3802 if (sectab_i->Characteristics & MYIMAGE_SCN_MEM_DISCARDABLE ||
3803 sectab_i->Characteristics & MYIMAGE_SCN_LNK_REMOVE)
3804 kind = SECTIONKIND_OTHER;
3805
3806 if (0==strcmp(".ctors", (char*)secname))
3807 kind = SECTIONKIND_INIT_ARRAY;
3808
3809 ASSERT(sectab_i->SizeOfRawData == 0 || sectab_i->VirtualSize == 0);
3810 sz = sectab_i->SizeOfRawData;
3811 if (sz < sectab_i->VirtualSize) sz = sectab_i->VirtualSize;
3812
3813 start = section.start;
3814 end = start + sz - 1;
3815
3816 if (kind != SECTIONKIND_OTHER && end >= start) {
3817 addSection(&oc->sections[i], kind, SECTION_NOMEM, start, sz, 0, 0, 0);
3818 addProddableBlock(oc, start, sz);
3819 }
3820
3821 stgFree(secname);
3822 }
3823
3824 /* Copy exported symbols into the ObjectCode. */
3825
3826 oc->n_symbols = hdr->NumberOfSymbols;
3827 oc->symbols = stgCallocBytes(sizeof(SymbolName*), oc->n_symbols,
3828 "ocGetNames_PEi386(oc->symbols)");
3829
3830 /* Work out the size of the global BSS section */
3831 StgWord globalBssSize = 0;
3832 for (i=0; i < (int)hdr->NumberOfSymbols; i++) {
3833 COFF_symbol* symtab_i;
3834 symtab_i = (COFF_symbol*)
3835 myindex ( sizeof_COFF_symbol, symtab, i );
3836 if (symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3837 && symtab_i->Value > 0
3838 && symtab_i->StorageClass != MYIMAGE_SYM_CLASS_SECTION) {
3839 globalBssSize += symtab_i->Value;
3840 }
3841 i += symtab_i->NumberOfAuxSymbols;
3842 }
3843
3844 /* Allocate BSS space */
3845 SymbolAddr* bss = NULL;
3846 if (globalBssSize > 0) {
3847 bss = stgCallocBytes(1, globalBssSize,
3848 "ocGetNames_PEi386(non-anonymous bss)");
3849 addSection(&oc->sections[oc->n_sections-1],
3850 SECTIONKIND_RWDATA, SECTION_MALLOC,
3851 bss, globalBssSize, 0, 0, 0);
3852 IF_DEBUG(linker, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize));
3853 addProddableBlock(oc, bss, globalBssSize);
3854 } else {
3855 addSection(&oc->sections[oc->n_sections-1],
3856 SECTIONKIND_OTHER, SECTION_NOMEM, NULL, 0, 0, 0, 0);
3857 }
3858
3859 for (i = 0; i < oc->n_symbols; i++) {
3860 COFF_symbol* symtab_i;
3861 symtab_i = (COFF_symbol*)
3862 myindex ( sizeof_COFF_symbol, symtab, i );
3863
3864 addr = NULL;
3865 HsBool isWeak = HS_BOOL_FALSE;
3866 if ( symtab_i->SectionNumber != MYIMAGE_SYM_UNDEFINED
3867 && symtab_i->SectionNumber > 0) {
3868 /* This symbol is global and defined, viz, exported */
3869 /* for MYIMAGE_SYMCLASS_EXTERNAL
3870 && !MYIMAGE_SYM_UNDEFINED,
3871 the address of the symbol is:
3872 address of relevant section + offset in section
3873 */
3874 COFF_section* sectabent
3875 = (COFF_section*) myindex ( sizeof_COFF_section,
3876 sectab,
3877 symtab_i->SectionNumber-1 );
3878 if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_EXTERNAL
3879 || ( symtab_i->StorageClass == MYIMAGE_SYM_CLASS_STATIC
3880 && sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT)
3881 ) {
3882 addr = (void*)((size_t)oc->sections[symtab_i->SectionNumber-1].start
3883 + symtab_i->Value);
3884 if (sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT) {
3885 isWeak = HS_BOOL_TRUE;
3886 }
3887 }
3888 }
3889 else if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_WEAK_EXTERNAL) {
3890 isWeak = HS_BOOL_TRUE;
3891 }
3892 else if ( symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3893 && symtab_i->Value > 0) {
3894 /* This symbol isn't in any section at all, ie, global bss.
3895 Allocate zeroed space for it from the BSS section */
3896 addr = bss;
3897 bss = (SymbolAddr*)((StgWord)bss + (StgWord)symtab_i->Value);
3898 IF_DEBUG(linker, debugBelch("bss symbol @ %p %u\n", addr, symtab_i->Value));
3899 }
3900
3901 sname = cstring_from_COFF_symbol_name(symtab_i->Name, strtab);
3902 if (addr != NULL || isWeak == HS_BOOL_TRUE) {
3903
3904 /* debugBelch("addSymbol %p `%s' Weak:%lld \n", addr, sname, isWeak); */
3905 IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr,sname);)
3906 ASSERT(i >= 0 && i < oc->n_symbols);
3907 /* cstring_from_COFF_symbol_name always succeeds. */
3908 oc->symbols[i] = (SymbolName*)sname;
3909 if (isWeak == HS_BOOL_TRUE) {
3910 setWeakSymbol(oc, sname);
3911 }
3912
3913 if (! ghciInsertSymbolTable(oc->fileName, symhash, (SymbolName*)sname, addr,
3914 isWeak, oc)) {
3915 return 0