Fix 32-bit build failures
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "Stable.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31
32 #if !defined(mingw32_HOST_OS)
33 #include "posix/Signals.h"
34 #endif
35
36 // get protos for is*()
37 #include <ctype.h>
38
39 #ifdef HAVE_SYS_TYPES_H
40 #include <sys/types.h>
41 #endif
42
43 #include <inttypes.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <stdio.h>
47 #include <assert.h>
48 #include <libgen.h>
49
50 #ifdef HAVE_SYS_STAT_H
51 #include <sys/stat.h>
52 #endif
53
54 #if defined(HAVE_DLFCN_H)
55 #include <dlfcn.h>
56 #endif
57
58 /* PowerPC and ARM have relative branch instructions with only 24 bit
59 * displacements and therefore need jump islands contiguous with each object
60 * code module.
61 */
62 #if defined(powerpc_HOST_ARCH)
63 #define SHORT_REL_BRANCH 1
64 #endif
65 #if defined(arm_HOST_ARCH)
66 #define SHORT_REL_BRANCH 1
67 #endif
68
69 #if (RTS_LINKER_USE_MMAP && defined(SHORT_REL_BRANCH) && defined(linux_HOST_OS))
70 #define USE_CONTIGUOUS_MMAP 1
71 #else
72 #define USE_CONTIGUOUS_MMAP 0
73 #endif
74
75 #if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
76 # define OBJFORMAT_ELF
77 # include <regex.h> // regex is already used by dlopen() so this is OK
78 // to use here without requiring an additional lib
79 #elif defined (mingw32_HOST_OS)
80 # define OBJFORMAT_PEi386
81 # include <windows.h>
82 # include <shfolder.h> /* SHGetFolderPathW */
83 # include <math.h>
84 # include <wchar.h>
85 #elif defined(darwin_HOST_OS)
86 # define OBJFORMAT_MACHO
87 # include <regex.h>
88 # include <mach/machine.h>
89 # include <mach-o/fat.h>
90 # include <mach-o/loader.h>
91 # include <mach-o/nlist.h>
92 # include <mach-o/reloc.h>
93 #if defined(powerpc_HOST_ARCH)
94 # include <mach-o/ppc/reloc.h>
95 #endif
96 #if defined(x86_64_HOST_ARCH)
97 # include <mach-o/x86_64/reloc.h>
98 #endif
99 #endif
100
101 #if defined(x86_64_HOST_ARCH) && defined(darwin_HOST_OS)
102 #define ALWAYS_PIC
103 #endif
104
105 #if defined(dragonfly_HOST_OS)
106 #include <sys/tls.h>
107 #endif
108
109 /* SymbolInfo tracks a symbol's address, the object code from which
110 it originated, and whether or not it's weak.
111
112 RtsSymbolInfo is used to track the state of the symbols currently
113 loaded or to be loaded by the Linker.
114
115 Where the information in the `ObjectCode` is used to track the
116 original status of the symbol inside the `ObjectCode`.
117
118 A weak symbol that has been used will still be marked as weak
119 in the `ObjectCode` but in the `RtsSymbolInfo` it won't be.
120 */
121 typedef struct _RtsSymbolInfo {
122 SymbolAddr* value;
123 ObjectCode *owner;
124 HsBool weak;
125 } RtsSymbolInfo;
126
127 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
128 This hashtable will contain information on all symbols
129 that we know of, however the .o they are in may not be loaded.
130
131 Until the ObjectCode the symbol belongs to is actually
132 loaded this symbol may be replaced. So do not rely on
133 addresses of unloaded symbols.
134
135 Note [runtime-linker-phases]
136 --------------------------------------
137 Broadly the behavior of the runtime linker can be
138 split into the following four phases:
139
140 - Indexing (e.g. ocVerifyImage and ocGetNames)
141 - Initialization (e.g. ocResolve and ocRunInit)
142 - Resolve (e.g. resolveObjs())
143 - Lookup (e.g. lookupSymbol)
144
145 This is to enable lazy loading of symbols. Eager loading is problematic
146 as it means that all symbols must be available, even those which we will
147 never use. This is especially painful of Windows, where the number of
148 libraries required to link things like mingwex grows to be quite high.
149
150 We proceed through these stages as follows,
151
152 * During Indexing we verify and open the ObjectCode and
153 perform a quick scan/indexing of the ObjectCode. All the work
154 required to actually load the ObjectCode is done.
155
156 All symbols from the ObjectCode is also inserted into
157 `symhash`, where possible duplicates are handled via the semantics
158 described in `ghciInsertSymbolTable`.
159
160 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
161 depending on whether they are an archive members or not.
162
163 * During initialization we load ObjectCode, perform relocations, execute
164 static constructors etc. This phase may trigger other ObjectCodes to
165 be loaded because of the calls to lookupSymbol.
166
167 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
168 previous status was `OBJECT_LOADED`.
169
170 * During resolve we attempt to resolve all the symbols needed for the
171 initial link. This essentially means, that for any ObjectCode given
172 directly to the command-line we perform lookupSymbols on the required
173 symbols. lookupSymbols may trigger the loading of additional ObjectCode
174 if required.
175
176 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
177 the previous status was `OBJECT_NEEDED`.
178
179 * Lookup symbols is used to lookup any symbols required, both during initial
180 link and during statement and expression compilations in the REPL.
181 Declaration of e.g. an foreign import, will eventually call lookupSymbol
182 which will either fail (symbol unknown) or succeed (and possibly triggered a
183 load).
184
185 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
186
187 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
188 by calling unloadObj and starting over.
189 When a new declaration or statement is performed ultimately lookupSymbol is called
190 without doing a re-link.
191
192 The goal of these different phases is to allow the linker to be able to perform
193 "lazy loading" of ObjectCode. The reason for this is that we want to only link
194 in symbols that are actually required for the link. This reduces:
195
196 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
197 then we don't need to load libB. This means the dependency chain for libraries
198 such as mingw32 and mingwex can be broken down.
199
200 2) The number of duplicate symbols, since now only symbols that are
201 true duplicates will display the error.
202 */
203 static /*Str*/HashTable *symhash;
204
205 /* List of currently loaded objects */
206 ObjectCode *objects = NULL; /* initially empty */
207
208 /* List of objects that have been unloaded via unloadObj(), but are waiting
209 to be actually freed via checkUnload() */
210 ObjectCode *unloaded_objects = NULL; /* initially empty */
211
212 #ifdef THREADED_RTS
213 /* This protects all the Linker's global state except unloaded_objects */
214 Mutex linker_mutex;
215 /*
216 * This protects unloaded_objects. We have a separate mutex for this, because
217 * the GC needs to access unloaded_objects in checkUnload, while the linker only
218 * needs to access unloaded_objects in unloadObj(), so this allows most linker
219 * operations proceed concurrently with the GC.
220 */
221 Mutex linker_unloaded_mutex;
222 #endif
223
224 /* Type of the initializer */
225 typedef void (*init_t) (int argc, char **argv, char **env);
226
227 static HsInt isAlreadyLoaded( pathchar *path );
228 static HsInt loadOc( ObjectCode* oc );
229 static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
230 rtsBool mapped, char *archiveMemberName,
231 int misalignment
232 );
233
234 // Use wchar_t for pathnames on Windows (#5697)
235 #if defined(mingw32_HOST_OS)
236 #define pathcmp wcscmp
237 #define pathlen wcslen
238 #define pathopen _wfopen
239 #define pathstat _wstat
240 #define struct_stat struct _stat
241 #define open wopen
242 #define WSTR(s) L##s
243 #define pathprintf swprintf
244 #define pathsize sizeof(wchar_t)
245 #else
246 #define pathcmp strcmp
247 #define pathlen strlen
248 #define pathopen fopen
249 #define pathstat stat
250 #define struct_stat struct stat
251 #define WSTR(s) s
252 #define pathprintf snprintf
253 #define pathsize sizeof(char)
254 #endif
255
256 static pathchar* pathdup(pathchar *path)
257 {
258 pathchar *ret;
259 #if defined(mingw32_HOST_OS)
260 ret = wcsdup(path);
261 #else
262 /* sigh, strdup() isn't a POSIX function, so do it the long way */
263 ret = stgMallocBytes( strlen(path)+1, "pathdup" );
264 strcpy(ret, path);
265 #endif
266 return ret;
267 }
268
269 static pathchar* pathdir(pathchar *path)
270 {
271 pathchar *ret;
272 #if defined(mingw32_HOST_OS)
273 pathchar *drive, *dirName;
274 size_t memberLen = pathlen(path) + 1;
275 dirName = stgMallocBytes(pathsize * memberLen, "pathdir(path)");
276 ret = stgMallocBytes(pathsize * memberLen, "pathdir(path)");
277 drive = stgMallocBytes(pathsize * _MAX_DRIVE, "pathdir(path)");
278 _wsplitpath_s(path, drive, _MAX_DRIVE, dirName, pathsize * pathlen(path), NULL, 0, NULL, 0);
279 pathprintf(ret, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), drive, dirName);
280 stgFree(drive);
281 stgFree(dirName);
282 #else
283 pathchar* dirName = dirname(path);
284 size_t memberLen = pathlen(dirName);
285 ret = stgMallocBytes(pathsize * (memberLen + 2), "pathdir(path)");
286 strcpy(ret, dirName);
287 ret[memberLen ] = '/';
288 ret[memberLen+1] = '\0';
289 #endif
290 return ret;
291 }
292
293 static pathchar* mkPath(char* path)
294 {
295 #if defined(mingw32_HOST_OS)
296 size_t required = mbstowcs(NULL, path, 0);
297 pathchar *ret = stgMallocBytes(sizeof(pathchar) * (required + 1), "mkPath");
298 if (mbstowcs(ret, path, required) == (size_t)-1)
299 {
300 barf("mkPath failed converting char* to wchar_t*");
301 }
302 ret[required] = '\0';
303 return ret;
304 #else
305 return pathdup(path);
306 #endif
307 }
308
309 /* Generic wrapper function to try and Resolve and RunInit oc files */
310 int ocTryLoad( ObjectCode* oc );
311
312 #if defined(OBJFORMAT_ELF)
313 static int ocVerifyImage_ELF ( ObjectCode* oc );
314 static int ocGetNames_ELF ( ObjectCode* oc );
315 static int ocResolve_ELF ( ObjectCode* oc );
316 static int ocRunInit_ELF ( ObjectCode* oc );
317 #if NEED_SYMBOL_EXTRAS
318 static int ocAllocateSymbolExtras_ELF ( ObjectCode* oc );
319 #endif
320 #elif defined(OBJFORMAT_PEi386)
321 static int ocVerifyImage_PEi386 ( ObjectCode* oc );
322 static int ocGetNames_PEi386 ( ObjectCode* oc );
323 static int ocResolve_PEi386 ( ObjectCode* oc );
324 static int ocRunInit_PEi386 ( ObjectCode* oc );
325 static void *lookupSymbolInDLLs ( unsigned char *lbl );
326 /* See Note [mingw-w64 name decoration scheme] */
327 #ifndef x86_64_HOST_ARCH
328 static void zapTrailingAtSign ( unsigned char *sym );
329 #endif
330
331 #if defined(x86_64_HOST_ARCH)
332 #define USED_IF_x86_64_HOST_ARCH /* Nothing */
333 #else
334 #define USED_IF_x86_64_HOST_ARCH STG_UNUSED
335 #endif
336
337 static char *allocateImageAndTrampolines (
338 pathchar* arch_name, char* member_name,
339 FILE* f,
340 int size,
341 int isThin);
342 #if defined(x86_64_HOST_ARCH)
343 static int ocAllocateSymbolExtras_PEi386 ( ObjectCode* oc );
344 static size_t makeSymbolExtra_PEi386( ObjectCode* oc, size_t, char* symbol );
345 #define PEi386_IMAGE_OFFSET 4
346 #else
347 #define PEi386_IMAGE_OFFSET 0
348 #endif
349 #elif defined(OBJFORMAT_MACHO)
350 static int ocVerifyImage_MachO ( ObjectCode* oc );
351 static int ocGetNames_MachO ( ObjectCode* oc );
352 static int ocResolve_MachO ( ObjectCode* oc );
353 static int ocRunInit_MachO ( ObjectCode* oc );
354 static int machoGetMisalignment( FILE * );
355 #if NEED_SYMBOL_EXTRAS
356 static int ocAllocateSymbolExtras_MachO ( ObjectCode* oc );
357 #endif
358 #ifdef powerpc_HOST_ARCH
359 static void machoInitSymbolsWithoutUnderscore( void );
360 #endif
361 #endif
362
363 #if defined(OBJFORMAT_PEi386)
364 /* string utility function */
365 static HsBool endsWithPath(pathchar* base, pathchar* str) {
366 int blen = pathlen(base);
367 int slen = pathlen(str);
368 return (blen >= slen) && (0 == pathcmp(base + blen - slen, str));
369 }
370
371 static int checkAndLoadImportLibrary(
372 pathchar* arch_name,
373 char* member_name,
374 FILE* f);
375
376 static int findAndLoadImportLibrary(
377 ObjectCode* oc
378 );
379
380 static UChar *myindex(
381 int scale,
382 void* base,
383 int index);
384 static UChar *cstring_from_COFF_symbol_name(
385 UChar* name,
386 UChar* strtab);
387 static char *cstring_from_section_name(
388 UChar* name,
389 UChar* strtab);
390
391
392 /* Add ld symbol for PE image base. */
393 #if defined(__GNUC__)
394 #define __ImageBase __MINGW_LSYMBOL(_image_base__)
395 #endif
396
397 /* Get the base of the module. */
398 /* This symbol is defined by ld. */
399 extern IMAGE_DOS_HEADER __ImageBase;
400 #define __image_base (void*)((HINSTANCE)&__ImageBase)
401
402 // MingW-w64 is missing these from the implementation. So we have to look them up
403 typedef DLL_DIRECTORY_COOKIE(WINAPI *LPAddDLLDirectory)(PCWSTR NewDirectory);
404 typedef WINBOOL(WINAPI *LPRemoveDLLDirectory)(DLL_DIRECTORY_COOKIE Cookie);
405 #endif /* OBJFORMAT_PEi386 */
406
407 static void freeProddableBlocks (ObjectCode *oc);
408
409 /* on x86_64 we have a problem with relocating symbol references in
410 * code that was compiled without -fPIC. By default, the small memory
411 * model is used, which assumes that symbol references can fit in a
412 * 32-bit slot. The system dynamic linker makes this work for
413 * references to shared libraries by either (a) allocating a jump
414 * table slot for code references, or (b) moving the symbol at load
415 * time (and copying its contents, if necessary) for data references.
416 *
417 * We unfortunately can't tell whether symbol references are to code
418 * or data. So for now we assume they are code (the vast majority
419 * are), and allocate jump-table slots. Unfortunately this will
420 * SILENTLY generate crashing code for data references. This hack is
421 * enabled by X86_64_ELF_NONPIC_HACK.
422 *
423 * One workaround is to use shared Haskell libraries. This is
424 * coming. Another workaround is to keep the static libraries but
425 * compile them with -fPIC, because that will generate PIC references
426 * to data which can be relocated. The PIC code is still too green to
427 * do this systematically, though.
428 *
429 * See bug #781
430 * See thread http://www.haskell.org/pipermail/cvs-ghc/2007-September/038458.html
431 *
432 * Naming Scheme for Symbol Macros
433 *
434 * SymI_*: symbol is internal to the RTS. It resides in an object
435 * file/library that is statically.
436 * SymE_*: symbol is external to the RTS library. It might be linked
437 * dynamically.
438 *
439 * Sym*_HasProto : the symbol prototype is imported in an include file
440 * or defined explicitly
441 * Sym*_NeedsProto: the symbol is undefined and we add a dummy
442 * default proto extern void sym(void);
443 */
444 #define X86_64_ELF_NONPIC_HACK 1
445
446 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
447 * small memory model on this architecture (see gcc docs,
448 * -mcmodel=small).
449 *
450 * MAP_32BIT not available on OpenBSD/amd64
451 */
452 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
453 #define TRY_MAP_32BIT MAP_32BIT
454 #else
455 #define TRY_MAP_32BIT 0
456 #endif
457
458 /*
459 Note [The ARM/Thumb Story]
460 ~~~~~~~~~~~~~~~~~~~~~~~~~~
461
462 Support for the ARM architecture is complicated by the fact that ARM has not
463 one but several instruction encodings. The two relevant ones here are the original
464 ARM encoding and Thumb, a more dense variant of ARM supporting only a subset
465 of the instruction set.
466
467 How the CPU decodes a particular instruction is determined by a mode bit. This
468 mode bit is set on jump instructions, the value being determined by the low
469 bit of the target address: An odd address means the target is a procedure
470 encoded in the Thumb encoding whereas an even address means it's a traditional
471 ARM procedure (the actual address jumped to is even regardless of the encoding bit).
472
473 Interoperation between Thumb- and ARM-encoded object code (known as "interworking")
474 is tricky. If the linker needs to link a call by an ARM object into Thumb code
475 (or vice-versa) it will produce a jump island. This, however, is incompatible with
476 GHC's tables-next-to-code. For this reason, it is critical that GHC emit
477 exclusively ARM or Thumb objects for all Haskell code.
478
479 We still do, however, need to worry about foreign code.
480 */
481
482 /*
483 * Due to the small memory model (see above), on x86_64 we have to map
484 * all our non-PIC object files into the low 2Gb of the address space
485 * (why 2Gb and not 4Gb? Because all addresses must be reachable
486 * using a 32-bit signed PC-relative offset). On Linux we can do this
487 * using the MAP_32BIT flag to mmap(), however on other OSs
488 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
489 * can't do this. So on these systems, we have to pick a base address
490 * in the low 2Gb of the address space and try to allocate memory from
491 * there.
492 *
493 * We pick a default address based on the OS, but also make this
494 * configurable via an RTS flag (+RTS -xm)
495 */
496 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
497
498 #if defined(MAP_32BIT)
499 // Try to use MAP_32BIT
500 #define MMAP_32BIT_BASE_DEFAULT 0
501 #else
502 // A guess: 1Gb.
503 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
504 #endif
505
506 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
507 #endif
508
509 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
510 ObjectCode *owner)
511 {
512 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
513 if (!pinfo || owner != pinfo->owner) return;
514 removeStrHashTable(table, key, NULL);
515 stgFree(pinfo);
516 }
517
518 /* -----------------------------------------------------------------------------
519 * Insert symbols into hash tables, checking for duplicates.
520 *
521 * Returns: 0 on failure, nonzero on success
522 */
523 /*
524 Note [weak-symbols-support]
525 -------------------------------------
526 While ghciInsertSymbolTable does implement extensive
527 logic for weak symbol support, weak symbols are not currently
528 fully supported by the RTS. This code is mostly here for COMDAT
529 support which uses the weak symbols support.
530
531 Linking weak symbols defined purely in C code with other C code
532 should also work, probably. Observing weak symbols in Haskell
533 won't.
534
535 Some test have been written for weak symbols but have been disabled
536 mostly because it's unsure how the weak symbols support should look.
537 See Trac #11223
538 */
539 static int ghciInsertSymbolTable(
540 pathchar* obj_name,
541 HashTable *table,
542 const SymbolName* key,
543 SymbolAddr* data,
544 HsBool weak,
545 ObjectCode *owner)
546 {
547 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
548 if (!pinfo) /* new entry */
549 {
550 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
551 pinfo->value = data;
552 pinfo->owner = owner;
553 pinfo->weak = weak;
554 insertStrHashTable(table, key, pinfo);
555 return 1;
556 }
557 else if (weak && data && pinfo->weak && !pinfo->value)
558 {
559 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
560 pinfo->value = data;
561 pinfo->owner = owner;
562 return 1;
563 }
564 else if (weak)
565 {
566 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
567 already know of another copy throw this one away.
568
569 or both weak symbols have a nonzero value. Keep the existing one.
570
571 This also preserves the semantics of linking against
572 the first symbol we find. */
573 }
574 else if (pinfo->weak && !weak) /* weak symbol is in the table */
575 {
576 /* override the weak definition with the non-weak one */
577 pinfo->value = data;
578 pinfo->owner = owner;
579 pinfo->weak = HS_BOOL_FALSE;
580 return 1;
581 }
582 else if ( pinfo->owner
583 && pinfo->owner->status != OBJECT_RESOLVED
584 && pinfo->owner->status != OBJECT_NEEDED)
585 {
586 /* If the other symbol hasn't been loaded or will be loaded and we want to
587 explicitly load the new one, we can just swap it out and load the one
588 that has been requested. If not, just keep the first one encountered.
589
590 Because the `symHash' table consists symbols we've also not loaded but
591 found during the initial scan this is safe to do. If however the existing
592 symbol has been loaded then it means we have a duplicate.
593
594 This is essentially emulating the behavior of a linker wherein it will always
595 link in object files that are .o file arguments, but only take object files
596 from archives as needed. */
597 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
598 pinfo->value = data;
599 pinfo->owner = owner;
600 pinfo->weak = weak;
601 }
602
603 return 1;
604 }
605 else if (pinfo->owner == owner)
606 {
607 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
608 return 1;
609 }
610 else if (owner && owner->status == OBJECT_LOADED)
611 {
612 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
613 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
614 call this function again to trigger the duplicate error. */
615 return 1;
616 }
617
618 pathchar* archiveName = NULL;
619 debugBelch(
620 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
621 " %s\n"
622 "whilst processing object file\n"
623 " %" PATH_FMT "\n"
624 "The symbol was previously defined in\n"
625 " %" PATH_FMT "\n"
626 "This could be caused by:\n"
627 " * Loading two different object files which export the same symbol\n"
628 " * Specifying the same object file twice on the GHCi command line\n"
629 " * An incorrect `package.conf' entry, causing some object to be\n"
630 " loaded twice.\n",
631 (char*)key,
632 obj_name,
633 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
634 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
635 : pinfo->owner->fileName
636 );
637
638 if (archiveName)
639 {
640 stgFree(archiveName);
641 archiveName = NULL;
642 }
643 return 0;
644 }
645
646 /* -----------------------------------------------------------------------------
647 * Looks up symbols into hash tables.
648 *
649 * Returns: 0 on failure and result is not set,
650 * nonzero on success and result set to nonzero pointer
651 */
652 static HsBool ghciLookupSymbolInfo(HashTable *table,
653 const SymbolName* key, RtsSymbolInfo **result)
654 {
655 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
656 if (!pinfo) {
657 *result = NULL;
658 return HS_BOOL_FALSE;
659 }
660 if (pinfo->weak)
661 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
662 /* Once it's looked up, it can no longer be overridden */
663 pinfo->weak = HS_BOOL_FALSE;
664
665 *result = pinfo;
666 return HS_BOOL_TRUE;
667 }
668
669 /* -----------------------------------------------------------------------------
670 * initialize the object linker
671 */
672
673
674 static int linker_init_done = 0 ;
675
676 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
677 static void *dl_prog_handle;
678 static regex_t re_invalid;
679 static regex_t re_realso;
680 #ifdef THREADED_RTS
681 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
682 #endif
683 #elif defined(OBJFORMAT_PEi386)
684 void addDLLHandle(pathchar* dll_name, HINSTANCE instance);
685 #endif
686
687 void initLinker (void)
688 {
689 // default to retaining CAFs for backwards compatibility. Most
690 // users will want initLinker_(0): otherwise unloadObj() will not
691 // be able to unload object files when they contain CAFs.
692 initLinker_(1);
693 }
694
695 void
696 initLinker_ (int retain_cafs)
697 {
698 RtsSymbolVal *sym;
699 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
700 int compileResult;
701 #endif
702
703 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
704
705 /* Make initLinker idempotent, so we can call it
706 before every relevant operation; that means we
707 don't need to initialise the linker separately */
708 if (linker_init_done == 1) {
709 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
710 return;
711 } else {
712 linker_init_done = 1;
713 }
714
715 objects = NULL;
716 unloaded_objects = NULL;
717
718 #if defined(THREADED_RTS)
719 initMutex(&linker_mutex);
720 initMutex(&linker_unloaded_mutex);
721 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
722 initMutex(&dl_mutex);
723 #endif
724 #endif
725 symhash = allocStrHashTable();
726
727 /* populate the symbol table with stuff from the RTS */
728 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
729 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
730 symhash, sym->lbl, sym->addr, HS_BOOL_FALSE, NULL)) {
731 barf("ghciInsertSymbolTable failed");
732 }
733 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
734 }
735 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
736 machoInitSymbolsWithoutUnderscore();
737 # endif
738 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
739 referenced from a statically linked module. We need to mimic this, but
740 we cannot use NULL because we use it to mean nonexistent symbols. So we
741 use an arbitrary (hopefully unique) address here.
742 */
743 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
744 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
745 barf("ghciInsertSymbolTable failed");
746 }
747
748 #if defined(OBJFORMAT_PEi386)
749 if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"),
750 symhash, "__image_base__", __image_base, HS_BOOL_TRUE, NULL)) {
751 barf("ghciInsertSymbolTable failed");
752 }
753 #endif /* OBJFORMAT_PEi386 */
754
755
756 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
757 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
758 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
759 retain_cafs ? newRetainedCAF : newGCdCAF,
760 HS_BOOL_FALSE, NULL)) {
761 barf("ghciInsertSymbolTable failed");
762 }
763
764 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
765 # if defined(RTLD_DEFAULT)
766 dl_prog_handle = RTLD_DEFAULT;
767 # else
768 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
769 # endif /* RTLD_DEFAULT */
770
771 compileResult = regcomp(&re_invalid,
772 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short)",
773 REG_EXTENDED);
774 if (compileResult != 0) {
775 barf("Compiling re_invalid failed");
776 }
777 compileResult = regcomp(&re_realso,
778 "(GROUP|INPUT) *\\( *([^ )]+)",
779 REG_EXTENDED);
780 if (compileResult != 0) {
781 barf("Compiling re_realso failed");
782 }
783 # endif
784
785 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
786 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
787 // User-override for mmap_32bit_base
788 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
789 }
790 #endif
791
792 #if defined(mingw32_HOST_OS)
793 /*
794 * These two libraries cause problems when added to the static link,
795 * but are necessary for resolving symbols in GHCi, hence we load
796 * them manually here.
797 */
798 addDLL(WSTR("msvcrt"));
799 addDLL(WSTR("kernel32"));
800 addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL));
801 #endif
802
803 if (RTS_LINKER_USE_MMAP)
804 m32_allocator_init();
805
806 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
807 return;
808 }
809
810 void
811 exitLinker( void ) {
812 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
813 if (linker_init_done == 1) {
814 regfree(&re_invalid);
815 regfree(&re_realso);
816 #ifdef THREADED_RTS
817 closeMutex(&dl_mutex);
818 #endif
819 }
820 #endif
821 if (linker_init_done == 1) {
822 freeHashTable(symhash, free);
823 }
824 #ifdef THREADED_RTS
825 closeMutex(&linker_mutex);
826 #endif
827 }
828
829 /* -----------------------------------------------------------------------------
830 * Loading DLL or .so dynamic libraries
831 * -----------------------------------------------------------------------------
832 *
833 * Add a DLL from which symbols may be found. In the ELF case, just
834 * do RTLD_GLOBAL-style add, so no further messing around needs to
835 * happen in order that symbols in the loaded .so are findable --
836 * lookupSymbol() will subsequently see them by dlsym on the program's
837 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
838 *
839 * In the PEi386 case, open the DLLs and put handles to them in a
840 * linked list. When looking for a symbol, try all handles in the
841 * list. This means that we need to load even DLLs that are guaranteed
842 * to be in the ghc.exe image already, just so we can get a handle
843 * to give to loadSymbol, so that we can find the symbols. For such
844 * libraries, the LoadLibrary call should be a no-op except for returning
845 * the handle.
846 *
847 */
848
849 #if defined(OBJFORMAT_PEi386)
850 /* A record for storing handles into DLLs. */
851
852 typedef
853 struct _OpenedDLL {
854 pathchar* name;
855 struct _OpenedDLL* next;
856 HINSTANCE instance;
857 }
858 OpenedDLL;
859
860 /* A list thereof. */
861 static OpenedDLL* opened_dlls = NULL;
862
863 /* A record for storing indirectly linked functions from DLLs. */
864 typedef
865 struct _IndirectAddr {
866 SymbolAddr* addr;
867 struct _IndirectAddr* next;
868 }
869 IndirectAddr;
870
871 /* A list thereof. */
872 static IndirectAddr* indirects = NULL;
873
874 /* Adds a DLL instance to the list of DLLs in which to search for symbols. */
875 void addDLLHandle(pathchar* dll_name, HINSTANCE instance) {
876 OpenedDLL* o_dll;
877 o_dll = stgMallocBytes( sizeof(OpenedDLL), "addDLLHandle" );
878 o_dll->name = dll_name ? pathdup(dll_name) : NULL;
879 o_dll->instance = instance;
880 o_dll->next = opened_dlls;
881 opened_dlls = o_dll;
882 }
883
884 #endif
885
886 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
887
888 /* Suppose in ghci we load a temporary SO for a module containing
889 f = 1
890 and then modify the module, recompile, and load another temporary
891 SO with
892 f = 2
893 Then as we don't unload the first SO, dlsym will find the
894 f = 1
895 symbol whereas we want the
896 f = 2
897 symbol. We therefore need to keep our own SO handle list, and
898 try SOs in the right order. */
899
900 typedef
901 struct _OpenedSO {
902 struct _OpenedSO* next;
903 void *handle;
904 }
905 OpenedSO;
906
907 /* A list thereof. */
908 static OpenedSO* openedSOs = NULL;
909
910 static const char *
911 internal_dlopen(const char *dll_name)
912 {
913 OpenedSO* o_so;
914 void *hdl;
915 const char *errmsg;
916 char *errmsg_copy;
917
918 // omitted: RTLD_NOW
919 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
920 IF_DEBUG(linker,
921 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
922
923 //-------------- Begin critical section ------------------
924 // This critical section is necessary because dlerror() is not
925 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
926 // Also, the error message returned must be copied to preserve it
927 // (see POSIX also)
928
929 ACQUIRE_LOCK(&dl_mutex);
930 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
931
932 errmsg = NULL;
933 if (hdl == NULL) {
934 /* dlopen failed; return a ptr to the error msg. */
935 errmsg = dlerror();
936 if (errmsg == NULL) errmsg = "addDLL: unknown error";
937 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
938 strcpy(errmsg_copy, errmsg);
939 errmsg = errmsg_copy;
940 } else {
941 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
942 o_so->handle = hdl;
943 o_so->next = openedSOs;
944 openedSOs = o_so;
945 }
946
947 RELEASE_LOCK(&dl_mutex);
948 //--------------- End critical section -------------------
949
950 return errmsg;
951 }
952
953 /*
954 Note [RTLD_LOCAL]
955
956 In GHCi we want to be able to override previous .so's with newly
957 loaded .so's when we recompile something. This further implies that
958 when we look up a symbol in internal_dlsym() we have to iterate
959 through the loaded libraries (in order from most recently loaded to
960 oldest) looking up the symbol in each one until we find it.
961
962 However, this can cause problems for some symbols that are copied
963 by the linker into the executable image at runtime - see #8935 for a
964 lengthy discussion. To solve that problem we need to look up
965 symbols in the main executable *first*, before attempting to look
966 them up in the loaded .so's. But in order to make that work, we
967 have to always call dlopen with RTLD_LOCAL, so that the loaded
968 libraries don't populate the global symbol table.
969 */
970
971 static void *
972 internal_dlsym(const char *symbol) {
973 OpenedSO* o_so;
974 void *v;
975
976 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
977 ACQUIRE_LOCK(&dl_mutex);
978 dlerror();
979 // look in program first
980 v = dlsym(dl_prog_handle, symbol);
981 if (dlerror() == NULL) {
982 RELEASE_LOCK(&dl_mutex);
983 return v;
984 }
985
986 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
987 v = dlsym(o_so->handle, symbol);
988 if (dlerror() == NULL) {
989 RELEASE_LOCK(&dl_mutex);
990 return v;
991 }
992 }
993 RELEASE_LOCK(&dl_mutex);
994 return v;
995 }
996 # endif
997
998 const char *
999 addDLL( pathchar *dll_name )
1000 {
1001 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
1002 /* ------------------- ELF DLL loader ------------------- */
1003
1004 #define NMATCH 5
1005 regmatch_t match[NMATCH];
1006 const char *errmsg;
1007 FILE* fp;
1008 size_t match_length;
1009 #define MAXLINE 1000
1010 char line[MAXLINE];
1011 int result;
1012
1013 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
1014 errmsg = internal_dlopen(dll_name);
1015
1016 if (errmsg == NULL) {
1017 return NULL;
1018 }
1019
1020 // GHC Trac ticket #2615
1021 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
1022 // contain linker scripts rather than ELF-format object code. This
1023 // code handles the situation by recognizing the real object code
1024 // file name given in the linker script.
1025 //
1026 // If an "invalid ELF header" error occurs, it is assumed that the
1027 // .so file contains a linker script instead of ELF object code.
1028 // In this case, the code looks for the GROUP ( ... ) linker
1029 // directive. If one is found, the first file name inside the
1030 // parentheses is treated as the name of a dynamic library and the
1031 // code attempts to dlopen that file. If this is also unsuccessful,
1032 // an error message is returned.
1033
1034 // see if the error message is due to an invalid ELF header
1035 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
1036 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
1037 IF_DEBUG(linker, debugBelch("result = %i\n", result));
1038 if (result == 0) {
1039 // success -- try to read the named file as a linker script
1040 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
1041 MAXLINE-1);
1042 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
1043 line[match_length] = '\0'; // make sure string is null-terminated
1044 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
1045 if ((fp = fopen(line, "r")) == NULL) {
1046 return errmsg; // return original error if open fails
1047 }
1048 // try to find a GROUP or INPUT ( ... ) command
1049 while (fgets(line, MAXLINE, fp) != NULL) {
1050 IF_DEBUG(linker, debugBelch("input line = %s", line));
1051 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
1052 // success -- try to dlopen the first named file
1053 IF_DEBUG(linker, debugBelch("match%s\n",""));
1054 line[match[2].rm_eo] = '\0';
1055 stgFree((void*)errmsg); // Free old message before creating new one
1056 errmsg = internal_dlopen(line+match[2].rm_so);
1057 break;
1058 }
1059 // if control reaches here, no GROUP or INPUT ( ... ) directive
1060 // was found and the original error message is returned to the
1061 // caller
1062 }
1063 fclose(fp);
1064 }
1065 return errmsg;
1066
1067 # elif defined(OBJFORMAT_PEi386)
1068 /* ------------------- Win32 DLL loader ------------------- */
1069
1070 pathchar* buf;
1071 OpenedDLL* o_dll;
1072 HINSTANCE instance;
1073
1074 IF_DEBUG(linker, debugBelch("\naddDLL; dll_name = `%" PATH_FMT "'\n", dll_name));
1075
1076 /* See if we've already got it, and ignore if so. */
1077 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
1078 if (0 == pathcmp(o_dll->name, dll_name))
1079 return NULL;
1080 }
1081
1082 /* The file name has no suffix (yet) so that we can try
1083 both foo.dll and foo.drv
1084
1085 The documentation for LoadLibrary says:
1086 If no file name extension is specified in the lpFileName
1087 parameter, the default library extension .dll is
1088 appended. However, the file name string can include a trailing
1089 point character (.) to indicate that the module name has no
1090 extension. */
1091
1092 size_t bufsize = pathlen(dll_name) + 10;
1093 buf = stgMallocBytes(bufsize * sizeof(wchar_t), "addDLL");
1094
1095 /* These are ordered by probability of success and order we'd like them */
1096 const wchar_t *formats[] = { L"%ls.DLL", L"%ls.DRV", L"lib%ls.DLL", L"%ls" };
1097 const DWORD flags[] = { LOAD_LIBRARY_SEARCH_USER_DIRS | LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, 0 };
1098
1099 int cFormat;
1100 int cFlag;
1101 int flags_start = 1; // Assume we don't support the new API
1102
1103 /* Detect if newer API are available, if not, skip the first flags entry */
1104 if (GetProcAddress((HMODULE)LoadLibraryW(L"Kernel32.DLL"), "AddDllDirectory")) {
1105 flags_start = 0;
1106 }
1107
1108 /* Iterate through the possible flags and formats */
1109 for (cFlag = flags_start; cFlag < 2; cFlag++)
1110 {
1111 for (cFormat = 0; cFormat < 4; cFormat++)
1112 {
1113 snwprintf(buf, bufsize, formats[cFormat], dll_name);
1114 instance = LoadLibraryExW(buf, NULL, flags[cFlag]);
1115 if (instance == NULL)
1116 {
1117 if (GetLastError() != ERROR_MOD_NOT_FOUND)
1118 {
1119 goto error;
1120 }
1121 }
1122 else
1123 {
1124 break; // We're done. DLL has been loaded.
1125 }
1126 }
1127 }
1128
1129 // Check if we managed to load the DLL
1130 if (instance == NULL) {
1131 goto error;
1132 }
1133
1134 stgFree(buf);
1135
1136 addDLLHandle(dll_name, instance);
1137
1138 return NULL;
1139
1140 error:
1141 stgFree(buf);
1142 sysErrorBelch("addDLL: %" PATH_FMT " (Win32 error %lu)", dll_name, GetLastError());
1143
1144 /* LoadLibrary failed; return a ptr to the error msg. */
1145 return "addDLL: could not load DLL";
1146
1147 # else
1148 barf("addDLL: not implemented on this platform");
1149 # endif
1150 }
1151
1152 /* -----------------------------------------------------------------------------
1153 * Searches the system directories to determine if there is a system DLL that
1154 * satisfies the given name. This prevent GHCi from linking against a static
1155 * library if a DLL is available.
1156 *
1157 * Returns: NULL on failure or no DLL found, else the full path to the DLL
1158 * that can be loaded.
1159 */
1160 pathchar* findSystemLibrary(pathchar* dll_name)
1161 {
1162 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%" PATH_FMT "'\n", dll_name));
1163
1164 #if defined(OBJFORMAT_PEi386)
1165 const unsigned int init_buf_size = 1024;
1166 unsigned int bufsize = init_buf_size;
1167 wchar_t* result = malloc(sizeof(wchar_t) * bufsize);
1168 DWORD wResult = SearchPathW(NULL, dll_name, NULL, bufsize, result, NULL);
1169
1170 if (wResult > bufsize) {
1171 result = realloc(result, sizeof(wchar_t) * wResult);
1172 wResult = SearchPathW(NULL, dll_name, NULL, wResult, result, NULL);
1173 }
1174
1175
1176 if (!wResult) {
1177 free(result);
1178 return NULL;
1179 }
1180
1181 return result;
1182 #else
1183 (void)(dll_name); // Function not implemented for other platforms.
1184 return NULL;
1185 #endif
1186 }
1187
1188 /* -----------------------------------------------------------------------------
1189 * Emits a warning determining that the system is missing a required security
1190 * update that we need to get access to the proper APIs
1191 */
1192 void warnMissingKBLibraryPaths( void )
1193 {
1194 static HsBool missing_update_warn = HS_BOOL_FALSE;
1195 if (!missing_update_warn) {
1196 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
1197 missing_update_warn = HS_BOOL_TRUE;
1198 }
1199 }
1200
1201 /* -----------------------------------------------------------------------------
1202 * appends a directory to the process DLL Load path so LoadLibrary can find it
1203 *
1204 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
1205 * restore the search path to what it was before this call.
1206 */
1207 HsPtr addLibrarySearchPath(pathchar* dll_path)
1208 {
1209 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%" PATH_FMT "'\n", dll_path));
1210
1211 #if defined(OBJFORMAT_PEi386)
1212 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1213 LPAddDLLDirectory AddDllDirectory = (LPAddDLLDirectory)GetProcAddress((HMODULE)hDLL, "AddDllDirectory");
1214
1215 HsPtr result = NULL;
1216
1217 const unsigned int init_buf_size = 4096;
1218 int bufsize = init_buf_size;
1219
1220 // Make sure the path is an absolute path
1221 WCHAR* abs_path = malloc(sizeof(WCHAR) * init_buf_size);
1222 DWORD wResult = GetFullPathNameW(dll_path, bufsize, abs_path, NULL);
1223 if (!wResult){
1224 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1225 }
1226 else if (wResult > init_buf_size) {
1227 abs_path = realloc(abs_path, sizeof(WCHAR) * wResult);
1228 if (!GetFullPathNameW(dll_path, bufsize, abs_path, NULL)) {
1229 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1230 }
1231 }
1232
1233 if (AddDllDirectory) {
1234 result = AddDllDirectory(abs_path);
1235 }
1236 else
1237 {
1238 warnMissingKBLibraryPaths();
1239 WCHAR* str = malloc(sizeof(WCHAR) * init_buf_size);
1240 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1241
1242 if (wResult > init_buf_size) {
1243 str = realloc(str, sizeof(WCHAR) * wResult);
1244 bufsize = wResult;
1245 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1246 if (!wResult) {
1247 sysErrorBelch("addLibrarySearchPath[GetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1248 }
1249 }
1250
1251 bufsize = wResult + 2 + pathlen(abs_path);
1252 wchar_t* newPath = malloc(sizeof(wchar_t) * bufsize);
1253
1254 wcscpy(newPath, abs_path);
1255 wcscat(newPath, L";");
1256 wcscat(newPath, str);
1257 if (!SetEnvironmentVariableW(L"PATH", (LPCWSTR)newPath)) {
1258 sysErrorBelch("addLibrarySearchPath[SetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1259 }
1260
1261 free(newPath);
1262 free(abs_path);
1263
1264 return str;
1265 }
1266
1267 if (!result) {
1268 sysErrorBelch("addLibrarySearchPath: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1269 free(abs_path);
1270 return NULL;
1271 }
1272
1273 free(abs_path);
1274 return result;
1275 #else
1276 (void)(dll_path); // Function not implemented for other platforms.
1277 return NULL;
1278 #endif
1279 }
1280
1281 /* -----------------------------------------------------------------------------
1282 * removes a directory from the process DLL Load path
1283 *
1284 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
1285 */
1286 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
1287 {
1288 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n", dll_path_index));
1289
1290 #if defined(OBJFORMAT_PEi386)
1291 HsBool result = 0;
1292
1293 if (dll_path_index != NULL) {
1294 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1295 LPRemoveDLLDirectory RemoveDllDirectory = (LPRemoveDLLDirectory)GetProcAddress((HMODULE)hDLL, "RemoveDllDirectory");
1296
1297 if (RemoveDllDirectory) {
1298 result = RemoveDllDirectory(dll_path_index);
1299 // dll_path_index is now invalid, do not use it after this point.
1300 }
1301 else
1302 {
1303 warnMissingKBLibraryPaths();
1304 result = SetEnvironmentVariableW(L"PATH", (LPCWSTR)dll_path_index);
1305 free(dll_path_index);
1306 }
1307
1308 if (!result) {
1309 sysErrorBelch("removeLibrarySearchPath: (Win32 error %lu)", GetLastError());
1310 return HS_BOOL_FALSE;
1311 }
1312 }
1313
1314 return result == 0 ? HS_BOOL_TRUE : HS_BOOL_FALSE;
1315 #else
1316 (void)(dll_path_index); // Function not implemented for other platforms.
1317 return HS_BOOL_FALSE;
1318 #endif
1319 }
1320
1321 /* -----------------------------------------------------------------------------
1322 * insert a symbol in the hash table
1323 *
1324 * Returns: 0 on failure, nozero on success
1325 */
1326 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
1327 {
1328 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE, NULL);
1329 }
1330
1331 /* -----------------------------------------------------------------------------
1332 * lookup a symbol in the hash table
1333 */
1334 static SymbolAddr* lookupSymbol_ (SymbolName* lbl)
1335 {
1336 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
1337
1338 ASSERT(symhash != NULL);
1339 RtsSymbolInfo *pinfo;
1340
1341 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
1342 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
1343
1344 # if defined(OBJFORMAT_ELF)
1345 return internal_dlsym(lbl);
1346 # elif defined(OBJFORMAT_MACHO)
1347
1348 /* HACK: On OS X, all symbols are prefixed with an underscore.
1349 However, dlsym wants us to omit the leading underscore from the
1350 symbol name -- the dlsym routine puts it back on before searching
1351 for the symbol. For now, we simply strip it off here (and ONLY
1352 here).
1353 */
1354 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", lbl));
1355 ASSERT(lbl[0] == '_');
1356 return internal_dlsym(lbl + 1);
1357 # elif defined(OBJFORMAT_PEi386)
1358 SymbolAddr* sym;
1359
1360 /* See Note [mingw-w64 name decoration scheme] */
1361 #ifndef x86_64_HOST_ARCH
1362 zapTrailingAtSign ( (unsigned char*)lbl );
1363 #endif
1364 sym = lookupSymbolInDLLs((unsigned char*)lbl);
1365 return sym; // might be NULL if not found
1366
1367 # else
1368 ASSERT(2+2 == 5);
1369 return NULL;
1370 # endif
1371 } else {
1372 SymbolAddr* val = pinfo->value;
1373 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl, val));
1374
1375 int r;
1376 ObjectCode* oc = pinfo->owner;
1377
1378 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
1379 See Note [runtime-linker-phases] */
1380 if (oc && oc->status == OBJECT_LOADED) {
1381 oc->status = OBJECT_NEEDED;
1382 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand loading symbol '%s'\n", lbl));
1383 r = ocTryLoad(oc);
1384
1385 if (!r) {
1386 errorBelch("Could not on-demand load symbol '%s'\n", lbl);
1387 return NULL;
1388 }
1389 }
1390
1391 return val;
1392 }
1393 }
1394
1395 SymbolAddr* lookupSymbol( SymbolName* lbl )
1396 {
1397 ACQUIRE_LOCK(&linker_mutex);
1398 SymbolAddr* r = lookupSymbol_(lbl);
1399 RELEASE_LOCK(&linker_mutex);
1400 return r;
1401 }
1402
1403 /* -----------------------------------------------------------------------------
1404 Create a StablePtr for a foreign export. This is normally called by
1405 a C function with __attribute__((constructor)), which is generated
1406 by GHC and linked into the module.
1407
1408 If the object code is being loaded dynamically, then we remember
1409 which StablePtrs were allocated by the constructors and free them
1410 again in unloadObj().
1411 -------------------------------------------------------------------------- */
1412
1413 static ObjectCode *loading_obj = NULL;
1414
1415 StgStablePtr foreignExportStablePtr (StgPtr p)
1416 {
1417 ForeignExportStablePtr *fe_sptr;
1418 StgStablePtr *sptr;
1419
1420 sptr = getStablePtr(p);
1421
1422 if (loading_obj != NULL) {
1423 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
1424 "foreignExportStablePtr");
1425 fe_sptr->stable_ptr = sptr;
1426 fe_sptr->next = loading_obj->stable_ptrs;
1427 loading_obj->stable_ptrs = fe_sptr;
1428 }
1429
1430 return sptr;
1431 }
1432
1433
1434 /* -----------------------------------------------------------------------------
1435 * Debugging aid: look in GHCi's object symbol tables for symbols
1436 * within DELTA bytes of the specified address, and show their names.
1437 */
1438 #ifdef DEBUG
1439 void ghci_enquire ( SymbolAddr* addr );
1440
1441 void ghci_enquire(SymbolAddr* addr)
1442 {
1443 int i;
1444 SymbolName* sym;
1445 RtsSymbolInfo* a;
1446 const int DELTA = 64;
1447 ObjectCode* oc;
1448
1449 for (oc = objects; oc; oc = oc->next) {
1450 for (i = 0; i < oc->n_symbols; i++) {
1451 sym = oc->symbols[i];
1452 if (sym == NULL) continue;
1453 a = NULL;
1454 if (a == NULL) {
1455 ghciLookupSymbolInfo(symhash, sym, &a);
1456 }
1457 if (a == NULL) {
1458 // debugBelch("ghci_enquire: can't find %s\n", sym);
1459 }
1460 else if ( a->value
1461 && (char*)addr-DELTA <= (char*)a->value
1462 && (char*)a->value <= (char*)addr+DELTA) {
1463 debugBelch("%p + %3d == `%s'\n", addr, (int)((char*)a->value - (char*)addr), sym);
1464 }
1465 }
1466 }
1467 }
1468
1469 void ghci_find(SymbolAddr *addr);
1470 void ghci_find(SymbolAddr *addr)
1471 {
1472 ObjectCode *oc;
1473 uint32_t i;
1474
1475 for (oc = objects; oc != NULL; oc = oc->next) {
1476 for (i = 0; i < (uint32_t)oc->n_sections; i++) {
1477 Section *section = &oc->sections[i];
1478 if (addr > section->start &&
1479 (StgWord)addr < (StgWord)section->start+section->size) {
1480 debugBelch("%p is in ", addr);
1481 if (oc->archiveMemberName) {
1482 debugBelch("%s", oc->archiveMemberName);
1483 } else {
1484 debugBelch("%" PATH_FMT, oc->fileName);
1485 }
1486 debugBelch(", section %d, offset %" FMT_Word "\n", i,
1487 (StgWord)addr - (StgWord)section->start);
1488 }
1489 }
1490 }
1491 }
1492 #endif
1493
1494 #if RTS_LINKER_USE_MMAP
1495 //
1496 // Returns NULL on failure.
1497 //
1498 void *
1499 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
1500 {
1501 void *map_addr = NULL;
1502 void *result;
1503 size_t size;
1504 static uint32_t fixed = 0;
1505
1506 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1507 size = roundUpToPage(bytes);
1508
1509 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1510 mmap_again:
1511
1512 if (mmap_32bit_base != 0) {
1513 map_addr = mmap_32bit_base;
1514 }
1515 #endif
1516
1517 IF_DEBUG(linker,
1518 debugBelch("mmapForLinker: \tprotection %#0x\n",
1519 PROT_EXEC | PROT_READ | PROT_WRITE));
1520 IF_DEBUG(linker,
1521 debugBelch("mmapForLinker: \tflags %#0x\n",
1522 MAP_PRIVATE | TRY_MAP_32BIT | fixed | flags));
1523
1524 result = mmap(map_addr, size,
1525 PROT_EXEC|PROT_READ|PROT_WRITE,
1526 MAP_PRIVATE|TRY_MAP_32BIT|fixed|flags, fd, offset);
1527
1528 if (result == MAP_FAILED) {
1529 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1530 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1531 return NULL;
1532 }
1533
1534 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1535 if (mmap_32bit_base != 0) {
1536 if (result == map_addr) {
1537 mmap_32bit_base = (StgWord8*)map_addr + size;
1538 } else {
1539 if ((W_)result > 0x80000000) {
1540 // oops, we were given memory over 2Gb
1541 munmap(result,size);
1542 #if defined(freebsd_HOST_OS) || \
1543 defined(kfreebsdgnu_HOST_OS) || \
1544 defined(dragonfly_HOST_OS)
1545 // Some platforms require MAP_FIXED. This is normally
1546 // a bad idea, because MAP_FIXED will overwrite
1547 // existing mappings.
1548 fixed = MAP_FIXED;
1549 goto mmap_again;
1550 #else
1551 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1552 "asked for %lu bytes at %p. "
1553 "Try specifying an address with +RTS -xm<addr> -RTS",
1554 size, map_addr);
1555 return NULL;
1556 #endif
1557 } else {
1558 // hmm, we were given memory somewhere else, but it's
1559 // still under 2Gb so we can use it. Next time, ask
1560 // for memory right after the place we just got some
1561 mmap_32bit_base = (StgWord8*)result + size;
1562 }
1563 }
1564 } else {
1565 if ((W_)result > 0x80000000) {
1566 // oops, we were given memory over 2Gb
1567 // ... try allocating memory somewhere else?;
1568 debugTrace(DEBUG_linker,
1569 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1570 bytes, result);
1571 munmap(result, size);
1572
1573 // Set a base address and try again... (guess: 1Gb)
1574 mmap_32bit_base = (void*)0x40000000;
1575 goto mmap_again;
1576 }
1577 }
1578 #endif
1579
1580 IF_DEBUG(linker,
1581 debugBelch("mmapForLinker: mapped %" FMT_Word
1582 " bytes starting at %p\n", (W_)size, result));
1583 IF_DEBUG(linker,
1584 debugBelch("mmapForLinker: done\n"));
1585
1586 return result;
1587 }
1588 #endif
1589
1590 /*
1591 * Remove symbols from the symbol table, and free oc->symbols.
1592 * This operation is idempotent.
1593 */
1594 static void removeOcSymbols (ObjectCode *oc)
1595 {
1596 if (oc->symbols == NULL) return;
1597
1598 // Remove all the mappings for the symbols within this object..
1599 int i;
1600 for (i = 0; i < oc->n_symbols; i++) {
1601 if (oc->symbols[i] != NULL) {
1602 ghciRemoveSymbolTable(symhash, oc->symbols[i], oc);
1603 }
1604 }
1605
1606 stgFree(oc->symbols);
1607 oc->symbols = NULL;
1608 }
1609
1610 /*
1611 * Release StablePtrs and free oc->stable_ptrs.
1612 * This operation is idempotent.
1613 */
1614 static void freeOcStablePtrs (ObjectCode *oc)
1615 {
1616 // Release any StablePtrs that were created when this
1617 // object module was initialized.
1618 ForeignExportStablePtr *fe_ptr, *next;
1619
1620 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1621 next = fe_ptr->next;
1622 freeStablePtr(fe_ptr->stable_ptr);
1623 stgFree(fe_ptr);
1624 }
1625 oc->stable_ptrs = NULL;
1626 }
1627
1628 static void
1629 freePreloadObjectFile (ObjectCode *oc)
1630 {
1631 #if defined(mingw32_HOST_OS)
1632
1633 VirtualFree(oc->image - PEi386_IMAGE_OFFSET, 0, MEM_RELEASE);
1634
1635 IndirectAddr *ia, *ia_next;
1636 ia = indirects;
1637 while (ia != NULL) {
1638 ia_next = ia->next;
1639 stgFree(ia);
1640 ia = ia_next;
1641 }
1642 indirects = NULL;
1643
1644 #else
1645
1646 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1647 munmap(oc->image, oc->fileSize);
1648 }
1649 else {
1650 stgFree(oc->image);
1651 }
1652
1653 #endif
1654
1655 oc->image = NULL;
1656 oc->fileSize = 0;
1657 }
1658
1659 /*
1660 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1661 * the GC when a previously unloaded ObjectCode has been determined to be
1662 * unused, and when an error occurs during loadObj().
1663 */
1664 void freeObjectCode (ObjectCode *oc)
1665 {
1666 freePreloadObjectFile(oc);
1667
1668 if (oc->symbols != NULL) {
1669 stgFree(oc->symbols);
1670 oc->symbols = NULL;
1671 }
1672
1673 if (oc->extraInfos != NULL) {
1674 freeHashTable(oc->extraInfos, NULL);
1675 oc->extraInfos = NULL;
1676 }
1677
1678 if (oc->sections != NULL) {
1679 int i;
1680 for (i=0; i < oc->n_sections; i++) {
1681 if (oc->sections[i].start != NULL) {
1682 switch(oc->sections[i].alloc){
1683 #if RTS_LINKER_USE_MMAP
1684 case SECTION_MMAP:
1685 munmap(oc->sections[i].mapped_start,
1686 oc->sections[i].mapped_size);
1687 break;
1688 case SECTION_M32:
1689 m32_free(oc->sections[i].start,
1690 oc->sections[i].size);
1691 break;
1692 #endif
1693 case SECTION_MALLOC:
1694 stgFree(oc->sections[i].start);
1695 break;
1696 default:
1697 break;
1698 }
1699 }
1700 }
1701 stgFree(oc->sections);
1702 }
1703
1704 freeProddableBlocks(oc);
1705
1706 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1707 * alongside the image, so we don't need to free. */
1708 #if NEED_SYMBOL_EXTRAS && (!defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS))
1709 if (RTS_LINKER_USE_MMAP) {
1710 if (!USE_CONTIGUOUS_MMAP && oc->symbol_extras != NULL) {
1711 m32_free(oc->symbol_extras,
1712 sizeof(SymbolExtra) * oc->n_symbol_extras);
1713 }
1714 }
1715 else {
1716 stgFree(oc->symbol_extras);
1717 }
1718 #endif
1719
1720 stgFree(oc->fileName);
1721 stgFree(oc->archiveMemberName);
1722
1723 stgFree(oc);
1724 }
1725
1726 /* -----------------------------------------------------------------------------
1727 * Sets the initial status of a fresh ObjectCode
1728 */
1729 static void setOcInitialStatus(ObjectCode* oc) {
1730 if (oc->archiveMemberName == NULL) {
1731 oc->status = OBJECT_NEEDED;
1732 } else {
1733 oc->status = OBJECT_LOADED;
1734 }
1735 }
1736
1737 static ObjectCode*
1738 mkOc( pathchar *path, char *image, int imageSize,
1739 rtsBool mapped, char *archiveMemberName, int misalignment ) {
1740 ObjectCode* oc;
1741
1742 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1743 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1744
1745 # if defined(OBJFORMAT_ELF)
1746 oc->formatName = "ELF";
1747 # elif defined(OBJFORMAT_PEi386)
1748 oc->formatName = "PEi386";
1749 # elif defined(OBJFORMAT_MACHO)
1750 oc->formatName = "Mach-O";
1751 # else
1752 stgFree(oc);
1753 barf("loadObj: not implemented on this platform");
1754 # endif
1755
1756 oc->image = image;
1757 oc->fileName = pathdup(path);
1758
1759 if (archiveMemberName) {
1760 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1, "loadObj" );
1761 strcpy(oc->archiveMemberName, archiveMemberName);
1762 } else {
1763 oc->archiveMemberName = NULL;
1764 }
1765
1766 setOcInitialStatus( oc );
1767
1768 oc->fileSize = imageSize;
1769 oc->symbols = NULL;
1770 oc->n_sections = 0;
1771 oc->sections = NULL;
1772 oc->proddables = NULL;
1773 oc->stable_ptrs = NULL;
1774 #if NEED_SYMBOL_EXTRAS
1775 oc->symbol_extras = NULL;
1776 #endif
1777 oc->imageMapped = mapped;
1778
1779 oc->misalignment = misalignment;
1780 oc->extraInfos = NULL;
1781
1782 /* chain it onto the list of objects */
1783 oc->next = NULL;
1784
1785 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1786 return oc;
1787 }
1788
1789 /* -----------------------------------------------------------------------------
1790 * Check if an object or archive is already loaded.
1791 *
1792 * Returns: 1 if the path is already loaded, 0 otherwise.
1793 */
1794 static HsInt
1795 isAlreadyLoaded( pathchar *path )
1796 {
1797 ObjectCode *o;
1798 for (o = objects; o; o = o->next) {
1799 if (0 == pathcmp(o->fileName, path)) {
1800 return 1; /* already loaded */
1801 }
1802 }
1803 return 0; /* not loaded yet */
1804 }
1805
1806 static HsInt loadArchive_ (pathchar *path)
1807 {
1808 ObjectCode* oc;
1809 char *image;
1810 int memberSize;
1811 FILE *f;
1812 int n;
1813 size_t thisFileNameSize;
1814 char *fileName;
1815 size_t fileNameSize;
1816 int isObject, isGnuIndex, isThin, isImportLib;
1817 char tmp[20];
1818 char *gnuFileIndex;
1819 int gnuFileIndexSize;
1820 #if defined(darwin_HOST_OS)
1821 int i;
1822 uint32_t nfat_arch, nfat_offset, cputype, cpusubtype;
1823 #if defined(i386_HOST_ARCH)
1824 const uint32_t mycputype = CPU_TYPE_X86;
1825 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_ALL;
1826 #elif defined(x86_64_HOST_ARCH)
1827 const uint32_t mycputype = CPU_TYPE_X86_64;
1828 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_64_ALL;
1829 #elif defined(powerpc_HOST_ARCH)
1830 const uint32_t mycputype = CPU_TYPE_POWERPC;
1831 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1832 #elif defined(powerpc64_HOST_ARCH)
1833 const uint32_t mycputype = CPU_TYPE_POWERPC64;
1834 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1835 #else
1836 #error Unknown Darwin architecture
1837 #endif
1838 #endif
1839 int misalignment = 0;
1840
1841 /* TODO: don't call barf() on error, instead return an error code, freeing
1842 * all resources correctly. This function is pretty complex, so it needs
1843 * to be refactored to make this practical. */
1844
1845 IF_DEBUG(linker, debugBelch("loadArchive: start\n"));
1846 IF_DEBUG(linker, debugBelch("loadArchive: Loading archive `%" PATH_FMT" '\n", path));
1847
1848 /* Check that we haven't already loaded this archive.
1849 Ignore requests to load multiple times */
1850 if (isAlreadyLoaded(path)) {
1851 IF_DEBUG(linker,
1852 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1853 return 1; /* success */
1854 }
1855
1856 gnuFileIndex = NULL;
1857 gnuFileIndexSize = 0;
1858
1859 fileNameSize = 32;
1860 fileName = stgMallocBytes(fileNameSize, "loadArchive(fileName)");
1861
1862 isThin = 0;
1863 isImportLib = 0;
1864
1865 f = pathopen(path, WSTR("rb"));
1866 if (!f)
1867 barf("loadObj: can't read `%" PATH_FMT "'", path);
1868
1869 /* Check if this is an archive by looking for the magic "!<arch>\n"
1870 * string. Usually, if this fails, we barf and quit. On Darwin however,
1871 * we may have a fat archive, which contains archives for more than
1872 * one architecture. Fat archives start with the magic number 0xcafebabe,
1873 * always stored big endian. If we find a fat_header, we scan through
1874 * the fat_arch structs, searching through for one for our host
1875 * architecture. If a matching struct is found, we read the offset
1876 * of our archive data (nfat_offset) and seek forward nfat_offset bytes
1877 * from the start of the file.
1878 *
1879 * A subtlety is that all of the members of the fat_header and fat_arch
1880 * structs are stored big endian, so we need to call byte order
1881 * conversion functions.
1882 *
1883 * If we find the appropriate architecture in a fat archive, we gobble
1884 * its magic "!<arch>\n" string and continue processing just as if
1885 * we had a single architecture archive.
1886 */
1887
1888 n = fread ( tmp, 1, 8, f );
1889 if (n != 8)
1890 barf("loadArchive: Failed reading header from `%" PATH_FMT "'", path);
1891 if (strncmp(tmp, "!<arch>\n", 8) == 0) {}
1892 /* Check if this is a thin archive by looking for the magic string "!<thin>\n"
1893 *
1894 * ar thin libraries have the exact same format as normal archives except they
1895 * have a different magic string and they don't copy the object files into the
1896 * archive.
1897 *
1898 * Instead each header entry points to the location of the object file on disk.
1899 * This is useful when a library is only created to satisfy a compile time dependency
1900 * instead of to be distributed. This saves the time required for copying.
1901 *
1902 * Thin archives are always flattened. They always only contain simple headers
1903 * pointing to the object file and so we need not allocate more memory than needed
1904 * to find the object file.
1905 *
1906 */
1907 else if (strncmp(tmp, "!<thin>\n", 8) == 0) {
1908 isThin = 1;
1909 }
1910 #if defined(darwin_HOST_OS)
1911 /* Not a standard archive, look for a fat archive magic number: */
1912 else if (ntohl(*(uint32_t *)tmp) == FAT_MAGIC) {
1913 nfat_arch = ntohl(*(uint32_t *)(tmp + 4));
1914 IF_DEBUG(linker, debugBelch("loadArchive: found a fat archive containing %d architectures\n", nfat_arch));
1915 nfat_offset = 0;
1916
1917 for (i = 0; i < (int)nfat_arch; i++) {
1918 /* search for the right arch */
1919 n = fread( tmp, 1, 20, f );
1920 if (n != 8)
1921 barf("loadArchive: Failed reading arch from `%s'", path);
1922 cputype = ntohl(*(uint32_t *)tmp);
1923 cpusubtype = ntohl(*(uint32_t *)(tmp + 4));
1924
1925 if (cputype == mycputype && cpusubtype == mycpusubtype) {
1926 IF_DEBUG(linker, debugBelch("loadArchive: found my archive in a fat archive\n"));
1927 nfat_offset = ntohl(*(uint32_t *)(tmp + 8));
1928 break;
1929 }
1930 }
1931
1932 if (nfat_offset == 0) {
1933 barf ("loadArchive: searched %d architectures, but no host arch found", (int)nfat_arch);
1934 }
1935 else {
1936 n = fseek( f, nfat_offset, SEEK_SET );
1937 if (n != 0)
1938 barf("loadArchive: Failed to seek to arch in `%s'", path);
1939 n = fread ( tmp, 1, 8, f );
1940 if (n != 8)
1941 barf("loadArchive: Failed reading header from `%s'", path);
1942 if (strncmp(tmp, "!<arch>\n", 8) != 0) {
1943 barf("loadArchive: couldn't find archive in `%s' at offset %d", path, nfat_offset);
1944 }
1945 }
1946 }
1947 else {
1948 barf("loadArchive: Neither an archive, nor a fat archive: `%s'", path);
1949 }
1950 #else
1951 else {
1952 barf("loadArchive: Not an archive: `%" PATH_FMT "'", path);
1953 }
1954 #endif
1955
1956 IF_DEBUG(linker, debugBelch("loadArchive: loading archive contents\n"));
1957
1958 while (1) {
1959 IF_DEBUG(linker, debugBelch("loadArchive: reading at %ld\n", ftell(f)));
1960 n = fread ( fileName, 1, 16, f );
1961 if (n != 16) {
1962 if (feof(f)) {
1963 IF_DEBUG(linker, debugBelch("loadArchive: EOF while reading from '%" PATH_FMT "'\n", path));
1964 break;
1965 }
1966 else {
1967 barf("loadArchive: Failed reading file name from `%" PATH_FMT "'", path);
1968 }
1969 }
1970
1971 #if defined(darwin_HOST_OS)
1972 if (strncmp(fileName, "!<arch>\n", 8) == 0) {
1973 IF_DEBUG(linker, debugBelch("loadArchive: found the start of another archive, breaking\n"));
1974 break;
1975 }
1976 #endif
1977
1978 n = fread ( tmp, 1, 12, f );
1979 if (n != 12)
1980 barf("loadArchive: Failed reading mod time from `%" PATH_FMT "'", path);
1981 n = fread ( tmp, 1, 6, f );
1982 if (n != 6)
1983 barf("loadArchive: Failed reading owner from `%" PATH_FMT "'", path);
1984 n = fread ( tmp, 1, 6, f );
1985 if (n != 6)
1986 barf("loadArchive: Failed reading group from `%" PATH_FMT "'", path);
1987 n = fread ( tmp, 1, 8, f );
1988 if (n != 8)
1989 barf("loadArchive: Failed reading mode from `%" PATH_FMT "'", path);
1990 n = fread ( tmp, 1, 10, f );
1991 if (n != 10)
1992 barf("loadArchive: Failed reading size from `%" PATH_FMT "'", path);
1993 tmp[10] = '\0';
1994 for (n = 0; isdigit(tmp[n]); n++);
1995 tmp[n] = '\0';
1996 memberSize = atoi(tmp);
1997
1998 IF_DEBUG(linker, debugBelch("loadArchive: size of this archive member is %d\n", memberSize));
1999 n = fread ( tmp, 1, 2, f );
2000 if (n != 2)
2001 barf("loadArchive: Failed reading magic from `%" PATH_FMT "'", path);
2002 if (strncmp(tmp, "\x60\x0A", 2) != 0)
2003 barf("loadArchive: Failed reading magic from `%" PATH_FMT "' at %ld. Got %c%c",
2004 path, ftell(f), tmp[0], tmp[1]);
2005
2006 isGnuIndex = 0;
2007 /* Check for BSD-variant large filenames */
2008 if (0 == strncmp(fileName, "#1/", 3)) {
2009 fileName[16] = '\0';
2010 if (isdigit(fileName[3])) {
2011 for (n = 4; isdigit(fileName[n]); n++);
2012 fileName[n] = '\0';
2013 thisFileNameSize = atoi(fileName + 3);
2014 memberSize -= thisFileNameSize;
2015 if (thisFileNameSize >= fileNameSize) {
2016 /* Double it to avoid potentially continually
2017 increasing it by 1 */
2018 fileNameSize = thisFileNameSize * 2;
2019 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
2020 }
2021 n = fread ( fileName, 1, thisFileNameSize, f );
2022 if (n != (int)thisFileNameSize) {
2023 barf("loadArchive: Failed reading filename from `%" PATH_FMT "'",
2024 path);
2025 }
2026 fileName[thisFileNameSize] = 0;
2027
2028 /* On OS X at least, thisFileNameSize is the size of the
2029 fileName field, not the length of the fileName
2030 itself. */
2031 thisFileNameSize = strlen(fileName);
2032 }
2033 else {
2034 barf("loadArchive: BSD-variant filename size not found while reading filename from `%s'", path);
2035 }
2036 }
2037 /* Check for GNU file index file */
2038 else if (0 == strncmp(fileName, "//", 2)) {
2039 fileName[0] = '\0';
2040 thisFileNameSize = 0;
2041 isGnuIndex = 1;
2042 }
2043 /* Check for a file in the GNU file index */
2044 else if (fileName[0] == '/') {
2045 if (isdigit(fileName[1])) {
2046 int i;
2047
2048 for (n = 2; isdigit(fileName[n]); n++);
2049 fileName[n] = '\0';
2050 n = atoi(fileName + 1);
2051
2052 if (gnuFileIndex == NULL) {
2053 barf("loadArchive: GNU-variant filename without an index while reading from `%s'", path);
2054 }
2055 if (n < 0 || n > gnuFileIndexSize) {
2056 barf("loadArchive: GNU-variant filename offset %d out of range [0..%d] while reading filename from `%s'", n, gnuFileIndexSize, path);
2057 }
2058 if (n != 0 && gnuFileIndex[n - 1] != '\n') {
2059 barf("loadArchive: GNU-variant filename offset %d invalid (range [0..%d]) while reading filename from `%s'", n, gnuFileIndexSize, path);
2060 }
2061 for (i = n; gnuFileIndex[i] != '\n'; i++);
2062 thisFileNameSize = i - n - 1;
2063 if (thisFileNameSize >= fileNameSize) {
2064 /* Double it to avoid potentially continually
2065 increasing it by 1 */
2066 fileNameSize = thisFileNameSize * 2;
2067 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
2068 }
2069 memcpy(fileName, gnuFileIndex + n, thisFileNameSize);
2070 fileName[thisFileNameSize] = '\0';
2071 }
2072 else if (fileName[1] == ' ') {
2073 fileName[0] = '\0';
2074 thisFileNameSize = 0;
2075 }
2076 else {
2077 barf("loadArchive: GNU-variant filename offset not found while reading filename from `%s'", path);
2078 }
2079 }
2080 /* Finally, the case where the filename field actually contains
2081 the filename */
2082 else {
2083 /* GNU ar terminates filenames with a '/', this allowing
2084 spaces in filenames. So first look to see if there is a
2085 terminating '/'. */
2086 for (thisFileNameSize = 0;
2087 thisFileNameSize < 16;
2088 thisFileNameSize++) {
2089 if (fileName[thisFileNameSize] == '/') {
2090 fileName[thisFileNameSize] = '\0';
2091 break;
2092 }
2093 }
2094 /* If we didn't find a '/', then a space teminates the
2095 filename. Note that if we don't find one, then
2096 thisFileNameSize ends up as 16, and we already have the
2097 '\0' at the end. */
2098 if (thisFileNameSize == 16) {
2099 for (thisFileNameSize = 0;
2100 thisFileNameSize < 16;
2101 thisFileNameSize++) {
2102 if (fileName[thisFileNameSize] == ' ') {
2103 fileName[thisFileNameSize] = '\0';
2104 break;
2105 }
2106 }
2107 }
2108 }
2109
2110 IF_DEBUG(linker,
2111 debugBelch("loadArchive: Found member file `%s'\n", fileName));
2112
2113 isObject = (thisFileNameSize >= 2 && strncmp(fileName + thisFileNameSize - 2, ".o" , 2) == 0)
2114 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".p_o", 4) == 0);
2115
2116 #if defined(OBJFORMAT_PEi386)
2117 /*
2118 * Note [MSVC import files (ext .lib)]
2119 * MSVC compilers store the object files in
2120 * the import libraries with extension .dll
2121 * so on Windows we should look for those too.
2122 * The PE COFF format doesn't specify any specific file name
2123 * for sections. So on windows, just try to load it all.
2124 *
2125 * Linker members (e.g. filename / are skipped since they are not needed)
2126 */
2127 isImportLib = thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".dll", 4) == 0;
2128
2129 /*
2130 * Note [GCC import files (ext .dll.a)]
2131 * GCC stores import information in the same binary format
2132 * as the object file normally has. The only difference is that
2133 * all the information are put in .idata sections. The only real
2134 * way to tell if we're dealing with an import lib is by looking
2135 * at the file extension.
2136 */
2137 isImportLib = isImportLib || endsWithPath(path, WSTR(".dll.a"));
2138 #endif // windows
2139
2140 IF_DEBUG(linker, debugBelch("loadArchive: \tthisFileNameSize = %d\n", (int)thisFileNameSize));
2141 IF_DEBUG(linker, debugBelch("loadArchive: \tisObject = %d\n", isObject));
2142
2143 if (isObject) {
2144 char *archiveMemberName;
2145
2146 IF_DEBUG(linker, debugBelch("loadArchive: Member is an object file...loading...\n"));
2147
2148 #if defined(mingw32_HOST_OS)
2149 // TODO: We would like to use allocateExec here, but allocateExec
2150 // cannot currently allocate blocks large enough.
2151 image = allocateImageAndTrampolines(path, fileName, f, memberSize,
2152 isThin);
2153 #elif defined(darwin_HOST_OS)
2154 if (RTS_LINKER_USE_MMAP)
2155 image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
2156 else {
2157 /* See loadObj() */
2158 misalignment = machoGetMisalignment(f);
2159 image = stgMallocBytes(memberSize + misalignment,
2160 "loadArchive(image)");
2161 image += misalignment;
2162 }
2163
2164 #else // not windows or darwin
2165 image = stgMallocBytes(memberSize, "loadArchive(image)");
2166 #endif
2167 if (isThin) {
2168 FILE *member;
2169 pathchar *pathCopy, *dirName, *memberPath, *objFileName;
2170
2171 /* Allocate and setup the dirname of the archive. We'll need
2172 this to locate the thin member */
2173 pathCopy = pathdup(path); // Convert the char* to a pathchar*
2174 dirName = pathdir(pathCopy);
2175
2176 /* Append the relative member name to the dirname. This should be
2177 be the full path to the actual thin member. */
2178 int memberLen = pathlen(dirName) + 1 + strlen(fileName) + 1;
2179 memberPath = stgMallocBytes(pathsize * memberLen, "loadArchive(file)");
2180 objFileName = mkPath(fileName);
2181 pathprintf(memberPath, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), dirName, objFileName);
2182 stgFree(objFileName);
2183 stgFree(dirName);
2184
2185 member = pathopen(memberPath, WSTR("rb"));
2186 if (!member)
2187 barf("loadObj: can't read thin archive `%" PATH_FMT "'", memberPath);
2188
2189 n = fread ( image, 1, memberSize, member );
2190 if (n != memberSize) {
2191 barf("loadArchive: error whilst reading `%s'", fileName);
2192 }
2193
2194 fclose(member);
2195 stgFree(memberPath);
2196 stgFree(pathCopy);
2197 }
2198 else
2199 {
2200 n = fread ( image, 1, memberSize, f );
2201 if (n != memberSize) {
2202 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2203 }
2204 }
2205
2206 archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
2207 "loadArchive(file)");
2208 sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
2209 path, (int)thisFileNameSize, fileName);
2210
2211 oc = mkOc(path, image, memberSize, rtsFalse, archiveMemberName
2212 , misalignment);
2213
2214 stgFree(archiveMemberName);
2215
2216 if (0 == loadOc(oc)) {
2217 stgFree(fileName);
2218 fclose(f);
2219 return 0;
2220 } else {
2221 #if defined(OBJFORMAT_PEi386)
2222 if (isImportLib)
2223 {
2224 findAndLoadImportLibrary(oc);
2225 stgFree(oc);
2226 oc = NULL;
2227 break;
2228 } else {
2229 #endif
2230 oc->next = objects;
2231 objects = oc;
2232 #if defined(OBJFORMAT_PEi386)
2233 }
2234 #endif
2235 }
2236 }
2237 else if (isGnuIndex) {
2238 if (gnuFileIndex != NULL) {
2239 barf("loadArchive: GNU-variant index found, but already have an index, while reading filename from `%s'", path);
2240 }
2241 IF_DEBUG(linker, debugBelch("loadArchive: Found GNU-variant file index\n"));
2242 #if RTS_LINKER_USE_MMAP
2243 gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
2244 #else
2245 gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
2246 #endif
2247 n = fread ( gnuFileIndex, 1, memberSize, f );
2248 if (n != memberSize) {
2249 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2250 }
2251 gnuFileIndex[memberSize] = '/';
2252 gnuFileIndexSize = memberSize;
2253 }
2254 else if (isImportLib) {
2255 #if defined(OBJFORMAT_PEi386)
2256 if (checkAndLoadImportLibrary(path, fileName, f)) {
2257 IF_DEBUG(linker, debugBelch("loadArchive: Member is an import file section... Corresponding DLL has been loaded...\n"));
2258 }
2259 else {
2260 IF_DEBUG(linker, debugBelch("loadArchive: Member is not a valid import file section... Skipping...\n"));
2261 n = fseek(f, memberSize, SEEK_CUR);
2262 if (n != 0)
2263 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
2264 memberSize, path);
2265 }
2266 #endif
2267 }
2268 else {
2269 IF_DEBUG(linker, debugBelch("loadArchive: '%s' does not appear to be an object file\n", fileName));
2270 if (!isThin || thisFileNameSize == 0) {
2271 n = fseek(f, memberSize, SEEK_CUR);
2272 if (n != 0)
2273 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
2274 memberSize, path);
2275 }
2276 }
2277
2278 /* .ar files are 2-byte aligned */
2279 if (!(isThin && thisFileNameSize > 0) && memberSize % 2) {
2280 IF_DEBUG(linker, debugBelch("loadArchive: trying to read one pad byte\n"));
2281 n = fread ( tmp, 1, 1, f );
2282 if (n != 1) {
2283 if (feof(f)) {
2284 IF_DEBUG(linker, debugBelch("loadArchive: found EOF while reading one pad byte\n"));
2285 break;
2286 }
2287 else {
2288 barf("loadArchive: Failed reading padding from `%" PATH_FMT "'", path);
2289 }
2290 }
2291 IF_DEBUG(linker, debugBelch("loadArchive: successfully read one pad byte\n"));
2292 }
2293 IF_DEBUG(linker, debugBelch("loadArchive: reached end of archive loading while loop\n"));
2294 }
2295
2296 fclose(f);
2297
2298 stgFree(fileName);
2299 if (gnuFileIndex != NULL) {
2300 #if RTS_LINKER_USE_MMAP
2301 munmap(gnuFileIndex, gnuFileIndexSize + 1);
2302 #else
2303 stgFree(gnuFileIndex);
2304 #endif
2305 }
2306
2307 if (RTS_LINKER_USE_MMAP)
2308 m32_allocator_flush();
2309
2310 IF_DEBUG(linker, debugBelch("loadArchive: done\n"));
2311 return 1;
2312 }
2313
2314 HsInt loadArchive (pathchar *path)
2315 {
2316 ACQUIRE_LOCK(&linker_mutex);
2317 HsInt r = loadArchive_(path);
2318 RELEASE_LOCK(&linker_mutex);
2319 return r;
2320 }
2321
2322 //
2323 // Load the object file into memory. This will not be its final resting place,
2324 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
2325 // address space, properly aligned.
2326 //
2327 static ObjectCode *
2328 preloadObjectFile (pathchar *path)
2329 {
2330 int fileSize;
2331 struct_stat st;
2332 int r;
2333 void *image;
2334 ObjectCode *oc;
2335 int misalignment = 0;
2336
2337 r = pathstat(path, &st);
2338 if (r == -1) {
2339 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
2340 return NULL;
2341 }
2342
2343 fileSize = st.st_size;
2344
2345 #if RTS_LINKER_USE_MMAP
2346 int fd;
2347
2348 /* On many architectures malloc'd memory isn't executable, so we need to use
2349 * mmap. */
2350
2351 #if defined(openbsd_HOST_OS)
2352 fd = open(path, O_RDONLY, S_IRUSR);
2353 #else
2354 fd = open(path, O_RDONLY);
2355 #endif
2356 if (fd == -1) {
2357 errorBelch("loadObj: can't open %s", path);
2358 return NULL;
2359 }
2360
2361 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
2362 MAP_PRIVATE, fd, 0);
2363 // not 32-bit yet, we'll remap later
2364 close(fd);
2365
2366 #else /* !RTS_LINKER_USE_MMAP */
2367 FILE *f;
2368
2369 /* load the image into memory */
2370 /* coverity[toctou] */
2371 f = pathopen(path, WSTR("rb"));
2372 if (!f) {
2373 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
2374 return NULL;
2375 }
2376
2377 # if defined(mingw32_HOST_OS)
2378
2379 // TODO: We would like to use allocateExec here, but allocateExec
2380 // cannot currently allocate blocks large enough.
2381 image = allocateImageAndTrampolines(path, "itself", f, fileSize,
2382 HS_BOOL_FALSE);
2383 if (image == NULL) {
2384 fclose(f);
2385 return NULL;
2386 }
2387
2388 # elif defined(darwin_HOST_OS)
2389
2390 // In a Mach-O .o file, all sections can and will be misaligned
2391 // if the total size of the headers is not a multiple of the
2392 // desired alignment. This is fine for .o files that only serve
2393 // as input for the static linker, but it's not fine for us,
2394 // as SSE (used by gcc for floating point) and Altivec require
2395 // 16-byte alignment.
2396 // We calculate the correct alignment from the header before
2397 // reading the file, and then we misalign image on purpose so
2398 // that the actual sections end up aligned again.
2399 misalignment = machoGetMisalignment(f);
2400 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
2401 image += misalignment;
2402
2403 # else /* !defined(mingw32_HOST_OS) */
2404
2405 image = stgMallocBytes(fileSize, "loadObj(image)");
2406
2407 #endif
2408
2409 int n;
2410 n = fread ( image, 1, fileSize, f );
2411 fclose(f);
2412 if (n != fileSize) {
2413 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
2414 stgFree(image);
2415 return NULL;
2416 }
2417
2418 #endif /* RTS_LINKER_USE_MMAP */
2419
2420 oc = mkOc(path, image, fileSize, rtsTrue, NULL, misalignment);
2421
2422 return oc;
2423 }
2424
2425 /* -----------------------------------------------------------------------------
2426 * Load an obj (populate the global symbol table, but don't resolve yet)
2427 *
2428 * Returns: 1 if ok, 0 on error.
2429 */
2430 static HsInt loadObj_ (pathchar *path)
2431 {
2432 ObjectCode* oc;
2433 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
2434
2435 /* debugBelch("loadObj %s\n", path ); */
2436
2437 /* Check that we haven't already loaded this object.
2438 Ignore requests to load multiple times */
2439
2440 if (isAlreadyLoaded(path)) {
2441 IF_DEBUG(linker,
2442 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
2443 return 1; /* success */
2444 }
2445
2446 oc = preloadObjectFile(path);
2447 if (oc == NULL) return 0;
2448
2449 if (! loadOc(oc)) {
2450 // failed; free everything we've allocated
2451 removeOcSymbols(oc);
2452 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
2453 freeObjectCode(oc);
2454 return 0;
2455 }
2456
2457 oc->next = objects;
2458 objects = oc;
2459 return 1;
2460 }
2461
2462 HsInt loadObj (pathchar *path)
2463 {
2464 ACQUIRE_LOCK(&linker_mutex);
2465 HsInt r = loadObj_(path);
2466 RELEASE_LOCK(&linker_mutex);
2467 return r;
2468 }
2469
2470 static HsInt loadOc (ObjectCode* oc)
2471 {
2472 int r;
2473
2474 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
2475
2476 /* verify the in-memory image */
2477 # if defined(OBJFORMAT_ELF)
2478 r = ocVerifyImage_ELF ( oc );
2479 # elif defined(OBJFORMAT_PEi386)
2480 r = ocVerifyImage_PEi386 ( oc );
2481 # elif defined(OBJFORMAT_MACHO)
2482 r = ocVerifyImage_MachO ( oc );
2483 # else
2484 barf("loadObj: no verify method");
2485 # endif
2486 if (!r) {
2487 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
2488 return r;
2489 }
2490
2491 #if NEED_SYMBOL_EXTRAS
2492 # if defined(OBJFORMAT_MACHO)
2493 r = ocAllocateSymbolExtras_MachO ( oc );
2494 if (!r) {
2495 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_MachO failed\n"));
2496 return r;
2497 }
2498 # elif defined(OBJFORMAT_ELF)
2499 r = ocAllocateSymbolExtras_ELF ( oc );
2500 if (!r) {
2501 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_ELF failed\n"));
2502 return r;
2503 }
2504 # elif defined(OBJFORMAT_PEi386)
2505 ocAllocateSymbolExtras_PEi386 ( oc );
2506 # endif
2507 #endif
2508
2509 /* build the symbol list for this image */
2510 # if defined(OBJFORMAT_ELF)
2511 r = ocGetNames_ELF ( oc );
2512 # elif defined(OBJFORMAT_PEi386)
2513 r = ocGetNames_PEi386 ( oc );
2514 # elif defined(OBJFORMAT_MACHO)
2515 r = ocGetNames_MachO ( oc );
2516 # else
2517 barf("loadObj: no getNames method");
2518 # endif
2519 if (!r) {
2520 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
2521 return r;
2522 }
2523
2524 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
2525 setOcInitialStatus( oc );
2526 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
2527
2528 return 1;
2529 }
2530
2531 /* -----------------------------------------------------------------------------
2532 * try to load and initialize an ObjectCode into memory
2533 *
2534 * Returns: 1 if ok, 0 on error.
2535 */
2536 int ocTryLoad (ObjectCode* oc) {
2537 int r;
2538
2539 if (oc->status != OBJECT_NEEDED) {
2540 return 1;
2541 }
2542
2543 /* Check for duplicate symbols by looking into `symhash`.
2544 Duplicate symbols are any symbols which exist
2545 in different ObjectCodes that have both been loaded, or
2546 are to be loaded by this call.
2547
2548 This call is intended to have no side-effects when a non-duplicate
2549 symbol is re-inserted.
2550
2551 We set the Address to NULL since that is not used to distinguish
2552 symbols. Duplicate symbols are distinguished by name and oc.
2553 */
2554 int x;
2555 SymbolName* symbol;
2556 for (x = 0; x < oc->n_symbols; x++) {
2557 symbol = oc->symbols[x];
2558 if ( symbol
2559 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol, NULL, isSymbolWeak(oc, symbol), oc)) {
2560 return 0;
2561 }
2562 }
2563
2564 IF_DEBUG(linker, debugBelch("Resolving %" PATH_FMT "\n",
2565 oc->archiveMemberName ?
2566 oc->archiveMemberName : oc->fileName));
2567
2568 # if defined(OBJFORMAT_ELF)
2569 r = ocResolve_ELF ( oc );
2570 # elif defined(OBJFORMAT_PEi386)
2571 r = ocResolve_PEi386 ( oc );
2572 # elif defined(OBJFORMAT_MACHO)
2573 r = ocResolve_MachO ( oc );
2574 # else
2575 barf("ocTryLoad: not implemented on this platform");
2576 # endif
2577 if (!r) { return r; }
2578
2579 // run init/init_array/ctors/mod_init_func
2580
2581 loading_obj = oc; // tells foreignExportStablePtr what to do
2582 #if defined(OBJFORMAT_ELF)
2583 r = ocRunInit_ELF ( oc );
2584 #elif defined(OBJFORMAT_PEi386)
2585 r = ocRunInit_PEi386 ( oc );
2586 #elif defined(OBJFORMAT_MACHO)
2587 r = ocRunInit_MachO ( oc );
2588 #else
2589 barf("ocTryLoad: initializers not implemented on this platform");
2590 #endif
2591 loading_obj = NULL;
2592
2593 if (!r) { return r; }
2594
2595 oc->status = OBJECT_RESOLVED;
2596
2597 return 1;
2598 }
2599
2600 /* -----------------------------------------------------------------------------
2601 * resolve all the currently unlinked objects in memory
2602 *
2603 * Returns: 1 if ok, 0 on error.
2604 */
2605 static HsInt resolveObjs_ (void)
2606 {
2607 ObjectCode *oc;
2608 int r;
2609
2610 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
2611
2612 for (oc = objects; oc; oc = oc->next) {
2613 r = ocTryLoad(oc);
2614 if (!r)
2615 {
2616 return r;
2617 }
2618 }
2619
2620 #ifdef PROFILING
2621 // collect any new cost centres & CCSs that were defined during runInit
2622 initProfiling2();
2623 #endif
2624
2625 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
2626 return 1;
2627 }
2628
2629 HsInt resolveObjs (void)
2630 {
2631 ACQUIRE_LOCK(&linker_mutex);
2632 HsInt r = resolveObjs_();
2633 RELEASE_LOCK(&linker_mutex);
2634 return r;
2635 }
2636
2637 /* -----------------------------------------------------------------------------
2638 * delete an object from the pool
2639 */
2640 static HsInt unloadObj_ (pathchar *path, rtsBool just_purge)
2641 {
2642 ObjectCode *oc, *prev, *next;
2643 HsBool unloadedAnyObj = HS_BOOL_FALSE;
2644
2645 ASSERT(symhash != NULL);
2646 ASSERT(objects != NULL);
2647
2648 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
2649
2650 prev = NULL;
2651 for (oc = objects; oc; oc = next) {
2652 next = oc->next; // oc might be freed
2653
2654 if (!pathcmp(oc->fileName,path)) {
2655
2656 // these are both idempotent, so in just_purge mode we can
2657 // later call unloadObj() to really unload the object.
2658 removeOcSymbols(oc);
2659 freeOcStablePtrs(oc);
2660
2661 if (!just_purge) {
2662 if (prev == NULL) {
2663 objects = oc->next;
2664 } else {
2665 prev->next = oc->next;
2666 }
2667 ACQUIRE_LOCK(&linker_unloaded_mutex);
2668 oc->next = unloaded_objects;
2669 unloaded_objects = oc;
2670 oc->status = OBJECT_UNLOADED;
2671 RELEASE_LOCK(&linker_unloaded_mutex);
2672 // We do not own oc any more; it can be released at any time by
2673 // the GC in checkUnload().
2674 } else {
2675 prev = oc;
2676 }
2677
2678 /* This could be a member of an archive so continue
2679 * unloading other members. */
2680 unloadedAnyObj = HS_BOOL_TRUE;
2681 } else {
2682 prev = oc;
2683 }
2684 }
2685
2686 if (unloadedAnyObj) {
2687 return 1;
2688 }
2689 else {
2690 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
2691 return 0;
2692 }
2693 }
2694
2695 HsInt unloadObj (pathchar *path)
2696 {
2697 ACQUIRE_LOCK(&linker_mutex);
2698 HsInt r = unloadObj_(path, rtsFalse);
2699 RELEASE_LOCK(&linker_mutex);
2700 return r;
2701 }
2702
2703 HsInt purgeObj (pathchar *path)
2704 {
2705 ACQUIRE_LOCK(&linker_mutex);
2706 HsInt r = unloadObj_(path, rtsTrue);
2707 RELEASE_LOCK(&linker_mutex);
2708 return r;
2709 }
2710
2711 /* -----------------------------------------------------------------------------
2712 * Sanity checking. For each ObjectCode, maintain a list of address ranges
2713 * which may be prodded during relocation, and abort if we try and write
2714 * outside any of these.
2715 */
2716 static void
2717 addProddableBlock ( ObjectCode* oc, void* start, int size )
2718 {
2719 ProddableBlock* pb
2720 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
2721
2722 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
2723 ASSERT(size > 0);
2724 pb->start = start;
2725 pb->size = size;
2726 pb->next = oc->proddables;
2727 oc->proddables = pb;
2728 }
2729
2730 static void
2731 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
2732 {
2733 ProddableBlock* pb;
2734
2735 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
2736 char* s = (char*)(pb->start);
2737 char* e = s + pb->size;
2738 char* a = (char*)addr;
2739 if (a >= s && (a+size) <= e) return;
2740 }
2741 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
2742 }
2743
2744 static void freeProddableBlocks (ObjectCode *oc)
2745 {
2746 ProddableBlock *pb, *next;
2747
2748 for (pb = oc->proddables; pb != NULL; pb = next) {
2749 next = pb->next;
2750 stgFree(pb);
2751 }
2752 oc->proddables = NULL;
2753 }
2754
2755 /* -----------------------------------------------------------------------------
2756 * Section management.
2757 */
2758 static void
2759 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
2760 void* start, StgWord size, StgWord mapped_offset,
2761 void* mapped_start, StgWord mapped_size)
2762 {
2763 s->start = start; /* actual start of section in memory */
2764 s->size = size; /* actual size of section in memory */
2765 s->kind = kind;
2766 s->alloc = alloc;
2767 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
2768
2769 s->mapped_start = mapped_start; /* start of mmap() block */
2770 s->mapped_size = mapped_size; /* size of mmap() block */
2771
2772 IF_DEBUG(linker,
2773 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
2774 start, (void*)((StgWord)start + size),
2775 size, kind ));
2776 }
2777
2778
2779 /* --------------------------------------------------------------------------
2780 * Symbol Extras.
2781 * This is about allocating a small chunk of memory for every symbol in the
2782 * object file. We make sure that the SymboLExtras are always "in range" of
2783 * limited-range PC-relative instructions on various platforms by allocating
2784 * them right next to the object code itself.
2785 */
2786
2787 #if NEED_SYMBOL_EXTRAS
2788 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2789
2790 /*
2791 ocAllocateSymbolExtras
2792
2793 Allocate additional space at the end of the object file image to make room
2794 for jump islands (powerpc, x86_64, arm) and GOT entries (x86_64).
2795
2796 PowerPC relative branch instructions have a 24 bit displacement field.
2797 As PPC code is always 4-byte-aligned, this yields a +-32MB range.
2798 If a particular imported symbol is outside this range, we have to redirect
2799 the jump to a short piece of new code that just loads the 32bit absolute
2800 address and jumps there.
2801 On x86_64, PC-relative jumps and PC-relative accesses to the GOT are limited
2802 to 32 bits (+-2GB).
2803
2804 This function just allocates space for one SymbolExtra for every
2805 undefined symbol in the object file. The code for the jump islands is
2806 filled in by makeSymbolExtra below.
2807 */
2808
2809 static int ocAllocateSymbolExtras( ObjectCode* oc, int count, int first )
2810 {
2811 size_t n;
2812
2813 if (RTS_LINKER_USE_MMAP && USE_CONTIGUOUS_MMAP) {
2814 n = roundUpToPage(oc->fileSize);
2815
2816 /* Keep image and symbol_extras contiguous */
2817 void *new = mmapForLinker(n + (sizeof(SymbolExtra) * count),
2818 MAP_ANONYMOUS, -1, 0);
2819 if (new) {
2820 memcpy(new, oc->image, oc->fileSize);
2821 if (oc->imageMapped) {
2822 munmap(oc->image, n);
2823 }
2824 oc->image = new;
2825 oc->imageMapped = rtsTrue;
2826 oc->fileSize = n + (sizeof(SymbolExtra) * count);
2827 oc->symbol_extras = (SymbolExtra *) (oc->image + n);
2828 }
2829 else {
2830 oc->symbol_extras = NULL;
2831 return 0;
2832 }
2833 }
2834 else if( count > 0 ) {
2835 if (RTS_LINKER_USE_MMAP) {
2836 n = roundUpToPage(oc->fileSize);
2837
2838 oc->symbol_extras = m32_alloc(sizeof(SymbolExtra) * count, 8);
2839 if (oc->symbol_extras == NULL) return 0;
2840 }
2841 else {
2842 // round up to the nearest 4
2843 int aligned = (oc->fileSize + 3) & ~3;
2844 int misalignment = oc->misalignment;
2845
2846 oc->image -= misalignment;
2847 oc->image = stgReallocBytes( oc->image,
2848 misalignment +
2849 aligned + sizeof (SymbolExtra) * count,
2850 "ocAllocateSymbolExtras" );
2851 oc->image += misalignment;
2852
2853 oc->symbol_extras = (SymbolExtra *) (oc->image + aligned);
2854 }
2855 }
2856
2857 if (oc->symbol_extras != NULL) {
2858 memset( oc->symbol_extras, 0, sizeof (SymbolExtra) * count );
2859 }
2860
2861 oc->first_symbol_extra = first;
2862 oc->n_symbol_extras = count;
2863
2864 return 1;
2865 }
2866
2867 #endif
2868 #endif // NEED_SYMBOL_EXTRAS
2869
2870 #if defined(arm_HOST_ARCH)
2871
2872 static void
2873 ocFlushInstructionCache( ObjectCode *oc )
2874 {
2875 int i;
2876 // Object code
2877 for (i=0; i < oc->n_sections; i++) {
2878 Section *s = &oc->sections[i];
2879 // This is a bit too broad but we don't have any way to determine what
2880 // is certainly code
2881 if (s->kind == SECTIONKIND_CODE_OR_RODATA)
2882 __clear_cache(s->start, (void*) ((uintptr_t) s->start + s->size));
2883 }
2884
2885 // Jump islands
2886 // Note the (+1) to ensure that the last symbol extra is covered by the
2887 // flush.
2888 __clear_cache(oc->symbol_extras, &oc->symbol_extras[oc->n_symbol_extras+1]);
2889 }
2890
2891 #endif
2892
2893 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2894 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2895
2896 static SymbolExtra* makeSymbolExtra( ObjectCode* oc,
2897 unsigned long symbolNumber,
2898 unsigned long target )
2899 {
2900 SymbolExtra *extra;
2901
2902 ASSERT( symbolNumber >= oc->first_symbol_extra
2903 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2904
2905 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2906
2907 #ifdef powerpc_HOST_ARCH
2908 // lis r12, hi16(target)
2909 extra->jumpIsland.lis_r12 = 0x3d80;
2910 extra->jumpIsland.hi_addr = target >> 16;
2911
2912 // ori r12, r12, lo16(target)
2913 extra->jumpIsland.ori_r12_r12 = 0x618c;
2914 extra->jumpIsland.lo_addr = target & 0xffff;
2915
2916 // mtctr r12
2917 extra->jumpIsland.mtctr_r12 = 0x7d8903a6;
2918
2919 // bctr
2920 extra->jumpIsland.bctr = 0x4e800420;
2921 #endif
2922 #ifdef x86_64_HOST_ARCH
2923 // jmp *-14(%rip)
2924 static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF };
2925 extra->addr = target;
2926 memcpy(extra->jumpIsland, jmp, 6);
2927 #endif
2928
2929 return extra;
2930 }
2931
2932 #endif
2933 #endif // defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2934
2935 #ifdef arm_HOST_ARCH
2936 static SymbolExtra* makeArmSymbolExtra( ObjectCode* oc,
2937 unsigned long symbolNumber,
2938 unsigned long target,
2939 int fromThumb,
2940 int toThumb )
2941 {
2942 SymbolExtra *extra;
2943
2944 ASSERT( symbolNumber >= oc->first_symbol_extra
2945 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2946
2947 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2948
2949 // Make sure instruction mode bit is set properly
2950 if (toThumb)
2951 target |= 1;
2952 else
2953 target &= ~1;
2954
2955 if (!fromThumb) {
2956 // In ARM encoding:
2957 // movw r12, #0
2958 // movt r12, #0
2959 // bx r12
2960 uint32_t code[] = { 0xe300c000, 0xe340c000, 0xe12fff1c };
2961
2962 // Patch lower half-word into movw
2963 code[0] |= ((target>>12) & 0xf) << 16;
2964 code[0] |= target & 0xfff;
2965 // Patch upper half-word into movt
2966 target >>= 16;
2967 code[1] |= ((target>>12) & 0xf) << 16;
2968 code[1] |= target & 0xfff;
2969
2970 memcpy(extra->jumpIsland, code, 12);
2971
2972 } else {
2973 // In Thumb encoding:
2974 // movw r12, #0
2975 // movt r12, #0
2976 // bx r12
2977 uint16_t code[] = { 0xf240, 0x0c00,
2978 0xf2c0, 0x0c00,
2979 0x4760 };
2980
2981 // Patch lower half-word into movw
2982 code[0] |= (target>>12) & 0xf;
2983 code[0] |= ((target>>11) & 0x1) << 10;
2984 code[1] |= ((target>>8) & 0x7) << 12;
2985 code[1] |= target & 0xff;
2986 // Patch upper half-word into movt
2987 target >>= 16;
2988 code[2] |= (target>>12) & 0xf;
2989 code[2] |= ((target>>11) & 0x1) << 10;
2990 code[3] |= ((target>>8) & 0x7) << 12;
2991 code[3] |= target & 0xff;
2992
2993 memcpy(extra->jumpIsland, code, 10);
2994 }
2995
2996 return extra;
2997 }
2998 #endif // arm_HOST_ARCH
2999
3000 /* --------------------------------------------------------------------------
3001 * PowerPC specifics (instruction cache flushing)
3002 * ------------------------------------------------------------------------*/
3003
3004 #ifdef powerpc_HOST_ARCH
3005 /*
3006 ocFlushInstructionCache
3007
3008 Flush the data & instruction caches.
3009 Because the PPC has split data/instruction caches, we have to
3010 do that whenever we modify code at runtime.
3011 */
3012
3013 static void
3014 ocFlushInstructionCacheFrom(void* begin, size_t length)
3015 {
3016 size_t n = (length + 3) / 4;
3017 unsigned long* p = begin;
3018
3019 while (n--)
3020 {
3021 __asm__ volatile ( "dcbf 0,%0\n\t"
3022 "sync\n\t"
3023 "icbi 0,%0"
3024 :
3025 : "r" (p)
3026 );
3027 p++;
3028 }
3029 __asm__ volatile ( "sync\n\t"
3030 "isync"
3031 );
3032 }
3033
3034 static void
3035 ocFlushInstructionCache( ObjectCode *oc )
3036 {
3037 /* The main object code */
3038 ocFlushInstructionCacheFrom(oc->image + oc->misalignment, oc->fileSize);
3039
3040 /* Jump Islands */
3041 ocFlushInstructionCacheFrom(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras);
3042 }
3043 #endif /* powerpc_HOST_ARCH */
3044
3045
3046 /* --------------------------------------------------------------------------
3047 * PEi386(+) specifics (Win32 targets)
3048 * ------------------------------------------------------------------------*/
3049
3050 /* The information for this linker comes from
3051 Microsoft Portable Executable
3052 and Common Object File Format Specification
3053 revision 8.3 February 2013
3054
3055 It can be found online at:
3056
3057 https://msdn.microsoft.com/en-us/windows/hardware/gg463119.aspx
3058
3059 Things move, so if that fails, try searching for it via
3060
3061 http://www.google.com/search?q=PE+COFF+specification
3062
3063 The ultimate reference for the PE format is the Winnt.h
3064 header file that comes with the Platform SDKs; as always,
3065 implementations will drift wrt their documentation.
3066
3067 A good background article on the PE format is Matt Pietrek's
3068 March 1994 article in Microsoft System Journal (MSJ)
3069 (Vol.9, No. 3): "Peering Inside the PE: A Tour of the
3070 Win32 Portable Executable File Format." The info in there
3071 has recently been updated in a two part article in
3072 MSDN magazine, issues Feb and March 2002,
3073 "Inside Windows: An In-Depth Look into the Win32 Portable
3074 Executable File Format"
3075
3076 John Levine's book "Linkers and Loaders" contains useful
3077 info on PE too.
3078
3079 The PE specification doesn't specify how to do the actual
3080 relocations. For this reason, and because both PE and ELF are
3081 based on COFF, the relocations for the PEi386+ code is based on
3082 the ELF relocations for the equivalent relocation type.
3083
3084 The ELF ABI can be found at
3085
3086 http://www.x86-64.org/documentation/abi.pdf
3087
3088 The current code is based on version 0.99.6 - October 2013
3089 */
3090
3091
3092 #if defined(OBJFORMAT_PEi386)
3093
3094 static int verifyCOFFHeader ( COFF_header *hdr, pathchar *filename);
3095
3096 /* We assume file pointer is right at the
3097 beginning of COFF object.
3098 */
3099 static char *
3100 allocateImageAndTrampolines (
3101 pathchar* arch_name, char* member_name,
3102 FILE* f USED_IF_x86_64_HOST_ARCH,
3103 int size,
3104 int isThin USED_IF_x86_64_HOST_ARCH)
3105 {
3106 char* image;
3107 #if defined(x86_64_HOST_ARCH)
3108 if (!isThin)
3109 {
3110 /* PeCoff contains number of symbols right in it's header, so
3111 we can reserve the room for symbolExtras right here. */
3112 COFF_header hdr;
3113 size_t n;
3114
3115 n = fread(&hdr, 1, sizeof_COFF_header, f);
3116 if (n != sizeof(COFF_header)) {
3117 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%S'",
3118 member_name, arch_name);
3119 return NULL;
3120 }
3121 fseek(f, -sizeof_COFF_header, SEEK_CUR);
3122
3123 if (!verifyCOFFHeader(&hdr, arch_name)) {
3124 return 0;
3125 }
3126
3127 /* We get back 8-byte aligned memory (is that guaranteed?), but
3128 the offsets to the sections within the file are all 4 mod 8
3129 (is that guaranteed?). We therefore need to offset the image
3130 by 4, so that all the pointers are 8-byte aligned, so that
3131 pointer tagging works. */
3132 /* For 32-bit case we don't need this, hence we use macro PEi386_IMAGE_OFFSET,
3133 which equals to 4 for 64-bit case and 0 for 32-bit case. */
3134 /* We allocate trampolines area for all symbols right behind
3135 image data, aligned on 8. */
3136 size = ((PEi386_IMAGE_OFFSET + size + 0x7) & ~0x7)
3137 + hdr.NumberOfSymbols * sizeof(SymbolExtra);
3138 }
3139 #endif
3140 image = VirtualAlloc(NULL, size,
3141 MEM_RESERVE | MEM_COMMIT,
3142 PAGE_EXECUTE_READWRITE);
3143
3144 if (image == NULL) {
3145 errorBelch("%" PATH_FMT ": failed to allocate memory for image for %s",
3146 arch_name, member_name);
3147 return NULL;
3148 }
3149
3150 return image + PEi386_IMAGE_OFFSET;
3151 }
3152
3153 static int findAndLoadImportLibrary(ObjectCode* oc)
3154 {
3155 int i;
3156
3157 COFF_header* hdr;
3158 COFF_section* sectab;
3159 COFF_symbol* symtab;
3160 UChar* strtab;
3161
3162 hdr = (COFF_header*)(oc->image);
3163 sectab = (COFF_section*)(
3164 ((UChar*)(oc->image))
3165 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3166 );
3167
3168 symtab = (COFF_symbol*)(
3169 ((UChar*)(oc->image))
3170 + hdr->PointerToSymbolTable
3171 );
3172
3173 strtab = ((UChar*)symtab)
3174 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3175
3176 for (i = 0; i < oc->n_sections; i++)
3177 {
3178 COFF_section* sectab_i
3179 = (COFF_section*)myindex(sizeof_COFF_section, sectab, i);
3180
3181 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3182
3183 // Find the first entry containing a valid .idata$7 section.
3184 if (strcmp(secname, ".idata$7") == 0) {
3185 /* First load the containing DLL if not loaded. */
3186 Section section = oc->sections[i];
3187
3188 pathchar* dirName = pathdir(oc->fileName);
3189 HsPtr token = addLibrarySearchPath(dirName);
3190 stgFree(dirName);
3191 char* dllName = (char*)section.start;
3192
3193 if (strlen(dllName) == 0 || dllName[0] == ' ')
3194 {
3195 continue;
3196 }
3197
3198 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand '%ls' => `%s'\n", oc->fileName, dllName));
3199
3200 pathchar* dll = mkPath(dllName);
3201 removeLibrarySearchPath(token);
3202
3203 const char* result = addDLL(dll);
3204 stgFree(dll);
3205
3206 if (result != NULL) {
3207 errorBelch("Could not load `%s'. Reason: %s\n", (char*)dllName, result);
3208 return 0;
3209 }
3210
3211 break;
3212 }
3213
3214 stgFree(secname);
3215 }
3216
3217 return 1;
3218 }
3219
3220 static int checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f)
3221 {
3222 char* image;
3223 static HsBool load_dll_warn = HS_BOOL_FALSE;
3224
3225 if (load_dll_warn) { return 0; }
3226
3227 /* Based on Import Library specification. PE Spec section 7.1 */
3228
3229 COFF_import_header hdr;
3230 size_t n;
3231
3232 n = fread(&hdr, 1, sizeof_COFF_import_Header, f);
3233 if (n != sizeof(COFF_header)) {
3234 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%" PATH_FMT "'\n",
3235 member_name, arch_name);
3236 return 0;
3237 }
3238
3239 if (hdr.Sig1 != 0x0 || hdr.Sig2 != 0xFFFF) {
3240 fseek(f, -sizeof_COFF_import_Header, SEEK_CUR);
3241 IF_DEBUG(linker, debugBelch("loadArchive: Object `%s` is not an import lib. Skipping...\n", member_name));
3242 return 0;
3243 }
3244
3245 IF_DEBUG(linker, debugBelch("loadArchive: reading %d bytes at %ld\n", hdr.SizeOfData, ftell(f)));
3246
3247 image = malloc(hdr.SizeOfData);
3248 n = fread(image, 1, hdr.SizeOfData, f);
3249 if (n != hdr.SizeOfData) {
3250 errorBelch("loadArchive: error whilst reading `%s' header in `%" PATH_FMT "'. Did not read enough bytes.\n",
3251 member_name, arch_name);
3252 }
3253
3254 char* symbol = strtok(image, "\0");
3255 int symLen = strlen(symbol) + 1;
3256 int nameLen = n - symLen;
3257 char* dllName = malloc(sizeof(char) * nameLen);
3258 dllName = strncpy(dllName, image + symLen, nameLen);
3259 pathchar* dll = malloc(sizeof(wchar_t) * nameLen);
3260 mbstowcs(dll, dllName, nameLen);
3261 free(dllName);
3262
3263 IF_DEBUG(linker, debugBelch("loadArchive: read symbol %s from lib `%ls'\n", symbol, dll));
3264 const char* result = addDLL(dll);
3265
3266 free(image);
3267
3268 if (result != NULL) {
3269 errorBelch("Could not load `%ls'. Reason: %s\n", dll, result);
3270 load_dll_warn = HS_BOOL_TRUE;
3271
3272 free(dll);
3273 fseek(f, -(n + sizeof_COFF_import_Header), SEEK_CUR);
3274 return 0;
3275 }
3276
3277 free(dll);
3278 return 1;
3279 }
3280
3281 /* We use myindex to calculate array addresses, rather than
3282 simply doing the normal subscript thing. That's because
3283 some of the above structs have sizes which are not
3284 a whole number of words. GCC rounds their sizes up to a
3285 whole number of words, which means that the address calcs
3286 arising from using normal C indexing or pointer arithmetic
3287 are just plain wrong. Sigh.
3288 */
3289 static UChar *
3290 myindex ( int scale, void* base, int index )
3291 {
3292 return
3293 ((UChar*)base) + scale * index;
3294 }
3295
3296
3297 static void
3298 printName ( UChar* name, UChar* strtab )
3299 {
3300 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3301 UInt32 strtab_offset = * (UInt32*)(name+4);
3302 debugBelch("%s", strtab + strtab_offset );
3303 } else {
3304 int i;
3305 for (i = 0; i < 8; i++) {
3306 if (name[i] == 0) break;
3307 debugBelch("%c", name[i] );
3308 }
3309 }
3310 }
3311
3312
3313 static void
3314 copyName ( UChar* name, UChar* strtab, UChar* dst, int dstSize )
3315 {
3316 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3317 UInt32 strtab_offset = * (UInt32*)(name+4);
3318 strncpy ( (char*)dst, (char*)strtab+strtab_offset, dstSize );
3319 dst[dstSize-1] = 0;
3320 } else {
3321 int i = 0;
3322 while (1) {
3323 if (i >= 8) break;
3324 if (name[i] == 0) break;
3325 dst[i] = name[i];
3326 i++;
3327 }
3328 dst[i] = 0;
3329 }
3330 }
3331
3332
3333 static UChar *
3334 cstring_from_COFF_symbol_name ( UChar* name, UChar* strtab )
3335 {
3336 UChar* newstr;
3337 /* If the string is longer than 8 bytes, look in the
3338 string table for it -- this will be correctly zero terminated.
3339 */
3340 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3341 UInt32 strtab_offset = * (UInt32*)(name+4);
3342 return ((UChar*)strtab) + strtab_offset;
3343 }
3344 /* Otherwise, if shorter than 8 bytes, return the original,
3345 which by defn is correctly terminated.
3346 */
3347 if (name[7]==0) return name;
3348 /* The annoying case: 8 bytes. Copy into a temporary
3349 (XXX which is never freed ...)
3350 */
3351 newstr = stgMallocBytes(9, "cstring_from_COFF_symbol_name");
3352 ASSERT(newstr);
3353 strncpy((char*)newstr,(char*)name,8);
3354 newstr[8] = 0;
3355 return newstr;
3356 }
3357
3358 /* Getting the name of a section is mildly tricky, so we make a
3359 function for it. Sadly, in one case we have to copy the string
3360 (when it is exactly 8 bytes long there's no trailing '\0'), so for
3361 consistency we *always* copy the string; the caller must free it
3362 */
3363 static char *
3364 cstring_from_section_name (UChar* name, UChar* strtab)
3365 {
3366 char *newstr;
3367
3368 if (name[0]=='/') {
3369 int strtab_offset = strtol((char*)name+1,NULL,10);
3370 int len = strlen(((char*)strtab) + strtab_offset);
3371
3372 newstr = stgMallocBytes(len+1, "cstring_from_section_symbol_name");
3373 strcpy((char*)newstr, (char*)((UChar*)strtab) + strtab_offset);
3374 return newstr;
3375 }
3376 else
3377 {
3378 newstr = stgMallocBytes(9, "cstring_from_section_symbol_name");
3379 ASSERT(newstr);
3380 strncpy((char*)newstr,(char*)name,8);
3381 newstr[8] = 0;
3382 return newstr;
3383 }
3384 }
3385
3386 /* See Note [mingw-w64 name decoration scheme] */
3387 #ifndef x86_64_HOST_ARCH
3388 static void
3389 zapTrailingAtSign ( UChar* sym )
3390 {
3391 # define my_isdigit(c) ((c) >= '0' && (c) <= '9')
3392 int i, j;
3393 if (sym[0] == 0) return;
3394 i = 0;
3395 while (sym[i] != 0) i++;
3396 i--;
3397 j = i;
3398 while (j > 0 && my_isdigit(sym[j])) j--;
3399 if (j > 0 && sym[j] == '@' && j != i) sym[j] = 0;
3400 # undef my_isdigit
3401 }
3402 #endif
3403
3404 /* See Note [mingw-w64 name decoration scheme] */
3405 #ifndef x86_64_HOST_ARCH
3406 #define STRIP_LEADING_UNDERSCORE 1
3407 #else
3408 #define STRIP_LEADING_UNDERSCORE 0
3409 #endif
3410
3411 /*
3412 Note [mingw-w64 name decoration scheme]
3413
3414 What's going on with name decoration? Well, original code
3415 have some crufty and ad-hocish paths related mostly to very old
3416 mingw gcc/binutils/runtime combinations. Now mingw-w64 offers pretty
3417 uniform and MS-compatible decoration scheme across its tools and runtime.
3418
3419 The scheme is pretty straightforward: on 32 bit objects symbols are exported
3420 with underscore prepended (and @ + stack size suffix appended for stdcall
3421 functions), on 64 bits no underscore is prepended and no suffix is appended
3422 because we have no stdcall convention on 64 bits.
3423
3424 See #9218
3425 */
3426
3427 static SymbolAddr*
3428 lookupSymbolInDLLs ( UChar *lbl )
3429 {
3430 OpenedDLL* o_dll;
3431 SymbolAddr* sym;
3432
3433 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
3434 /* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */
3435
3436 sym = GetProcAddress(o_dll->instance, (char*)(lbl+STRIP_LEADING_UNDERSCORE));
3437 if (sym != NULL) {
3438 /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/
3439 return sym;
3440 }
3441
3442 /* Ticket #2283.
3443 Long description: http://support.microsoft.com/kb/132044
3444 tl;dr:
3445 If C/C++ compiler sees __declspec(dllimport) ... foo ...
3446 it generates call *__imp_foo, and __imp_foo here has exactly
3447 the same semantics as in __imp_foo = GetProcAddress(..., "foo")
3448 */
3449 if (sym == NULL && strncmp ((const char*)lbl, "__imp_", 6) == 0) {
3450 sym = GetProcAddress(o_dll->instance, (char*)(lbl+6+STRIP_LEADING_UNDERSCORE));
3451 if (sym != NULL) {
3452 IndirectAddr* ret;
3453 ret = stgMallocBytes( sizeof(IndirectAddr), "lookupSymbolInDLLs" );
3454 ret->addr = sym;
3455 ret->next = indirects;
3456 indirects = ret;
3457 IF_DEBUG(linker,
3458 debugBelch("warning: %s from %S is linked instead of %s\n",
3459 (char*)(lbl+6+STRIP_LEADING_UNDERSCORE), o_dll->name, (char*)lbl));
3460 return (void*) & ret->addr;
3461 }
3462 }
3463
3464 sym = GetProcAddress(o_dll->instance, (char*)lbl);
3465 if (sym != NULL) {
3466 /*debugBelch("found %s in %s\n", lbl,o_dll->name);*/
3467 return sym;
3468 }
3469 }
3470 return NULL;
3471 }
3472
3473 static int
3474 verifyCOFFHeader (COFF_header *hdr, pathchar *fileName)
3475 {
3476 #if defined(i386_HOST_ARCH)
3477 if (hdr->Machine != 0x14c) {
3478 errorBelch("%" PATH_FMT ": Not x86 PEi386", fileName);
3479 return 0;
3480 }
3481 #elif defined(x86_64_HOST_ARCH)
3482 if (hdr->Machine != 0x8664) {
3483 errorBelch("%" PATH_FMT ": Not x86_64 PEi386", fileName);
3484 return 0;
3485 }
3486 #else
3487 errorBelch("PEi386 not supported on this arch");
3488 #endif
3489
3490 if (hdr->SizeOfOptionalHeader != 0) {
3491 errorBelch("%" PATH_FMT ": PEi386 with nonempty optional header",
3492 fileName);
3493 return 0;
3494 }
3495 if ( /* (hdr->Characteristics & MYIMAGE_FILE_RELOCS_STRIPPED) || */
3496 (hdr->Characteristics & MYIMAGE_FILE_EXECUTABLE_IMAGE) ||
3497 (hdr->Characteristics & MYIMAGE_FILE_DLL) ||
3498 (hdr->Characteristics & MYIMAGE_FILE_SYSTEM) ) {
3499 errorBelch("%" PATH_FMT ": Not a PEi386 object file", fileName);
3500 return 0;
3501 }
3502 if ( (hdr->Characteristics & MYIMAGE_FILE_BYTES_REVERSED_HI)
3503 /* || !(hdr->Characteristics & MYIMAGE_FILE_32BIT_MACHINE) */ ) {
3504 errorBelch("%" PATH_FMT ": Invalid PEi386 word size or endiannness: %d",
3505 fileName,
3506 (int)(hdr->Characteristics));
3507 return 0;
3508 }
3509 return 1;
3510 }
3511
3512 static int
3513 ocVerifyImage_PEi386 ( ObjectCode* oc )
3514 {
3515 int i;
3516 UInt32 j, noRelocs;
3517 COFF_header* hdr;
3518 COFF_section* sectab;
3519 COFF_symbol* symtab;
3520 UChar* strtab;
3521 /* debugBelch("\nLOADING %s\n", oc->fileName); */
3522 hdr = (COFF_header*)(oc->image);
3523 sectab = (COFF_section*) (
3524 ((UChar*)(oc->image))
3525 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3526 );
3527 symtab = (COFF_symbol*) (
3528 ((UChar*)(oc->image))
3529 + hdr->PointerToSymbolTable
3530 );
3531 strtab = ((UChar*)symtab)
3532 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3533
3534 if (!verifyCOFFHeader(hdr, oc->fileName)) {
3535 return 0;
3536 }
3537
3538 /* If the string table size is way crazy, this might indicate that
3539 there are more than 64k relocations, despite claims to the
3540 contrary. Hence this test. */
3541 /* debugBelch("strtab size %d\n", * (UInt32*)strtab); */
3542 #if 0
3543 if ( (*(UInt32*)strtab) > 600000 ) {
3544 /* Note that 600k has no special significance other than being
3545 big enough to handle the almost-2MB-sized lumps that
3546 constitute HSwin32*.o. */
3547 debugBelch("PEi386 object has suspiciously large string table; > 64k relocs?");
3548 return 0;
3549 }
3550 #endif
3551
3552 /* .BSS Section is initialized in ocGetNames_PEi386
3553 but we need the Sections array initialized here already. */
3554 Section *sections;
3555 sections = (Section*)stgCallocBytes(
3556 sizeof(Section),
3557 hdr->NumberOfSections + 1, /* +1 for the global BSS section see ocGetNames_PEi386 */
3558 "ocVerifyImage_PEi386(sections)");
3559 oc->sections = sections;
3560 oc->n_sections = hdr->NumberOfSections + 1;
3561
3562 /* Initialize the Sections */
3563 for (i = 0; i < hdr->NumberOfSections; i++) {
3564 COFF_section* sectab_i
3565 = (COFF_section*)
3566 myindex(sizeof_COFF_section, sectab, i);
3567
3568 /* Calculate the start of the data section */
3569 sections[i].start = oc->image + sectab_i->PointerToRawData;
3570 }
3571
3572 /* No further verification after this point; only debug printing. */
3573 i = 0;
3574 IF_DEBUG(linker, i=1);
3575 if (i == 0) return 1;
3576
3577 debugBelch("sectab offset = %" FMT_SizeT "\n",
3578 ((UChar*)sectab) - ((UChar*)hdr) );
3579 debugBelch("symtab offset = %" FMT_SizeT "\n",
3580 ((UChar*)symtab) - ((UChar*)hdr) );
3581 debugBelch("strtab offset = %" FMT_SizeT "\n",
3582 ((UChar*)strtab) - ((UChar*)hdr) );
3583
3584 debugBelch("\n" );
3585 debugBelch( "Machine: 0x%x\n", (UInt32)(hdr->Machine) );
3586 debugBelch( "# sections: %d\n", (UInt32)(hdr->NumberOfSections) );
3587 debugBelch( "time/date: 0x%x\n", (UInt32)(hdr->TimeDateStamp) );
3588 debugBelch( "symtab offset: %d\n", (UInt32)(hdr->PointerToSymbolTable) );
3589 debugBelch( "# symbols: %d\n", (UInt32)(hdr->NumberOfSymbols) );
3590 debugBelch( "sz of opt hdr: %d\n", (UInt32)(hdr->SizeOfOptionalHeader) );
3591 debugBelch( "characteristics: 0x%x\n", (UInt32)(hdr->Characteristics) );
3592
3593 /* Print the section table. */
3594 debugBelch("\n" );
3595 for (i = 0; i < hdr->NumberOfSections; i++) {
3596 COFF_reloc* reltab;
3597 COFF_section* sectab_i
3598 = (COFF_section*)
3599 myindex ( sizeof_COFF_section, sectab, i );
3600 Section section = sections[i];
3601 debugBelch(
3602 "\n"
3603 "section %d\n"
3604 " name `",
3605 i
3606 );
3607 printName ( sectab_i->Name, strtab );
3608 debugBelch(
3609 "'\n"
3610 " vsize %d\n"
3611 " vaddr %d\n"
3612 " data sz %d\n"
3613 " data off 0x%p\n"
3614 " num rel %d\n"
3615 " off rel %d\n"
3616 " ptr raw 0x%x\n",
3617 sectab_i->VirtualSize,
3618 sectab_i->VirtualAddress,
3619 sectab_i->SizeOfRawData,
3620 section.start,
3621 sectab_i->NumberOfRelocations,
3622 sectab_i->PointerToRelocations,
3623 sectab_i->PointerToRawData
3624 );
3625 reltab = (COFF_reloc*) (
3626 ((UChar*)(oc->image)) + sectab_i->PointerToRelocations
3627 );
3628
3629 if ( sectab_i->Characteristics & MYIMAGE_SCN_LNK_NRELOC_OVFL ) {
3630 /* If the relocation field (a short) has overflowed, the
3631 * real count can be found in the first reloc entry.
3632 *
3633 * See Section 4.1 (last para) of the PE spec (rev6.0).
3634 */
3635 COFF_reloc* rel = (COFF_reloc*)
3636 myindex ( sizeof_COFF_reloc, reltab, 0 );
3637 noRelocs = rel->VirtualAddress;
3638 j = 1;
3639 } else {
3640 noRelocs = sectab_i->NumberOfRelocations;
3641 j = 0;
3642 }
3643
3644 for (; j < noRelocs; j++) {
3645 COFF_symbol* sym;
3646 COFF_reloc* rel = (COFF_reloc*)
3647 myindex ( sizeof_COFF_reloc, reltab, j );
3648 debugBelch(
3649 " type 0x%-4x vaddr 0x%-8x name `",
3650 (UInt32)rel->Type,
3651 rel->VirtualAddress );
3652 sym = (COFF_symbol*)
3653 myindex ( sizeof_COFF_symbol, symtab, rel->SymbolTableIndex );
3654 /* Hmm..mysterious looking offset - what's it for? SOF */
3655 printName ( sym->Name, strtab -10 );
3656 debugBelch("'\n" );
3657 }
3658
3659 debugBelch("\n" );
3660 }
3661 debugBelch("\n" );
3662 debugBelch("string table has size 0x%x\n", * (UInt32*)strtab );
3663 debugBelch("---START of string table---\n");
3664 for (i = 4; i < *(Int32*)strtab; i++) {
3665 if (strtab[i] == 0)
3666 debugBelch("\n"); else
3667 debugBelch("%c", strtab[i] );
3668 }
3669 debugBelch("--- END of string table---\n");
3670
3671 debugBelch("\n" );
3672 i = 0;
3673 while (1) {
3674 COFF_symbol* symtab_i;
3675 if (i >= (Int32)(hdr->NumberOfSymbols)) break;
3676 symtab_i = (COFF_symbol*)
3677 myindex ( sizeof_COFF_symbol, symtab, i );
3678 debugBelch(
3679 "symbol %d\n"
3680 " name `",
3681 i
3682 );
3683 printName ( symtab_i->Name, strtab );
3684 debugBelch(
3685 "'\n"
3686 " value 0x%x\n"
3687 " 1+sec# %d\n"
3688 " type 0x%x\n"
3689 " sclass 0x%x\n"
3690 " nAux %d\n",
3691 symtab_i->Value,
3692 (Int32)(symtab_i->SectionNumber),
3693 (UInt32)symtab_i->Type,
3694 (UInt32)symtab_i->StorageClass,
3695 (UInt32)symtab_i->NumberOfAuxSymbols
3696 );
3697 i += symtab_i->NumberOfAuxSymbols;
3698 i++;
3699 }
3700
3701 debugBelch("\n" );
3702 return 1;
3703 }
3704
3705
3706 static int
3707 ocGetNames_PEi386 ( ObjectCode* oc )
3708 {
3709 COFF_header* hdr;
3710 COFF_section* sectab;
3711 COFF_symbol* symtab;
3712 UChar* strtab;
3713
3714 UChar* sname;
3715 SymbolAddr* addr;
3716 int i;
3717
3718 hdr = (COFF_header*)(oc->image);
3719 sectab = (COFF_section*) (
3720 ((UChar*)(oc->image))
3721 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3722 );
3723 symtab = (COFF_symbol*) (
3724 ((UChar*)(oc->image))
3725 + hdr->PointerToSymbolTable
3726 );
3727 strtab = ((UChar*)(oc->image))
3728 + hdr->PointerToSymbolTable
3729 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3730
3731 /* Allocate space for any (local, anonymous) .bss sections. */
3732
3733 for (i = 0; i < hdr->NumberOfSections; i++) {
3734 UInt32 bss_sz;
3735 UChar* zspace;
3736 COFF_section* sectab_i
3737 = (COFF_section*)
3738 myindex ( sizeof_COFF_section, sectab, i );
3739
3740 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3741
3742 if (0 != strcmp(secname, ".bss")) {
3743 stgFree(secname);
3744 continue;
3745 }
3746
3747 stgFree(secname);
3748
3749 /* sof 10/05: the PE spec text isn't too clear regarding what
3750 * the SizeOfRawData field is supposed to hold for object
3751 * file sections containing just uninitialized data -- for executables,
3752 * it is supposed to be zero; unclear what it's supposed to be
3753 * for object files. However, VirtualSize is guaranteed to be
3754 * zero for object files, which definitely suggests that SizeOfRawData
3755 * will be non-zero (where else would the size of this .bss section be
3756 * stored?) Looking at the COFF_section info for incoming object files,
3757 * this certainly appears to be the case.
3758 *
3759 * => I suspect we've been incorrectly handling .bss sections in (relocatable)
3760 * object files up until now. This turned out to bite us with ghc-6.4.1's use
3761 * of gcc-3.4.x, which has started to emit initially-zeroed-out local 'static'
3762 * variable decls into the .bss section. (The specific function in Q which
3763 * triggered this is libraries/base/cbits/dirUtils.c:__hscore_getFolderPath())
3764 */
3765 if (sectab_i->VirtualSize == 0 && sectab_i->SizeOfRawData == 0) continue;
3766 /* This is a non-empty .bss section.
3767 Allocate zeroed space for it */
3768 bss_sz = sectab_i->VirtualSize;
3769 if ( bss_sz < sectab_i->SizeOfRawData) { bss_sz = sectab_i->SizeOfRawData; }
3770 zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
3771 oc->sections[i].start = zspace;
3772 addProddableBlock(oc, zspace, bss_sz);
3773 /* debugBelch("BSS anon section at 0x%x\n", zspace); */
3774 }
3775
3776 /* Copy section information into the ObjectCode. */
3777
3778 for (i = 0; i < hdr->NumberOfSections; i++) {
3779 UChar* start;
3780 UChar* end;
3781 UInt32 sz;
3782
3783 /* By default consider all section as CODE or DATA, which means we want to load them. */
3784 SectionKind kind
3785 = SECTIONKIND_CODE_OR_RODATA;
3786 COFF_section* sectab_i
3787 = (COFF_section*)
3788 myindex ( sizeof_COFF_section, sectab, i );
3789 Section section = oc->sections[i];
3790
3791 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3792
3793 IF_DEBUG(linker, debugBelch("section name = %s\n", secname ));
3794
3795 /* The PE file section flag indicates whether the section contains code or data. */
3796 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_CODE ||
3797 sectab_i->Characteristics & MYIMAGE_SCN_CNT_INITIALIZED_DATA)
3798 kind = SECTIONKIND_CODE_OR_RODATA;
3799
3800 /* Check next if it contains any uninitialized data */
3801 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_UNINITIALIZED_DATA)
3802 kind = SECTIONKIND_RWDATA;
3803
3804 /* Finally check if it can be discarded. This will also ignore .debug sections */
3805 if (sectab_i->Characteristics & MYIMAGE_SCN_MEM_DISCARDABLE ||
3806 sectab_i->Characteristics & MYIMAGE_SCN_LNK_REMOVE)
3807 kind = SECTIONKIND_OTHER;
3808
3809 if (0==strcmp(".ctors", (char*)secname))
3810 kind = SECTIONKIND_INIT_ARRAY;
3811
3812 ASSERT(sectab_i->SizeOfRawData == 0 || sectab_i->VirtualSize == 0);
3813 sz = sectab_i->SizeOfRawData;
3814 if (sz < sectab_i->VirtualSize) sz = sectab_i->VirtualSize;
3815
3816 start = section.start;
3817 end = start + sz - 1;
3818
3819 if (kind != SECTIONKIND_OTHER && end >= start) {
3820 addSection(&oc->sections[i], kind, SECTION_NOMEM, start, sz, 0, 0, 0);
3821 addProddableBlock(oc, start, sz);
3822 }
3823
3824 stgFree(secname);
3825 }
3826
3827 /* Copy exported symbols into the ObjectCode. */
3828
3829 oc->n_symbols = hdr->NumberOfSymbols;
3830 oc->symbols = stgCallocBytes(sizeof(SymbolName*), oc->n_symbols,
3831 "ocGetNames_PEi386(oc->symbols)");
3832
3833 /* Work out the size of the global BSS section */
3834 StgWord globalBssSize = 0;
3835 for (i=0; i < (int)hdr->NumberOfSymbols; i++) {
3836 COFF_symbol* symtab_i;
3837 symtab_i = (COFF_symbol*)
3838 myindex ( sizeof_COFF_symbol, symtab, i );
3839 if (symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3840 && symtab_i->Value > 0
3841 && symtab_i->StorageClass != MYIMAGE_SYM_CLASS_SECTION) {
3842 globalBssSize += symtab_i->Value;
3843 }
3844 i += symtab_i->NumberOfAuxSymbols;
3845 }
3846
3847 /* Allocate BSS space */
3848 SymbolAddr* bss = NULL;
3849 if (globalBssSize > 0) {
3850 bss = stgCallocBytes(1, globalBssSize,
3851 "ocGetNames_PEi386(non-anonymous bss)");
3852 addSection(&oc->sections[oc->n_sections-1],
3853 SECTIONKIND_RWDATA, SECTION_MALLOC,
3854 bss, globalBssSize, 0, 0, 0);
3855 IF_DEBUG(linker, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize));
3856 addProddableBlock(oc, bss, globalBssSize);
3857 } else {
3858 addSection(&oc->sections[oc->n_sections-1],
3859 SECTIONKIND_OTHER, SECTION_NOMEM, NULL, 0, 0, 0, 0);
3860 }
3861
3862 for (i = 0; i < oc->n_symbols; i++) {
3863 COFF_symbol* symtab_i;
3864 symtab_i = (COFF_symbol*)
3865 myindex ( sizeof_COFF_symbol, symtab, i );
3866
3867 addr = NULL;
3868 HsBool isWeak = HS_BOOL_FALSE;
3869 if ( symtab_i->SectionNumber != MYIMAGE_SYM_UNDEFINED
3870 && symtab_i->SectionNumber > 0) {
3871 /* This symbol is global and defined, viz, exported */
3872 /* for MYIMAGE_SYMCLASS_EXTERNAL
3873 && !MYIMAGE_SYM_UNDEFINED,
3874 the address of the symbol is:
3875 address of relevant section + offset in section
3876 */
3877 COFF_section* sectabent
3878 = (COFF_section*) myindex ( sizeof_COFF_section,
3879 sectab,
3880 symtab_i->SectionNumber-1 );
3881 if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_EXTERNAL
3882 || ( symtab_i->StorageClass == MYIMAGE_SYM_CLASS_STATIC
3883 && sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT)
3884 ) {
3885 addr = (void*)((size_t)oc->sections[symtab_i->SectionNumber-1].start
3886 + symtab_i->Value);
3887 if (sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT) {
3888 isWeak = HS_BOOL_TRUE;
3889 }
3890 }
3891 }
3892 else if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_WEAK_EXTERNAL) {
3893 isWeak = HS_BOOL_TRUE;
3894 }
3895 else if ( symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3896 && symtab_i->Value > 0) {
3897 /* This symbol isn't in any section at all, ie, global bss.
3898 Allocate zeroed space for it from the BSS section */
3899 addr = bss;
3900 bss = (SymbolAddr*)((StgWord)bss + (StgWord)symtab_i->Value);
3901 IF_DEBUG(linker, debugBelch("bss symbol @ %p %u\n", addr, symtab_i->Value));
3902 }
3903
3904 sname = cstring_from_COFF_symbol_name(symtab_i->Name, strtab);
3905 if (addr != NULL || isWeak == HS_BOOL_TRUE) {
3906
3907 /* debugBelch("addSymbol %p `%s' Weak:%lld \n", addr, sname, isWeak); */
3908 IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr,sname);)
3909 ASSERT(i >= 0 && i < oc->n_symbols);
3910 /* cstring_from_COFF_symbol_name always succeeds. */
3911 oc->symbols[i] = (SymbolName*)sname;
3912 if (isWeak == HS_BOOL_TRUE) {
3913 setWeakSymbol(oc, sname);
3914 }
3915