rts/Linker.c: Improve ugly C pre-processor hack
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "Stable.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31
32 #if !defined(mingw32_HOST_OS)
33 #include "posix/Signals.h"
34 #endif
35
36 // get protos for is*()
37 #include <ctype.h>
38
39 #ifdef HAVE_SYS_TYPES_H
40 #include <sys/types.h>
41 #endif
42
43 #include <inttypes.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <stdio.h>
47 #include <assert.h>
48 #include <libgen.h>
49
50 #ifdef HAVE_SYS_STAT_H
51 #include <sys/stat.h>
52 #endif
53
54 #if defined(HAVE_DLFCN_H)
55 #include <dlfcn.h>
56 #endif
57
58 /* PowerPC and ARM have relative branch instructions with only 24 bit
59 * displacements and therefore need jump islands contiguous with each object
60 * code module.
61 */
62 #if defined(powerpc_HOST_ARCH)
63 #define SHORT_REL_BRANCH 1
64 #endif
65 #if defined(arm_HOST_ARCH)
66 #define SHORT_REL_BRANCH 1
67 #endif
68
69 #if (RTS_LINKER_USE_MMAP && defined(SHORT_REL_BRANCH) && defined(linux_HOST_OS))
70 #define USE_CONTIGUOUS_MMAP 1
71 #else
72 #define USE_CONTIGUOUS_MMAP 0
73 #endif
74
75 #if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
76 # define OBJFORMAT_ELF
77 # include <regex.h> // regex is already used by dlopen() so this is OK
78 // to use here without requiring an additional lib
79 #elif defined (mingw32_HOST_OS)
80 # define OBJFORMAT_PEi386
81 # include <windows.h>
82 # include <shfolder.h> /* SHGetFolderPathW */
83 # include <math.h>
84 # include <wchar.h>
85 #elif defined(darwin_HOST_OS)
86 # define OBJFORMAT_MACHO
87 # include <regex.h>
88 # include <mach/machine.h>
89 # include <mach-o/fat.h>
90 # include <mach-o/loader.h>
91 # include <mach-o/nlist.h>
92 # include <mach-o/reloc.h>
93 #if defined(powerpc_HOST_ARCH)
94 # include <mach-o/ppc/reloc.h>
95 #endif
96 #if defined(x86_64_HOST_ARCH)
97 # include <mach-o/x86_64/reloc.h>
98 #endif
99 #endif
100
101 #if defined(x86_64_HOST_ARCH) && defined(darwin_HOST_OS)
102 #define ALWAYS_PIC
103 #endif
104
105 #if defined(dragonfly_HOST_OS)
106 #include <sys/tls.h>
107 #endif
108
109 /* SymbolInfo tracks a symbol's address, the object code from which
110 it originated, and whether or not it's weak.
111
112 RtsSymbolInfo is used to track the state of the symbols currently
113 loaded or to be loaded by the Linker.
114
115 Where the information in the `ObjectCode` is used to track the
116 original status of the symbol inside the `ObjectCode`.
117
118 A weak symbol that has been used will still be marked as weak
119 in the `ObjectCode` but in the `RtsSymbolInfo` it won't be.
120 */
121 typedef struct _RtsSymbolInfo {
122 SymbolAddr* value;
123 ObjectCode *owner;
124 HsBool weak;
125 } RtsSymbolInfo;
126
127 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
128 This hashtable will contain information on all symbols
129 that we know of, however the .o they are in may not be loaded.
130
131 Until the ObjectCode the symbol belongs to is actually
132 loaded this symbol may be replaced. So do not rely on
133 addresses of unloaded symbols.
134
135 Note [runtime-linker-phases]
136 --------------------------------------
137 Broadly the behavior of the runtime linker can be
138 split into the following four phases:
139
140 - Indexing (e.g. ocVerifyImage and ocGetNames)
141 - Initialization (e.g. ocResolve and ocRunInit)
142 - Resolve (e.g. resolveObjs())
143 - Lookup (e.g. lookupSymbol)
144
145 This is to enable lazy loading of symbols. Eager loading is problematic
146 as it means that all symbols must be available, even those which we will
147 never use. This is especially painful of Windows, where the number of
148 libraries required to link things like mingwex grows to be quite high.
149
150 We proceed through these stages as follows,
151
152 * During Indexing we verify and open the ObjectCode and
153 perform a quick scan/indexing of the ObjectCode. All the work
154 required to actually load the ObjectCode is done.
155
156 All symbols from the ObjectCode is also inserted into
157 `symhash`, where possible duplicates are handled via the semantics
158 described in `ghciInsertSymbolTable`.
159
160 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
161 depending on whether they are an archive members or not.
162
163 * During initialization we load ObjectCode, perform relocations, execute
164 static constructors etc. This phase may trigger other ObjectCodes to
165 be loaded because of the calls to lookupSymbol.
166
167 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
168 previous status was `OBJECT_LOADED`.
169
170 * During resolve we attempt to resolve all the symbols needed for the
171 initial link. This essentially means, that for any ObjectCode given
172 directly to the command-line we perform lookupSymbols on the required
173 symbols. lookupSymbols may trigger the loading of additional ObjectCode
174 if required.
175
176 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
177 the previous status was `OBJECT_NEEDED`.
178
179 * Lookup symbols is used to lookup any symbols required, both during initial
180 link and during statement and expression compilations in the REPL.
181 Declaration of e.g. an foreign import, will eventually call lookupSymbol
182 which will either fail (symbol unknown) or succeed (and possibly triggered a
183 load).
184
185 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
186
187 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
188 by calling unloadObj and starting over.
189 When a new declaration or statement is performed ultimately lookupSymbol is called
190 without doing a re-link.
191
192 The goal of these different phases is to allow the linker to be able to perform
193 "lazy loading" of ObjectCode. The reason for this is that we want to only link
194 in symbols that are actually required for the link. This reduces:
195
196 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
197 then we don't need to load libB. This means the dependency chain for libraries
198 such as mingw32 and mingwex can be broken down.
199
200 2) The number of duplicate symbols, since now only symbols that are
201 true duplicates will display the error.
202 */
203 static /*Str*/HashTable *symhash;
204
205 /* List of currently loaded objects */
206 ObjectCode *objects = NULL; /* initially empty */
207
208 /* List of objects that have been unloaded via unloadObj(), but are waiting
209 to be actually freed via checkUnload() */
210 ObjectCode *unloaded_objects = NULL; /* initially empty */
211
212 #ifdef THREADED_RTS
213 /* This protects all the Linker's global state except unloaded_objects */
214 Mutex linker_mutex;
215 /*
216 * This protects unloaded_objects. We have a separate mutex for this, because
217 * the GC needs to access unloaded_objects in checkUnload, while the linker only
218 * needs to access unloaded_objects in unloadObj(), so this allows most linker
219 * operations proceed concurrently with the GC.
220 */
221 Mutex linker_unloaded_mutex;
222 #endif
223
224 /* Type of the initializer */
225 typedef void (*init_t) (int argc, char **argv, char **env);
226
227 static HsInt isAlreadyLoaded( pathchar *path );
228 static HsInt loadOc( ObjectCode* oc );
229 static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
230 rtsBool mapped, char *archiveMemberName,
231 int misalignment
232 );
233
234 // Use wchar_t for pathnames on Windows (#5697)
235 #if defined(mingw32_HOST_OS)
236 #define pathcmp wcscmp
237 #define pathlen wcslen
238 #define pathopen _wfopen
239 #define pathstat _wstat
240 #define struct_stat struct _stat
241 #define open wopen
242 #define WSTR(s) L##s
243 #define pathprintf swprintf
244 #define pathsize sizeof(wchar_t)
245 #else
246 #define pathcmp strcmp
247 #define pathlen strlen
248 #define pathopen fopen
249 #define pathstat stat
250 #define struct_stat struct stat
251 #define WSTR(s) s
252 #define pathprintf snprintf
253 #define pathsize sizeof(char)
254 #endif
255
256 static pathchar* pathdup(pathchar *path)
257 {
258 pathchar *ret;
259 #if defined(mingw32_HOST_OS)
260 ret = wcsdup(path);
261 #else
262 /* sigh, strdup() isn't a POSIX function, so do it the long way */
263 ret = stgMallocBytes( strlen(path)+1, "pathdup" );
264 strcpy(ret, path);
265 #endif
266 return ret;
267 }
268
269 static pathchar* pathdir(pathchar *path)
270 {
271 pathchar *ret;
272 #if defined(mingw32_HOST_OS)
273 pathchar *drive, *dirName;
274 size_t memberLen = pathlen(path) + 1;
275 dirName = stgMallocBytes(pathsize * memberLen, "pathdir(path)");
276 ret = stgMallocBytes(pathsize * memberLen, "pathdir(path)");
277 drive = stgMallocBytes(pathsize * _MAX_DRIVE, "pathdir(path)");
278 _wsplitpath_s(path, drive, _MAX_DRIVE, dirName, pathsize * pathlen(path), NULL, 0, NULL, 0);
279 pathprintf(ret, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), drive, dirName);
280 stgFree(drive);
281 stgFree(dirName);
282 #else
283 pathchar* dirName = dirname(path);
284 size_t memberLen = pathlen(dirName);
285 ret = stgMallocBytes(pathsize * (memberLen + 2), "pathdir(path)");
286 strcpy(ret, dirName);
287 ret[memberLen ] = '/';
288 ret[memberLen+1] = '\0';
289 #endif
290 return ret;
291 }
292
293 static pathchar* mkPath(char* path)
294 {
295 #if defined(mingw32_HOST_OS)
296 size_t required = mbstowcs(NULL, path, 0);
297 pathchar *ret = stgMallocBytes(sizeof(pathchar) * (required + 1), "mkPath");
298 if (mbstowcs(ret, path, required) == (size_t)-1)
299 {
300 barf("mkPath failed converting char* to wchar_t*");
301 }
302 ret[required] = '\0';
303 return ret;
304 #else
305 return pathdup(path);
306 #endif
307 }
308
309 /* Generic wrapper function to try and Resolve and RunInit oc files */
310 int ocTryLoad( ObjectCode* oc );
311
312 #if defined(OBJFORMAT_ELF)
313 static int ocVerifyImage_ELF ( ObjectCode* oc );
314 static int ocGetNames_ELF ( ObjectCode* oc );
315 static int ocResolve_ELF ( ObjectCode* oc );
316 static int ocRunInit_ELF ( ObjectCode* oc );
317 #if NEED_SYMBOL_EXTRAS
318 static int ocAllocateSymbolExtras_ELF ( ObjectCode* oc );
319 #endif
320 #elif defined(OBJFORMAT_PEi386)
321 static int ocVerifyImage_PEi386 ( ObjectCode* oc );
322 static int ocGetNames_PEi386 ( ObjectCode* oc );
323 static int ocResolve_PEi386 ( ObjectCode* oc );
324 static int ocRunInit_PEi386 ( ObjectCode* oc );
325 static void *lookupSymbolInDLLs ( unsigned char *lbl );
326 /* See Note [mingw-w64 name decoration scheme] */
327 #ifndef x86_64_HOST_ARCH
328 static void zapTrailingAtSign ( unsigned char *sym );
329 #endif
330
331 #if defined(x86_64_HOST_ARCH)
332 #define ONLY_USED_x86_64_HOST_ARCH(x) (x)
333 #else
334 #define ONLY_USED_x86_64_HOST_ARCH(x) (x) GNUC3_ATTRIBUTE(__unused__)
335 #endif
336
337 static char *allocateImageAndTrampolines (
338 pathchar* arch_name, char* member_name,
339 FILE* f,
340 int size,
341 int isThin);
342 #if defined(x86_64_HOST_ARCH)
343 static int ocAllocateSymbolExtras_PEi386 ( ObjectCode* oc );
344 static size_t makeSymbolExtra_PEi386( ObjectCode* oc, size_t, char* symbol );
345 #define PEi386_IMAGE_OFFSET 4
346 #else
347 #define PEi386_IMAGE_OFFSET 0
348 #endif
349 #elif defined(OBJFORMAT_MACHO)
350 static int ocVerifyImage_MachO ( ObjectCode* oc );
351 static int ocGetNames_MachO ( ObjectCode* oc );
352 static int ocResolve_MachO ( ObjectCode* oc );
353 static int ocRunInit_MachO ( ObjectCode* oc );
354 static int machoGetMisalignment( FILE * );
355 #if NEED_SYMBOL_EXTRAS
356 static int ocAllocateSymbolExtras_MachO ( ObjectCode* oc );
357 #endif
358 #ifdef powerpc_HOST_ARCH
359 static void machoInitSymbolsWithoutUnderscore( void );
360 #endif
361 #endif
362
363 #if defined(OBJFORMAT_PEi386)
364 /* string utility function */
365 static HsBool endsWithPath(pathchar* base, pathchar* str) {
366 int blen = pathlen(base);
367 int slen = pathlen(str);
368 return (blen >= slen) && (0 == pathcmp(base + blen - slen, str));
369 }
370
371 static int checkAndLoadImportLibrary(
372 pathchar* arch_name,
373 char* member_name,
374 FILE* f);
375
376 static int findAndLoadImportLibrary(
377 ObjectCode* oc
378 );
379
380 static UChar *myindex(
381 int scale,
382 void* base,
383 int index);
384 static UChar *cstring_from_COFF_symbol_name(
385 UChar* name,
386 UChar* strtab);
387 static char *cstring_from_section_name(
388 UChar* name,
389 UChar* strtab);
390
391
392 /* Add ld symbol for PE image base. */
393 #if defined(__GNUC__)
394 #define __ImageBase __MINGW_LSYMBOL(_image_base__)
395 #endif
396
397 /* Get the base of the module. */
398 /* This symbol is defined by ld. */
399 extern IMAGE_DOS_HEADER __ImageBase;
400 #define __image_base (void*)((HINSTANCE)&__ImageBase)
401
402 // MingW-w64 is missing these from the implementation. So we have to look them up
403 typedef DLL_DIRECTORY_COOKIE(WINAPI *LPAddDLLDirectory)(PCWSTR NewDirectory);
404 typedef WINBOOL(WINAPI *LPRemoveDLLDirectory)(DLL_DIRECTORY_COOKIE Cookie);
405 #endif /* OBJFORMAT_PEi386 */
406
407 static void freeProddableBlocks (ObjectCode *oc);
408
409 /* on x86_64 we have a problem with relocating symbol references in
410 * code that was compiled without -fPIC. By default, the small memory
411 * model is used, which assumes that symbol references can fit in a
412 * 32-bit slot. The system dynamic linker makes this work for
413 * references to shared libraries by either (a) allocating a jump
414 * table slot for code references, or (b) moving the symbol at load
415 * time (and copying its contents, if necessary) for data references.
416 *
417 * We unfortunately can't tell whether symbol references are to code
418 * or data. So for now we assume they are code (the vast majority
419 * are), and allocate jump-table slots. Unfortunately this will
420 * SILENTLY generate crashing code for data references. This hack is
421 * enabled by X86_64_ELF_NONPIC_HACK.
422 *
423 * One workaround is to use shared Haskell libraries. This is
424 * coming. Another workaround is to keep the static libraries but
425 * compile them with -fPIC, because that will generate PIC references
426 * to data which can be relocated. The PIC code is still too green to
427 * do this systematically, though.
428 *
429 * See bug #781
430 * See thread http://www.haskell.org/pipermail/cvs-ghc/2007-September/038458.html
431 *
432 * Naming Scheme for Symbol Macros
433 *
434 * SymI_*: symbol is internal to the RTS. It resides in an object
435 * file/library that is statically.
436 * SymE_*: symbol is external to the RTS library. It might be linked
437 * dynamically.
438 *
439 * Sym*_HasProto : the symbol prototype is imported in an include file
440 * or defined explicitly
441 * Sym*_NeedsProto: the symbol is undefined and we add a dummy
442 * default proto extern void sym(void);
443 */
444 #define X86_64_ELF_NONPIC_HACK 1
445
446 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
447 * small memory model on this architecture (see gcc docs,
448 * -mcmodel=small).
449 *
450 * MAP_32BIT not available on OpenBSD/amd64
451 */
452 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
453 #define TRY_MAP_32BIT MAP_32BIT
454 #else
455 #define TRY_MAP_32BIT 0
456 #endif
457
458 /*
459 Note [The ARM/Thumb Story]
460 ~~~~~~~~~~~~~~~~~~~~~~~~~~
461
462 Support for the ARM architecture is complicated by the fact that ARM has not
463 one but several instruction encodings. The two relevant ones here are the original
464 ARM encoding and Thumb, a more dense variant of ARM supporting only a subset
465 of the instruction set.
466
467 How the CPU decodes a particular instruction is determined by a mode bit. This
468 mode bit is set on jump instructions, the value being determined by the low
469 bit of the target address: An odd address means the target is a procedure
470 encoded in the Thumb encoding whereas an even address means it's a traditional
471 ARM procedure (the actual address jumped to is even regardless of the encoding bit).
472
473 Interoperation between Thumb- and ARM-encoded object code (known as "interworking")
474 is tricky. If the linker needs to link a call by an ARM object into Thumb code
475 (or vice-versa) it will produce a jump island. This, however, is incompatible with
476 GHC's tables-next-to-code. For this reason, it is critical that GHC emit
477 exclusively ARM or Thumb objects for all Haskell code.
478
479 We still do, however, need to worry about foreign code.
480 */
481
482 /*
483 * Due to the small memory model (see above), on x86_64 we have to map
484 * all our non-PIC object files into the low 2Gb of the address space
485 * (why 2Gb and not 4Gb? Because all addresses must be reachable
486 * using a 32-bit signed PC-relative offset). On Linux we can do this
487 * using the MAP_32BIT flag to mmap(), however on other OSs
488 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
489 * can't do this. So on these systems, we have to pick a base address
490 * in the low 2Gb of the address space and try to allocate memory from
491 * there.
492 *
493 * We pick a default address based on the OS, but also make this
494 * configurable via an RTS flag (+RTS -xm)
495 */
496 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
497
498 #if defined(MAP_32BIT)
499 // Try to use MAP_32BIT
500 #define MMAP_32BIT_BASE_DEFAULT 0
501 #else
502 // A guess: 1Gb.
503 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
504 #endif
505
506 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
507 #endif
508
509 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
510 ObjectCode *owner)
511 {
512 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
513 if (!pinfo || owner != pinfo->owner) return;
514 removeStrHashTable(table, key, NULL);
515 stgFree(pinfo);
516 }
517
518 /* -----------------------------------------------------------------------------
519 * Insert symbols into hash tables, checking for duplicates.
520 *
521 * Returns: 0 on failure, nonzero on success
522 */
523 /*
524 Note [weak-symbols-support]
525 -------------------------------------
526 While ghciInsertSymbolTable does implement extensive
527 logic for weak symbol support, weak symbols are not currently
528 fully supported by the RTS. This code is mostly here for COMDAT
529 support which uses the weak symbols support.
530
531 Linking weak symbols defined purely in C code with other C code
532 should also work, probably. Observing weak symbols in Haskell
533 won't.
534
535 Some test have been written for weak symbols but have been disabled
536 mostly because it's unsure how the weak symbols support should look.
537 See Trac #11223
538 */
539 static int ghciInsertSymbolTable(
540 pathchar* obj_name,
541 HashTable *table,
542 const SymbolName* key,
543 SymbolAddr* data,
544 HsBool weak,
545 ObjectCode *owner)
546 {
547 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
548 if (!pinfo) /* new entry */
549 {
550 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
551 pinfo->value = data;
552 pinfo->owner = owner;
553 pinfo->weak = weak;
554 insertStrHashTable(table, key, pinfo);
555 return 1;
556 }
557 else if (weak && data && pinfo->weak && !pinfo->value)
558 {
559 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
560 pinfo->value = data;
561 pinfo->owner = owner;
562 return 1;
563 }
564 else if (weak)
565 {
566 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
567 already know of another copy throw this one away.
568
569 or both weak symbols have a nonzero value. Keep the existing one.
570
571 This also preserves the semantics of linking against
572 the first symbol we find. */
573 }
574 else if (pinfo->weak && !weak) /* weak symbol is in the table */
575 {
576 /* override the weak definition with the non-weak one */
577 pinfo->value = data;
578 pinfo->owner = owner;
579 pinfo->weak = HS_BOOL_FALSE;
580 return 1;
581 }
582 else if ( pinfo->owner
583 && pinfo->owner->status != OBJECT_RESOLVED
584 && pinfo->owner->status != OBJECT_NEEDED)
585 {
586 /* If the other symbol hasn't been loaded or will be loaded and we want to
587 explicitly load the new one, we can just swap it out and load the one
588 that has been requested. If not, just keep the first one encountered.
589
590 Because the `symHash' table consists symbols we've also not loaded but
591 found during the initial scan this is safe to do. If however the existing
592 symbol has been loaded then it means we have a duplicate.
593
594 This is essentially emulating the behavior of a linker wherein it will always
595 link in object files that are .o file arguments, but only take object files
596 from archives as needed. */
597 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
598 pinfo->value = data;
599 pinfo->owner = owner;
600 pinfo->weak = weak;
601 }
602
603 return 1;
604 }
605 else if (pinfo->owner == owner)
606 {
607 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
608 return 1;
609 }
610 else if (owner && owner->status == OBJECT_LOADED)
611 {
612 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
613 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
614 call this function again to trigger the duplicate error. */
615 return 1;
616 }
617
618 pathchar* archiveName = NULL;
619 debugBelch(
620 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
621 " %s\n"
622 "whilst processing object file\n"
623 " %" PATH_FMT "\n"
624 "The symbol was previously defined in\n"
625 " %" PATH_FMT "\n"
626 "This could be caused by:\n"
627 " * Loading two different object files which export the same symbol\n"
628 " * Specifying the same object file twice on the GHCi command line\n"
629 " * An incorrect `package.conf' entry, causing some object to be\n"
630 " loaded twice.\n",
631 (char*)key,
632 obj_name,
633 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
634 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
635 : pinfo->owner->fileName
636 );
637
638 if (archiveName)
639 {
640 stgFree(archiveName);
641 archiveName = NULL;
642 }
643 return 0;
644 }
645
646 /* -----------------------------------------------------------------------------
647 * Looks up symbols into hash tables.
648 *
649 * Returns: 0 on failure and result is not set,
650 * nonzero on success and result set to nonzero pointer
651 */
652 static HsBool ghciLookupSymbolInfo(HashTable *table,
653 const SymbolName* key, RtsSymbolInfo **result)
654 {
655 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
656 if (!pinfo) {
657 *result = NULL;
658 return HS_BOOL_FALSE;
659 }
660 if (pinfo->weak)
661 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
662 /* Once it's looked up, it can no longer be overridden */
663 pinfo->weak = HS_BOOL_FALSE;
664
665 *result = pinfo;
666 return HS_BOOL_TRUE;
667 }
668
669 /* -----------------------------------------------------------------------------
670 * initialize the object linker
671 */
672
673
674 static int linker_init_done = 0 ;
675
676 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
677 static void *dl_prog_handle;
678 static regex_t re_invalid;
679 static regex_t re_realso;
680 #ifdef THREADED_RTS
681 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
682 #endif
683 #elif defined(OBJFORMAT_PEi386)
684 void addDLLHandle(pathchar* dll_name, HINSTANCE instance);
685 #endif
686
687 void initLinker (void)
688 {
689 // default to retaining CAFs for backwards compatibility. Most
690 // users will want initLinker_(0): otherwise unloadObj() will not
691 // be able to unload object files when they contain CAFs.
692 initLinker_(1);
693 }
694
695 void
696 initLinker_ (int retain_cafs)
697 {
698 RtsSymbolVal *sym;
699 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
700 int compileResult;
701 #endif
702
703 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
704
705 /* Make initLinker idempotent, so we can call it
706 before every relevant operation; that means we
707 don't need to initialise the linker separately */
708 if (linker_init_done == 1) {
709 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
710 return;
711 } else {
712 linker_init_done = 1;
713 }
714
715 objects = NULL;
716 unloaded_objects = NULL;
717
718 #if defined(THREADED_RTS)
719 initMutex(&linker_mutex);
720 initMutex(&linker_unloaded_mutex);
721 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
722 initMutex(&dl_mutex);
723 #endif
724 #endif
725 symhash = allocStrHashTable();
726
727 /* populate the symbol table with stuff from the RTS */
728 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
729 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
730 symhash, sym->lbl, sym->addr, HS_BOOL_FALSE, NULL)) {
731 barf("ghciInsertSymbolTable failed");
732 }
733 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
734 }
735 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
736 machoInitSymbolsWithoutUnderscore();
737 # endif
738 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
739 referenced from a statically linked module. We need to mimic this, but
740 we cannot use NULL because we use it to mean nonexistent symbols. So we
741 use an arbitrary (hopefully unique) address here.
742 */
743 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
744 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
745 barf("ghciInsertSymbolTable failed");
746 }
747
748 #if defined(OBJFORMAT_PEi386)
749 if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"),
750 symhash, "__image_base__", __image_base, HS_BOOL_TRUE, NULL)) {
751 barf("ghciInsertSymbolTable failed");
752 }
753 #endif /* OBJFORMAT_PEi386 */
754
755
756 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
757 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
758 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
759 retain_cafs ? newRetainedCAF : newGCdCAF,
760 HS_BOOL_FALSE, NULL)) {
761 barf("ghciInsertSymbolTable failed");
762 }
763
764 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
765 # if defined(RTLD_DEFAULT)
766 dl_prog_handle = RTLD_DEFAULT;
767 # else
768 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
769 # endif /* RTLD_DEFAULT */
770
771 compileResult = regcomp(&re_invalid,
772 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short)",
773 REG_EXTENDED);
774 if (compileResult != 0) {
775 barf("Compiling re_invalid failed");
776 }
777 compileResult = regcomp(&re_realso,
778 "(GROUP|INPUT) *\\( *([^ )]+)",
779 REG_EXTENDED);
780 if (compileResult != 0) {
781 barf("Compiling re_realso failed");
782 }
783 # endif
784
785 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
786 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
787 // User-override for mmap_32bit_base
788 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
789 }
790 #endif
791
792 #if defined(mingw32_HOST_OS)
793 /*
794 * These two libraries cause problems when added to the static link,
795 * but are necessary for resolving symbols in GHCi, hence we load
796 * them manually here.
797 */
798 addDLL(WSTR("msvcrt"));
799 addDLL(WSTR("kernel32"));
800 addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL));
801 #endif
802
803 if (RTS_LINKER_USE_MMAP)
804 m32_allocator_init();
805
806 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
807 return;
808 }
809
810 void
811 exitLinker( void ) {
812 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
813 if (linker_init_done == 1) {
814 regfree(&re_invalid);
815 regfree(&re_realso);
816 #ifdef THREADED_RTS
817 closeMutex(&dl_mutex);
818 #endif
819 }
820 #endif
821 if (linker_init_done == 1) {
822 freeHashTable(symhash, free);
823 }
824 #ifdef THREADED_RTS
825 closeMutex(&linker_mutex);
826 #endif
827 }
828
829 /* -----------------------------------------------------------------------------
830 * Loading DLL or .so dynamic libraries
831 * -----------------------------------------------------------------------------
832 *
833 * Add a DLL from which symbols may be found. In the ELF case, just
834 * do RTLD_GLOBAL-style add, so no further messing around needs to
835 * happen in order that symbols in the loaded .so are findable --
836 * lookupSymbol() will subsequently see them by dlsym on the program's
837 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
838 *
839 * In the PEi386 case, open the DLLs and put handles to them in a
840 * linked list. When looking for a symbol, try all handles in the
841 * list. This means that we need to load even DLLs that are guaranteed
842 * to be in the ghc.exe image already, just so we can get a handle
843 * to give to loadSymbol, so that we can find the symbols. For such
844 * libraries, the LoadLibrary call should be a no-op except for returning
845 * the handle.
846 *
847 */
848
849 #if defined(OBJFORMAT_PEi386)
850 /* A record for storing handles into DLLs. */
851
852 typedef
853 struct _OpenedDLL {
854 pathchar* name;
855 struct _OpenedDLL* next;
856 HINSTANCE instance;
857 }
858 OpenedDLL;
859
860 /* A list thereof. */
861 static OpenedDLL* opened_dlls = NULL;
862
863 /* A record for storing indirectly linked functions from DLLs. */
864 typedef
865 struct _IndirectAddr {
866 SymbolAddr* addr;
867 struct _IndirectAddr* next;
868 }
869 IndirectAddr;
870
871 /* A list thereof. */
872 static IndirectAddr* indirects = NULL;
873
874 /* Adds a DLL instance to the list of DLLs in which to search for symbols. */
875 void addDLLHandle(pathchar* dll_name, HINSTANCE instance) {
876 OpenedDLL* o_dll;
877 o_dll = stgMallocBytes( sizeof(OpenedDLL), "addDLLHandle" );
878 o_dll->name = dll_name ? pathdup(dll_name) : NULL;
879 o_dll->instance = instance;
880 o_dll->next = opened_dlls;
881 opened_dlls = o_dll;
882 }
883
884 #endif
885
886 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
887
888 /* Suppose in ghci we load a temporary SO for a module containing
889 f = 1
890 and then modify the module, recompile, and load another temporary
891 SO with
892 f = 2
893 Then as we don't unload the first SO, dlsym will find the
894 f = 1
895 symbol whereas we want the
896 f = 2
897 symbol. We therefore need to keep our own SO handle list, and
898 try SOs in the right order. */
899
900 typedef
901 struct _OpenedSO {
902 struct _OpenedSO* next;
903 void *handle;
904 }
905 OpenedSO;
906
907 /* A list thereof. */
908 static OpenedSO* openedSOs = NULL;
909
910 static const char *
911 internal_dlopen(const char *dll_name)
912 {
913 OpenedSO* o_so;
914 void *hdl;
915 const char *errmsg;
916 char *errmsg_copy;
917
918 // omitted: RTLD_NOW
919 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
920 IF_DEBUG(linker,
921 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
922
923 //-------------- Begin critical section ------------------
924 // This critical section is necessary because dlerror() is not
925 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
926 // Also, the error message returned must be copied to preserve it
927 // (see POSIX also)
928
929 ACQUIRE_LOCK(&dl_mutex);
930 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
931
932 errmsg = NULL;
933 if (hdl == NULL) {
934 /* dlopen failed; return a ptr to the error msg. */
935 errmsg = dlerror();
936 if (errmsg == NULL) errmsg = "addDLL: unknown error";
937 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
938 strcpy(errmsg_copy, errmsg);
939 errmsg = errmsg_copy;
940 } else {
941 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
942 o_so->handle = hdl;
943 o_so->next = openedSOs;
944 openedSOs = o_so;
945 }
946
947 RELEASE_LOCK(&dl_mutex);
948 //--------------- End critical section -------------------
949
950 return errmsg;
951 }
952
953 /*
954 Note [RTLD_LOCAL]
955
956 In GHCi we want to be able to override previous .so's with newly
957 loaded .so's when we recompile something. This further implies that
958 when we look up a symbol in internal_dlsym() we have to iterate
959 through the loaded libraries (in order from most recently loaded to
960 oldest) looking up the symbol in each one until we find it.
961
962 However, this can cause problems for some symbols that are copied
963 by the linker into the executable image at runtime - see #8935 for a
964 lengthy discussion. To solve that problem we need to look up
965 symbols in the main executable *first*, before attempting to look
966 them up in the loaded .so's. But in order to make that work, we
967 have to always call dlopen with RTLD_LOCAL, so that the loaded
968 libraries don't populate the global symbol table.
969 */
970
971 static void *
972 internal_dlsym(const char *symbol) {
973 OpenedSO* o_so;
974 void *v;
975
976 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
977 ACQUIRE_LOCK(&dl_mutex);
978 dlerror();
979 // look in program first
980 v = dlsym(dl_prog_handle, symbol);
981 if (dlerror() == NULL) {
982 RELEASE_LOCK(&dl_mutex);
983 return v;
984 }
985
986 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
987 v = dlsym(o_so->handle, symbol);
988 if (dlerror() == NULL) {
989 RELEASE_LOCK(&dl_mutex);
990 return v;
991 }
992 }
993 RELEASE_LOCK(&dl_mutex);
994 return v;
995 }
996 # endif
997
998 const char *
999 addDLL( pathchar *dll_name )
1000 {
1001 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
1002 /* ------------------- ELF DLL loader ------------------- */
1003
1004 #define NMATCH 5
1005 regmatch_t match[NMATCH];
1006 const char *errmsg;
1007 FILE* fp;
1008 size_t match_length;
1009 #define MAXLINE 1000
1010 char line[MAXLINE];
1011 int result;
1012
1013 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
1014 errmsg = internal_dlopen(dll_name);
1015
1016 if (errmsg == NULL) {
1017 return NULL;
1018 }
1019
1020 // GHC Trac ticket #2615
1021 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
1022 // contain linker scripts rather than ELF-format object code. This
1023 // code handles the situation by recognizing the real object code
1024 // file name given in the linker script.
1025 //
1026 // If an "invalid ELF header" error occurs, it is assumed that the
1027 // .so file contains a linker script instead of ELF object code.
1028 // In this case, the code looks for the GROUP ( ... ) linker
1029 // directive. If one is found, the first file name inside the
1030 // parentheses is treated as the name of a dynamic library and the
1031 // code attempts to dlopen that file. If this is also unsuccessful,
1032 // an error message is returned.
1033
1034 // see if the error message is due to an invalid ELF header
1035 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
1036 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
1037 IF_DEBUG(linker, debugBelch("result = %i\n", result));
1038 if (result == 0) {
1039 // success -- try to read the named file as a linker script
1040 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
1041 MAXLINE-1);
1042 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
1043 line[match_length] = '\0'; // make sure string is null-terminated
1044 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
1045 if ((fp = fopen(line, "r")) == NULL) {
1046 return errmsg; // return original error if open fails
1047 }
1048 // try to find a GROUP or INPUT ( ... ) command
1049 while (fgets(line, MAXLINE, fp) != NULL) {
1050 IF_DEBUG(linker, debugBelch("input line = %s", line));
1051 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
1052 // success -- try to dlopen the first named file
1053 IF_DEBUG(linker, debugBelch("match%s\n",""));
1054 line[match[2].rm_eo] = '\0';
1055 stgFree((void*)errmsg); // Free old message before creating new one
1056 errmsg = internal_dlopen(line+match[2].rm_so);
1057 break;
1058 }
1059 // if control reaches here, no GROUP or INPUT ( ... ) directive
1060 // was found and the original error message is returned to the
1061 // caller
1062 }
1063 fclose(fp);
1064 }
1065 return errmsg;
1066
1067 # elif defined(OBJFORMAT_PEi386)
1068 /* ------------------- Win32 DLL loader ------------------- */
1069
1070 pathchar* buf;
1071 OpenedDLL* o_dll;
1072 HINSTANCE instance;
1073
1074 IF_DEBUG(linker, debugBelch("\naddDLL; dll_name = `%" PATH_FMT "'\n", dll_name));
1075
1076 /* See if we've already got it, and ignore if so. */
1077 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
1078 if (0 == pathcmp(o_dll->name, dll_name))
1079 return NULL;
1080 }
1081
1082 /* The file name has no suffix (yet) so that we can try
1083 both foo.dll and foo.drv
1084
1085 The documentation for LoadLibrary says:
1086 If no file name extension is specified in the lpFileName
1087 parameter, the default library extension .dll is
1088 appended. However, the file name string can include a trailing
1089 point character (.) to indicate that the module name has no
1090 extension. */
1091
1092 size_t bufsize = pathlen(dll_name) + 10;
1093 buf = stgMallocBytes(bufsize * sizeof(wchar_t), "addDLL");
1094
1095 /* These are ordered by probability of success and order we'd like them */
1096 const wchar_t *formats[] = { L"%ls.DLL", L"%ls.DRV", L"lib%ls.DLL", L"%ls" };
1097 const DWORD flags[] = { LOAD_LIBRARY_SEARCH_USER_DIRS | LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, 0 };
1098
1099 int cFormat;
1100 int cFlag;
1101 int flags_start = 1; // Assume we don't support the new API
1102
1103 /* Detect if newer API are available, if not, skip the first flags entry */
1104 if (GetProcAddress((HMODULE)LoadLibraryW(L"Kernel32.DLL"), "AddDllDirectory")) {
1105 flags_start = 0;
1106 }
1107
1108 /* Iterate through the possible flags and formats */
1109 for (cFlag = flags_start; cFlag < 2; cFlag++)
1110 {
1111 for (cFormat = 0; cFormat < 4; cFormat++)
1112 {
1113 snwprintf(buf, bufsize, formats[cFormat], dll_name);
1114 instance = LoadLibraryExW(buf, NULL, flags[cFlag]);
1115 if (instance == NULL)
1116 {
1117 if (GetLastError() != ERROR_MOD_NOT_FOUND)
1118 {
1119 goto error;
1120 }
1121 }
1122 else
1123 {
1124 break; // We're done. DLL has been loaded.
1125 }
1126 }
1127 }
1128
1129 // Check if we managed to load the DLL
1130 if (instance == NULL) {
1131 goto error;
1132 }
1133
1134 stgFree(buf);
1135
1136 addDLLHandle(dll_name, instance);
1137
1138 return NULL;
1139
1140 error:
1141 stgFree(buf);
1142 sysErrorBelch("addDLL: %" PATH_FMT " (Win32 error %lu)", dll_name, GetLastError());
1143
1144 /* LoadLibrary failed; return a ptr to the error msg. */
1145 return "addDLL: could not load DLL";
1146
1147 # else
1148 barf("addDLL: not implemented on this platform");
1149 # endif
1150 }
1151
1152 /* -----------------------------------------------------------------------------
1153 * Searches the system directories to determine if there is a system DLL that
1154 * satisfies the given name. This prevent GHCi from linking against a static
1155 * library if a DLL is available.
1156 *
1157 * Returns: NULL on failure or no DLL found, else the full path to the DLL
1158 * that can be loaded.
1159 */
1160 pathchar* findSystemLibrary(pathchar* dll_name)
1161 {
1162 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%" PATH_FMT "'\n", dll_name));
1163
1164 #if defined(OBJFORMAT_PEi386)
1165 const unsigned int init_buf_size = 1024;
1166 unsigned int bufsize = init_buf_size;
1167 wchar_t* result = malloc(sizeof(wchar_t) * bufsize);
1168 DWORD wResult = SearchPathW(NULL, dll_name, NULL, bufsize, result, NULL);
1169
1170 if (wResult > bufsize) {
1171 result = realloc(result, sizeof(wchar_t) * wResult);
1172 wResult = SearchPathW(NULL, dll_name, NULL, wResult, result, NULL);
1173 }
1174
1175
1176 if (!wResult) {
1177 free(result);
1178 return NULL;
1179 }
1180
1181 return result;
1182 #else
1183 (void)(dll_name); // Function not implemented for other platforms.
1184 return NULL;
1185 #endif
1186 }
1187
1188 /* -----------------------------------------------------------------------------
1189 * Emits a warning determining that the system is missing a required security
1190 * update that we need to get access to the proper APIs
1191 */
1192 void warnMissingKBLibraryPaths( void )
1193 {
1194 static HsBool missing_update_warn = HS_BOOL_FALSE;
1195 if (!missing_update_warn) {
1196 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
1197 missing_update_warn = HS_BOOL_TRUE;
1198 }
1199 }
1200
1201 /* -----------------------------------------------------------------------------
1202 * appends a directory to the process DLL Load path so LoadLibrary can find it
1203 *
1204 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
1205 * restore the search path to what it was before this call.
1206 */
1207 HsPtr addLibrarySearchPath(pathchar* dll_path)
1208 {
1209 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%" PATH_FMT "'\n", dll_path));
1210
1211 #if defined(OBJFORMAT_PEi386)
1212 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1213 LPAddDLLDirectory AddDllDirectory = (LPAddDLLDirectory)GetProcAddress((HMODULE)hDLL, "AddDllDirectory");
1214
1215 HsPtr result = NULL;
1216
1217 const unsigned int init_buf_size = 4096;
1218 int bufsize = init_buf_size;
1219
1220 // Make sure the path is an absolute path
1221 WCHAR* abs_path = malloc(sizeof(WCHAR) * init_buf_size);
1222 DWORD wResult = GetFullPathNameW(dll_path, bufsize, abs_path, NULL);
1223 if (!wResult){
1224 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1225 }
1226 else if (wResult > init_buf_size) {
1227 abs_path = realloc(abs_path, sizeof(WCHAR) * wResult);
1228 if (!GetFullPathNameW(dll_path, bufsize, abs_path, NULL)) {
1229 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1230 }
1231 }
1232
1233 if (AddDllDirectory) {
1234 result = AddDllDirectory(abs_path);
1235 }
1236 else
1237 {
1238 warnMissingKBLibraryPaths();
1239 WCHAR* str = malloc(sizeof(WCHAR) * init_buf_size);
1240 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1241
1242 if (wResult > init_buf_size) {
1243 str = realloc(str, sizeof(WCHAR) * wResult);
1244 bufsize = wResult;
1245 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1246 if (!wResult) {
1247 sysErrorBelch("addLibrarySearchPath[GetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1248 }
1249 }
1250
1251 bufsize = wResult + 2 + pathlen(abs_path);
1252 wchar_t* newPath = malloc(sizeof(wchar_t) * bufsize);
1253
1254 wcscpy(newPath, abs_path);
1255 wcscat(newPath, L";");
1256 wcscat(newPath, str);
1257 if (!SetEnvironmentVariableW(L"PATH", (LPCWSTR)newPath)) {
1258 sysErrorBelch("addLibrarySearchPath[SetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1259 }
1260
1261 free(newPath);
1262 free(abs_path);
1263
1264 return str;
1265 }
1266
1267 if (!result) {
1268 sysErrorBelch("addLibrarySearchPath: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1269 free(abs_path);
1270 return NULL;
1271 }
1272
1273 free(abs_path);
1274 return result;
1275 #else
1276 (void)(dll_path); // Function not implemented for other platforms.
1277 return NULL;
1278 #endif
1279 }
1280
1281 /* -----------------------------------------------------------------------------
1282 * removes a directory from the process DLL Load path
1283 *
1284 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
1285 */
1286 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
1287 {
1288 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n", dll_path_index));
1289
1290 #if defined(OBJFORMAT_PEi386)
1291 HsBool result = 0;
1292
1293 if (dll_path_index != NULL) {
1294 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1295 LPRemoveDLLDirectory RemoveDllDirectory = (LPRemoveDLLDirectory)GetProcAddress((HMODULE)hDLL, "RemoveDllDirectory");
1296
1297 if (RemoveDllDirectory) {
1298 result = RemoveDllDirectory(dll_path_index);
1299 // dll_path_index is now invalid, do not use it after this point.
1300 }
1301 else
1302 {
1303 warnMissingKBLibraryPaths();
1304 result = SetEnvironmentVariableW(L"PATH", (LPCWSTR)dll_path_index);
1305 free(dll_path_index);
1306 }
1307
1308 if (!result) {
1309 sysErrorBelch("removeLibrarySearchPath: (Win32 error %lu)", GetLastError());
1310 return HS_BOOL_FALSE;
1311 }
1312 }
1313
1314 return result == 0 ? HS_BOOL_TRUE : HS_BOOL_FALSE;
1315 #else
1316 (void)(dll_path_index); // Function not implemented for other platforms.
1317 return HS_BOOL_FALSE;
1318 #endif
1319 }
1320
1321 /* -----------------------------------------------------------------------------
1322 * insert a symbol in the hash table
1323 *
1324 * Returns: 0 on failure, nozero on success
1325 */
1326 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
1327 {
1328 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE, NULL);
1329 }
1330
1331 /* -----------------------------------------------------------------------------
1332 * lookup a symbol in the hash table
1333 */
1334 static SymbolAddr* lookupSymbol_ (SymbolName* lbl)
1335 {
1336 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
1337
1338 ASSERT(symhash != NULL);
1339 RtsSymbolInfo *pinfo;
1340
1341 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
1342 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
1343
1344 # if defined(OBJFORMAT_ELF)
1345 return internal_dlsym(lbl);
1346 # elif defined(OBJFORMAT_MACHO)
1347
1348 /* HACK: On OS X, all symbols are prefixed with an underscore.
1349 However, dlsym wants us to omit the leading underscore from the
1350 symbol name -- the dlsym routine puts it back on before searching
1351 for the symbol. For now, we simply strip it off here (and ONLY
1352 here).
1353 */
1354 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", lbl));
1355 ASSERT(lbl[0] == '_');
1356 return internal_dlsym(lbl + 1);
1357 # elif defined(OBJFORMAT_PEi386)
1358 SymbolAddr* sym;
1359
1360 /* See Note [mingw-w64 name decoration scheme] */
1361 #ifndef x86_64_HOST_ARCH
1362 zapTrailingAtSign ( (unsigned char*)lbl );
1363 #endif
1364 sym = lookupSymbolInDLLs((unsigned char*)lbl);
1365 return sym; // might be NULL if not found
1366
1367 # else
1368 ASSERT(2+2 == 5);
1369 return NULL;
1370 # endif
1371 } else {
1372 SymbolAddr* val = pinfo->value;
1373 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl, val));
1374
1375 int r;
1376 ObjectCode* oc = pinfo->owner;
1377
1378 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
1379 See Note [runtime-linker-phases] */
1380 if (oc && oc->status == OBJECT_LOADED) {
1381 oc->status = OBJECT_NEEDED;
1382 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand loading symbol '%s'\n", lbl));
1383 r = ocTryLoad(oc);
1384
1385 if (!r) {
1386 errorBelch("Could not on-demand load symbol '%s'\n", lbl);
1387 return NULL;
1388 }
1389 }
1390
1391 return val;
1392 }
1393 }
1394
1395 SymbolAddr* lookupSymbol( SymbolName* lbl )
1396 {
1397 ACQUIRE_LOCK(&linker_mutex);
1398 SymbolAddr* r = lookupSymbol_(lbl);
1399 RELEASE_LOCK(&linker_mutex);
1400 return r;
1401 }
1402
1403 /* -----------------------------------------------------------------------------
1404 Create a StablePtr for a foreign export. This is normally called by
1405 a C function with __attribute__((constructor)), which is generated
1406 by GHC and linked into the module.
1407
1408 If the object code is being loaded dynamically, then we remember
1409 which StablePtrs were allocated by the constructors and free them
1410 again in unloadObj().
1411 -------------------------------------------------------------------------- */
1412
1413 static ObjectCode *loading_obj = NULL;
1414
1415 StgStablePtr foreignExportStablePtr (StgPtr p)
1416 {
1417 ForeignExportStablePtr *fe_sptr;
1418 StgStablePtr *sptr;
1419
1420 sptr = getStablePtr(p);
1421
1422 if (loading_obj != NULL) {
1423 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
1424 "foreignExportStablePtr");
1425 fe_sptr->stable_ptr = sptr;
1426 fe_sptr->next = loading_obj->stable_ptrs;
1427 loading_obj->stable_ptrs = fe_sptr;
1428 }
1429
1430 return sptr;
1431 }
1432
1433
1434 /* -----------------------------------------------------------------------------
1435 * Debugging aid: look in GHCi's object symbol tables for symbols
1436 * within DELTA bytes of the specified address, and show their names.
1437 */
1438 #ifdef DEBUG
1439 void ghci_enquire ( SymbolAddr* addr );
1440
1441 void ghci_enquire(SymbolAddr* addr)
1442 {
1443 int i;
1444 SymbolName* sym;
1445 RtsSymbolInfo* a;
1446 const int DELTA = 64;
1447 ObjectCode* oc;
1448
1449 for (oc = objects; oc; oc = oc->next) {
1450 for (i = 0; i < oc->n_symbols; i++) {
1451 sym = oc->symbols[i];
1452 if (sym == NULL) continue;
1453 a = NULL;
1454 if (a == NULL) {
1455 ghciLookupSymbolInfo(symhash, sym, &a);
1456 }
1457 if (a == NULL) {
1458 // debugBelch("ghci_enquire: can't find %s\n", sym);
1459 }
1460 else if ( a->value
1461 && (char*)addr-DELTA <= (char*)a->value
1462 && (char*)a->value <= (char*)addr+DELTA) {
1463 debugBelch("%p + %3d == `%s'\n", addr, (int)((char*)a->value - (char*)addr), sym);
1464 }
1465 }
1466 }
1467 }
1468 #endif
1469
1470 #if RTS_LINKER_USE_MMAP
1471 //
1472 // Returns NULL on failure.
1473 //
1474 void *
1475 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
1476 {
1477 void *map_addr = NULL;
1478 void *result;
1479 size_t size;
1480 static uint32_t fixed = 0;
1481
1482 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1483 size = roundUpToPage(bytes);
1484
1485 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1486 mmap_again:
1487
1488 if (mmap_32bit_base != 0) {
1489 map_addr = mmap_32bit_base;
1490 }
1491 #endif
1492
1493 IF_DEBUG(linker,
1494 debugBelch("mmapForLinker: \tprotection %#0x\n",
1495 PROT_EXEC | PROT_READ | PROT_WRITE));
1496 IF_DEBUG(linker,
1497 debugBelch("mmapForLinker: \tflags %#0x\n",
1498 MAP_PRIVATE | TRY_MAP_32BIT | fixed | flags));
1499
1500 result = mmap(map_addr, size,
1501 PROT_EXEC|PROT_READ|PROT_WRITE,
1502 MAP_PRIVATE|TRY_MAP_32BIT|fixed|flags, fd, offset);
1503
1504 if (result == MAP_FAILED) {
1505 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1506 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1507 return NULL;
1508 }
1509
1510 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1511 if (mmap_32bit_base != 0) {
1512 if (result == map_addr) {
1513 mmap_32bit_base = (StgWord8*)map_addr + size;
1514 } else {
1515 if ((W_)result > 0x80000000) {
1516 // oops, we were given memory over 2Gb
1517 munmap(result,size);
1518 #if defined(freebsd_HOST_OS) || \
1519 defined(kfreebsdgnu_HOST_OS) || \
1520 defined(dragonfly_HOST_OS)
1521 // Some platforms require MAP_FIXED. This is normally
1522 // a bad idea, because MAP_FIXED will overwrite
1523 // existing mappings.
1524 fixed = MAP_FIXED;
1525 goto mmap_again;
1526 #else
1527 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1528 "asked for %lu bytes at %p. "
1529 "Try specifying an address with +RTS -xm<addr> -RTS",
1530 size, map_addr);
1531 return NULL;
1532 #endif
1533 } else {
1534 // hmm, we were given memory somewhere else, but it's
1535 // still under 2Gb so we can use it. Next time, ask
1536 // for memory right after the place we just got some
1537 mmap_32bit_base = (StgWord8*)result + size;
1538 }
1539 }
1540 } else {
1541 if ((W_)result > 0x80000000) {
1542 // oops, we were given memory over 2Gb
1543 // ... try allocating memory somewhere else?;
1544 debugTrace(DEBUG_linker,
1545 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1546 bytes, result);
1547 munmap(result, size);
1548
1549 // Set a base address and try again... (guess: 1Gb)
1550 mmap_32bit_base = (void*)0x40000000;
1551 goto mmap_again;
1552 }
1553 }
1554 #endif
1555
1556 IF_DEBUG(linker,
1557 debugBelch("mmapForLinker: mapped %" FMT_Word
1558 " bytes starting at %p\n", (W_)size, result));
1559 IF_DEBUG(linker,
1560 debugBelch("mmapForLinker: done\n"));
1561
1562 return result;
1563 }
1564 #endif
1565
1566 /*
1567 * Remove symbols from the symbol table, and free oc->symbols.
1568 * This operation is idempotent.
1569 */
1570 static void removeOcSymbols (ObjectCode *oc)
1571 {
1572 if (oc->symbols == NULL) return;
1573
1574 // Remove all the mappings for the symbols within this object..
1575 int i;
1576 for (i = 0; i < oc->n_symbols; i++) {
1577 if (oc->symbols[i] != NULL) {
1578 ghciRemoveSymbolTable(symhash, oc->symbols[i], oc);
1579 }
1580 }
1581
1582 stgFree(oc->symbols);
1583 oc->symbols = NULL;
1584 }
1585
1586 /*
1587 * Release StablePtrs and free oc->stable_ptrs.
1588 * This operation is idempotent.
1589 */
1590 static void freeOcStablePtrs (ObjectCode *oc)
1591 {
1592 // Release any StablePtrs that were created when this
1593 // object module was initialized.
1594 ForeignExportStablePtr *fe_ptr, *next;
1595
1596 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1597 next = fe_ptr->next;
1598 freeStablePtr(fe_ptr->stable_ptr);
1599 stgFree(fe_ptr);
1600 }
1601 oc->stable_ptrs = NULL;
1602 }
1603
1604 static void
1605 freePreloadObjectFile (ObjectCode *oc)
1606 {
1607 #if defined(mingw32_HOST_OS)
1608
1609 VirtualFree(oc->image - PEi386_IMAGE_OFFSET, 0, MEM_RELEASE);
1610
1611 IndirectAddr *ia, *ia_next;
1612 ia = indirects;
1613 while (ia != NULL) {
1614 ia_next = ia->next;
1615 stgFree(ia);
1616 ia = ia_next;
1617 }
1618 indirects = NULL;
1619
1620 #else
1621
1622 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1623 munmap(oc->image, oc->fileSize);
1624 }
1625 else {
1626 stgFree(oc->image);
1627 }
1628
1629 #endif
1630
1631 oc->image = NULL;
1632 oc->fileSize = 0;
1633 }
1634
1635 /*
1636 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1637 * the GC when a previously unloaded ObjectCode has been determined to be
1638 * unused, and when an error occurs during loadObj().
1639 */
1640 void freeObjectCode (ObjectCode *oc)
1641 {
1642 freePreloadObjectFile(oc);
1643
1644 if (oc->symbols != NULL) {
1645 stgFree(oc->symbols);
1646 oc->symbols = NULL;
1647 }
1648
1649 if (oc->extraInfos != NULL) {
1650 freeHashTable(oc->extraInfos, NULL);
1651 oc->extraInfos = NULL;
1652 }
1653
1654 if (oc->sections != NULL) {
1655 int i;
1656 for (i=0; i < oc->n_sections; i++) {
1657 if (oc->sections[i].start != NULL) {
1658 switch(oc->sections[i].alloc){
1659 #if RTS_LINKER_USE_MMAP
1660 case SECTION_MMAP:
1661 munmap(oc->sections[i].mapped_start,
1662 oc->sections[i].mapped_size);
1663 break;
1664 case SECTION_M32:
1665 m32_free(oc->sections[i].start,
1666 oc->sections[i].size);
1667 break;
1668 #endif
1669 case SECTION_MALLOC:
1670 stgFree(oc->sections[i].start);
1671 break;
1672 default:
1673 break;
1674 }
1675 }
1676 }
1677 stgFree(oc->sections);
1678 }
1679
1680 freeProddableBlocks(oc);
1681
1682 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1683 * alongside the image, so we don't need to free. */
1684 #if NEED_SYMBOL_EXTRAS && (!defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS))
1685 if (RTS_LINKER_USE_MMAP) {
1686 if (!USE_CONTIGUOUS_MMAP && oc->symbol_extras != NULL) {
1687 m32_free(oc->symbol_extras,
1688 sizeof(SymbolExtra) * oc->n_symbol_extras);
1689 }
1690 }
1691 else {
1692 stgFree(oc->symbol_extras);
1693 }
1694 #endif
1695
1696 stgFree(oc->fileName);
1697 stgFree(oc->archiveMemberName);
1698
1699 stgFree(oc);
1700 }
1701
1702 /* -----------------------------------------------------------------------------
1703 * Sets the initial status of a fresh ObjectCode
1704 */
1705 static void setOcInitialStatus(ObjectCode* oc) {
1706 if (oc->isImportLib == HS_BOOL_TRUE) {
1707 oc->status = OBJECT_DONT_RESOLVE;
1708 } else if (oc->archiveMemberName == NULL) {
1709 oc->status = OBJECT_NEEDED;
1710 } else {
1711 oc->status = OBJECT_LOADED;
1712 }
1713 }
1714
1715 static ObjectCode*
1716 mkOc( pathchar *path, char *image, int imageSize,
1717 rtsBool mapped, char *archiveMemberName, int misalignment ) {
1718 ObjectCode* oc;
1719
1720 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1721 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1722
1723 # if defined(OBJFORMAT_ELF)
1724 oc->formatName = "ELF";
1725 # elif defined(OBJFORMAT_PEi386)
1726 oc->formatName = "PEi386";
1727 # elif defined(OBJFORMAT_MACHO)
1728 oc->formatName = "Mach-O";
1729 # else
1730 stgFree(oc);
1731 barf("loadObj: not implemented on this platform");
1732 # endif
1733
1734 oc->image = image;
1735 oc->fileName = pathdup(path);
1736
1737 if (archiveMemberName) {
1738 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1, "loadObj" );
1739 strcpy(oc->archiveMemberName, archiveMemberName);
1740 } else {
1741 oc->archiveMemberName = NULL;
1742 }
1743
1744 setOcInitialStatus( oc );
1745
1746 oc->fileSize = imageSize;
1747 oc->symbols = NULL;
1748 oc->n_sections = 0;
1749 oc->sections = NULL;
1750 oc->proddables = NULL;
1751 oc->stable_ptrs = NULL;
1752 #if NEED_SYMBOL_EXTRAS
1753 oc->symbol_extras = NULL;
1754 #endif
1755 oc->imageMapped = mapped;
1756
1757 oc->misalignment = misalignment;
1758 oc->extraInfos = NULL;
1759
1760 /* chain it onto the list of objects */
1761 oc->next = NULL;
1762
1763 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1764 return oc;
1765 }
1766
1767 /* -----------------------------------------------------------------------------
1768 * Check if an object or archive is already loaded.
1769 *
1770 * Returns: 1 if the path is already loaded, 0 otherwise.
1771 */
1772 static HsInt
1773 isAlreadyLoaded( pathchar *path )
1774 {
1775 ObjectCode *o;
1776 for (o = objects; o; o = o->next) {
1777 if (0 == pathcmp(o->fileName, path)) {
1778 return 1; /* already loaded */
1779 }
1780 }
1781 return 0; /* not loaded yet */
1782 }
1783
1784 static HsInt loadArchive_ (pathchar *path)
1785 {
1786 ObjectCode* oc;
1787 char *image;
1788 int memberSize;
1789 FILE *f;
1790 int n;
1791 size_t thisFileNameSize;
1792 char *fileName;
1793 size_t fileNameSize;
1794 int isObject, isGnuIndex, isThin, isImportLib;
1795 char tmp[20];
1796 char *gnuFileIndex;
1797 int gnuFileIndexSize;
1798 #if defined(darwin_HOST_OS)
1799 int i;
1800 uint32_t nfat_arch, nfat_offset, cputype, cpusubtype;
1801 #if defined(i386_HOST_ARCH)
1802 const uint32_t mycputype = CPU_TYPE_X86;
1803 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_ALL;
1804 #elif defined(x86_64_HOST_ARCH)
1805 const uint32_t mycputype = CPU_TYPE_X86_64;
1806 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_64_ALL;
1807 #elif defined(powerpc_HOST_ARCH)
1808 const uint32_t mycputype = CPU_TYPE_POWERPC;
1809 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1810 #elif defined(powerpc64_HOST_ARCH)
1811 const uint32_t mycputype = CPU_TYPE_POWERPC64;
1812 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1813 #else
1814 #error Unknown Darwin architecture
1815 #endif
1816 #endif
1817 int misalignment = 0;
1818
1819 /* TODO: don't call barf() on error, instead return an error code, freeing
1820 * all resources correctly. This function is pretty complex, so it needs
1821 * to be refactored to make this practical. */
1822
1823 IF_DEBUG(linker, debugBelch("loadArchive: start\n"));
1824 IF_DEBUG(linker, debugBelch("loadArchive: Loading archive `%" PATH_FMT" '\n", path));
1825
1826 /* Check that we haven't already loaded this archive.
1827 Ignore requests to load multiple times */
1828 if (isAlreadyLoaded(path)) {
1829 IF_DEBUG(linker,
1830 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1831 return 1; /* success */
1832 }
1833
1834 gnuFileIndex = NULL;
1835 gnuFileIndexSize = 0;
1836
1837 fileNameSize = 32;
1838 fileName = stgMallocBytes(fileNameSize, "loadArchive(fileName)");
1839
1840 isThin = 0;
1841 isImportLib = 0;
1842
1843 f = pathopen(path, WSTR("rb"));
1844 if (!f)
1845 barf("loadObj: can't read `%" PATH_FMT "'", path);
1846
1847 /* Check if this is an archive by looking for the magic "!<arch>\n"
1848 * string. Usually, if this fails, we barf and quit. On Darwin however,
1849 * we may have a fat archive, which contains archives for more than
1850 * one architecture. Fat archives start with the magic number 0xcafebabe,
1851 * always stored big endian. If we find a fat_header, we scan through
1852 * the fat_arch structs, searching through for one for our host
1853 * architecture. If a matching struct is found, we read the offset
1854 * of our archive data (nfat_offset) and seek forward nfat_offset bytes
1855 * from the start of the file.
1856 *
1857 * A subtlety is that all of the members of the fat_header and fat_arch
1858 * structs are stored big endian, so we need to call byte order
1859 * conversion functions.
1860 *
1861 * If we find the appropriate architecture in a fat archive, we gobble
1862 * its magic "!<arch>\n" string and continue processing just as if
1863 * we had a single architecture archive.
1864 */
1865
1866 n = fread ( tmp, 1, 8, f );
1867 if (n != 8)
1868 barf("loadArchive: Failed reading header from `%" PATH_FMT "'", path);
1869 if (strncmp(tmp, "!<arch>\n", 8) == 0) {}
1870 /* Check if this is a thin archive by looking for the magic string "!<thin>\n"
1871 *
1872 * ar thin libraries have the exact same format as normal archives except they
1873 * have a different magic string and they don't copy the object files into the
1874 * archive.
1875 *
1876 * Instead each header entry points to the location of the object file on disk.
1877 * This is useful when a library is only created to satisfy a compile time dependency
1878 * instead of to be distributed. This saves the time required for copying.
1879 *
1880 * Thin archives are always flattened. They always only contain simple headers
1881 * pointing to the object file and so we need not allocate more memory than needed
1882 * to find the object file.
1883 *
1884 */
1885 else if (strncmp(tmp, "!<thin>\n", 8) == 0) {
1886 isThin = 1;
1887 }
1888 #if defined(darwin_HOST_OS)
1889 /* Not a standard archive, look for a fat archive magic number: */
1890 else if (ntohl(*(uint32_t *)tmp) == FAT_MAGIC) {
1891 nfat_arch = ntohl(*(uint32_t *)(tmp + 4));
1892 IF_DEBUG(linker, debugBelch("loadArchive: found a fat archive containing %d architectures\n", nfat_arch));
1893 nfat_offset = 0;
1894
1895 for (i = 0; i < (int)nfat_arch; i++) {
1896 /* search for the right arch */
1897 n = fread( tmp, 1, 20, f );
1898 if (n != 8)
1899 barf("loadArchive: Failed reading arch from `%s'", path);
1900 cputype = ntohl(*(uint32_t *)tmp);
1901 cpusubtype = ntohl(*(uint32_t *)(tmp + 4));
1902
1903 if (cputype == mycputype && cpusubtype == mycpusubtype) {
1904 IF_DEBUG(linker, debugBelch("loadArchive: found my archive in a fat archive\n"));
1905 nfat_offset = ntohl(*(uint32_t *)(tmp + 8));
1906 break;
1907 }
1908 }
1909
1910 if (nfat_offset == 0) {
1911 barf ("loadArchive: searched %d architectures, but no host arch found", (int)nfat_arch);
1912 }
1913 else {
1914 n = fseek( f, nfat_offset, SEEK_SET );
1915 if (n != 0)
1916 barf("loadArchive: Failed to seek to arch in `%s'", path);
1917 n = fread ( tmp, 1, 8, f );
1918 if (n != 8)
1919 barf("loadArchive: Failed reading header from `%s'", path);
1920 if (strncmp(tmp, "!<arch>\n", 8) != 0) {
1921 barf("loadArchive: couldn't find archive in `%s' at offset %d", path, nfat_offset);
1922 }
1923 }
1924 }
1925 else {
1926 barf("loadArchive: Neither an archive, nor a fat archive: `%s'", path);
1927 }
1928 #else
1929 else {
1930 barf("loadArchive: Not an archive: `%" PATH_FMT "'", path);
1931 }
1932 #endif
1933
1934 IF_DEBUG(linker, debugBelch("loadArchive: loading archive contents\n"));
1935
1936 while (1) {
1937 IF_DEBUG(linker, debugBelch("loadArchive: reading at %ld\n", ftell(f)));
1938 n = fread ( fileName, 1, 16, f );
1939 if (n != 16) {
1940 if (feof(f)) {
1941 IF_DEBUG(linker, debugBelch("loadArchive: EOF while reading from '%" PATH_FMT "'\n", path));
1942 break;
1943 }
1944 else {
1945 barf("loadArchive: Failed reading file name from `%" PATH_FMT "'", path);
1946 }
1947 }
1948
1949 #if defined(darwin_HOST_OS)
1950 if (strncmp(fileName, "!<arch>\n", 8) == 0) {
1951 IF_DEBUG(linker, debugBelch("loadArchive: found the start of another archive, breaking\n"));
1952 break;
1953 }
1954 #endif
1955
1956 n = fread ( tmp, 1, 12, f );
1957 if (n != 12)
1958 barf("loadArchive: Failed reading mod time from `%" PATH_FMT "'", path);
1959 n = fread ( tmp, 1, 6, f );
1960 if (n != 6)
1961 barf("loadArchive: Failed reading owner from `%" PATH_FMT "'", path);
1962 n = fread ( tmp, 1, 6, f );
1963 if (n != 6)
1964 barf("loadArchive: Failed reading group from `%" PATH_FMT "'", path);
1965 n = fread ( tmp, 1, 8, f );
1966 if (n != 8)
1967 barf("loadArchive: Failed reading mode from `%" PATH_FMT "'", path);
1968 n = fread ( tmp, 1, 10, f );
1969 if (n != 10)
1970 barf("loadArchive: Failed reading size from `%" PATH_FMT "'", path);
1971 tmp[10] = '\0';
1972 for (n = 0; isdigit(tmp[n]); n++);
1973 tmp[n] = '\0';
1974 memberSize = atoi(tmp);
1975
1976 IF_DEBUG(linker, debugBelch("loadArchive: size of this archive member is %d\n", memberSize));
1977 n = fread ( tmp, 1, 2, f );
1978 if (n != 2)
1979 barf("loadArchive: Failed reading magic from `%" PATH_FMT "'", path);
1980 if (strncmp(tmp, "\x60\x0A", 2) != 0)
1981 barf("loadArchive: Failed reading magic from `%" PATH_FMT "' at %ld. Got %c%c",
1982 path, ftell(f), tmp[0], tmp[1]);
1983
1984 isGnuIndex = 0;
1985 /* Check for BSD-variant large filenames */
1986 if (0 == strncmp(fileName, "#1/", 3)) {
1987 fileName[16] = '\0';
1988 if (isdigit(fileName[3])) {
1989 for (n = 4; isdigit(fileName[n]); n++);
1990 fileName[n] = '\0';
1991 thisFileNameSize = atoi(fileName + 3);
1992 memberSize -= thisFileNameSize;
1993 if (thisFileNameSize >= fileNameSize) {
1994 /* Double it to avoid potentially continually
1995 increasing it by 1 */
1996 fileNameSize = thisFileNameSize * 2;
1997 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
1998 }
1999 n = fread ( fileName, 1, thisFileNameSize, f );
2000 if (n != (int)thisFileNameSize) {
2001 barf("loadArchive: Failed reading filename from `%" PATH_FMT "'",
2002 path);
2003 }
2004 fileName[thisFileNameSize] = 0;
2005
2006 /* On OS X at least, thisFileNameSize is the size of the
2007 fileName field, not the length of the fileName
2008 itself. */
2009 thisFileNameSize = strlen(fileName);
2010 }
2011 else {
2012 barf("loadArchive: BSD-variant filename size not found while reading filename from `%s'", path);
2013 }
2014 }
2015 /* Check for GNU file index file */
2016 else if (0 == strncmp(fileName, "//", 2)) {
2017 fileName[0] = '\0';
2018 thisFileNameSize = 0;
2019 isGnuIndex = 1;
2020 }
2021 /* Check for a file in the GNU file index */
2022 else if (fileName[0] == '/') {
2023 if (isdigit(fileName[1])) {
2024 int i;
2025
2026 for (n = 2; isdigit(fileName[n]); n++);
2027 fileName[n] = '\0';
2028 n = atoi(fileName + 1);
2029
2030 if (gnuFileIndex == NULL) {
2031 barf("loadArchive: GNU-variant filename without an index while reading from `%s'", path);
2032 }
2033 if (n < 0 || n > gnuFileIndexSize) {
2034 barf("loadArchive: GNU-variant filename offset %d out of range [0..%d] while reading filename from `%s'", n, gnuFileIndexSize, path);
2035 }
2036 if (n != 0 && gnuFileIndex[n - 1] != '\n') {
2037 barf("loadArchive: GNU-variant filename offset %d invalid (range [0..%d]) while reading filename from `%s'", n, gnuFileIndexSize, path);
2038 }
2039 for (i = n; gnuFileIndex[i] != '\n'; i++);
2040 thisFileNameSize = i - n - 1;
2041 if (thisFileNameSize >= fileNameSize) {
2042 /* Double it to avoid potentially continually
2043 increasing it by 1 */
2044 fileNameSize = thisFileNameSize * 2;
2045 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
2046 }
2047 memcpy(fileName, gnuFileIndex + n, thisFileNameSize);
2048 fileName[thisFileNameSize] = '\0';
2049 }
2050 else if (fileName[1] == ' ') {
2051 fileName[0] = '\0';
2052 thisFileNameSize = 0;
2053 }
2054 else {
2055 barf("loadArchive: GNU-variant filename offset not found while reading filename from `%s'", path);
2056 }
2057 }
2058 /* Finally, the case where the filename field actually contains
2059 the filename */
2060 else {
2061 /* GNU ar terminates filenames with a '/', this allowing
2062 spaces in filenames. So first look to see if there is a
2063 terminating '/'. */
2064 for (thisFileNameSize = 0;
2065 thisFileNameSize < 16;
2066 thisFileNameSize++) {
2067 if (fileName[thisFileNameSize] == '/') {
2068 fileName[thisFileNameSize] = '\0';
2069 break;
2070 }
2071 }
2072 /* If we didn't find a '/', then a space teminates the
2073 filename. Note that if we don't find one, then
2074 thisFileNameSize ends up as 16, and we already have the
2075 '\0' at the end. */
2076 if (thisFileNameSize == 16) {
2077 for (thisFileNameSize = 0;
2078 thisFileNameSize < 16;
2079 thisFileNameSize++) {
2080 if (fileName[thisFileNameSize] == ' ') {
2081 fileName[thisFileNameSize] = '\0';
2082 break;
2083 }
2084 }
2085 }
2086 }
2087
2088 IF_DEBUG(linker,
2089 debugBelch("loadArchive: Found member file `%s'\n", fileName));
2090
2091 isObject = (thisFileNameSize >= 2 && strncmp(fileName + thisFileNameSize - 2, ".o" , 2) == 0)
2092 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".p_o", 4) == 0);
2093
2094 #if defined(OBJFORMAT_PEi386)
2095 /*
2096 * Note [MSVC import files (ext .lib)]
2097 * MSVC compilers store the object files in
2098 * the import libraries with extension .dll
2099 * so on Windows we should look for those too.
2100 * The PE COFF format doesn't specify any specific file name
2101 * for sections. So on windows, just try to load it all.
2102 *
2103 * Linker members (e.g. filename / are skipped since they are not needed)
2104 */
2105 isImportLib = thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".dll", 4) == 0;
2106
2107 /*
2108 * Note [GCC import files (ext .dll.a)]
2109 * GCC stores import information in the same binary format
2110 * as the object file normally has. The only difference is that
2111 * all the information are put in .idata sections. The only real
2112 * way to tell if we're dealing with an import lib is by looking
2113 * at the file extension.
2114 */
2115 isImportLib = isImportLib || endsWithPath(path, WSTR(".dll.a"));
2116 #endif // windows
2117
2118 IF_DEBUG(linker, debugBelch("loadArchive: \tthisFileNameSize = %d\n", (int)thisFileNameSize));
2119 IF_DEBUG(linker, debugBelch("loadArchive: \tisObject = %d\n", isObject));
2120
2121 if (isObject) {
2122 char *archiveMemberName;
2123
2124 IF_DEBUG(linker, debugBelch("loadArchive: Member is an object file...loading...\n"));
2125
2126 #if defined(mingw32_HOST_OS)
2127 // TODO: We would like to use allocateExec here, but allocateExec
2128 // cannot currently allocate blocks large enough.
2129 image = allocateImageAndTrampolines(path, fileName, f, memberSize,
2130 isThin);
2131 #elif defined(darwin_HOST_OS)
2132 if (RTS_LINKER_USE_MMAP)
2133 image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
2134 else {
2135 /* See loadObj() */
2136 misalignment = machoGetMisalignment(f);
2137 image = stgMallocBytes(memberSize + misalignment,
2138 "loadArchive(image)");
2139 image += misalignment;
2140 }
2141
2142 #else // not windows or darwin
2143 image = stgMallocBytes(memberSize, "loadArchive(image)");
2144 #endif
2145 if (isThin) {
2146 FILE *member;
2147 pathchar *pathCopy, *dirName, *memberPath, *objFileName;
2148
2149 /* Allocate and setup the dirname of the archive. We'll need
2150 this to locate the thin member */
2151 pathCopy = pathdup(path); // Convert the char* to a pathchar*
2152 dirName = pathdir(pathCopy);
2153
2154 /* Append the relative member name to the dirname. This should be
2155 be the full path to the actual thin member. */
2156 int memberLen = pathlen(dirName) + 1 + strlen(fileName) + 1;
2157 memberPath = stgMallocBytes(pathsize * memberLen, "loadArchive(file)");
2158 objFileName = mkPath(fileName);
2159 pathprintf(memberPath, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), dirName, objFileName);
2160 stgFree(objFileName);
2161 stgFree(dirName);
2162
2163 member = pathopen(memberPath, WSTR("rb"));
2164 if (!member)
2165 barf("loadObj: can't read thin archive `%" PATH_FMT "'", memberPath);
2166
2167 n = fread ( image, 1, memberSize, member );
2168 if (n != memberSize) {
2169 barf("loadArchive: error whilst reading `%s'", fileName);
2170 }
2171
2172 fclose(member);
2173 stgFree(memberPath);
2174 stgFree(pathCopy);
2175 }
2176 else
2177 {
2178 n = fread ( image, 1, memberSize, f );
2179 if (n != memberSize) {
2180 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2181 }
2182 }
2183
2184 archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
2185 "loadArchive(file)");
2186 sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
2187 path, (int)thisFileNameSize, fileName);
2188
2189 oc = mkOc(path, image, memberSize, rtsFalse, archiveMemberName
2190 , misalignment);
2191
2192 stgFree(archiveMemberName);
2193
2194 if (0 == loadOc(oc)) {
2195 stgFree(fileName);
2196 fclose(f);
2197 return 0;
2198 } else {
2199 #if defined(OBJFORMAT_PEi386)
2200 if (isImportLib)
2201 {
2202 findAndLoadImportLibrary(oc);
2203 stgFree(oc);
2204 oc = NULL;
2205 break;
2206 } else {
2207 #endif
2208 oc->next = objects;
2209 objects = oc;
2210 #if defined(OBJFORMAT_PEi386)
2211 }
2212 #endif
2213 }
2214 }
2215 else if (isGnuIndex) {
2216 if (gnuFileIndex != NULL) {
2217 barf("loadArchive: GNU-variant index found, but already have an index, while reading filename from `%s'", path);
2218 }
2219 IF_DEBUG(linker, debugBelch("loadArchive: Found GNU-variant file index\n"));
2220 #if RTS_LINKER_USE_MMAP
2221 gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
2222 #else
2223 gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
2224 #endif
2225 n = fread ( gnuFileIndex, 1, memberSize, f );
2226 if (n != memberSize) {
2227 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2228 }
2229 gnuFileIndex[memberSize] = '/';
2230 gnuFileIndexSize = memberSize;
2231 }
2232 else if (isImportLib) {
2233 #if defined(OBJFORMAT_PEi386)
2234 if (checkAndLoadImportLibrary(path, fileName, f)) {
2235 IF_DEBUG(linker, debugBelch("loadArchive: Member is an import file section... Corresponding DLL has been loaded...\n"));
2236 }
2237 else {
2238 IF_DEBUG(linker, debugBelch("loadArchive: Member is not a valid import file section... Skipping...\n"));
2239 n = fseek(f, memberSize, SEEK_CUR);
2240 if (n != 0)
2241 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
2242 memberSize, path);
2243 }
2244 #endif
2245 }
2246 else {
2247 IF_DEBUG(linker, debugBelch("loadArchive: '%s' does not appear to be an object file\n", fileName));
2248 if (!isThin || thisFileNameSize == 0) {
2249 n = fseek(f, memberSize, SEEK_CUR);
2250 if (n != 0)
2251 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
2252 memberSize, path);
2253 }
2254 }
2255
2256 /* .ar files are 2-byte aligned */
2257 if (!(isThin && thisFileNameSize > 0) && memberSize % 2) {
2258 IF_DEBUG(linker, debugBelch("loadArchive: trying to read one pad byte\n"));
2259 n = fread ( tmp, 1, 1, f );
2260 if (n != 1) {
2261 if (feof(f)) {
2262 IF_DEBUG(linker, debugBelch("loadArchive: found EOF while reading one pad byte\n"));
2263 break;
2264 }
2265 else {
2266 barf("loadArchive: Failed reading padding from `%" PATH_FMT "'", path);
2267 }
2268 }
2269 IF_DEBUG(linker, debugBelch("loadArchive: successfully read one pad byte\n"));
2270 }
2271 IF_DEBUG(linker, debugBelch("loadArchive: reached end of archive loading while loop\n"));
2272 }
2273
2274 fclose(f);
2275
2276 stgFree(fileName);
2277 if (gnuFileIndex != NULL) {
2278 #if RTS_LINKER_USE_MMAP
2279 munmap(gnuFileIndex, gnuFileIndexSize + 1);
2280 #else
2281 stgFree(gnuFileIndex);
2282 #endif
2283 }
2284
2285 if (RTS_LINKER_USE_MMAP)
2286 m32_allocator_flush();
2287
2288 IF_DEBUG(linker, debugBelch("loadArchive: done\n"));
2289 return 1;
2290 }
2291
2292 HsInt loadArchive (pathchar *path)
2293 {
2294 ACQUIRE_LOCK(&linker_mutex);
2295 HsInt r = loadArchive_(path);
2296 RELEASE_LOCK(&linker_mutex);
2297 return r;
2298 }
2299
2300 //
2301 // Load the object file into memory. This will not be its final resting place,
2302 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
2303 // address space, properly aligned.
2304 //
2305 static ObjectCode *
2306 preloadObjectFile (pathchar *path)
2307 {
2308 int fileSize;
2309 struct_stat st;
2310 int r;
2311 void *image;
2312 ObjectCode *oc;
2313 int misalignment = 0;
2314
2315 r = pathstat(path, &st);
2316 if (r == -1) {
2317 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
2318 return NULL;
2319 }
2320
2321 fileSize = st.st_size;
2322
2323 #if RTS_LINKER_USE_MMAP
2324 int fd;
2325
2326 /* On many architectures malloc'd memory isn't executable, so we need to use
2327 * mmap. */
2328
2329 #if defined(openbsd_HOST_OS)
2330 fd = open(path, O_RDONLY, S_IRUSR);
2331 #else
2332 fd = open(path, O_RDONLY);
2333 #endif
2334 if (fd == -1) {
2335 errorBelch("loadObj: can't open %s", path);
2336 return NULL;
2337 }
2338
2339 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
2340 MAP_PRIVATE, fd, 0);
2341 // not 32-bit yet, we'll remap later
2342 close(fd);
2343
2344 #else /* !RTS_LINKER_USE_MMAP */
2345 FILE *f;
2346
2347 /* load the image into memory */
2348 /* coverity[toctou] */
2349 f = pathopen(path, WSTR("rb"));
2350 if (!f) {
2351 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
2352 return NULL;
2353 }
2354
2355 # if defined(mingw32_HOST_OS)
2356
2357 // TODO: We would like to use allocateExec here, but allocateExec
2358 // cannot currently allocate blocks large enough.
2359 image = allocateImageAndTrampolines(path, "itself", f, fileSize,
2360 HS_BOOL_FALSE);
2361 if (image == NULL) {
2362 fclose(f);
2363 return NULL;
2364 }
2365
2366 # elif defined(darwin_HOST_OS)
2367
2368 // In a Mach-O .o file, all sections can and will be misaligned
2369 // if the total size of the headers is not a multiple of the
2370 // desired alignment. This is fine for .o files that only serve
2371 // as input for the static linker, but it's not fine for us,
2372 // as SSE (used by gcc for floating point) and Altivec require
2373 // 16-byte alignment.
2374 // We calculate the correct alignment from the header before
2375 // reading the file, and then we misalign image on purpose so
2376 // that the actual sections end up aligned again.
2377 misalignment = machoGetMisalignment(f);
2378 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
2379 image += misalignment;
2380
2381 # else /* !defined(mingw32_HOST_OS) */
2382
2383 image = stgMallocBytes(fileSize, "loadObj(image)");
2384
2385 #endif
2386
2387 int n;
2388 n = fread ( image, 1, fileSize, f );
2389 fclose(f);
2390 if (n != fileSize) {
2391 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
2392 stgFree(image);
2393 return NULL;
2394 }
2395
2396 #endif /* RTS_LINKER_USE_MMAP */
2397
2398 oc = mkOc(path, image, fileSize, rtsTrue, NULL, misalignment);
2399
2400 return oc;
2401 }
2402
2403 /* -----------------------------------------------------------------------------
2404 * Load an obj (populate the global symbol table, but don't resolve yet)
2405 *
2406 * Returns: 1 if ok, 0 on error.
2407 */
2408 static HsInt loadObj_ (pathchar *path)
2409 {
2410 ObjectCode* oc;
2411 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
2412
2413 /* debugBelch("loadObj %s\n", path ); */
2414
2415 /* Check that we haven't already loaded this object.
2416 Ignore requests to load multiple times */
2417
2418 if (isAlreadyLoaded(path)) {
2419 IF_DEBUG(linker,
2420 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
2421 return 1; /* success */
2422 }
2423
2424 oc = preloadObjectFile(path);
2425 if (oc == NULL) return 0;
2426
2427 if (! loadOc(oc)) {
2428 // failed; free everything we've allocated
2429 removeOcSymbols(oc);
2430 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
2431 freeObjectCode(oc);
2432 return 0;
2433 }
2434
2435 oc->next = objects;
2436 objects = oc;
2437 return 1;
2438 }
2439
2440 HsInt loadObj (pathchar *path)
2441 {
2442 ACQUIRE_LOCK(&linker_mutex);
2443 HsInt r = loadObj_(path);
2444 RELEASE_LOCK(&linker_mutex);
2445 return r;
2446 }
2447
2448 static HsInt loadOc (ObjectCode* oc)
2449 {
2450 int r;
2451
2452 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
2453
2454 /* verify the in-memory image */
2455 # if defined(OBJFORMAT_ELF)
2456 r = ocVerifyImage_ELF ( oc );
2457 # elif defined(OBJFORMAT_PEi386)
2458 r = ocVerifyImage_PEi386 ( oc );
2459 # elif defined(OBJFORMAT_MACHO)
2460 r = ocVerifyImage_MachO ( oc );
2461 # else
2462 barf("loadObj: no verify method");
2463 # endif
2464 if (!r) {
2465 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
2466 return r;
2467 }
2468
2469 #if NEED_SYMBOL_EXTRAS
2470 # if defined(OBJFORMAT_MACHO)
2471 r = ocAllocateSymbolExtras_MachO ( oc );
2472 if (!r) {
2473 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_MachO failed\n"));
2474 return r;
2475 }
2476 # elif defined(OBJFORMAT_ELF)
2477 r = ocAllocateSymbolExtras_ELF ( oc );
2478 if (!r) {
2479 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_ELF failed\n"));
2480 return r;
2481 }
2482 # elif defined(OBJFORMAT_PEi386)
2483 ocAllocateSymbolExtras_PEi386 ( oc );
2484 # endif
2485 #endif
2486
2487 /* build the symbol list for this image */
2488 # if defined(OBJFORMAT_ELF)
2489 r = ocGetNames_ELF ( oc );
2490 # elif defined(OBJFORMAT_PEi386)
2491 r = ocGetNames_PEi386 ( oc );
2492 # elif defined(OBJFORMAT_MACHO)
2493 r = ocGetNames_MachO ( oc );
2494 # else
2495 barf("loadObj: no getNames method");
2496 # endif
2497 if (!r) {
2498 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
2499 return r;
2500 }
2501
2502 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
2503 setOcInitialStatus( oc );
2504 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
2505
2506 return 1;
2507 }
2508
2509 /* -----------------------------------------------------------------------------
2510 * try to load and initialize an ObjectCode into memory
2511 *
2512 * Returns: 1 if ok, 0 on error.
2513 */
2514 int ocTryLoad (ObjectCode* oc) {
2515 int r;
2516
2517 if (oc->status != OBJECT_NEEDED) {
2518 return 1;
2519 }
2520
2521 /* Check for duplicate symbols by looking into `symhash`.
2522 Duplicate symbols are any symbols which exist
2523 in different ObjectCodes that have both been loaded, or
2524 are to be loaded by this call.
2525
2526 This call is intended to have no side-effects when a non-duplicate
2527 symbol is re-inserted.
2528
2529 We set the Address to NULL since that is not used to distinguish
2530 symbols. Duplicate symbols are distinguished by name and oc.
2531 */
2532 int x;
2533 SymbolName* symbol;
2534 for (x = 0; x < oc->n_symbols; x++) {
2535 symbol = oc->symbols[x];
2536 if ( symbol
2537 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol, NULL, isSymbolWeak(oc, symbol), oc)) {
2538 return 0;
2539 }
2540 }
2541
2542 # if defined(OBJFORMAT_ELF)
2543 r = ocResolve_ELF ( oc );
2544 # elif defined(OBJFORMAT_PEi386)
2545 r = ocResolve_PEi386 ( oc );
2546 # elif defined(OBJFORMAT_MACHO)
2547 r = ocResolve_MachO ( oc );
2548 # else
2549 barf("ocTryLoad: not implemented on this platform");
2550 # endif
2551 if (!r) { return r; }
2552
2553 // run init/init_array/ctors/mod_init_func
2554
2555 loading_obj = oc; // tells foreignExportStablePtr what to do
2556 #if defined(OBJFORMAT_ELF)
2557 r = ocRunInit_ELF ( oc );
2558 #elif defined(OBJFORMAT_PEi386)
2559 r = ocRunInit_PEi386 ( oc );
2560 #elif defined(OBJFORMAT_MACHO)
2561 r = ocRunInit_MachO ( oc );
2562 #else
2563 barf("ocTryLoad: initializers not implemented on this platform");
2564 #endif
2565 loading_obj = NULL;
2566
2567 if (!r) { return r; }
2568
2569 oc->status = OBJECT_RESOLVED;
2570
2571 return 1;
2572 }
2573
2574 /* -----------------------------------------------------------------------------
2575 * resolve all the currently unlinked objects in memory
2576 *
2577 * Returns: 1 if ok, 0 on error.
2578 */
2579 static HsInt resolveObjs_ (void)
2580 {
2581 ObjectCode *oc;
2582 int r;
2583
2584 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
2585
2586 for (oc = objects; oc; oc = oc->next) {
2587 r = ocTryLoad(oc);
2588 if (!r)
2589 {
2590 return r;
2591 }
2592 }
2593
2594 #ifdef PROFILING
2595 // collect any new cost centres & CCSs that were defined during runInit
2596 initProfiling2();
2597 #endif
2598
2599 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
2600 return 1;
2601 }
2602
2603 HsInt resolveObjs (void)
2604 {
2605 ACQUIRE_LOCK(&linker_mutex);
2606 HsInt r = resolveObjs_();
2607 RELEASE_LOCK(&linker_mutex);
2608 return r;
2609 }
2610
2611 /* -----------------------------------------------------------------------------
2612 * delete an object from the pool
2613 */
2614 static HsInt unloadObj_ (pathchar *path, rtsBool just_purge)
2615 {
2616 ObjectCode *oc, *prev, *next;
2617 HsBool unloadedAnyObj = HS_BOOL_FALSE;
2618
2619 ASSERT(symhash != NULL);
2620 ASSERT(objects != NULL);
2621
2622 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
2623
2624 prev = NULL;
2625 for (oc = objects; oc; oc = next) {
2626 next = oc->next; // oc might be freed
2627
2628 if (!pathcmp(oc->fileName,path)) {
2629
2630 // these are both idempotent, so in just_purge mode we can
2631 // later call unloadObj() to really unload the object.
2632 removeOcSymbols(oc);
2633 freeOcStablePtrs(oc);
2634
2635 if (!just_purge) {
2636 if (prev == NULL) {
2637 objects = oc->next;
2638 } else {
2639 prev->next = oc->next;
2640 }
2641 ACQUIRE_LOCK(&linker_unloaded_mutex);
2642 oc->next = unloaded_objects;
2643 unloaded_objects = oc;
2644 oc->status = OBJECT_UNLOADED;
2645 RELEASE_LOCK(&linker_unloaded_mutex);
2646 // We do not own oc any more; it can be released at any time by
2647 // the GC in checkUnload().
2648 } else {
2649 prev = oc;
2650 }
2651
2652 /* This could be a member of an archive so continue
2653 * unloading other members. */
2654 unloadedAnyObj = HS_BOOL_TRUE;
2655 } else {
2656 prev = oc;
2657 }
2658 }
2659
2660 if (unloadedAnyObj) {
2661 return 1;
2662 }
2663 else {
2664 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
2665 return 0;
2666 }
2667 }
2668
2669 HsInt unloadObj (pathchar *path)
2670 {
2671 ACQUIRE_LOCK(&linker_mutex);
2672 HsInt r = unloadObj_(path, rtsFalse);
2673 RELEASE_LOCK(&linker_mutex);
2674 return r;
2675 }
2676
2677 HsInt purgeObj (pathchar *path)
2678 {
2679 ACQUIRE_LOCK(&linker_mutex);
2680 HsInt r = unloadObj_(path, rtsTrue);
2681 RELEASE_LOCK(&linker_mutex);
2682 return r;
2683 }
2684
2685 /* -----------------------------------------------------------------------------
2686 * Sanity checking. For each ObjectCode, maintain a list of address ranges
2687 * which may be prodded during relocation, and abort if we try and write
2688 * outside any of these.
2689 */
2690 static void
2691 addProddableBlock ( ObjectCode* oc, void* start, int size )
2692 {
2693 ProddableBlock* pb
2694 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
2695
2696 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
2697 ASSERT(size > 0);
2698 pb->start = start;
2699 pb->size = size;
2700 pb->next = oc->proddables;
2701 oc->proddables = pb;
2702 }
2703
2704 static void
2705 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
2706 {
2707 ProddableBlock* pb;
2708
2709 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
2710 char* s = (char*)(pb->start);
2711 char* e = s + pb->size;
2712 char* a = (char*)addr;
2713 if (a >= s && (a+size) <= e) return;
2714 }
2715 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
2716 }
2717
2718 static void freeProddableBlocks (ObjectCode *oc)
2719 {
2720 ProddableBlock *pb, *next;
2721
2722 for (pb = oc->proddables; pb != NULL; pb = next) {
2723 next = pb->next;
2724 stgFree(pb);
2725 }
2726 oc->proddables = NULL;
2727 }
2728
2729 /* -----------------------------------------------------------------------------
2730 * Section management.
2731 */
2732 static void
2733 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
2734 void* start, StgWord size, StgWord mapped_offset,
2735 void* mapped_start, StgWord mapped_size)
2736 {
2737 s->start = start; /* actual start of section in memory */
2738 s->size = size; /* actual size of section in memory */
2739 s->kind = kind;
2740 s->alloc = alloc;
2741 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
2742
2743 s->mapped_start = mapped_start; /* start of mmap() block */
2744 s->mapped_size = mapped_size; /* size of mmap() block */
2745
2746 IF_DEBUG(linker,
2747 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
2748 start, (void*)((StgWord)start + size),
2749 size, kind ));
2750 }
2751
2752
2753 /* --------------------------------------------------------------------------
2754 * Symbol Extras.
2755 * This is about allocating a small chunk of memory for every symbol in the
2756 * object file. We make sure that the SymboLExtras are always "in range" of
2757 * limited-range PC-relative instructions on various platforms by allocating
2758 * them right next to the object code itself.
2759 */
2760
2761 #if NEED_SYMBOL_EXTRAS
2762 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2763
2764 /*
2765 ocAllocateSymbolExtras
2766
2767 Allocate additional space at the end of the object file image to make room
2768 for jump islands (powerpc, x86_64, arm) and GOT entries (x86_64).
2769
2770 PowerPC relative branch instructions have a 24 bit displacement field.
2771 As PPC code is always 4-byte-aligned, this yields a +-32MB range.
2772 If a particular imported symbol is outside this range, we have to redirect
2773 the jump to a short piece of new code that just loads the 32bit absolute
2774 address and jumps there.
2775 On x86_64, PC-relative jumps and PC-relative accesses to the GOT are limited
2776 to 32 bits (+-2GB).
2777
2778 This function just allocates space for one SymbolExtra for every
2779 undefined symbol in the object file. The code for the jump islands is
2780 filled in by makeSymbolExtra below.
2781 */
2782
2783 static int ocAllocateSymbolExtras( ObjectCode* oc, int count, int first )
2784 {
2785 size_t n;
2786
2787 if (RTS_LINKER_USE_MMAP && USE_CONTIGUOUS_MMAP) {
2788 n = roundUpToPage(oc->fileSize);
2789
2790 /* Keep image and symbol_extras contiguous */
2791 void *new = mmapForLinker(n + (sizeof(SymbolExtra) * count),
2792 MAP_ANONYMOUS, -1, 0);
2793 if (new) {
2794 memcpy(new, oc->image, oc->fileSize);
2795 if (oc->imageMapped) {
2796 munmap(oc->image, n);
2797 }
2798 oc->image = new;
2799 oc->imageMapped = rtsTrue;
2800 oc->fileSize = n + (sizeof(SymbolExtra) * count);
2801 oc->symbol_extras = (SymbolExtra *) (oc->image + n);
2802 }
2803 else {
2804 oc->symbol_extras = NULL;
2805 return 0;
2806 }
2807 }
2808 else if( count > 0 ) {
2809 if (RTS_LINKER_USE_MMAP) {
2810 n = roundUpToPage(oc->fileSize);
2811
2812 oc->symbol_extras = m32_alloc(sizeof(SymbolExtra) * count, 8);
2813 if (oc->symbol_extras == NULL) return 0;
2814 }
2815 else {
2816 // round up to the nearest 4
2817 int aligned = (oc->fileSize + 3) & ~3;
2818 int misalignment = oc->misalignment;
2819
2820 oc->image -= misalignment;
2821 oc->image = stgReallocBytes( oc->image,
2822 misalignment +
2823 aligned + sizeof (SymbolExtra) * count,
2824 "ocAllocateSymbolExtras" );
2825 oc->image += misalignment;
2826
2827 oc->symbol_extras = (SymbolExtra *) (oc->image + aligned);
2828 }
2829 }
2830
2831 if (oc->symbol_extras != NULL) {
2832 memset( oc->symbol_extras, 0, sizeof (SymbolExtra) * count );
2833 }
2834
2835 oc->first_symbol_extra = first;
2836 oc->n_symbol_extras = count;
2837
2838 return 1;
2839 }
2840
2841 #endif
2842 #endif // NEED_SYMBOL_EXTRAS
2843
2844 #if defined(arm_HOST_ARCH)
2845
2846 static void
2847 ocFlushInstructionCache( ObjectCode *oc )
2848 {
2849 int i;
2850 // Object code
2851 for (i=0; i < oc->n_sections; i++) {
2852 Section *s = &oc->sections[i];
2853 // This is a bit too broad but we don't have any way to determine what
2854 // is certainly code
2855 if (s->kind == SECTIONKIND_CODE_OR_RODATA)
2856 __clear_cache(s->start, (void*) ((uintptr_t) s->start + s->size));
2857 }
2858
2859 // Jump islands
2860 // Note the (+1) to ensure that the last symbol extra is covered by the
2861 // flush.
2862 __clear_cache(oc->symbol_extras, &oc->symbol_extras[oc->n_symbol_extras+1]);
2863 }
2864
2865 #endif
2866
2867 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2868 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2869
2870 static SymbolExtra* makeSymbolExtra( ObjectCode* oc,
2871 unsigned long symbolNumber,
2872 unsigned long target )
2873 {
2874 SymbolExtra *extra;
2875
2876 ASSERT( symbolNumber >= oc->first_symbol_extra
2877 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2878
2879 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2880
2881 #ifdef powerpc_HOST_ARCH
2882 // lis r12, hi16(target)
2883 extra->jumpIsland.lis_r12 = 0x3d80;
2884 extra->jumpIsland.hi_addr = target >> 16;
2885
2886 // ori r12, r12, lo16(target)
2887 extra->jumpIsland.ori_r12_r12 = 0x618c;
2888 extra->jumpIsland.lo_addr = target & 0xffff;
2889
2890 // mtctr r12
2891 extra->jumpIsland.mtctr_r12 = 0x7d8903a6;
2892
2893 // bctr
2894 extra->jumpIsland.bctr = 0x4e800420;
2895 #endif
2896 #ifdef x86_64_HOST_ARCH
2897 // jmp *-14(%rip)
2898 static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF };
2899 extra->addr = target;
2900 memcpy(extra->jumpIsland, jmp, 6);
2901 #endif
2902
2903 return extra;
2904 }
2905
2906 #endif
2907 #endif // defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2908
2909 #ifdef arm_HOST_ARCH
2910 static SymbolExtra* makeArmSymbolExtra( ObjectCode* oc,
2911 unsigned long symbolNumber,
2912 unsigned long target,
2913 int fromThumb,
2914 int toThumb )
2915 {
2916 SymbolExtra *extra;
2917
2918 ASSERT( symbolNumber >= oc->first_symbol_extra
2919 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2920
2921 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2922
2923 // Make sure instruction mode bit is set properly
2924 if (toThumb)
2925 target |= 1;
2926 else
2927 target &= ~1;
2928
2929 if (!fromThumb) {
2930 // In ARM encoding:
2931 // movw r12, #0
2932 // movt r12, #0
2933 // bx r12
2934 uint32_t code[] = { 0xe300c000, 0xe340c000, 0xe12fff1c };
2935
2936 // Patch lower half-word into movw
2937 code[0] |= ((target>>12) & 0xf) << 16;
2938 code[0] |= target & 0xfff;
2939 // Patch upper half-word into movt
2940 target >>= 16;
2941 code[1] |= ((target>>12) & 0xf) << 16;
2942 code[1] |= target & 0xfff;
2943
2944 memcpy(extra->jumpIsland, code, 12);
2945
2946 } else {
2947 // In Thumb encoding:
2948 // movw r12, #0
2949 // movt r12, #0
2950 // bx r12
2951 uint16_t code[] = { 0xf240, 0x0c00,
2952 0xf2c0, 0x0c00,
2953 0x4760 };
2954
2955 // Patch lower half-word into movw
2956 code[0] |= (target>>12) & 0xf;
2957 code[0] |= ((target>>11) & 0x1) << 10;
2958 code[1] |= ((target>>8) & 0x7) << 12;
2959 code[1] |= target & 0xff;
2960 // Patch upper half-word into movt
2961 target >>= 16;
2962 code[2] |= (target>>12) & 0xf;
2963 code[2] |= ((target>>11) & 0x1) << 10;
2964 code[3] |= ((target>>8) & 0x7) << 12;
2965 code[3] |= target & 0xff;
2966
2967 memcpy(extra->jumpIsland, code, 10);
2968 }
2969
2970 return extra;
2971 }
2972 #endif // arm_HOST_ARCH
2973
2974 /* --------------------------------------------------------------------------
2975 * PowerPC specifics (instruction cache flushing)
2976 * ------------------------------------------------------------------------*/
2977
2978 #ifdef powerpc_HOST_ARCH
2979 /*
2980 ocFlushInstructionCache
2981
2982 Flush the data & instruction caches.
2983 Because the PPC has split data/instruction caches, we have to
2984 do that whenever we modify code at runtime.
2985 */
2986
2987 static void
2988 ocFlushInstructionCacheFrom(void* begin, size_t length)
2989 {
2990 size_t n = (length + 3) / 4;
2991 unsigned long* p = begin;
2992
2993 while (n--)
2994 {
2995 __asm__ volatile ( "dcbf 0,%0\n\t"
2996 "sync\n\t"
2997 "icbi 0,%0"
2998 :
2999 : "r" (p)
3000 );
3001 p++;
3002 }
3003 __asm__ volatile ( "sync\n\t"
3004 "isync"
3005 );
3006 }
3007
3008 static void
3009 ocFlushInstructionCache( ObjectCode *oc )
3010 {
3011 /* The main object code */
3012 ocFlushInstructionCacheFrom(oc->image + oc->misalignment, oc->fileSize);
3013
3014 /* Jump Islands */
3015 ocFlushInstructionCacheFrom(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras);
3016 }
3017 #endif /* powerpc_HOST_ARCH */
3018
3019
3020 /* --------------------------------------------------------------------------
3021 * PEi386(+) specifics (Win32 targets)
3022 * ------------------------------------------------------------------------*/
3023
3024 /* The information for this linker comes from
3025 Microsoft Portable Executable
3026 and Common Object File Format Specification
3027 revision 8.3 February 2013
3028
3029 It can be found online at:
3030
3031 https://msdn.microsoft.com/en-us/windows/hardware/gg463119.aspx
3032
3033 Things move, so if that fails, try searching for it via
3034
3035 http://www.google.com/search?q=PE+COFF+specification
3036
3037 The ultimate reference for the PE format is the Winnt.h
3038 header file that comes with the Platform SDKs; as always,
3039 implementations will drift wrt their documentation.
3040
3041 A good background article on the PE format is Matt Pietrek's
3042 March 1994 article in Microsoft System Journal (MSJ)
3043 (Vol.9, No. 3): "Peering Inside the PE: A Tour of the
3044 Win32 Portable Executable File Format." The info in there
3045 has recently been updated in a two part article in
3046 MSDN magazine, issues Feb and March 2002,
3047 "Inside Windows: An In-Depth Look into the Win32 Portable
3048 Executable File Format"
3049
3050 John Levine's book "Linkers and Loaders" contains useful
3051 info on PE too.
3052
3053 The PE specification doesn't specify how to do the actual
3054 relocations. For this reason, and because both PE and ELF are
3055 based on COFF, the relocations for the PEi386+ code is based on
3056 the ELF relocations for the equivalent relocation type.
3057
3058 The ELF ABI can be found at
3059
3060 http://www.x86-64.org/documentation/abi.pdf
3061
3062 The current code is based on version 0.99.6 - October 2013
3063 */
3064
3065
3066 #if defined(OBJFORMAT_PEi386)
3067
3068 static int verifyCOFFHeader ( COFF_header *hdr, pathchar *filename);
3069
3070 /* We assume file pointer is right at the
3071 beginning of COFF object.
3072 */
3073 static char *
3074 allocateImageAndTrampolines (
3075 pathchar* arch_name, char* member_name,
3076 FILE* ONLY_USED_x86_64_HOST_ARCH (f),
3077 int size,
3078 int ONLY_USED_x86_64_HOST_ARCH (isThin))
3079 {
3080 char* image;
3081 #if defined(x86_64_HOST_ARCH)
3082 if (!isThin)
3083 {
3084 /* PeCoff contains number of symbols right in it's header, so
3085 we can reserve the room for symbolExtras right here. */
3086 COFF_header hdr;
3087 size_t n;
3088
3089 n = fread(&hdr, 1, sizeof_COFF_header, f);
3090 if (n != sizeof(COFF_header)) {
3091 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%S'",
3092 member_name, arch_name);
3093 return NULL;
3094 }
3095 fseek(f, -sizeof_COFF_header, SEEK_CUR);
3096
3097 if (!verifyCOFFHeader(&hdr, arch_name)) {
3098 return 0;
3099 }
3100
3101 /* We get back 8-byte aligned memory (is that guaranteed?), but
3102 the offsets to the sections within the file are all 4 mod 8
3103 (is that guaranteed?). We therefore need to offset the image
3104 by 4, so that all the pointers are 8-byte aligned, so that
3105 pointer tagging works. */
3106 /* For 32-bit case we don't need this, hence we use macro PEi386_IMAGE_OFFSET,
3107 which equals to 4 for 64-bit case and 0 for 32-bit case. */
3108 /* We allocate trampolines area for all symbols right behind
3109 image data, aligned on 8. */
3110 size = ((PEi386_IMAGE_OFFSET + size + 0x7) & ~0x7)
3111 + hdr.NumberOfSymbols * sizeof(SymbolExtra);
3112 }
3113 #endif
3114 image = VirtualAlloc(NULL, size,
3115 MEM_RESERVE | MEM_COMMIT,
3116 PAGE_EXECUTE_READWRITE);
3117
3118 if (image == NULL) {
3119 errorBelch("%" PATH_FMT ": failed to allocate memory for image for %s",
3120 arch_name, member_name);
3121 return NULL;
3122 }
3123
3124 return image + PEi386_IMAGE_OFFSET;
3125 }
3126
3127 static int findAndLoadImportLibrary(ObjectCode* oc)
3128 {
3129 int i;
3130
3131 COFF_header* hdr;
3132 COFF_section* sectab;
3133 COFF_symbol* symtab;
3134 UChar* strtab;
3135
3136 hdr = (COFF_header*)(oc->image);
3137 sectab = (COFF_section*)(
3138 ((UChar*)(oc->image))
3139 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3140 );
3141
3142 symtab = (COFF_symbol*)(
3143 ((UChar*)(oc->image))
3144 + hdr->PointerToSymbolTable
3145 );
3146
3147 strtab = ((UChar*)symtab)
3148 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3149
3150 for (i = 0; i < oc->n_sections; i++)
3151 {
3152 COFF_section* sectab_i
3153 = (COFF_section*)myindex(sizeof_COFF_section, sectab, i);
3154
3155 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3156
3157 // Find the first entry containing a valid .idata$7 section.
3158 if (strcmp(secname, ".idata$7") == 0) {
3159 /* First load the containing DLL if not loaded. */
3160 Section section = oc->sections[i];
3161
3162 pathchar* dirName = pathdir(oc->fileName);
3163 HsPtr token = addLibrarySearchPath(dirName);
3164 stgFree(dirName);
3165 char* dllName = (char*)section.start;
3166
3167 if (strlen(dllName) == 0 || dllName[0] == ' ')
3168 {
3169 continue;
3170 }
3171
3172 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand '%ls' => `%s'\n", oc->fileName, dllName));
3173
3174 pathchar* dll = mkPath(dllName);
3175 removeLibrarySearchPath(token);
3176
3177 const char* result = addDLL(dll);
3178 stgFree(dll);
3179
3180 if (result != NULL) {
3181 errorBelch("Could not load `%s'. Reason: %s\n", (char*)dllName, result);
3182 return 0;
3183 }
3184
3185 break;
3186 }
3187
3188 stgFree(secname);
3189 }
3190
3191 return 1;
3192 }
3193
3194 static int checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f)
3195 {
3196 char* image;
3197 static HsBool load_dll_warn = HS_BOOL_FALSE;
3198
3199 if (load_dll_warn) { return 0; }
3200
3201 /* Based on Import Library specification. PE Spec section 7.1 */
3202
3203 COFF_import_header hdr;
3204 size_t n;
3205
3206 n = fread(&hdr, 1, sizeof_COFF_import_Header, f);
3207 if (n != sizeof(COFF_header)) {
3208 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%" PATH_FMT "'\n",
3209 member_name, arch_name);
3210 return 0;
3211 }
3212
3213 if (hdr.Sig1 != 0x0 || hdr.Sig2 != 0xFFFF) {
3214 fseek(f, -sizeof_COFF_import_Header, SEEK_CUR);
3215 IF_DEBUG(linker, debugBelch("loadArchive: Object `%s` is not an import lib. Skipping...\n", member_name));
3216 return 0;
3217 }
3218
3219 IF_DEBUG(linker, debugBelch("loadArchive: reading %d bytes at %ld\n", hdr.SizeOfData, ftell(f)));
3220
3221 image = malloc(hdr.SizeOfData);
3222 n = fread(image, 1, hdr.SizeOfData, f);
3223 if (n != hdr.SizeOfData) {
3224 errorBelch("loadArchive: error whilst reading `%s' header in `%" PATH_FMT "'. Did not read enough bytes.\n",
3225 member_name, arch_name);
3226 }
3227
3228 char* symbol = strtok(image, "\0");
3229 int symLen = strlen(symbol) + 1;
3230 int nameLen = n - symLen;
3231 char* dllName = malloc(sizeof(char) * nameLen);
3232 dllName = strncpy(dllName, image + symLen, nameLen);
3233 pathchar* dll = malloc(sizeof(wchar_t) * nameLen);
3234 mbstowcs(dll, dllName, nameLen);
3235 free(dllName);
3236
3237 IF_DEBUG(linker, debugBelch("loadArchive: read symbol %s from lib `%ls'\n", symbol, dll));
3238 const char* result = addDLL(dll);
3239
3240 free(image);
3241
3242 if (result != NULL) {
3243 errorBelch("Could not load `%ls'. Reason: %s\n", dll, result);
3244 load_dll_warn = HS_BOOL_TRUE;
3245
3246 free(dll);
3247 fseek(f, -(n + sizeof_COFF_import_Header), SEEK_CUR);
3248 return 0;
3249 }
3250
3251 free(dll);
3252 return 1;
3253 }
3254
3255 /* We use myindex to calculate array addresses, rather than
3256 simply doing the normal subscript thing. That's because
3257 some of the above structs have sizes which are not
3258 a whole number of words. GCC rounds their sizes up to a
3259 whole number of words, which means that the address calcs
3260 arising from using normal C indexing or pointer arithmetic
3261 are just plain wrong. Sigh.
3262 */
3263 static UChar *
3264 myindex ( int scale, void* base, int index )
3265 {
3266 return
3267 ((UChar*)base) + scale * index;
3268 }
3269
3270
3271 static void
3272 printName ( UChar* name, UChar* strtab )
3273 {
3274 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3275 UInt32 strtab_offset = * (UInt32*)(name+4);
3276 debugBelch("%s", strtab + strtab_offset );
3277 } else {
3278 int i;
3279 for (i = 0; i < 8; i++) {
3280 if (name[i] == 0) break;
3281 debugBelch("%c", name[i] );
3282 }
3283 }
3284 }
3285
3286
3287 static void
3288 copyName ( UChar* name, UChar* strtab, UChar* dst, int dstSize )
3289 {
3290 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3291 UInt32 strtab_offset = * (UInt32*)(name+4);
3292 strncpy ( (char*)dst, (char*)strtab+strtab_offset, dstSize );
3293 dst[dstSize-1] = 0;
3294 } else {
3295 int i = 0;
3296 while (1) {
3297 if (i >= 8) break;
3298 if (name[i] == 0) break;
3299 dst[i] = name[i];
3300 i++;
3301 }
3302 dst[i] = 0;
3303 }
3304 }
3305
3306
3307 static UChar *
3308 cstring_from_COFF_symbol_name ( UChar* name, UChar* strtab )
3309 {
3310 UChar* newstr;
3311 /* If the string is longer than 8 bytes, look in the
3312 string table for it -- this will be correctly zero terminated.
3313 */
3314 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3315 UInt32 strtab_offset = * (UInt32*)(name+4);
3316 return ((UChar*)strtab) + strtab_offset;
3317 }
3318 /* Otherwise, if shorter than 8 bytes, return the original,
3319 which by defn is correctly terminated.
3320 */
3321 if (name[7]==0) return name;
3322 /* The annoying case: 8 bytes. Copy into a temporary
3323 (XXX which is never freed ...)
3324 */
3325 newstr = stgMallocBytes(9, "cstring_from_COFF_symbol_name");
3326 ASSERT(newstr);
3327 strncpy((char*)newstr,(char*)name,8);
3328 newstr[8] = 0;
3329 return newstr;
3330 }
3331
3332 /* Getting the name of a section is mildly tricky, so we make a
3333 function for it. Sadly, in one case we have to copy the string
3334 (when it is exactly 8 bytes long there's no trailing '\0'), so for
3335 consistency we *always* copy the string; the caller must free it
3336 */
3337 static char *
3338 cstring_from_section_name (UChar* name, UChar* strtab)
3339 {
3340 char *newstr;
3341
3342 if (name[0]=='/') {
3343 int strtab_offset = strtol((char*)name+1,NULL,10);
3344 int len = strlen(((char*)strtab) + strtab_offset);
3345
3346 newstr = stgMallocBytes(len+1, "cstring_from_section_symbol_name");
3347 strcpy((char*)newstr, (char*)((UChar*)strtab) + strtab_offset);
3348 return newstr;
3349 }
3350 else
3351 {
3352 newstr = stgMallocBytes(9, "cstring_from_section_symbol_name");
3353 ASSERT(newstr);
3354 strncpy((char*)newstr,(char*)name,8);
3355 newstr[8] = 0;
3356 return newstr;
3357 }
3358 }
3359
3360 /* See Note [mingw-w64 name decoration scheme] */
3361 #ifndef x86_64_HOST_ARCH
3362 static void
3363 zapTrailingAtSign ( UChar* sym )
3364 {
3365 # define my_isdigit(c) ((c) >= '0' && (c) <= '9')
3366 int i, j;
3367 if (sym[0] == 0) return;
3368 i = 0;
3369 while (sym[i] != 0) i++;
3370 i--;
3371 j = i;
3372 while (j > 0 && my_isdigit(sym[j])) j--;
3373 if (j > 0 && sym[j] == '@' && j != i) sym[j] = 0;
3374 # undef my_isdigit
3375 }
3376 #endif
3377
3378 /* See Note [mingw-w64 name decoration scheme] */
3379 #ifndef x86_64_HOST_ARCH
3380 #define STRIP_LEADING_UNDERSCORE 1
3381 #else
3382 #define STRIP_LEADING_UNDERSCORE 0
3383 #endif
3384
3385 /*
3386 Note [mingw-w64 name decoration scheme]
3387
3388 What's going on with name decoration? Well, original code
3389 have some crufty and ad-hocish paths related mostly to very old
3390 mingw gcc/binutils/runtime combinations. Now mingw-w64 offers pretty
3391 uniform and MS-compatible decoration scheme across its tools and runtime.
3392
3393 The scheme is pretty straightforward: on 32 bit objects symbols are exported
3394 with underscore prepended (and @ + stack size suffix appended for stdcall
3395 functions), on 64 bits no underscore is prepended and no suffix is appended
3396 because we have no stdcall convention on 64 bits.
3397
3398 See #9218
3399 */
3400
3401 static SymbolAddr*
3402 lookupSymbolInDLLs ( UChar *lbl )
3403 {
3404 OpenedDLL* o_dll;
3405 SymbolAddr* sym;
3406
3407 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
3408 /* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */
3409
3410 sym = GetProcAddress(o_dll->instance, (char*)(lbl+STRIP_LEADING_UNDERSCORE));
3411 if (sym != NULL) {
3412 /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/
3413 return sym;
3414 }
3415
3416 /* Ticket #2283.
3417 Long description: http://support.microsoft.com/kb/132044
3418 tl;dr:
3419 If C/C++ compiler sees __declspec(dllimport) ... foo ...
3420 it generates call *__imp_foo, and __imp_foo here has exactly
3421 the same semantics as in __imp_foo = GetProcAddress(..., "foo")
3422 */
3423 if (sym == NULL && strncmp ((const char*)lbl, "__imp_", 6) == 0) {
3424 sym = GetProcAddress(o_dll->instance, (char*)(lbl+6+STRIP_LEADING_UNDERSCORE));
3425 if (sym != NULL) {
3426 IndirectAddr* ret;
3427 ret = stgMallocBytes( sizeof(IndirectAddr), "lookupSymbolInDLLs" );
3428 ret->addr = sym;
3429 ret->next = indirects;
3430 indirects = ret;
3431 IF_DEBUG(linker,
3432 debugBelch("warning: %s from %S is linked instead of %s\n",
3433 (char*)(lbl+6+STRIP_LEADING_UNDERSCORE), o_dll->name, (char*)lbl));
3434 return (void*) & ret->addr;
3435 }
3436 }
3437
3438 sym = GetProcAddress(o_dll->instance, (char*)lbl);
3439 if (sym != NULL) {
3440 /*debugBelch("found %s in %s\n", lbl,o_dll->name);*/
3441 return sym;
3442 }
3443 }
3444 return NULL;
3445 }
3446
3447 static int
3448 verifyCOFFHeader (COFF_header *hdr, pathchar *fileName)
3449 {
3450 #if defined(i386_HOST_ARCH)
3451 if (hdr->Machine != 0x14c) {
3452 errorBelch("%" PATH_FMT ": Not x86 PEi386", fileName);
3453 return 0;
3454 }
3455 #elif defined(x86_64_HOST_ARCH)
3456 if (hdr->Machine != 0x8664) {
3457 errorBelch("%" PATH_FMT ": Not x86_64 PEi386", fileName);
3458 return 0;
3459 }
3460 #else
3461 errorBelch("PEi386 not supported on this arch");
3462 #endif
3463
3464 if (hdr->SizeOfOptionalHeader != 0) {
3465 errorBelch("%" PATH_FMT ": PEi386 with nonempty optional header",
3466 fileName);
3467 return 0;
3468 }
3469 if ( /* (hdr->Characteristics & MYIMAGE_FILE_RELOCS_STRIPPED) || */
3470 (hdr->Characteristics & MYIMAGE_FILE_EXECUTABLE_IMAGE) ||
3471 (hdr->Characteristics & MYIMAGE_FILE_DLL) ||
3472 (hdr->Characteristics & MYIMAGE_FILE_SYSTEM) ) {
3473 errorBelch("%" PATH_FMT ": Not a PEi386 object file", fileName);
3474 return 0;
3475 }
3476 if ( (hdr->Characteristics & MYIMAGE_FILE_BYTES_REVERSED_HI)
3477 /* || !(hdr->Characteristics & MYIMAGE_FILE_32BIT_MACHINE) */ ) {
3478 errorBelch("%" PATH_FMT ": Invalid PEi386 word size or endiannness: %d",
3479 fileName,
3480 (int)(hdr->Characteristics));
3481 return 0;
3482 }
3483 return 1;
3484 }
3485
3486 static int
3487 ocVerifyImage_PEi386 ( ObjectCode* oc )
3488 {
3489 int i;
3490 UInt32 j, noRelocs;
3491 COFF_header* hdr;
3492 COFF_section* sectab;
3493 COFF_symbol* symtab;
3494 UChar* strtab;
3495 /* debugBelch("\nLOADING %s\n", oc->fileName); */
3496 hdr = (COFF_header*)(oc->image);
3497 sectab = (COFF_section*) (
3498 ((UChar*)(oc->image))
3499 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3500 );
3501 symtab = (COFF_symbol*) (
3502 ((UChar*)(oc->image))
3503 + hdr->PointerToSymbolTable
3504 );
3505 strtab = ((UChar*)symtab)
3506 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3507
3508 if (!verifyCOFFHeader(hdr, oc->fileName)) {
3509 return 0;
3510 }
3511
3512 /* If the string table size is way crazy, this might indicate that
3513 there are more than 64k relocations, despite claims to the
3514 contrary. Hence this test. */
3515 /* debugBelch("strtab size %d\n", * (UInt32*)strtab); */
3516 #if 0
3517 if ( (*(UInt32*)strtab) > 600000 ) {
3518 /* Note that 600k has no special significance other than being
3519 big enough to handle the almost-2MB-sized lumps that
3520 constitute HSwin32*.o. */
3521 debugBelch("PEi386 object has suspiciously large string table; > 64k relocs?");
3522 return 0;
3523 }
3524 #endif
3525
3526 /* .BSS Section is initialized in ocGetNames_PEi386
3527 but we need the Sections array initialized here already. */
3528 Section *sections;
3529 sections = (Section*)stgCallocBytes(
3530 sizeof(Section),
3531 hdr->NumberOfSections + 1, /* +1 for the global BSS section see ocGetNames_PEi386 */
3532 "ocVerifyImage_PEi386(sections)");
3533 oc->sections = sections;
3534 oc->n_sections = hdr->NumberOfSections + 1;
3535
3536 /* Initialize the Sections */
3537 for (i = 0; i < hdr->NumberOfSections; i++) {
3538 COFF_section* sectab_i
3539 = (COFF_section*)
3540 myindex(sizeof_COFF_section, sectab, i);
3541
3542 /* Calculate the start of the data section */
3543 sections[i].start = oc->image + sectab_i->PointerToRawData;
3544 }
3545
3546 /* No further verification after this point; only debug printing. */
3547 i = 0;
3548 IF_DEBUG(linker, i=1);
3549 if (i == 0) return 1;
3550
3551 debugBelch("sectab offset = %" FMT_SizeT "\n",
3552 ((UChar*)sectab) - ((UChar*)hdr) );
3553 debugBelch("symtab offset = %" FMT_SizeT "\n",
3554 ((UChar*)symtab) - ((UChar*)hdr) );
3555 debugBelch("strtab offset = %" FMT_SizeT "\n",
3556 ((UChar*)strtab) - ((UChar*)hdr) );
3557
3558 debugBelch("\n" );
3559 debugBelch( "Machine: 0x%x\n", (UInt32)(hdr->Machine) );
3560 debugBelch( "# sections: %d\n", (UInt32)(hdr->NumberOfSections) );
3561 debugBelch( "time/date: 0x%x\n", (UInt32)(hdr->TimeDateStamp) );
3562 debugBelch( "symtab offset: %d\n", (UInt32)(hdr->PointerToSymbolTable) );
3563 debugBelch( "# symbols: %d\n", (UInt32)(hdr->NumberOfSymbols) );
3564 debugBelch( "sz of opt hdr: %d\n", (UInt32)(hdr->SizeOfOptionalHeader) );
3565 debugBelch( "characteristics: 0x%x\n", (UInt32)(hdr->Characteristics) );
3566
3567 /* Print the section table. */
3568 debugBelch("\n" );
3569 for (i = 0; i < hdr->NumberOfSections; i++) {
3570 COFF_reloc* reltab;
3571 COFF_section* sectab_i
3572 = (COFF_section*)
3573 myindex ( sizeof_COFF_section, sectab, i );
3574 Section section = sections[i];
3575 debugBelch(
3576 "\n"
3577 "section %d\n"
3578 " name `",
3579 i
3580 );
3581 printName ( sectab_i->Name, strtab );
3582 debugBelch(
3583 "'\n"
3584 " vsize %d\n"
3585 " vaddr %d\n"
3586 " data sz %d\n"
3587 " data off 0x%p\n"
3588 " num rel %d\n"
3589 " off rel %d\n"
3590 " ptr raw 0x%x\n",
3591 sectab_i->VirtualSize,
3592 sectab_i->VirtualAddress,
3593 sectab_i->SizeOfRawData,
3594 section.start,
3595 sectab_i->NumberOfRelocations,
3596 sectab_i->PointerToRelocations,
3597 sectab_i->PointerToRawData
3598 );
3599 reltab = (COFF_reloc*) (
3600 ((UChar*)(oc->image)) + sectab_i->PointerToRelocations
3601 );
3602
3603 if ( sectab_i->Characteristics & MYIMAGE_SCN_LNK_NRELOC_OVFL ) {
3604 /* If the relocation field (a short) has overflowed, the
3605 * real count can be found in the first reloc entry.
3606 *
3607 * See Section 4.1 (last para) of the PE spec (rev6.0).
3608 */
3609 COFF_reloc* rel = (COFF_reloc*)
3610 myindex ( sizeof_COFF_reloc, reltab, 0 );
3611 noRelocs = rel->VirtualAddress;
3612 j = 1;
3613 } else {
3614 noRelocs = sectab_i->NumberOfRelocations;
3615 j = 0;
3616 }
3617
3618 for (; j < noRelocs; j++) {
3619 COFF_symbol* sym;
3620 COFF_reloc* rel = (COFF_reloc*)
3621 myindex ( sizeof_COFF_reloc, reltab, j );
3622 debugBelch(
3623 " type 0x%-4x vaddr 0x%-8x name `",
3624 (UInt32)rel->Type,
3625 rel->VirtualAddress );
3626 sym = (COFF_symbol*)
3627 myindex ( sizeof_COFF_symbol, symtab, rel->SymbolTableIndex );
3628 /* Hmm..mysterious looking offset - what's it for? SOF */
3629 printName ( sym->Name, strtab -10 );
3630 debugBelch("'\n" );
3631 }
3632
3633 debugBelch("\n" );
3634 }
3635 debugBelch("\n" );
3636 debugBelch("string table has size 0x%x\n", * (UInt32*)strtab );
3637 debugBelch("---START of string table---\n");
3638 for (i = 4; i < *(Int32*)strtab; i++) {
3639 if (strtab[i] == 0)
3640 debugBelch("\n"); else
3641 debugBelch("%c", strtab[i] );
3642 }
3643 debugBelch("--- END of string table---\n");
3644
3645 debugBelch("\n" );
3646 i = 0;
3647 while (1) {
3648 COFF_symbol* symtab_i;
3649 if (i >= (Int32)(hdr->NumberOfSymbols)) break;
3650 symtab_i = (COFF_symbol*)
3651 myindex ( sizeof_COFF_symbol, symtab, i );
3652 debugBelch(
3653 "symbol %d\n"
3654 " name `",
3655 i
3656 );
3657 printName ( symtab_i->Name, strtab );
3658 debugBelch(
3659 "'\n"
3660 " value 0x%x\n"
3661 " 1+sec# %d\n"
3662 " type 0x%x\n"
3663 " sclass 0x%x\n"
3664 " nAux %d\n",
3665 symtab_i->Value,
3666 (Int32)(symtab_i->SectionNumber),
3667 (UInt32)symtab_i->Type,
3668 (UInt32)symtab_i->StorageClass,
3669 (UInt32)symtab_i->NumberOfAuxSymbols
3670 );
3671 i += symtab_i->NumberOfAuxSymbols;
3672 i++;
3673 }
3674
3675 debugBelch("\n" );
3676 return 1;
3677 }
3678
3679
3680 static int
3681 ocGetNames_PEi386 ( ObjectCode* oc )
3682 {
3683 COFF_header* hdr;
3684 COFF_section* sectab;
3685 COFF_symbol* symtab;
3686 UChar* strtab;
3687
3688 UChar* sname;
3689 SymbolAddr* addr;
3690 int i;
3691
3692 hdr = (COFF_header*)(oc->image);
3693 sectab = (COFF_section*) (
3694 ((UChar*)(oc->image))
3695 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3696 );
3697 symtab = (COFF_symbol*) (
3698 ((UChar*)(oc->image))
3699 + hdr->PointerToSymbolTable
3700 );
3701 strtab = ((UChar*)(oc->image))
3702 + hdr->PointerToSymbolTable
3703 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3704
3705 /* Allocate space for any (local, anonymous) .bss sections. */
3706
3707 for (i = 0; i < hdr->NumberOfSections; i++) {
3708 UInt32 bss_sz;
3709 UChar* zspace;
3710 COFF_section* sectab_i
3711 = (COFF_section*)
3712 myindex ( sizeof_COFF_section, sectab, i );
3713
3714 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3715
3716 if (0 != strcmp(secname, ".bss")) {
3717 stgFree(secname);
3718 continue;
3719 }
3720
3721 stgFree(secname);
3722
3723 /* sof 10/05: the PE spec text isn't too clear regarding what
3724 * the SizeOfRawData field is supposed to hold for object
3725 * file sections containing just uninitialized data -- for executables,
3726 * it is supposed to be zero; unclear what it's supposed to be
3727 * for object files. However, VirtualSize is guaranteed to be
3728 * zero for object files, which definitely suggests that SizeOfRawData
3729 * will be non-zero (where else would the size of this .bss section be
3730 * stored?) Looking at the COFF_section info for incoming object files,
3731 * this certainly appears to be the case.
3732 *
3733 * => I suspect we've been incorrectly handling .bss sections in (relocatable)
3734 * object files up until now. This turned out to bite us with ghc-6.4.1's use
3735 * of gcc-3.4.x, which has started to emit initially-zeroed-out local 'static'
3736 * variable decls into the .bss section. (The specific function in Q which
3737 * triggered this is libraries/base/cbits/dirUtils.c:__hscore_getFolderPath())
3738 */
3739 if (sectab_i->VirtualSize == 0 && sectab_i->SizeOfRawData == 0) continue;
3740 /* This is a non-empty .bss section.
3741 Allocate zeroed space for it */
3742 bss_sz = sectab_i->VirtualSize;
3743 if ( bss_sz < sectab_i->SizeOfRawData) { bss_sz = sectab_i->SizeOfRawData; }
3744 zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
3745 oc->sections[i].start = zspace;
3746 addProddableBlock(oc, zspace, bss_sz);
3747 /* debugBelch("BSS anon section at 0x%x\n", zspace); */
3748 }
3749
3750 /* Copy section information into the ObjectCode. */
3751
3752 for (i = 0; i < hdr->NumberOfSections; i++) {
3753 UChar* start;
3754 UChar* end;
3755 UInt32 sz;
3756
3757 /* By default consider all section as CODE or DATA, which means we want to load them. */
3758 SectionKind kind
3759 = SECTIONKIND_CODE_OR_RODATA;
3760 COFF_section* sectab_i
3761 = (COFF_section*)
3762 myindex ( sizeof_COFF_section, sectab, i );
3763 Section section = oc->sections[i];
3764
3765 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3766
3767 IF_DEBUG(linker, debugBelch("section name = %s\n", secname ));
3768
3769 /* The PE file section flag indicates whether the section contains code or data. */
3770 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_CODE ||
3771 sectab_i->Characteristics & MYIMAGE_SCN_CNT_INITIALIZED_DATA)
3772 kind = SECTIONKIND_CODE_OR_RODATA;
3773
3774 /* Check next if it contains any uninitialized data */
3775 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_UNINITIALIZED_DATA)
3776 kind = SECTIONKIND_RWDATA;
3777
3778 /* Finally check if it can be discarded. This will also ignore .debug sections */
3779 if (sectab_i->Characteristics & MYIMAGE_SCN_MEM_DISCARDABLE ||
3780 sectab_i->Characteristics & MYIMAGE_SCN_LNK_REMOVE)
3781 kind = SECTIONKIND_OTHER;
3782
3783 if (0==strcmp(".ctors", (char*)secname))
3784 kind = SECTIONKIND_INIT_ARRAY;
3785
3786 ASSERT(sectab_i->SizeOfRawData == 0 || sectab_i->VirtualSize == 0);
3787 sz = sectab_i->SizeOfRawData;
3788 if (sz < sectab_i->VirtualSize) sz = sectab_i->VirtualSize;
3789
3790 start = section.start;
3791 end = start + sz - 1;
3792
3793 if (kind != SECTIONKIND_OTHER && end >= start) {
3794 addSection(&oc->sections[i], kind, SECTION_NOMEM, start, sz, 0, 0, 0);
3795 addProddableBlock(oc, start, sz);
3796 }
3797
3798 stgFree(secname);
3799 }
3800
3801 /* Copy exported symbols into the ObjectCode. */
3802
3803 oc->n_symbols = hdr->NumberOfSymbols;
3804 oc->symbols = stgCallocBytes(sizeof(SymbolName*), oc->n_symbols,
3805 "ocGetNames_PEi386(oc->symbols)");
3806
3807 /* Work out the size of the global BSS section */
3808 StgWord globalBssSize = 0;
3809 for (i=0; i < (int)hdr->NumberOfSymbols; i++) {
3810 COFF_symbol* symtab_i;
3811 symtab_i = (COFF_symbol*)
3812 myindex ( sizeof_COFF_symbol, symtab, i );
3813 if (symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3814 && symtab_i->Value > 0
3815 && symtab_i->StorageClass != MYIMAGE_SYM_CLASS_SECTION) {
3816 globalBssSize += symtab_i->Value;
3817 }
3818 i += symtab_i->NumberOfAuxSymbols;
3819 }
3820
3821 /* Allocate BSS space */
3822 SymbolAddr* bss = NULL;
3823 if (globalBssSize > 0) {
3824 bss = stgCallocBytes(1, globalBssSize,
3825 "ocGetNames_PEi386(non-anonymous bss)");
3826 addSection(&oc->sections[oc->n_sections-1],
3827 SECTIONKIND_RWDATA, SECTION_MALLOC,
3828 bss, globalBssSize, 0, 0, 0);
3829 IF_DEBUG(linker, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize));
3830 addProddableBlock(oc, bss, globalBssSize);
3831 } else {
3832 addSection(&oc->sections[oc->n_sections-1],
3833 SECTIONKIND_OTHER, SECTION_NOMEM, NULL, 0, 0, 0, 0);
3834 }
3835
3836 for (i = 0; i < oc->n_symbols; i++) {
3837 COFF_symbol* symtab_i;
3838 symtab_i = (COFF_symbol*)
3839 myindex ( sizeof_COFF_symbol, symtab, i );
3840
3841 addr = NULL;
3842 HsBool isWeak = HS_BOOL_FALSE;
3843 if ( symtab_i->SectionNumber != MYIMAGE_SYM_UNDEFINED
3844 && symtab_i->SectionNumber > 0) {
3845 /* This symbol is global and defined, viz, exported */
3846 /* for MYIMAGE_SYMCLASS_EXTERNAL
3847 && !MYIMAGE_SYM_UNDEFINED,
3848 the address of the symbol is:
3849 address of relevant section + offset in section
3850 */
3851 COFF_section* sectabent
3852 = (COFF_section*) myindex ( sizeof_COFF_section,
3853 sectab,
3854 symtab_i->SectionNumber-1 );
3855 if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_EXTERNAL
3856 || ( symtab_i->StorageClass == MYIMAGE_SYM_CLASS_STATIC
3857 && sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT)
3858 ) {
3859 addr = (void*)((size_t)oc->sections[symtab_i->SectionNumber-1].start
3860 + symtab_i->Value);
3861 if (sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT) {
3862 isWeak = HS_BOOL_TRUE;
3863 }
3864 }
3865 }
3866 else if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_WEAK_EXTERNAL) {
3867 isWeak = HS_BOOL_TRUE;
3868 }
3869 else if ( symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3870 && symtab_i->Value > 0) {
3871 /* This symbol isn't in any section at all, ie, global bss.
3872 Allocate zeroed space for it from the BSS section */
3873 addr = bss;
3874 bss = (SymbolAddr*)((StgWord)bss + (StgWord)symtab_i->Value);
3875 IF_DEBUG(linker, debugBelch("bss symbol @ %p %u\n", addr, symtab_i->Value));
3876 }
3877
3878 sname = cstring_from_COFF_symbol_name(symtab_i->Name, strtab);
3879 if (addr != NULL || isWeak == HS_BOOL_TRUE) {
3880
3881 /* debugBelch("addSymbol %p `%s' Weak:%lld \n", addr, sname, isWeak); */
3882 IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr,sname);)
3883 ASSERT(i >= 0 && i < oc->n_symbols);
3884 /* cstring_from_COFF_symbol_name always succeeds. */
3885 oc->symbols[i] = (SymbolName*)sname;
3886 if (isWeak == HS_BOOL_TRUE) {
3887 setWeakSymbol(oc, sname);
3888 }
3889
3890 if (! ghciInsertSymbolTable(oc->fileName, symhash, (SymbolName*)sname, addr,
3891 isWeak, oc)) {
3892 return 0;
3893 }
3894 } else {
3895 /* We're skipping the symbol, but if we ever load this
3896 object file we'll want to skip it then too. */
3897 oc->symbols[i] = NULL;
3898
3899 # if 0
3900 debugBelch(
3901 "IGNORING symbol %d\n"
3902 " name `",
3903 i
3904 );
3905 printName ( symtab_i->Name, strtab );
3906 debugBelch(
3907 "'\n"
3908 " value 0x%x\n"
3909 " 1+sec# %d\n"
3910 " type 0x%x\n"
3911 " sclass 0x%x\n"
3912 " nAux %d\n",
3913 symtab_i->Value,
3914 (Int32)(symtab_i->SectionNumber),
3915 (UInt32)symtab_i->Type,
3916 (UInt32)symtab_i->StorageClass,
3917 (UInt32)symtab_i->NumberOfAuxSymbols
3918 );