Revert "Linker: some extra debugging / logging"
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "Stable.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31
32 #if !defined(mingw32_HOST_OS)
33 #include "posix/Signals.h"
34 #endif
35
36 // get protos for is*()
37 #include <ctype.h>
38
39 #ifdef HAVE_SYS_TYPES_H
40 #include <sys/types.h>
41 #endif
42
43 #include <inttypes.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <stdio.h>
47 #include <assert.h>
48 #include <libgen.h>
49
50 #ifdef HAVE_SYS_STAT_H
51 #include <sys/stat.h>
52 #endif
53
54 #if defined(HAVE_DLFCN_H)
55 #include <dlfcn.h>
56 #endif
57
58 /* PowerPC and ARM have relative branch instructions with only 24 bit
59 * displacements and therefore need jump islands contiguous with each object
60 * code module.
61 */
62 #if defined(powerpc_HOST_ARCH)
63 #define SHORT_REL_BRANCH 1
64 #endif
65 #if defined(arm_HOST_ARCH)
66 #define SHORT_REL_BRANCH 1
67 #endif
68
69 #if (RTS_LINKER_USE_MMAP && defined(SHORT_REL_BRANCH) && defined(linux_HOST_OS))
70 #define USE_CONTIGUOUS_MMAP 1
71 #else
72 #define USE_CONTIGUOUS_MMAP 0
73 #endif
74
75 #if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
76 # define OBJFORMAT_ELF
77 # include <regex.h> // regex is already used by dlopen() so this is OK
78 // to use here without requiring an additional lib
79 #elif defined (mingw32_HOST_OS)
80 # define OBJFORMAT_PEi386
81 # include <windows.h>
82 # include <shfolder.h> /* SHGetFolderPathW */
83 # include <math.h>
84 # include <wchar.h>
85 #elif defined(darwin_HOST_OS)
86 # define OBJFORMAT_MACHO
87 # include <regex.h>
88 # include <mach/machine.h>
89 # include <mach-o/fat.h>
90 # include <mach-o/loader.h>
91 # include <mach-o/nlist.h>
92 # include <mach-o/reloc.h>
93 #if defined(powerpc_HOST_ARCH)
94 # include <mach-o/ppc/reloc.h>
95 #endif
96 #if defined(x86_64_HOST_ARCH)
97 # include <mach-o/x86_64/reloc.h>
98 #endif
99 #endif
100
101 #if defined(x86_64_HOST_ARCH) && defined(darwin_HOST_OS)
102 #define ALWAYS_PIC
103 #endif
104
105 #if defined(dragonfly_HOST_OS)
106 #include <sys/tls.h>
107 #endif
108
109 /* SymbolInfo tracks a symbol's address, the object code from which
110 it originated, and whether or not it's weak.
111
112 RtsSymbolInfo is used to track the state of the symbols currently
113 loaded or to be loaded by the Linker.
114
115 Where the information in the `ObjectCode` is used to track the
116 original status of the symbol inside the `ObjectCode`.
117
118 A weak symbol that has been used will still be marked as weak
119 in the `ObjectCode` but in the `RtsSymbolInfo` it won't be.
120 */
121 typedef struct _RtsSymbolInfo {
122 SymbolAddr* value;
123 ObjectCode *owner;
124 HsBool weak;
125 } RtsSymbolInfo;
126
127 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
128 This hashtable will contain information on all symbols
129 that we know of, however the .o they are in may not be loaded.
130
131 Until the ObjectCode the symbol belongs to is actually
132 loaded this symbol may be replaced. So do not rely on
133 addresses of unloaded symbols.
134
135 Note [runtime-linker-phases]
136 --------------------------------------
137 Broadly the behavior of the runtime linker can be
138 split into the following four phases:
139
140 - Indexing (e.g. ocVerifyImage and ocGetNames)
141 - Initialization (e.g. ocResolve and ocRunInit)
142 - Resolve (e.g. resolveObjs())
143 - Lookup (e.g. lookupSymbol)
144
145 This is to enable lazy loading of symbols. Eager loading is problematic
146 as it means that all symbols must be available, even those which we will
147 never use. This is especially painful of Windows, where the number of
148 libraries required to link things like mingwex grows to be quite high.
149
150 We proceed through these stages as follows,
151
152 * During Indexing we verify and open the ObjectCode and
153 perform a quick scan/indexing of the ObjectCode. All the work
154 required to actually load the ObjectCode is done.
155
156 All symbols from the ObjectCode is also inserted into
157 `symhash`, where possible duplicates are handled via the semantics
158 described in `ghciInsertSymbolTable`.
159
160 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
161 depending on whether they are an archive members or not.
162
163 * During initialization we load ObjectCode, perform relocations, execute
164 static constructors etc. This phase may trigger other ObjectCodes to
165 be loaded because of the calls to lookupSymbol.
166
167 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
168 previous status was `OBJECT_LOADED`.
169
170 * During resolve we attempt to resolve all the symbols needed for the
171 initial link. This essentially means, that for any ObjectCode given
172 directly to the command-line we perform lookupSymbols on the required
173 symbols. lookupSymbols may trigger the loading of additional ObjectCode
174 if required.
175
176 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
177 the previous status was `OBJECT_NEEDED`.
178
179 * Lookup symbols is used to lookup any symbols required, both during initial
180 link and during statement and expression compilations in the REPL.
181 Declaration of e.g. an foreign import, will eventually call lookupSymbol
182 which will either fail (symbol unknown) or succeed (and possibly triggered a
183 load).
184
185 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
186
187 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
188 by calling unloadObj and starting over.
189 When a new declaration or statement is performed ultimately lookupSymbol is called
190 without doing a re-link.
191
192 The goal of these different phases is to allow the linker to be able to perform
193 "lazy loading" of ObjectCode. The reason for this is that we want to only link
194 in symbols that are actually required for the link. This reduces:
195
196 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
197 then we don't need to load libB. This means the dependency chain for libraries
198 such as mingw32 and mingwex can be broken down.
199
200 2) The number of duplicate symbols, since now only symbols that are
201 true duplicates will display the error.
202 */
203 static /*Str*/HashTable *symhash;
204
205 /* List of currently loaded objects */
206 ObjectCode *objects = NULL; /* initially empty */
207
208 /* List of objects that have been unloaded via unloadObj(), but are waiting
209 to be actually freed via checkUnload() */
210 ObjectCode *unloaded_objects = NULL; /* initially empty */
211
212 #ifdef THREADED_RTS
213 /* This protects all the Linker's global state except unloaded_objects */
214 Mutex linker_mutex;
215 /*
216 * This protects unloaded_objects. We have a separate mutex for this, because
217 * the GC needs to access unloaded_objects in checkUnload, while the linker only
218 * needs to access unloaded_objects in unloadObj(), so this allows most linker
219 * operations proceed concurrently with the GC.
220 */
221 Mutex linker_unloaded_mutex;
222 #endif
223
224 /* Type of the initializer */
225 typedef void (*init_t) (int argc, char **argv, char **env);
226
227 static HsInt isAlreadyLoaded( pathchar *path );
228 static HsInt loadOc( ObjectCode* oc );
229 static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
230 rtsBool mapped, char *archiveMemberName,
231 int misalignment
232 );
233
234 // Use wchar_t for pathnames on Windows (#5697)
235 #if defined(mingw32_HOST_OS)
236 #define pathcmp wcscmp
237 #define pathlen wcslen
238 #define pathopen _wfopen
239 #define pathstat _wstat
240 #define struct_stat struct _stat
241 #define open wopen
242 #define WSTR(s) L##s
243 #define pathprintf swprintf
244 #define pathsize sizeof(wchar_t)
245 #else
246 #define pathcmp strcmp
247 #define pathlen strlen
248 #define pathopen fopen
249 #define pathstat stat
250 #define struct_stat struct stat
251 #define WSTR(s) s
252 #define pathprintf snprintf
253 #define pathsize sizeof(char)
254 #endif
255
256 static pathchar* pathdup(pathchar *path)
257 {
258 pathchar *ret;
259 #if defined(mingw32_HOST_OS)
260 ret = wcsdup(path);
261 #else
262 /* sigh, strdup() isn't a POSIX function, so do it the long way */
263 ret = stgMallocBytes( strlen(path)+1, "pathdup" );
264 strcpy(ret, path);
265 #endif
266 return ret;
267 }
268
269 static pathchar* pathdir(pathchar *path)
270 {
271 pathchar *ret;
272 #if defined(mingw32_HOST_OS)
273 pathchar *drive, *dirName;
274 size_t memberLen = pathlen(path) + 1;
275 dirName = stgMallocBytes(pathsize * memberLen, "pathdir(path)");
276 ret = stgMallocBytes(pathsize * memberLen, "pathdir(path)");
277 drive = stgMallocBytes(pathsize * _MAX_DRIVE, "pathdir(path)");
278 _wsplitpath_s(path, drive, _MAX_DRIVE, dirName, pathsize * pathlen(path), NULL, 0, NULL, 0);
279 pathprintf(ret, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), drive, dirName);
280 stgFree(drive);
281 stgFree(dirName);
282 #else
283 pathchar* dirName = dirname(path);
284 size_t memberLen = pathlen(dirName);
285 ret = stgMallocBytes(pathsize * (memberLen + 2), "pathdir(path)");
286 strcpy(ret, dirName);
287 ret[memberLen ] = '/';
288 ret[memberLen+1] = '\0';
289 #endif
290 return ret;
291 }
292
293 static pathchar* mkPath(char* path)
294 {
295 #if defined(mingw32_HOST_OS)
296 size_t required = mbstowcs(NULL, path, 0);
297 pathchar *ret = stgMallocBytes(sizeof(pathchar) * (required + 1), "mkPath");
298 if (mbstowcs(ret, path, required) == (size_t)-1)
299 {
300 barf("mkPath failed converting char* to wchar_t*");
301 }
302 ret[required] = '\0';
303 return ret;
304 #else
305 return pathdup(path);
306 #endif
307 }
308
309 /* Generic wrapper function to try and Resolve and RunInit oc files */
310 int ocTryLoad( ObjectCode* oc );
311
312 #if defined(OBJFORMAT_ELF)
313 static int ocVerifyImage_ELF ( ObjectCode* oc );
314 static int ocGetNames_ELF ( ObjectCode* oc );
315 static int ocResolve_ELF ( ObjectCode* oc );
316 static int ocRunInit_ELF ( ObjectCode* oc );
317 #if NEED_SYMBOL_EXTRAS
318 static int ocAllocateSymbolExtras_ELF ( ObjectCode* oc );
319 #endif
320 #elif defined(OBJFORMAT_PEi386)
321 static int ocVerifyImage_PEi386 ( ObjectCode* oc );
322 static int ocGetNames_PEi386 ( ObjectCode* oc );
323 static int ocResolve_PEi386 ( ObjectCode* oc );
324 static int ocRunInit_PEi386 ( ObjectCode* oc );
325 static void *lookupSymbolInDLLs ( unsigned char *lbl );
326 /* See Note [mingw-w64 name decoration scheme] */
327 #ifndef x86_64_HOST_ARCH
328 static void zapTrailingAtSign ( unsigned char *sym );
329 #endif
330
331 #if defined(x86_64_HOST_ARCH)
332 #define USED_IF_x86_64_HOST_ARCH /* Nothing */
333 #else
334 #define USED_IF_x86_64_HOST_ARCH STG_UNUSED
335 #endif
336
337 static char *allocateImageAndTrampolines (
338 pathchar* arch_name, char* member_name,
339 FILE* f,
340 int size,
341 int isThin);
342 #if defined(x86_64_HOST_ARCH)
343 static int ocAllocateSymbolExtras_PEi386 ( ObjectCode* oc );
344 static size_t makeSymbolExtra_PEi386( ObjectCode* oc, size_t, char* symbol );
345 #define PEi386_IMAGE_OFFSET 4
346 #else
347 #define PEi386_IMAGE_OFFSET 0
348 #endif
349 #elif defined(OBJFORMAT_MACHO)
350 static int ocVerifyImage_MachO ( ObjectCode* oc );
351 static int ocGetNames_MachO ( ObjectCode* oc );
352 static int ocResolve_MachO ( ObjectCode* oc );
353 static int ocRunInit_MachO ( ObjectCode* oc );
354 static int machoGetMisalignment( FILE * );
355 #if NEED_SYMBOL_EXTRAS
356 static int ocAllocateSymbolExtras_MachO ( ObjectCode* oc );
357 #endif
358 #ifdef powerpc_HOST_ARCH
359 static void machoInitSymbolsWithoutUnderscore( void );
360 #endif
361 #endif
362
363 #if defined(OBJFORMAT_PEi386)
364 /* string utility function */
365 static HsBool endsWithPath(pathchar* base, pathchar* str) {
366 int blen = pathlen(base);
367 int slen = pathlen(str);
368 return (blen >= slen) && (0 == pathcmp(base + blen - slen, str));
369 }
370
371 static int checkAndLoadImportLibrary(
372 pathchar* arch_name,
373 char* member_name,
374 FILE* f);
375
376 static int findAndLoadImportLibrary(
377 ObjectCode* oc
378 );
379
380 static UChar *myindex(
381 int scale,
382 void* base,
383 int index);
384 static UChar *cstring_from_COFF_symbol_name(
385 UChar* name,
386 UChar* strtab);
387 static char *cstring_from_section_name(
388 UChar* name,
389 UChar* strtab);
390
391
392 /* Add ld symbol for PE image base. */
393 #if defined(__GNUC__)
394 #define __ImageBase __MINGW_LSYMBOL(_image_base__)
395 #endif
396
397 /* Get the base of the module. */
398 /* This symbol is defined by ld. */
399 extern IMAGE_DOS_HEADER __ImageBase;
400 #define __image_base (void*)((HINSTANCE)&__ImageBase)
401
402 // MingW-w64 is missing these from the implementation. So we have to look them up
403 typedef DLL_DIRECTORY_COOKIE(WINAPI *LPAddDLLDirectory)(PCWSTR NewDirectory);
404 typedef WINBOOL(WINAPI *LPRemoveDLLDirectory)(DLL_DIRECTORY_COOKIE Cookie);
405 #endif /* OBJFORMAT_PEi386 */
406
407 static void freeProddableBlocks (ObjectCode *oc);
408
409 /* on x86_64 we have a problem with relocating symbol references in
410 * code that was compiled without -fPIC. By default, the small memory
411 * model is used, which assumes that symbol references can fit in a
412 * 32-bit slot. The system dynamic linker makes this work for
413 * references to shared libraries by either (a) allocating a jump
414 * table slot for code references, or (b) moving the symbol at load
415 * time (and copying its contents, if necessary) for data references.
416 *
417 * We unfortunately can't tell whether symbol references are to code
418 * or data. So for now we assume they are code (the vast majority
419 * are), and allocate jump-table slots. Unfortunately this will
420 * SILENTLY generate crashing code for data references. This hack is
421 * enabled by X86_64_ELF_NONPIC_HACK.
422 *
423 * One workaround is to use shared Haskell libraries. This is
424 * coming. Another workaround is to keep the static libraries but
425 * compile them with -fPIC, because that will generate PIC references
426 * to data which can be relocated. The PIC code is still too green to
427 * do this systematically, though.
428 *
429 * See bug #781
430 * See thread http://www.haskell.org/pipermail/cvs-ghc/2007-September/038458.html
431 *
432 * Naming Scheme for Symbol Macros
433 *
434 * SymI_*: symbol is internal to the RTS. It resides in an object
435 * file/library that is statically.
436 * SymE_*: symbol is external to the RTS library. It might be linked
437 * dynamically.
438 *
439 * Sym*_HasProto : the symbol prototype is imported in an include file
440 * or defined explicitly
441 * Sym*_NeedsProto: the symbol is undefined and we add a dummy
442 * default proto extern void sym(void);
443 */
444 #define X86_64_ELF_NONPIC_HACK 1
445
446 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
447 * small memory model on this architecture (see gcc docs,
448 * -mcmodel=small).
449 *
450 * MAP_32BIT not available on OpenBSD/amd64
451 */
452 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
453 #define TRY_MAP_32BIT MAP_32BIT
454 #else
455 #define TRY_MAP_32BIT 0
456 #endif
457
458 /*
459 Note [The ARM/Thumb Story]
460 ~~~~~~~~~~~~~~~~~~~~~~~~~~
461
462 Support for the ARM architecture is complicated by the fact that ARM has not
463 one but several instruction encodings. The two relevant ones here are the original
464 ARM encoding and Thumb, a more dense variant of ARM supporting only a subset
465 of the instruction set.
466
467 How the CPU decodes a particular instruction is determined by a mode bit. This
468 mode bit is set on jump instructions, the value being determined by the low
469 bit of the target address: An odd address means the target is a procedure
470 encoded in the Thumb encoding whereas an even address means it's a traditional
471 ARM procedure (the actual address jumped to is even regardless of the encoding bit).
472
473 Interoperation between Thumb- and ARM-encoded object code (known as "interworking")
474 is tricky. If the linker needs to link a call by an ARM object into Thumb code
475 (or vice-versa) it will produce a jump island. This, however, is incompatible with
476 GHC's tables-next-to-code. For this reason, it is critical that GHC emit
477 exclusively ARM or Thumb objects for all Haskell code.
478
479 We still do, however, need to worry about foreign code.
480 */
481
482 /*
483 * Due to the small memory model (see above), on x86_64 we have to map
484 * all our non-PIC object files into the low 2Gb of the address space
485 * (why 2Gb and not 4Gb? Because all addresses must be reachable
486 * using a 32-bit signed PC-relative offset). On Linux we can do this
487 * using the MAP_32BIT flag to mmap(), however on other OSs
488 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
489 * can't do this. So on these systems, we have to pick a base address
490 * in the low 2Gb of the address space and try to allocate memory from
491 * there.
492 *
493 * We pick a default address based on the OS, but also make this
494 * configurable via an RTS flag (+RTS -xm)
495 */
496 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
497
498 #if defined(MAP_32BIT)
499 // Try to use MAP_32BIT
500 #define MMAP_32BIT_BASE_DEFAULT 0
501 #else
502 // A guess: 1Gb.
503 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
504 #endif
505
506 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
507 #endif
508
509 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
510 ObjectCode *owner)
511 {
512 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
513 if (!pinfo || owner != pinfo->owner) return;
514 removeStrHashTable(table, key, NULL);
515 stgFree(pinfo);
516 }
517
518 /* -----------------------------------------------------------------------------
519 * Insert symbols into hash tables, checking for duplicates.
520 *
521 * Returns: 0 on failure, nonzero on success
522 */
523 /*
524 Note [weak-symbols-support]
525 -------------------------------------
526 While ghciInsertSymbolTable does implement extensive
527 logic for weak symbol support, weak symbols are not currently
528 fully supported by the RTS. This code is mostly here for COMDAT
529 support which uses the weak symbols support.
530
531 Linking weak symbols defined purely in C code with other C code
532 should also work, probably. Observing weak symbols in Haskell
533 won't.
534
535 Some test have been written for weak symbols but have been disabled
536 mostly because it's unsure how the weak symbols support should look.
537 See Trac #11223
538 */
539 static int ghciInsertSymbolTable(
540 pathchar* obj_name,
541 HashTable *table,
542 const SymbolName* key,
543 SymbolAddr* data,
544 HsBool weak,
545 ObjectCode *owner)
546 {
547 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
548 if (!pinfo) /* new entry */
549 {
550 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
551 pinfo->value = data;
552 pinfo->owner = owner;
553 pinfo->weak = weak;
554 insertStrHashTable(table, key, pinfo);
555 return 1;
556 }
557 else if (weak && data && pinfo->weak && !pinfo->value)
558 {
559 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
560 pinfo->value = data;
561 pinfo->owner = owner;
562 return 1;
563 }
564 else if (weak)
565 {
566 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
567 already know of another copy throw this one away.
568
569 or both weak symbols have a nonzero value. Keep the existing one.
570
571 This also preserves the semantics of linking against
572 the first symbol we find. */
573 }
574 else if (pinfo->weak && !weak) /* weak symbol is in the table */
575 {
576 /* override the weak definition with the non-weak one */
577 pinfo->value = data;
578 pinfo->owner = owner;
579 pinfo->weak = HS_BOOL_FALSE;
580 return 1;
581 }
582 else if ( pinfo->owner
583 && pinfo->owner->status != OBJECT_RESOLVED
584 && pinfo->owner->status != OBJECT_NEEDED)
585 {
586 /* If the other symbol hasn't been loaded or will be loaded and we want to
587 explicitly load the new one, we can just swap it out and load the one
588 that has been requested. If not, just keep the first one encountered.
589
590 Because the `symHash' table consists symbols we've also not loaded but
591 found during the initial scan this is safe to do. If however the existing
592 symbol has been loaded then it means we have a duplicate.
593
594 This is essentially emulating the behavior of a linker wherein it will always
595 link in object files that are .o file arguments, but only take object files
596 from archives as needed. */
597 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
598 pinfo->value = data;
599 pinfo->owner = owner;
600 pinfo->weak = weak;
601 }
602
603 return 1;
604 }
605 else if (pinfo->owner == owner)
606 {
607 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
608 return 1;
609 }
610 else if (owner && owner->status == OBJECT_LOADED)
611 {
612 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
613 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
614 call this function again to trigger the duplicate error. */
615 return 1;
616 }
617
618 pathchar* archiveName = NULL;
619 debugBelch(
620 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
621 " %s\n"
622 "whilst processing object file\n"
623 " %" PATH_FMT "\n"
624 "The symbol was previously defined in\n"
625 " %" PATH_FMT "\n"
626 "This could be caused by:\n"
627 " * Loading two different object files which export the same symbol\n"
628 " * Specifying the same object file twice on the GHCi command line\n"
629 " * An incorrect `package.conf' entry, causing some object to be\n"
630 " loaded twice.\n",
631 (char*)key,
632 obj_name,
633 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
634 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
635 : pinfo->owner->fileName
636 );
637
638 if (archiveName)
639 {
640 stgFree(archiveName);
641 archiveName = NULL;
642 }
643 return 0;
644 }
645
646 /* -----------------------------------------------------------------------------
647 * Looks up symbols into hash tables.
648 *
649 * Returns: 0 on failure and result is not set,
650 * nonzero on success and result set to nonzero pointer
651 */
652 static HsBool ghciLookupSymbolInfo(HashTable *table,
653 const SymbolName* key, RtsSymbolInfo **result)
654 {
655 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
656 if (!pinfo) {
657 *result = NULL;
658 return HS_BOOL_FALSE;
659 }
660 if (pinfo->weak)
661 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
662 /* Once it's looked up, it can no longer be overridden */
663 pinfo->weak = HS_BOOL_FALSE;
664
665 *result = pinfo;
666 return HS_BOOL_TRUE;
667 }
668
669 /* -----------------------------------------------------------------------------
670 * initialize the object linker
671 */
672
673
674 static int linker_init_done = 0 ;
675
676 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
677 static void *dl_prog_handle;
678 static regex_t re_invalid;
679 static regex_t re_realso;
680 #ifdef THREADED_RTS
681 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
682 #endif
683 #elif defined(OBJFORMAT_PEi386)
684 void addDLLHandle(pathchar* dll_name, HINSTANCE instance);
685 #endif
686
687 void initLinker (void)
688 {
689 // default to retaining CAFs for backwards compatibility. Most
690 // users will want initLinker_(0): otherwise unloadObj() will not
691 // be able to unload object files when they contain CAFs.
692 initLinker_(1);
693 }
694
695 void
696 initLinker_ (int retain_cafs)
697 {
698 RtsSymbolVal *sym;
699 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
700 int compileResult;
701 #endif
702
703 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
704
705 /* Make initLinker idempotent, so we can call it
706 before every relevant operation; that means we
707 don't need to initialise the linker separately */
708 if (linker_init_done == 1) {
709 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
710 return;
711 } else {
712 linker_init_done = 1;
713 }
714
715 objects = NULL;
716 unloaded_objects = NULL;
717
718 #if defined(THREADED_RTS)
719 initMutex(&linker_mutex);
720 initMutex(&linker_unloaded_mutex);
721 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
722 initMutex(&dl_mutex);
723 #endif
724 #endif
725 symhash = allocStrHashTable();
726
727 /* populate the symbol table with stuff from the RTS */
728 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
729 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
730 symhash, sym->lbl, sym->addr, HS_BOOL_FALSE, NULL)) {
731 barf("ghciInsertSymbolTable failed");
732 }
733 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
734 }
735 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
736 machoInitSymbolsWithoutUnderscore();
737 # endif
738 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
739 referenced from a statically linked module. We need to mimic this, but
740 we cannot use NULL because we use it to mean nonexistent symbols. So we
741 use an arbitrary (hopefully unique) address here.
742 */
743 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
744 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
745 barf("ghciInsertSymbolTable failed");
746 }
747
748 #if defined(OBJFORMAT_PEi386)
749 if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"),
750 symhash, "__image_base__", __image_base, HS_BOOL_TRUE, NULL)) {
751 barf("ghciInsertSymbolTable failed");
752 }
753 #endif /* OBJFORMAT_PEi386 */
754
755
756 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
757 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
758 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
759 retain_cafs ? newRetainedCAF : newGCdCAF,
760 HS_BOOL_FALSE, NULL)) {
761 barf("ghciInsertSymbolTable failed");
762 }
763
764 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
765 # if defined(RTLD_DEFAULT)
766 dl_prog_handle = RTLD_DEFAULT;
767 # else
768 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
769 # endif /* RTLD_DEFAULT */
770
771 compileResult = regcomp(&re_invalid,
772 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short)",
773 REG_EXTENDED);
774 if (compileResult != 0) {
775 barf("Compiling re_invalid failed");
776 }
777 compileResult = regcomp(&re_realso,
778 "(GROUP|INPUT) *\\( *([^ )]+)",
779 REG_EXTENDED);
780 if (compileResult != 0) {
781 barf("Compiling re_realso failed");
782 }
783 # endif
784
785 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
786 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
787 // User-override for mmap_32bit_base
788 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
789 }
790 #endif
791
792 #if defined(mingw32_HOST_OS)
793 /*
794 * These two libraries cause problems when added to the static link,
795 * but are necessary for resolving symbols in GHCi, hence we load
796 * them manually here.
797 */
798 addDLL(WSTR("msvcrt"));
799 addDLL(WSTR("kernel32"));
800 addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL));
801 #endif
802
803 if (RTS_LINKER_USE_MMAP)
804 m32_allocator_init();
805
806 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
807 return;
808 }
809
810 void
811 exitLinker( void ) {
812 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
813 if (linker_init_done == 1) {
814 regfree(&re_invalid);
815 regfree(&re_realso);
816 #ifdef THREADED_RTS
817 closeMutex(&dl_mutex);
818 #endif
819 }
820 #endif
821 if (linker_init_done == 1) {
822 freeHashTable(symhash, free);
823 }
824 #ifdef THREADED_RTS
825 closeMutex(&linker_mutex);
826 #endif
827 }
828
829 /* -----------------------------------------------------------------------------
830 * Loading DLL or .so dynamic libraries
831 * -----------------------------------------------------------------------------
832 *
833 * Add a DLL from which symbols may be found. In the ELF case, just
834 * do RTLD_GLOBAL-style add, so no further messing around needs to
835 * happen in order that symbols in the loaded .so are findable --
836 * lookupSymbol() will subsequently see them by dlsym on the program's
837 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
838 *
839 * In the PEi386 case, open the DLLs and put handles to them in a
840 * linked list. When looking for a symbol, try all handles in the
841 * list. This means that we need to load even DLLs that are guaranteed
842 * to be in the ghc.exe image already, just so we can get a handle
843 * to give to loadSymbol, so that we can find the symbols. For such
844 * libraries, the LoadLibrary call should be a no-op except for returning
845 * the handle.
846 *
847 */
848
849 #if defined(OBJFORMAT_PEi386)
850 /* A record for storing handles into DLLs. */
851
852 typedef
853 struct _OpenedDLL {
854 pathchar* name;
855 struct _OpenedDLL* next;
856 HINSTANCE instance;
857 }
858 OpenedDLL;
859
860 /* A list thereof. */
861 static OpenedDLL* opened_dlls = NULL;
862
863 /* A record for storing indirectly linked functions from DLLs. */
864 typedef
865 struct _IndirectAddr {
866 SymbolAddr* addr;
867 struct _IndirectAddr* next;
868 }
869 IndirectAddr;
870
871 /* A list thereof. */
872 static IndirectAddr* indirects = NULL;
873
874 /* Adds a DLL instance to the list of DLLs in which to search for symbols. */
875 void addDLLHandle(pathchar* dll_name, HINSTANCE instance) {
876 OpenedDLL* o_dll;
877 o_dll = stgMallocBytes( sizeof(OpenedDLL), "addDLLHandle" );
878 o_dll->name = dll_name ? pathdup(dll_name) : NULL;
879 o_dll->instance = instance;
880 o_dll->next = opened_dlls;
881 opened_dlls = o_dll;
882 }
883
884 #endif
885
886 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
887
888 /* Suppose in ghci we load a temporary SO for a module containing
889 f = 1
890 and then modify the module, recompile, and load another temporary
891 SO with
892 f = 2
893 Then as we don't unload the first SO, dlsym will find the
894 f = 1
895 symbol whereas we want the
896 f = 2
897 symbol. We therefore need to keep our own SO handle list, and
898 try SOs in the right order. */
899
900 typedef
901 struct _OpenedSO {
902 struct _OpenedSO* next;
903 void *handle;
904 }
905 OpenedSO;
906
907 /* A list thereof. */
908 static OpenedSO* openedSOs = NULL;
909
910 static const char *
911 internal_dlopen(const char *dll_name)
912 {
913 OpenedSO* o_so;
914 void *hdl;
915 const char *errmsg;
916 char *errmsg_copy;
917
918 // omitted: RTLD_NOW
919 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
920 IF_DEBUG(linker,
921 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
922
923 //-------------- Begin critical section ------------------
924 // This critical section is necessary because dlerror() is not
925 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
926 // Also, the error message returned must be copied to preserve it
927 // (see POSIX also)
928
929 ACQUIRE_LOCK(&dl_mutex);
930 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
931
932 errmsg = NULL;
933 if (hdl == NULL) {
934 /* dlopen failed; return a ptr to the error msg. */
935 errmsg = dlerror();
936 if (errmsg == NULL) errmsg = "addDLL: unknown error";
937 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
938 strcpy(errmsg_copy, errmsg);
939 errmsg = errmsg_copy;
940 } else {
941 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
942 o_so->handle = hdl;
943 o_so->next = openedSOs;
944 openedSOs = o_so;
945 }
946
947 RELEASE_LOCK(&dl_mutex);
948 //--------------- End critical section -------------------
949
950 return errmsg;
951 }
952
953 /*
954 Note [RTLD_LOCAL]
955
956 In GHCi we want to be able to override previous .so's with newly
957 loaded .so's when we recompile something. This further implies that
958 when we look up a symbol in internal_dlsym() we have to iterate
959 through the loaded libraries (in order from most recently loaded to
960 oldest) looking up the symbol in each one until we find it.
961
962 However, this can cause problems for some symbols that are copied
963 by the linker into the executable image at runtime - see #8935 for a
964 lengthy discussion. To solve that problem we need to look up
965 symbols in the main executable *first*, before attempting to look
966 them up in the loaded .so's. But in order to make that work, we
967 have to always call dlopen with RTLD_LOCAL, so that the loaded
968 libraries don't populate the global symbol table.
969 */
970
971 static void *
972 internal_dlsym(const char *symbol) {
973 OpenedSO* o_so;
974 void *v;
975
976 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
977 ACQUIRE_LOCK(&dl_mutex);
978 dlerror();
979 // look in program first
980 v = dlsym(dl_prog_handle, symbol);
981 if (dlerror() == NULL) {
982 RELEASE_LOCK(&dl_mutex);
983 return v;
984 }
985
986 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
987 v = dlsym(o_so->handle, symbol);
988 if (dlerror() == NULL) {
989 RELEASE_LOCK(&dl_mutex);
990 return v;
991 }
992 }
993 RELEASE_LOCK(&dl_mutex);
994 return v;
995 }
996 # endif
997
998 const char *
999 addDLL( pathchar *dll_name )
1000 {
1001 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
1002 /* ------------------- ELF DLL loader ------------------- */
1003
1004 #define NMATCH 5
1005 regmatch_t match[NMATCH];
1006 const char *errmsg;
1007 FILE* fp;
1008 size_t match_length;
1009 #define MAXLINE 1000
1010 char line[MAXLINE];
1011 int result;
1012
1013 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
1014 errmsg = internal_dlopen(dll_name);
1015
1016 if (errmsg == NULL) {
1017 return NULL;
1018 }
1019
1020 // GHC Trac ticket #2615
1021 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
1022 // contain linker scripts rather than ELF-format object code. This
1023 // code handles the situation by recognizing the real object code
1024 // file name given in the linker script.
1025 //
1026 // If an "invalid ELF header" error occurs, it is assumed that the
1027 // .so file contains a linker script instead of ELF object code.
1028 // In this case, the code looks for the GROUP ( ... ) linker
1029 // directive. If one is found, the first file name inside the
1030 // parentheses is treated as the name of a dynamic library and the
1031 // code attempts to dlopen that file. If this is also unsuccessful,
1032 // an error message is returned.
1033
1034 // see if the error message is due to an invalid ELF header
1035 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
1036 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
1037 IF_DEBUG(linker, debugBelch("result = %i\n", result));
1038 if (result == 0) {
1039 // success -- try to read the named file as a linker script
1040 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
1041 MAXLINE-1);
1042 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
1043 line[match_length] = '\0'; // make sure string is null-terminated
1044 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
1045 if ((fp = fopen(line, "r")) == NULL) {
1046 return errmsg; // return original error if open fails
1047 }
1048 // try to find a GROUP or INPUT ( ... ) command
1049 while (fgets(line, MAXLINE, fp) != NULL) {
1050 IF_DEBUG(linker, debugBelch("input line = %s", line));
1051 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
1052 // success -- try to dlopen the first named file
1053 IF_DEBUG(linker, debugBelch("match%s\n",""));
1054 line[match[2].rm_eo] = '\0';
1055 stgFree((void*)errmsg); // Free old message before creating new one
1056 errmsg = internal_dlopen(line+match[2].rm_so);
1057 break;
1058 }
1059 // if control reaches here, no GROUP or INPUT ( ... ) directive
1060 // was found and the original error message is returned to the
1061 // caller
1062 }
1063 fclose(fp);
1064 }
1065 return errmsg;
1066
1067 # elif defined(OBJFORMAT_PEi386)
1068 /* ------------------- Win32 DLL loader ------------------- */
1069
1070 pathchar* buf;
1071 OpenedDLL* o_dll;
1072 HINSTANCE instance;
1073
1074 IF_DEBUG(linker, debugBelch("\naddDLL; dll_name = `%" PATH_FMT "'\n", dll_name));
1075
1076 /* See if we've already got it, and ignore if so. */
1077 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
1078 if (0 == pathcmp(o_dll->name, dll_name))
1079 return NULL;
1080 }
1081
1082 /* The file name has no suffix (yet) so that we can try
1083 both foo.dll and foo.drv
1084
1085 The documentation for LoadLibrary says:
1086 If no file name extension is specified in the lpFileName
1087 parameter, the default library extension .dll is
1088 appended. However, the file name string can include a trailing
1089 point character (.) to indicate that the module name has no
1090 extension. */
1091
1092 size_t bufsize = pathlen(dll_name) + 10;
1093 buf = stgMallocBytes(bufsize * sizeof(wchar_t), "addDLL");
1094
1095 /* These are ordered by probability of success and order we'd like them */
1096 const wchar_t *formats[] = { L"%ls.DLL", L"%ls.DRV", L"lib%ls.DLL", L"%ls" };
1097 const DWORD flags[] = { LOAD_LIBRARY_SEARCH_USER_DIRS | LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, 0 };
1098
1099 int cFormat;
1100 int cFlag;
1101 int flags_start = 1; // Assume we don't support the new API
1102
1103 /* Detect if newer API are available, if not, skip the first flags entry */
1104 if (GetProcAddress((HMODULE)LoadLibraryW(L"Kernel32.DLL"), "AddDllDirectory")) {
1105 flags_start = 0;
1106 }
1107
1108 /* Iterate through the possible flags and formats */
1109 for (cFlag = flags_start; cFlag < 2; cFlag++)
1110 {
1111 for (cFormat = 0; cFormat < 4; cFormat++)
1112 {
1113 snwprintf(buf, bufsize, formats[cFormat], dll_name);
1114 instance = LoadLibraryExW(buf, NULL, flags[cFlag]);
1115 if (instance == NULL)
1116 {
1117 if (GetLastError() != ERROR_MOD_NOT_FOUND)
1118 {
1119 goto error;
1120 }
1121 }
1122 else
1123 {
1124 break; // We're done. DLL has been loaded.
1125 }
1126 }
1127 }
1128
1129 // Check if we managed to load the DLL
1130 if (instance == NULL) {
1131 goto error;
1132 }
1133
1134 stgFree(buf);
1135
1136 addDLLHandle(dll_name, instance);
1137
1138 return NULL;
1139
1140 error:
1141 stgFree(buf);
1142 sysErrorBelch("addDLL: %" PATH_FMT " (Win32 error %lu)", dll_name, GetLastError());
1143
1144 /* LoadLibrary failed; return a ptr to the error msg. */
1145 return "addDLL: could not load DLL";
1146
1147 # else
1148 barf("addDLL: not implemented on this platform");
1149 # endif
1150 }
1151
1152 /* -----------------------------------------------------------------------------
1153 * Searches the system directories to determine if there is a system DLL that
1154 * satisfies the given name. This prevent GHCi from linking against a static
1155 * library if a DLL is available.
1156 *
1157 * Returns: NULL on failure or no DLL found, else the full path to the DLL
1158 * that can be loaded.
1159 */
1160 pathchar* findSystemLibrary(pathchar* dll_name)
1161 {
1162 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%" PATH_FMT "'\n", dll_name));
1163
1164 #if defined(OBJFORMAT_PEi386)
1165 const unsigned int init_buf_size = 1024;
1166 unsigned int bufsize = init_buf_size;
1167 wchar_t* result = malloc(sizeof(wchar_t) * bufsize);
1168 DWORD wResult = SearchPathW(NULL, dll_name, NULL, bufsize, result, NULL);
1169
1170 if (wResult > bufsize) {
1171 result = realloc(result, sizeof(wchar_t) * wResult);
1172 wResult = SearchPathW(NULL, dll_name, NULL, wResult, result, NULL);
1173 }
1174
1175
1176 if (!wResult) {
1177 free(result);
1178 return NULL;
1179 }
1180
1181 return result;
1182 #else
1183 (void)(dll_name); // Function not implemented for other platforms.
1184 return NULL;
1185 #endif
1186 }
1187
1188 /* -----------------------------------------------------------------------------
1189 * Emits a warning determining that the system is missing a required security
1190 * update that we need to get access to the proper APIs
1191 */
1192 void warnMissingKBLibraryPaths( void )
1193 {
1194 static HsBool missing_update_warn = HS_BOOL_FALSE;
1195 if (!missing_update_warn) {
1196 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
1197 missing_update_warn = HS_BOOL_TRUE;
1198 }
1199 }
1200
1201 /* -----------------------------------------------------------------------------
1202 * appends a directory to the process DLL Load path so LoadLibrary can find it
1203 *
1204 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
1205 * restore the search path to what it was before this call.
1206 */
1207 HsPtr addLibrarySearchPath(pathchar* dll_path)
1208 {
1209 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%" PATH_FMT "'\n", dll_path));
1210
1211 #if defined(OBJFORMAT_PEi386)
1212 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1213 LPAddDLLDirectory AddDllDirectory = (LPAddDLLDirectory)GetProcAddress((HMODULE)hDLL, "AddDllDirectory");
1214
1215 HsPtr result = NULL;
1216
1217 const unsigned int init_buf_size = 4096;
1218 int bufsize = init_buf_size;
1219
1220 // Make sure the path is an absolute path
1221 WCHAR* abs_path = malloc(sizeof(WCHAR) * init_buf_size);
1222 DWORD wResult = GetFullPathNameW(dll_path, bufsize, abs_path, NULL);
1223 if (!wResult){
1224 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1225 }
1226 else if (wResult > init_buf_size) {
1227 abs_path = realloc(abs_path, sizeof(WCHAR) * wResult);
1228 if (!GetFullPathNameW(dll_path, bufsize, abs_path, NULL)) {
1229 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1230 }
1231 }
1232
1233 if (AddDllDirectory) {
1234 result = AddDllDirectory(abs_path);
1235 }
1236 else
1237 {
1238 warnMissingKBLibraryPaths();
1239 WCHAR* str = malloc(sizeof(WCHAR) * init_buf_size);
1240 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1241
1242 if (wResult > init_buf_size) {
1243 str = realloc(str, sizeof(WCHAR) * wResult);
1244 bufsize = wResult;
1245 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1246 if (!wResult) {
1247 sysErrorBelch("addLibrarySearchPath[GetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1248 }
1249 }
1250
1251 bufsize = wResult + 2 + pathlen(abs_path);
1252 wchar_t* newPath = malloc(sizeof(wchar_t) * bufsize);
1253
1254 wcscpy(newPath, abs_path);
1255 wcscat(newPath, L";");
1256 wcscat(newPath, str);
1257 if (!SetEnvironmentVariableW(L"PATH", (LPCWSTR)newPath)) {
1258 sysErrorBelch("addLibrarySearchPath[SetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1259 }
1260
1261 free(newPath);
1262 free(abs_path);
1263
1264 return str;
1265 }
1266
1267 if (!result) {
1268 sysErrorBelch("addLibrarySearchPath: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1269 free(abs_path);
1270 return NULL;
1271 }
1272
1273 free(abs_path);
1274 return result;
1275 #else
1276 (void)(dll_path); // Function not implemented for other platforms.
1277 return NULL;
1278 #endif
1279 }
1280
1281 /* -----------------------------------------------------------------------------
1282 * removes a directory from the process DLL Load path
1283 *
1284 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
1285 */
1286 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
1287 {
1288 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n", dll_path_index));
1289
1290 #if defined(OBJFORMAT_PEi386)
1291 HsBool result = 0;
1292
1293 if (dll_path_index != NULL) {
1294 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1295 LPRemoveDLLDirectory RemoveDllDirectory = (LPRemoveDLLDirectory)GetProcAddress((HMODULE)hDLL, "RemoveDllDirectory");
1296
1297 if (RemoveDllDirectory) {
1298 result = RemoveDllDirectory(dll_path_index);
1299 // dll_path_index is now invalid, do not use it after this point.
1300 }
1301 else
1302 {
1303 warnMissingKBLibraryPaths();
1304 result = SetEnvironmentVariableW(L"PATH", (LPCWSTR)dll_path_index);
1305 free(dll_path_index);
1306 }
1307
1308 if (!result) {
1309 sysErrorBelch("removeLibrarySearchPath: (Win32 error %lu)", GetLastError());
1310 return HS_BOOL_FALSE;
1311 }
1312 }
1313
1314 return result == 0 ? HS_BOOL_TRUE : HS_BOOL_FALSE;
1315 #else
1316 (void)(dll_path_index); // Function not implemented for other platforms.
1317 return HS_BOOL_FALSE;
1318 #endif
1319 }
1320
1321 /* -----------------------------------------------------------------------------
1322 * insert a symbol in the hash table
1323 *
1324 * Returns: 0 on failure, nozero on success
1325 */
1326 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
1327 {
1328 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE, NULL);
1329 }
1330
1331 /* -----------------------------------------------------------------------------
1332 * lookup a symbol in the hash table
1333 */
1334 static SymbolAddr* lookupSymbol_ (SymbolName* lbl)
1335 {
1336 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
1337
1338 ASSERT(symhash != NULL);
1339 RtsSymbolInfo *pinfo;
1340
1341 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
1342 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
1343
1344 # if defined(OBJFORMAT_ELF)
1345 return internal_dlsym(lbl);
1346 # elif defined(OBJFORMAT_MACHO)
1347
1348 /* HACK: On OS X, all symbols are prefixed with an underscore.
1349 However, dlsym wants us to omit the leading underscore from the
1350 symbol name -- the dlsym routine puts it back on before searching
1351 for the symbol. For now, we simply strip it off here (and ONLY
1352 here).
1353 */
1354 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", lbl));
1355 ASSERT(lbl[0] == '_');
1356 return internal_dlsym(lbl + 1);
1357 # elif defined(OBJFORMAT_PEi386)
1358 SymbolAddr* sym;
1359
1360 /* See Note [mingw-w64 name decoration scheme] */
1361 #ifndef x86_64_HOST_ARCH
1362 zapTrailingAtSign ( (unsigned char*)lbl );
1363 #endif
1364 sym = lookupSymbolInDLLs((unsigned char*)lbl);
1365 return sym; // might be NULL if not found
1366
1367 # else
1368 ASSERT(2+2 == 5);
1369 return NULL;
1370 # endif
1371 } else {
1372 SymbolAddr* val = pinfo->value;
1373 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl, val));
1374
1375 int r;
1376 ObjectCode* oc = pinfo->owner;
1377
1378 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
1379 See Note [runtime-linker-phases] */
1380 if (oc && oc->status == OBJECT_LOADED) {
1381 oc->status = OBJECT_NEEDED;
1382 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand loading symbol '%s'\n", lbl));
1383 r = ocTryLoad(oc);
1384
1385 if (!r) {
1386 errorBelch("Could not on-demand load symbol '%s'\n", lbl);
1387 return NULL;
1388 }
1389 }
1390
1391 return val;
1392 }
1393 }
1394
1395 SymbolAddr* lookupSymbol( SymbolName* lbl )
1396 {
1397 ACQUIRE_LOCK(&linker_mutex);
1398 SymbolAddr* r = lookupSymbol_(lbl);
1399 RELEASE_LOCK(&linker_mutex);
1400 return r;
1401 }
1402
1403 /* -----------------------------------------------------------------------------
1404 Create a StablePtr for a foreign export. This is normally called by
1405 a C function with __attribute__((constructor)), which is generated
1406 by GHC and linked into the module.
1407
1408 If the object code is being loaded dynamically, then we remember
1409 which StablePtrs were allocated by the constructors and free them
1410 again in unloadObj().
1411 -------------------------------------------------------------------------- */
1412
1413 static ObjectCode *loading_obj = NULL;
1414
1415 StgStablePtr foreignExportStablePtr (StgPtr p)
1416 {
1417 ForeignExportStablePtr *fe_sptr;
1418 StgStablePtr *sptr;
1419
1420 sptr = getStablePtr(p);
1421
1422 if (loading_obj != NULL) {
1423 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
1424 "foreignExportStablePtr");
1425 fe_sptr->stable_ptr = sptr;
1426 fe_sptr->next = loading_obj->stable_ptrs;
1427 loading_obj->stable_ptrs = fe_sptr;
1428 }
1429
1430 return sptr;
1431 }
1432
1433
1434 /* -----------------------------------------------------------------------------
1435 * Debugging aid: look in GHCi's object symbol tables for symbols
1436 * within DELTA bytes of the specified address, and show their names.
1437 */
1438 #ifdef DEBUG
1439 void ghci_enquire ( SymbolAddr* addr );
1440
1441 void ghci_enquire(SymbolAddr* addr)
1442 {
1443 int i;
1444 SymbolName* sym;
1445 RtsSymbolInfo* a;
1446 const int DELTA = 64;
1447 ObjectCode* oc;
1448
1449 for (oc = objects; oc; oc = oc->next) {
1450 for (i = 0; i < oc->n_symbols; i++) {
1451 sym = oc->symbols[i];
1452 if (sym == NULL) continue;
1453 a = NULL;
1454 if (a == NULL) {
1455 ghciLookupSymbolInfo(symhash, sym, &a);
1456 }
1457 if (a == NULL) {
1458 // debugBelch("ghci_enquire: can't find %s\n", sym);
1459 }
1460 else if ( a->value
1461 && (char*)addr-DELTA <= (char*)a->value
1462 && (char*)a->value <= (char*)addr+DELTA) {
1463 debugBelch("%p + %3d == `%s'\n", addr, (int)((char*)a->value - (char*)addr), sym);
1464 }
1465 }
1466 }
1467 }
1468 #endif
1469
1470 #if RTS_LINKER_USE_MMAP
1471 //
1472 // Returns NULL on failure.
1473 //
1474 void *
1475 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
1476 {
1477 void *map_addr = NULL;
1478 void *result;
1479 size_t size;
1480 static uint32_t fixed = 0;
1481
1482 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1483 size = roundUpToPage(bytes);
1484
1485 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1486 mmap_again:
1487
1488 if (mmap_32bit_base != 0) {
1489 map_addr = mmap_32bit_base;
1490 }
1491 #endif
1492
1493 IF_DEBUG(linker,
1494 debugBelch("mmapForLinker: \tprotection %#0x\n",
1495 PROT_EXEC | PROT_READ | PROT_WRITE));
1496 IF_DEBUG(linker,
1497 debugBelch("mmapForLinker: \tflags %#0x\n",
1498 MAP_PRIVATE | TRY_MAP_32BIT | fixed | flags));
1499
1500 result = mmap(map_addr, size,
1501 PROT_EXEC|PROT_READ|PROT_WRITE,
1502 MAP_PRIVATE|TRY_MAP_32BIT|fixed|flags, fd, offset);
1503
1504 if (result == MAP_FAILED) {
1505 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1506 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1507 return NULL;
1508 }
1509
1510 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1511 if (mmap_32bit_base != 0) {
1512 if (result == map_addr) {
1513 mmap_32bit_base = (StgWord8*)map_addr + size;
1514 } else {
1515 if ((W_)result > 0x80000000) {
1516 // oops, we were given memory over 2Gb
1517 munmap(result,size);
1518 #if defined(freebsd_HOST_OS) || \
1519 defined(kfreebsdgnu_HOST_OS) || \
1520 defined(dragonfly_HOST_OS)
1521 // Some platforms require MAP_FIXED. This is normally
1522 // a bad idea, because MAP_FIXED will overwrite
1523 // existing mappings.
1524 fixed = MAP_FIXED;
1525 goto mmap_again;
1526 #else
1527 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1528 "asked for %lu bytes at %p. "
1529 "Try specifying an address with +RTS -xm<addr> -RTS",
1530 size, map_addr);
1531 return NULL;
1532 #endif
1533 } else {
1534 // hmm, we were given memory somewhere else, but it's
1535 // still under 2Gb so we can use it. Next time, ask
1536 // for memory right after the place we just got some
1537 mmap_32bit_base = (StgWord8*)result + size;
1538 }
1539 }
1540 } else {
1541 if ((W_)result > 0x80000000) {
1542 // oops, we were given memory over 2Gb
1543 // ... try allocating memory somewhere else?;
1544 debugTrace(DEBUG_linker,
1545 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1546 bytes, result);
1547 munmap(result, size);
1548
1549 // Set a base address and try again... (guess: 1Gb)
1550 mmap_32bit_base = (void*)0x40000000;
1551 goto mmap_again;
1552 }
1553 }
1554 #endif
1555
1556 IF_DEBUG(linker,
1557 debugBelch("mmapForLinker: mapped %" FMT_Word
1558 " bytes starting at %p\n", (W_)size, result));
1559 IF_DEBUG(linker,
1560 debugBelch("mmapForLinker: done\n"));
1561
1562 return result;
1563 }
1564 #endif
1565
1566 /*
1567 * Remove symbols from the symbol table, and free oc->symbols.
1568 * This operation is idempotent.
1569 */
1570 static void removeOcSymbols (ObjectCode *oc)
1571 {
1572 if (oc->symbols == NULL) return;
1573
1574 // Remove all the mappings for the symbols within this object..
1575 int i;
1576 for (i = 0; i < oc->n_symbols; i++) {
1577 if (oc->symbols[i] != NULL) {
1578 ghciRemoveSymbolTable(symhash, oc->symbols[i], oc);
1579 }
1580 }
1581
1582 stgFree(oc->symbols);
1583 oc->symbols = NULL;
1584 }
1585
1586 /*
1587 * Release StablePtrs and free oc->stable_ptrs.
1588 * This operation is idempotent.
1589 */
1590 static void freeOcStablePtrs (ObjectCode *oc)
1591 {
1592 // Release any StablePtrs that were created when this
1593 // object module was initialized.
1594 ForeignExportStablePtr *fe_ptr, *next;
1595
1596 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1597 next = fe_ptr->next;
1598 freeStablePtr(fe_ptr->stable_ptr);
1599 stgFree(fe_ptr);
1600 }
1601 oc->stable_ptrs = NULL;
1602 }
1603
1604 static void
1605 freePreloadObjectFile (ObjectCode *oc)
1606 {
1607 #if defined(mingw32_HOST_OS)
1608
1609 VirtualFree(oc->image - PEi386_IMAGE_OFFSET, 0, MEM_RELEASE);
1610
1611 IndirectAddr *ia, *ia_next;
1612 ia = indirects;
1613 while (ia != NULL) {
1614 ia_next = ia->next;
1615 stgFree(ia);
1616 ia = ia_next;
1617 }
1618 indirects = NULL;
1619
1620 #else
1621
1622 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1623 munmap(oc->image, oc->fileSize);
1624 }
1625 else {
1626 stgFree(oc->image);
1627 }
1628
1629 #endif
1630
1631 oc->image = NULL;
1632 oc->fileSize = 0;
1633 }
1634
1635 /*
1636 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1637 * the GC when a previously unloaded ObjectCode has been determined to be
1638 * unused, and when an error occurs during loadObj().
1639 */
1640 void freeObjectCode (ObjectCode *oc)
1641 {
1642 freePreloadObjectFile(oc);
1643
1644 if (oc->symbols != NULL) {
1645 stgFree(oc->symbols);
1646 oc->symbols = NULL;
1647 }
1648
1649 if (oc->extraInfos != NULL) {
1650 freeHashTable(oc->extraInfos, NULL);
1651 oc->extraInfos = NULL;
1652 }
1653
1654 if (oc->sections != NULL) {
1655 int i;
1656 for (i=0; i < oc->n_sections; i++) {
1657 if (oc->sections[i].start != NULL) {
1658 switch(oc->sections[i].alloc){
1659 #if RTS_LINKER_USE_MMAP
1660 case SECTION_MMAP:
1661 munmap(oc->sections[i].mapped_start,
1662 oc->sections[i].mapped_size);
1663 break;
1664 case SECTION_M32:
1665 m32_free(oc->sections[i].start,
1666 oc->sections[i].size);
1667 break;
1668 #endif
1669 case SECTION_MALLOC:
1670 stgFree(oc->sections[i].start);
1671 break;
1672 default:
1673 break;
1674 }
1675 }
1676 }
1677 stgFree(oc->sections);
1678 }
1679
1680 freeProddableBlocks(oc);
1681
1682 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1683 * alongside the image, so we don't need to free. */
1684 #if NEED_SYMBOL_EXTRAS && (!defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS))
1685 if (RTS_LINKER_USE_MMAP) {
1686 if (!USE_CONTIGUOUS_MMAP && oc->symbol_extras != NULL) {
1687 m32_free(oc->symbol_extras,
1688 sizeof(SymbolExtra) * oc->n_symbol_extras);
1689 }
1690 }
1691 else {
1692 stgFree(oc->symbol_extras);
1693 }
1694 #endif
1695
1696 stgFree(oc->fileName);
1697 stgFree(oc->archiveMemberName);
1698
1699 stgFree(oc);
1700 }
1701
1702 /* -----------------------------------------------------------------------------
1703 * Sets the initial status of a fresh ObjectCode
1704 */
1705 static void setOcInitialStatus(ObjectCode* oc) {
1706 if (oc->archiveMemberName == NULL) {
1707 oc->status = OBJECT_NEEDED;
1708 } else {
1709 oc->status = OBJECT_LOADED;
1710 }
1711 }
1712
1713 static ObjectCode*
1714 mkOc( pathchar *path, char *image, int imageSize,
1715 rtsBool mapped, char *archiveMemberName, int misalignment ) {
1716 ObjectCode* oc;
1717
1718 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1719 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1720
1721 # if defined(OBJFORMAT_ELF)
1722 oc->formatName = "ELF";
1723 # elif defined(OBJFORMAT_PEi386)
1724 oc->formatName = "PEi386";
1725 # elif defined(OBJFORMAT_MACHO)
1726 oc->formatName = "Mach-O";
1727 # else
1728 stgFree(oc);
1729 barf("loadObj: not implemented on this platform");
1730 # endif
1731
1732 oc->image = image;
1733 oc->fileName = pathdup(path);
1734
1735 if (archiveMemberName) {
1736 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1, "loadObj" );
1737 strcpy(oc->archiveMemberName, archiveMemberName);
1738 } else {
1739 oc->archiveMemberName = NULL;
1740 }
1741
1742 setOcInitialStatus( oc );
1743
1744 oc->fileSize = imageSize;
1745 oc->symbols = NULL;
1746 oc->n_sections = 0;
1747 oc->sections = NULL;
1748 oc->proddables = NULL;
1749 oc->stable_ptrs = NULL;
1750 #if NEED_SYMBOL_EXTRAS
1751 oc->symbol_extras = NULL;
1752 #endif
1753 oc->imageMapped = mapped;
1754
1755 oc->misalignment = misalignment;
1756 oc->extraInfos = NULL;
1757
1758 /* chain it onto the list of objects */
1759 oc->next = NULL;
1760
1761 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1762 return oc;
1763 }
1764
1765 /* -----------------------------------------------------------------------------
1766 * Check if an object or archive is already loaded.
1767 *
1768 * Returns: 1 if the path is already loaded, 0 otherwise.
1769 */
1770 static HsInt
1771 isAlreadyLoaded( pathchar *path )
1772 {
1773 ObjectCode *o;
1774 for (o = objects; o; o = o->next) {
1775 if (0 == pathcmp(o->fileName, path)) {
1776 return 1; /* already loaded */
1777 }
1778 }
1779 return 0; /* not loaded yet */
1780 }
1781
1782 static HsInt loadArchive_ (pathchar *path)
1783 {
1784 ObjectCode* oc;
1785 char *image;
1786 int memberSize;
1787 FILE *f;
1788 int n;
1789 size_t thisFileNameSize;
1790 char *fileName;
1791 size_t fileNameSize;
1792 int isObject, isGnuIndex, isThin, isImportLib;
1793 char tmp[20];
1794 char *gnuFileIndex;
1795 int gnuFileIndexSize;
1796 #if defined(darwin_HOST_OS)
1797 int i;
1798 uint32_t nfat_arch, nfat_offset, cputype, cpusubtype;
1799 #if defined(i386_HOST_ARCH)
1800 const uint32_t mycputype = CPU_TYPE_X86;
1801 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_ALL;
1802 #elif defined(x86_64_HOST_ARCH)
1803 const uint32_t mycputype = CPU_TYPE_X86_64;
1804 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_64_ALL;
1805 #elif defined(powerpc_HOST_ARCH)
1806 const uint32_t mycputype = CPU_TYPE_POWERPC;
1807 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1808 #elif defined(powerpc64_HOST_ARCH)
1809 const uint32_t mycputype = CPU_TYPE_POWERPC64;
1810 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1811 #else
1812 #error Unknown Darwin architecture
1813 #endif
1814 #endif
1815 int misalignment = 0;
1816
1817 /* TODO: don't call barf() on error, instead return an error code, freeing
1818 * all resources correctly. This function is pretty complex, so it needs
1819 * to be refactored to make this practical. */
1820
1821 IF_DEBUG(linker, debugBelch("loadArchive: start\n"));
1822 IF_DEBUG(linker, debugBelch("loadArchive: Loading archive `%" PATH_FMT" '\n", path));
1823
1824 /* Check that we haven't already loaded this archive.
1825 Ignore requests to load multiple times */
1826 if (isAlreadyLoaded(path)) {
1827 IF_DEBUG(linker,
1828 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1829 return 1; /* success */
1830 }
1831
1832 gnuFileIndex = NULL;
1833 gnuFileIndexSize = 0;
1834
1835 fileNameSize = 32;
1836 fileName = stgMallocBytes(fileNameSize, "loadArchive(fileName)");
1837
1838 isThin = 0;
1839 isImportLib = 0;
1840
1841 f = pathopen(path, WSTR("rb"));
1842 if (!f)
1843 barf("loadObj: can't read `%" PATH_FMT "'", path);
1844
1845 /* Check if this is an archive by looking for the magic "!<arch>\n"
1846 * string. Usually, if this fails, we barf and quit. On Darwin however,
1847 * we may have a fat archive, which contains archives for more than
1848 * one architecture. Fat archives start with the magic number 0xcafebabe,
1849 * always stored big endian. If we find a fat_header, we scan through
1850 * the fat_arch structs, searching through for one for our host
1851 * architecture. If a matching struct is found, we read the offset
1852 * of our archive data (nfat_offset) and seek forward nfat_offset bytes
1853 * from the start of the file.
1854 *
1855 * A subtlety is that all of the members of the fat_header and fat_arch
1856 * structs are stored big endian, so we need to call byte order
1857 * conversion functions.
1858 *
1859 * If we find the appropriate architecture in a fat archive, we gobble
1860 * its magic "!<arch>\n" string and continue processing just as if
1861 * we had a single architecture archive.
1862 */
1863
1864 n = fread ( tmp, 1, 8, f );
1865 if (n != 8)
1866 barf("loadArchive: Failed reading header from `%" PATH_FMT "'", path);
1867 if (strncmp(tmp, "!<arch>\n", 8) == 0) {}
1868 /* Check if this is a thin archive by looking for the magic string "!<thin>\n"
1869 *
1870 * ar thin libraries have the exact same format as normal archives except they
1871 * have a different magic string and they don't copy the object files into the
1872 * archive.
1873 *
1874 * Instead each header entry points to the location of the object file on disk.
1875 * This is useful when a library is only created to satisfy a compile time dependency
1876 * instead of to be distributed. This saves the time required for copying.
1877 *
1878 * Thin archives are always flattened. They always only contain simple headers
1879 * pointing to the object file and so we need not allocate more memory than needed
1880 * to find the object file.
1881 *
1882 */
1883 else if (strncmp(tmp, "!<thin>\n", 8) == 0) {
1884 isThin = 1;
1885 }
1886 #if defined(darwin_HOST_OS)
1887 /* Not a standard archive, look for a fat archive magic number: */
1888 else if (ntohl(*(uint32_t *)tmp) == FAT_MAGIC) {
1889 nfat_arch = ntohl(*(uint32_t *)(tmp + 4));
1890 IF_DEBUG(linker, debugBelch("loadArchive: found a fat archive containing %d architectures\n", nfat_arch));
1891 nfat_offset = 0;
1892
1893 for (i = 0; i < (int)nfat_arch; i++) {
1894 /* search for the right arch */
1895 n = fread( tmp, 1, 20, f );
1896 if (n != 8)
1897 barf("loadArchive: Failed reading arch from `%s'", path);
1898 cputype = ntohl(*(uint32_t *)tmp);
1899 cpusubtype = ntohl(*(uint32_t *)(tmp + 4));
1900
1901 if (cputype == mycputype && cpusubtype == mycpusubtype) {
1902 IF_DEBUG(linker, debugBelch("loadArchive: found my archive in a fat archive\n"));
1903 nfat_offset = ntohl(*(uint32_t *)(tmp + 8));
1904 break;
1905 }
1906 }
1907
1908 if (nfat_offset == 0) {
1909 barf ("loadArchive: searched %d architectures, but no host arch found", (int)nfat_arch);
1910 }
1911 else {
1912 n = fseek( f, nfat_offset, SEEK_SET );
1913 if (n != 0)
1914 barf("loadArchive: Failed to seek to arch in `%s'", path);
1915 n = fread ( tmp, 1, 8, f );
1916 if (n != 8)
1917 barf("loadArchive: Failed reading header from `%s'", path);
1918 if (strncmp(tmp, "!<arch>\n", 8) != 0) {
1919 barf("loadArchive: couldn't find archive in `%s' at offset %d", path, nfat_offset);
1920 }
1921 }
1922 }
1923 else {
1924 barf("loadArchive: Neither an archive, nor a fat archive: `%s'", path);
1925 }
1926 #else
1927 else {
1928 barf("loadArchive: Not an archive: `%" PATH_FMT "'", path);
1929 }
1930 #endif
1931
1932 IF_DEBUG(linker, debugBelch("loadArchive: loading archive contents\n"));
1933
1934 while (1) {
1935 IF_DEBUG(linker, debugBelch("loadArchive: reading at %ld\n", ftell(f)));
1936 n = fread ( fileName, 1, 16, f );
1937 if (n != 16) {
1938 if (feof(f)) {
1939 IF_DEBUG(linker, debugBelch("loadArchive: EOF while reading from '%" PATH_FMT "'\n", path));
1940 break;
1941 }
1942 else {
1943 barf("loadArchive: Failed reading file name from `%" PATH_FMT "'", path);
1944 }
1945 }
1946
1947 #if defined(darwin_HOST_OS)
1948 if (strncmp(fileName, "!<arch>\n", 8) == 0) {
1949 IF_DEBUG(linker, debugBelch("loadArchive: found the start of another archive, breaking\n"));
1950 break;
1951 }
1952 #endif
1953
1954 n = fread ( tmp, 1, 12, f );
1955 if (n != 12)
1956 barf("loadArchive: Failed reading mod time from `%" PATH_FMT "'", path);
1957 n = fread ( tmp, 1, 6, f );
1958 if (n != 6)
1959 barf("loadArchive: Failed reading owner from `%" PATH_FMT "'", path);
1960 n = fread ( tmp, 1, 6, f );
1961 if (n != 6)
1962 barf("loadArchive: Failed reading group from `%" PATH_FMT "'", path);
1963 n = fread ( tmp, 1, 8, f );
1964 if (n != 8)
1965 barf("loadArchive: Failed reading mode from `%" PATH_FMT "'", path);
1966 n = fread ( tmp, 1, 10, f );
1967 if (n != 10)
1968 barf("loadArchive: Failed reading size from `%" PATH_FMT "'", path);
1969 tmp[10] = '\0';
1970 for (n = 0; isdigit(tmp[n]); n++);
1971 tmp[n] = '\0';
1972 memberSize = atoi(tmp);
1973
1974 IF_DEBUG(linker, debugBelch("loadArchive: size of this archive member is %d\n", memberSize));
1975 n = fread ( tmp, 1, 2, f );
1976 if (n != 2)
1977 barf("loadArchive: Failed reading magic from `%" PATH_FMT "'", path);
1978 if (strncmp(tmp, "\x60\x0A", 2) != 0)
1979 barf("loadArchive: Failed reading magic from `%" PATH_FMT "' at %ld. Got %c%c",
1980 path, ftell(f), tmp[0], tmp[1]);
1981
1982 isGnuIndex = 0;
1983 /* Check for BSD-variant large filenames */
1984 if (0 == strncmp(fileName, "#1/", 3)) {
1985 fileName[16] = '\0';
1986 if (isdigit(fileName[3])) {
1987 for (n = 4; isdigit(fileName[n]); n++);
1988 fileName[n] = '\0';
1989 thisFileNameSize = atoi(fileName + 3);
1990 memberSize -= thisFileNameSize;
1991 if (thisFileNameSize >= fileNameSize) {
1992 /* Double it to avoid potentially continually
1993 increasing it by 1 */
1994 fileNameSize = thisFileNameSize * 2;
1995 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
1996 }
1997 n = fread ( fileName, 1, thisFileNameSize, f );
1998 if (n != (int)thisFileNameSize) {
1999 barf("loadArchive: Failed reading filename from `%" PATH_FMT "'",
2000 path);
2001 }
2002 fileName[thisFileNameSize] = 0;
2003
2004 /* On OS X at least, thisFileNameSize is the size of the
2005 fileName field, not the length of the fileName
2006 itself. */
2007 thisFileNameSize = strlen(fileName);
2008 }
2009 else {
2010 barf("loadArchive: BSD-variant filename size not found while reading filename from `%s'", path);
2011 }
2012 }
2013 /* Check for GNU file index file */
2014 else if (0 == strncmp(fileName, "//", 2)) {
2015 fileName[0] = '\0';
2016 thisFileNameSize = 0;
2017 isGnuIndex = 1;
2018 }
2019 /* Check for a file in the GNU file index */
2020 else if (fileName[0] == '/') {
2021 if (isdigit(fileName[1])) {
2022 int i;
2023
2024 for (n = 2; isdigit(fileName[n]); n++);
2025 fileName[n] = '\0';
2026 n = atoi(fileName + 1);
2027
2028 if (gnuFileIndex == NULL) {
2029 barf("loadArchive: GNU-variant filename without an index while reading from `%s'", path);
2030 }
2031 if (n < 0 || n > gnuFileIndexSize) {
2032 barf("loadArchive: GNU-variant filename offset %d out of range [0..%d] while reading filename from `%s'", n, gnuFileIndexSize, path);
2033 }
2034 if (n != 0 && gnuFileIndex[n - 1] != '\n') {
2035 barf("loadArchive: GNU-variant filename offset %d invalid (range [0..%d]) while reading filename from `%s'", n, gnuFileIndexSize, path);
2036 }
2037 for (i = n; gnuFileIndex[i] != '\n'; i++);
2038 thisFileNameSize = i - n - 1;
2039 if (thisFileNameSize >= fileNameSize) {
2040 /* Double it to avoid potentially continually
2041 increasing it by 1 */
2042 fileNameSize = thisFileNameSize * 2;
2043 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
2044 }
2045 memcpy(fileName, gnuFileIndex + n, thisFileNameSize);
2046 fileName[thisFileNameSize] = '\0';
2047 }
2048 else if (fileName[1] == ' ') {
2049 fileName[0] = '\0';
2050 thisFileNameSize = 0;
2051 }
2052 else {
2053 barf("loadArchive: GNU-variant filename offset not found while reading filename from `%s'", path);
2054 }
2055 }
2056 /* Finally, the case where the filename field actually contains
2057 the filename */
2058 else {
2059 /* GNU ar terminates filenames with a '/', this allowing
2060 spaces in filenames. So first look to see if there is a
2061 terminating '/'. */
2062 for (thisFileNameSize = 0;
2063 thisFileNameSize < 16;
2064 thisFileNameSize++) {
2065 if (fileName[thisFileNameSize] == '/') {
2066 fileName[thisFileNameSize] = '\0';
2067 break;
2068 }
2069 }
2070 /* If we didn't find a '/', then a space teminates the
2071 filename. Note that if we don't find one, then
2072 thisFileNameSize ends up as 16, and we already have the
2073 '\0' at the end. */
2074 if (thisFileNameSize == 16) {
2075 for (thisFileNameSize = 0;
2076 thisFileNameSize < 16;
2077 thisFileNameSize++) {
2078 if (fileName[thisFileNameSize] == ' ') {
2079 fileName[thisFileNameSize] = '\0';
2080 break;
2081 }
2082 }
2083 }
2084 }
2085
2086 IF_DEBUG(linker,
2087 debugBelch("loadArchive: Found member file `%s'\n", fileName));
2088
2089 isObject = (thisFileNameSize >= 2 && strncmp(fileName + thisFileNameSize - 2, ".o" , 2) == 0)
2090 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".p_o", 4) == 0);
2091
2092 #if defined(OBJFORMAT_PEi386)
2093 /*
2094 * Note [MSVC import files (ext .lib)]
2095 * MSVC compilers store the object files in
2096 * the import libraries with extension .dll
2097 * so on Windows we should look for those too.
2098 * The PE COFF format doesn't specify any specific file name
2099 * for sections. So on windows, just try to load it all.
2100 *
2101 * Linker members (e.g. filename / are skipped since they are not needed)
2102 */
2103 isImportLib = thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".dll", 4) == 0;
2104
2105 /*
2106 * Note [GCC import files (ext .dll.a)]
2107 * GCC stores import information in the same binary format
2108 * as the object file normally has. The only difference is that
2109 * all the information are put in .idata sections. The only real
2110 * way to tell if we're dealing with an import lib is by looking
2111 * at the file extension.
2112 */
2113 isImportLib = isImportLib || endsWithPath(path, WSTR(".dll.a"));
2114 #endif // windows
2115
2116 IF_DEBUG(linker, debugBelch("loadArchive: \tthisFileNameSize = %d\n", (int)thisFileNameSize));
2117 IF_DEBUG(linker, debugBelch("loadArchive: \tisObject = %d\n", isObject));
2118
2119 if (isObject) {
2120 char *archiveMemberName;
2121
2122 IF_DEBUG(linker, debugBelch("loadArchive: Member is an object file...loading...\n"));
2123
2124 #if defined(mingw32_HOST_OS)
2125 // TODO: We would like to use allocateExec here, but allocateExec
2126 // cannot currently allocate blocks large enough.
2127 image = allocateImageAndTrampolines(path, fileName, f, memberSize,
2128 isThin);
2129 #elif defined(darwin_HOST_OS)
2130 if (RTS_LINKER_USE_MMAP)
2131 image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
2132 else {
2133 /* See loadObj() */
2134 misalignment = machoGetMisalignment(f);
2135 image = stgMallocBytes(memberSize + misalignment,
2136 "loadArchive(image)");
2137 image += misalignment;
2138 }
2139
2140 #else // not windows or darwin
2141 image = stgMallocBytes(memberSize, "loadArchive(image)");
2142 #endif
2143 if (isThin) {
2144 FILE *member;
2145 pathchar *pathCopy, *dirName, *memberPath, *objFileName;
2146
2147 /* Allocate and setup the dirname of the archive. We'll need
2148 this to locate the thin member */
2149 pathCopy = pathdup(path); // Convert the char* to a pathchar*
2150 dirName = pathdir(pathCopy);
2151
2152 /* Append the relative member name to the dirname. This should be
2153 be the full path to the actual thin member. */
2154 int memberLen = pathlen(dirName) + 1 + strlen(fileName) + 1;
2155 memberPath = stgMallocBytes(pathsize * memberLen, "loadArchive(file)");
2156 objFileName = mkPath(fileName);
2157 pathprintf(memberPath, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), dirName, objFileName);
2158 stgFree(objFileName);
2159 stgFree(dirName);
2160
2161 member = pathopen(memberPath, WSTR("rb"));
2162 if (!member)
2163 barf("loadObj: can't read thin archive `%" PATH_FMT "'", memberPath);
2164
2165 n = fread ( image, 1, memberSize, member );
2166 if (n != memberSize) {
2167 barf("loadArchive: error whilst reading `%s'", fileName);
2168 }
2169
2170 fclose(member);
2171 stgFree(memberPath);
2172 stgFree(pathCopy);
2173 }
2174 else
2175 {
2176 n = fread ( image, 1, memberSize, f );
2177 if (n != memberSize) {
2178 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2179 }
2180 }
2181
2182 archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
2183 "loadArchive(file)");
2184 sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
2185 path, (int)thisFileNameSize, fileName);
2186
2187 oc = mkOc(path, image, memberSize, rtsFalse, archiveMemberName
2188 , misalignment);
2189
2190 stgFree(archiveMemberName);
2191
2192 if (0 == loadOc(oc)) {
2193 stgFree(fileName);
2194 fclose(f);
2195 return 0;
2196 } else {
2197 #if defined(OBJFORMAT_PEi386)
2198 if (isImportLib)
2199 {
2200 findAndLoadImportLibrary(oc);
2201 stgFree(oc);
2202 oc = NULL;
2203 break;
2204 } else {
2205 #endif
2206 oc->next = objects;
2207 objects = oc;
2208 #if defined(OBJFORMAT_PEi386)
2209 }
2210 #endif
2211 }
2212 }
2213 else if (isGnuIndex) {
2214 if (gnuFileIndex != NULL) {
2215 barf("loadArchive: GNU-variant index found, but already have an index, while reading filename from `%s'", path);
2216 }
2217 IF_DEBUG(linker, debugBelch("loadArchive: Found GNU-variant file index\n"));
2218 #if RTS_LINKER_USE_MMAP
2219 gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
2220 #else
2221 gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
2222 #endif
2223 n = fread ( gnuFileIndex, 1, memberSize, f );
2224 if (n != memberSize) {
2225 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2226 }
2227 gnuFileIndex[memberSize] = '/';
2228 gnuFileIndexSize = memberSize;
2229 }
2230 else if (isImportLib) {
2231 #if defined(OBJFORMAT_PEi386)
2232 if (checkAndLoadImportLibrary(path, fileName, f)) {
2233 IF_DEBUG(linker, debugBelch("loadArchive: Member is an import file section... Corresponding DLL has been loaded...\n"));
2234 }
2235 else {
2236 IF_DEBUG(linker, debugBelch("loadArchive: Member is not a valid import file section... Skipping...\n"));
2237 n = fseek(f, memberSize, SEEK_CUR);
2238 if (n != 0)
2239 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
2240 memberSize, path);
2241 }
2242 #endif
2243 }
2244 else {
2245 IF_DEBUG(linker, debugBelch("loadArchive: '%s' does not appear to be an object file\n", fileName));
2246 if (!isThin || thisFileNameSize == 0) {
2247 n = fseek(f, memberSize, SEEK_CUR);
2248 if (n != 0)
2249 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
2250 memberSize, path);
2251 }
2252 }
2253
2254 /* .ar files are 2-byte aligned */
2255 if (!(isThin && thisFileNameSize > 0) && memberSize % 2) {
2256 IF_DEBUG(linker, debugBelch("loadArchive: trying to read one pad byte\n"));
2257 n = fread ( tmp, 1, 1, f );
2258 if (n != 1) {
2259 if (feof(f)) {
2260 IF_DEBUG(linker, debugBelch("loadArchive: found EOF while reading one pad byte\n"));
2261 break;
2262 }
2263 else {
2264 barf("loadArchive: Failed reading padding from `%" PATH_FMT "'", path);
2265 }
2266 }
2267 IF_DEBUG(linker, debugBelch("loadArchive: successfully read one pad byte\n"));
2268 }
2269 IF_DEBUG(linker, debugBelch("loadArchive: reached end of archive loading while loop\n"));
2270 }
2271
2272 fclose(f);
2273
2274 stgFree(fileName);
2275 if (gnuFileIndex != NULL) {
2276 #if RTS_LINKER_USE_MMAP
2277 munmap(gnuFileIndex, gnuFileIndexSize + 1);
2278 #else
2279 stgFree(gnuFileIndex);
2280 #endif
2281 }
2282
2283 if (RTS_LINKER_USE_MMAP)
2284 m32_allocator_flush();
2285
2286 IF_DEBUG(linker, debugBelch("loadArchive: done\n"));
2287 return 1;
2288 }
2289
2290 HsInt loadArchive (pathchar *path)
2291 {
2292 ACQUIRE_LOCK(&linker_mutex);
2293 HsInt r = loadArchive_(path);
2294 RELEASE_LOCK(&linker_mutex);
2295 return r;
2296 }
2297
2298 //
2299 // Load the object file into memory. This will not be its final resting place,
2300 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
2301 // address space, properly aligned.
2302 //
2303 static ObjectCode *
2304 preloadObjectFile (pathchar *path)
2305 {
2306 int fileSize;
2307 struct_stat st;
2308 int r;
2309 void *image;
2310 ObjectCode *oc;
2311 int misalignment = 0;
2312
2313 r = pathstat(path, &st);
2314 if (r == -1) {
2315 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
2316 return NULL;
2317 }
2318
2319 fileSize = st.st_size;
2320
2321 #if RTS_LINKER_USE_MMAP
2322 int fd;
2323
2324 /* On many architectures malloc'd memory isn't executable, so we need to use
2325 * mmap. */
2326
2327 #if defined(openbsd_HOST_OS)
2328 fd = open(path, O_RDONLY, S_IRUSR);
2329 #else
2330 fd = open(path, O_RDONLY);
2331 #endif
2332 if (fd == -1) {
2333 errorBelch("loadObj: can't open %s", path);
2334 return NULL;
2335 }
2336
2337 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
2338 MAP_PRIVATE, fd, 0);
2339 // not 32-bit yet, we'll remap later
2340 close(fd);
2341
2342 #else /* !RTS_LINKER_USE_MMAP */
2343 FILE *f;
2344
2345 /* load the image into memory */
2346 /* coverity[toctou] */
2347 f = pathopen(path, WSTR("rb"));
2348 if (!f) {
2349 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
2350 return NULL;
2351 }
2352
2353 # if defined(mingw32_HOST_OS)
2354
2355 // TODO: We would like to use allocateExec here, but allocateExec
2356 // cannot currently allocate blocks large enough.
2357 image = allocateImageAndTrampolines(path, "itself", f, fileSize,
2358 HS_BOOL_FALSE);
2359 if (image == NULL) {
2360 fclose(f);
2361 return NULL;
2362 }
2363
2364 # elif defined(darwin_HOST_OS)
2365
2366 // In a Mach-O .o file, all sections can and will be misaligned
2367 // if the total size of the headers is not a multiple of the
2368 // desired alignment. This is fine for .o files that only serve
2369 // as input for the static linker, but it's not fine for us,
2370 // as SSE (used by gcc for floating point) and Altivec require
2371 // 16-byte alignment.
2372 // We calculate the correct alignment from the header before
2373 // reading the file, and then we misalign image on purpose so
2374 // that the actual sections end up aligned again.
2375 misalignment = machoGetMisalignment(f);
2376 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
2377 image += misalignment;
2378
2379 # else /* !defined(mingw32_HOST_OS) */
2380
2381 image = stgMallocBytes(fileSize, "loadObj(image)");
2382
2383 #endif
2384
2385 int n;
2386 n = fread ( image, 1, fileSize, f );
2387 fclose(f);
2388 if (n != fileSize) {
2389 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
2390 stgFree(image);
2391 return NULL;
2392 }
2393
2394 #endif /* RTS_LINKER_USE_MMAP */
2395
2396 oc = mkOc(path, image, fileSize, rtsTrue, NULL, misalignment);
2397
2398 return oc;
2399 }
2400
2401 /* -----------------------------------------------------------------------------
2402 * Load an obj (populate the global symbol table, but don't resolve yet)
2403 *
2404 * Returns: 1 if ok, 0 on error.
2405 */
2406 static HsInt loadObj_ (pathchar *path)
2407 {
2408 ObjectCode* oc;
2409 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
2410
2411 /* debugBelch("loadObj %s\n", path ); */
2412
2413 /* Check that we haven't already loaded this object.
2414 Ignore requests to load multiple times */
2415
2416 if (isAlreadyLoaded(path)) {
2417 IF_DEBUG(linker,
2418 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
2419 return 1; /* success */
2420 }
2421
2422 oc = preloadObjectFile(path);
2423 if (oc == NULL) return 0;
2424
2425 if (! loadOc(oc)) {
2426 // failed; free everything we've allocated
2427 removeOcSymbols(oc);
2428 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
2429 freeObjectCode(oc);
2430 return 0;
2431 }
2432
2433 oc->next = objects;
2434 objects = oc;
2435 return 1;
2436 }
2437
2438 HsInt loadObj (pathchar *path)
2439 {
2440 ACQUIRE_LOCK(&linker_mutex);
2441 HsInt r = loadObj_(path);
2442 RELEASE_LOCK(&linker_mutex);
2443 return r;
2444 }
2445
2446 static HsInt loadOc (ObjectCode* oc)
2447 {
2448 int r;
2449
2450 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
2451
2452 /* verify the in-memory image */
2453 # if defined(OBJFORMAT_ELF)
2454 r = ocVerifyImage_ELF ( oc );
2455 # elif defined(OBJFORMAT_PEi386)
2456 r = ocVerifyImage_PEi386 ( oc );
2457 # elif defined(OBJFORMAT_MACHO)
2458 r = ocVerifyImage_MachO ( oc );
2459 # else
2460 barf("loadObj: no verify method");
2461 # endif
2462 if (!r) {
2463 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
2464 return r;
2465 }
2466
2467 #if NEED_SYMBOL_EXTRAS
2468 # if defined(OBJFORMAT_MACHO)
2469 r = ocAllocateSymbolExtras_MachO ( oc );
2470 if (!r) {
2471 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_MachO failed\n"));
2472 return r;
2473 }
2474 # elif defined(OBJFORMAT_ELF)
2475 r = ocAllocateSymbolExtras_ELF ( oc );
2476 if (!r) {
2477 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_ELF failed\n"));
2478 return r;
2479 }
2480 # elif defined(OBJFORMAT_PEi386)
2481 ocAllocateSymbolExtras_PEi386 ( oc );
2482 # endif
2483 #endif
2484
2485 /* build the symbol list for this image */
2486 # if defined(OBJFORMAT_ELF)
2487 r = ocGetNames_ELF ( oc );
2488 # elif defined(OBJFORMAT_PEi386)
2489 r = ocGetNames_PEi386 ( oc );
2490 # elif defined(OBJFORMAT_MACHO)
2491 r = ocGetNames_MachO ( oc );
2492 # else
2493 barf("loadObj: no getNames method");
2494 # endif
2495 if (!r) {
2496 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
2497 return r;
2498 }
2499
2500 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
2501 setOcInitialStatus( oc );
2502 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
2503
2504 return 1;
2505 }
2506
2507 /* -----------------------------------------------------------------------------
2508 * try to load and initialize an ObjectCode into memory
2509 *
2510 * Returns: 1 if ok, 0 on error.
2511 */
2512 int ocTryLoad (ObjectCode* oc) {
2513 int r;
2514
2515 if (oc->status != OBJECT_NEEDED) {
2516 return 1;
2517 }
2518
2519 /* Check for duplicate symbols by looking into `symhash`.
2520 Duplicate symbols are any symbols which exist
2521 in different ObjectCodes that have both been loaded, or
2522 are to be loaded by this call.
2523
2524 This call is intended to have no side-effects when a non-duplicate
2525 symbol is re-inserted.
2526
2527 We set the Address to NULL since that is not used to distinguish
2528 symbols. Duplicate symbols are distinguished by name and oc.
2529 */
2530 int x;
2531 SymbolName* symbol;
2532 for (x = 0; x < oc->n_symbols; x++) {
2533 symbol = oc->symbols[x];
2534 if ( symbol
2535 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol, NULL, isSymbolWeak(oc, symbol), oc)) {
2536 return 0;
2537 }
2538 }
2539
2540 # if defined(OBJFORMAT_ELF)
2541 r = ocResolve_ELF ( oc );
2542 # elif defined(OBJFORMAT_PEi386)
2543 r = ocResolve_PEi386 ( oc );
2544 # elif defined(OBJFORMAT_MACHO)
2545 r = ocResolve_MachO ( oc );
2546 # else
2547 barf("ocTryLoad: not implemented on this platform");
2548 # endif
2549 if (!r) { return r; }
2550
2551 // run init/init_array/ctors/mod_init_func
2552
2553 loading_obj = oc; // tells foreignExportStablePtr what to do
2554 #if defined(OBJFORMAT_ELF)
2555 r = ocRunInit_ELF ( oc );
2556 #elif defined(OBJFORMAT_PEi386)
2557 r = ocRunInit_PEi386 ( oc );
2558 #elif defined(OBJFORMAT_MACHO)
2559 r = ocRunInit_MachO ( oc );
2560 #else
2561 barf("ocTryLoad: initializers not implemented on this platform");
2562 #endif
2563 loading_obj = NULL;
2564
2565 if (!r) { return r; }
2566
2567 oc->status = OBJECT_RESOLVED;
2568
2569 return 1;
2570 }
2571
2572 /* -----------------------------------------------------------------------------
2573 * resolve all the currently unlinked objects in memory
2574 *
2575 * Returns: 1 if ok, 0 on error.
2576 */
2577 static HsInt resolveObjs_ (void)
2578 {
2579 ObjectCode *oc;
2580 int r;
2581
2582 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
2583
2584 for (oc = objects; oc; oc = oc->next) {
2585 r = ocTryLoad(oc);
2586 if (!r)
2587 {
2588 return r;
2589 }
2590 }
2591
2592 #ifdef PROFILING
2593 // collect any new cost centres & CCSs that were defined during runInit
2594 initProfiling2();
2595 #endif
2596
2597 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
2598 return 1;
2599 }
2600
2601 HsInt resolveObjs (void)
2602 {
2603 ACQUIRE_LOCK(&linker_mutex);
2604 HsInt r = resolveObjs_();
2605 RELEASE_LOCK(&linker_mutex);
2606 return r;
2607 }
2608
2609 /* -----------------------------------------------------------------------------
2610 * delete an object from the pool
2611 */
2612 static HsInt unloadObj_ (pathchar *path, rtsBool just_purge)
2613 {
2614 ObjectCode *oc, *prev, *next;
2615 HsBool unloadedAnyObj = HS_BOOL_FALSE;
2616
2617 ASSERT(symhash != NULL);
2618 ASSERT(objects != NULL);
2619
2620 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
2621
2622 prev = NULL;
2623 for (oc = objects; oc; oc = next) {
2624 next = oc->next; // oc might be freed
2625
2626 if (!pathcmp(oc->fileName,path)) {
2627
2628 // these are both idempotent, so in just_purge mode we can
2629 // later call unloadObj() to really unload the object.
2630 removeOcSymbols(oc);
2631 freeOcStablePtrs(oc);
2632
2633 if (!just_purge) {
2634 if (prev == NULL) {
2635 objects = oc->next;
2636 } else {
2637 prev->next = oc->next;
2638 }
2639 ACQUIRE_LOCK(&linker_unloaded_mutex);
2640 oc->next = unloaded_objects;
2641 unloaded_objects = oc;
2642 oc->status = OBJECT_UNLOADED;
2643 RELEASE_LOCK(&linker_unloaded_mutex);
2644 // We do not own oc any more; it can be released at any time by
2645 // the GC in checkUnload().
2646 } else {
2647 prev = oc;
2648 }
2649
2650 /* This could be a member of an archive so continue
2651 * unloading other members. */
2652 unloadedAnyObj = HS_BOOL_TRUE;
2653 } else {
2654 prev = oc;
2655 }
2656 }
2657
2658 if (unloadedAnyObj) {
2659 return 1;
2660 }
2661 else {
2662 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
2663 return 0;
2664 }
2665 }
2666
2667 HsInt unloadObj (pathchar *path)
2668 {
2669 ACQUIRE_LOCK(&linker_mutex);
2670 HsInt r = unloadObj_(path, rtsFalse);
2671 RELEASE_LOCK(&linker_mutex);
2672 return r;
2673 }
2674
2675 HsInt purgeObj (pathchar *path)
2676 {
2677 ACQUIRE_LOCK(&linker_mutex);
2678 HsInt r = unloadObj_(path, rtsTrue);
2679 RELEASE_LOCK(&linker_mutex);
2680 return r;
2681 }
2682
2683 /* -----------------------------------------------------------------------------
2684 * Sanity checking. For each ObjectCode, maintain a list of address ranges
2685 * which may be prodded during relocation, and abort if we try and write
2686 * outside any of these.
2687 */
2688 static void
2689 addProddableBlock ( ObjectCode* oc, void* start, int size )
2690 {
2691 ProddableBlock* pb
2692 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
2693
2694 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
2695 ASSERT(size > 0);
2696 pb->start = start;
2697 pb->size = size;
2698 pb->next = oc->proddables;
2699 oc->proddables = pb;
2700 }
2701
2702 static void
2703 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
2704 {
2705 ProddableBlock* pb;
2706
2707 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
2708 char* s = (char*)(pb->start);
2709 char* e = s + pb->size;
2710 char* a = (char*)addr;
2711 if (a >= s && (a+size) <= e) return;
2712 }
2713 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
2714 }
2715
2716 static void freeProddableBlocks (ObjectCode *oc)
2717 {
2718 ProddableBlock *pb, *next;
2719
2720 for (pb = oc->proddables; pb != NULL; pb = next) {
2721 next = pb->next;
2722 stgFree(pb);
2723 }
2724 oc->proddables = NULL;
2725 }
2726
2727 /* -----------------------------------------------------------------------------
2728 * Section management.
2729 */
2730 static void
2731 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
2732 void* start, StgWord size, StgWord mapped_offset,
2733 void* mapped_start, StgWord mapped_size)
2734 {
2735 s->start = start; /* actual start of section in memory */
2736 s->size = size; /* actual size of section in memory */
2737 s->kind = kind;
2738 s->alloc = alloc;
2739 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
2740
2741 s->mapped_start = mapped_start; /* start of mmap() block */
2742 s->mapped_size = mapped_size; /* size of mmap() block */
2743
2744 IF_DEBUG(linker,
2745 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
2746 start, (void*)((StgWord)start + size),
2747 size, kind ));
2748 }
2749
2750
2751 /* --------------------------------------------------------------------------
2752 * Symbol Extras.
2753 * This is about allocating a small chunk of memory for every symbol in the
2754 * object file. We make sure that the SymboLExtras are always "in range" of
2755 * limited-range PC-relative instructions on various platforms by allocating
2756 * them right next to the object code itself.
2757 */
2758
2759 #if NEED_SYMBOL_EXTRAS
2760 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2761
2762 /*
2763 ocAllocateSymbolExtras
2764
2765 Allocate additional space at the end of the object file image to make room
2766 for jump islands (powerpc, x86_64, arm) and GOT entries (x86_64).
2767
2768 PowerPC relative branch instructions have a 24 bit displacement field.
2769 As PPC code is always 4-byte-aligned, this yields a +-32MB range.
2770 If a particular imported symbol is outside this range, we have to redirect
2771 the jump to a short piece of new code that just loads the 32bit absolute
2772 address and jumps there.
2773 On x86_64, PC-relative jumps and PC-relative accesses to the GOT are limited
2774 to 32 bits (+-2GB).
2775
2776 This function just allocates space for one SymbolExtra for every
2777 undefined symbol in the object file. The code for the jump islands is
2778 filled in by makeSymbolExtra below.
2779 */
2780
2781 static int ocAllocateSymbolExtras( ObjectCode* oc, int count, int first )
2782 {
2783 size_t n;
2784
2785 if (RTS_LINKER_USE_MMAP && USE_CONTIGUOUS_MMAP) {
2786 n = roundUpToPage(oc->fileSize);
2787
2788 /* Keep image and symbol_extras contiguous */
2789 void *new = mmapForLinker(n + (sizeof(SymbolExtra) * count),
2790 MAP_ANONYMOUS, -1, 0);
2791 if (new) {
2792 memcpy(new, oc->image, oc->fileSize);
2793 if (oc->imageMapped) {
2794 munmap(oc->image, n);
2795 }
2796 oc->image = new;
2797 oc->imageMapped = rtsTrue;
2798 oc->fileSize = n + (sizeof(SymbolExtra) * count);
2799 oc->symbol_extras = (SymbolExtra *) (oc->image + n);
2800 }
2801 else {
2802 oc->symbol_extras = NULL;
2803 return 0;
2804 }
2805 }
2806 else if( count > 0 ) {
2807 if (RTS_LINKER_USE_MMAP) {
2808 n = roundUpToPage(oc->fileSize);
2809
2810 oc->symbol_extras = m32_alloc(sizeof(SymbolExtra) * count, 8);
2811 if (oc->symbol_extras == NULL) return 0;
2812 }
2813 else {
2814 // round up to the nearest 4
2815 int aligned = (oc->fileSize + 3) & ~3;
2816 int misalignment = oc->misalignment;
2817
2818 oc->image -= misalignment;
2819 oc->image = stgReallocBytes( oc->image,
2820 misalignment +
2821 aligned + sizeof (SymbolExtra) * count,
2822 "ocAllocateSymbolExtras" );
2823 oc->image += misalignment;
2824
2825 oc->symbol_extras = (SymbolExtra *) (oc->image + aligned);
2826 }
2827 }
2828
2829 if (oc->symbol_extras != NULL) {
2830 memset( oc->symbol_extras, 0, sizeof (SymbolExtra) * count );
2831 }
2832
2833 oc->first_symbol_extra = first;
2834 oc->n_symbol_extras = count;
2835
2836 return 1;
2837 }
2838
2839 #endif
2840 #endif // NEED_SYMBOL_EXTRAS
2841
2842 #if defined(arm_HOST_ARCH)
2843
2844 static void
2845 ocFlushInstructionCache( ObjectCode *oc )
2846 {
2847 int i;
2848 // Object code
2849 for (i=0; i < oc->n_sections; i++) {
2850 Section *s = &oc->sections[i];
2851 // This is a bit too broad but we don't have any way to determine what
2852 // is certainly code
2853 if (s->kind == SECTIONKIND_CODE_OR_RODATA)
2854 __clear_cache(s->start, (void*) ((uintptr_t) s->start + s->size));
2855 }
2856
2857 // Jump islands
2858 // Note the (+1) to ensure that the last symbol extra is covered by the
2859 // flush.
2860 __clear_cache(oc->symbol_extras, &oc->symbol_extras[oc->n_symbol_extras+1]);
2861 }
2862
2863 #endif
2864
2865 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2866 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2867
2868 static SymbolExtra* makeSymbolExtra( ObjectCode* oc,
2869 unsigned long symbolNumber,
2870 unsigned long target )
2871 {
2872 SymbolExtra *extra;
2873
2874 ASSERT( symbolNumber >= oc->first_symbol_extra
2875 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2876
2877 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2878
2879 #ifdef powerpc_HOST_ARCH
2880 // lis r12, hi16(target)
2881 extra->jumpIsland.lis_r12 = 0x3d80;
2882 extra->jumpIsland.hi_addr = target >> 16;
2883
2884 // ori r12, r12, lo16(target)
2885 extra->jumpIsland.ori_r12_r12 = 0x618c;
2886 extra->jumpIsland.lo_addr = target & 0xffff;
2887
2888 // mtctr r12
2889 extra->jumpIsland.mtctr_r12 = 0x7d8903a6;
2890
2891 // bctr
2892 extra->jumpIsland.bctr = 0x4e800420;
2893 #endif
2894 #ifdef x86_64_HOST_ARCH
2895 // jmp *-14(%rip)
2896 static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF };
2897 extra->addr = target;
2898 memcpy(extra->jumpIsland, jmp, 6);
2899 #endif
2900
2901 return extra;
2902 }
2903
2904 #endif
2905 #endif // defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2906
2907 #ifdef arm_HOST_ARCH
2908 static SymbolExtra* makeArmSymbolExtra( ObjectCode* oc,
2909 unsigned long symbolNumber,
2910 unsigned long target,
2911 int fromThumb,
2912 int toThumb )
2913 {
2914 SymbolExtra *extra;
2915
2916 ASSERT( symbolNumber >= oc->first_symbol_extra
2917 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2918
2919 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2920
2921 // Make sure instruction mode bit is set properly
2922 if (toThumb)
2923 target |= 1;
2924 else
2925 target &= ~1;
2926
2927 if (!fromThumb) {
2928 // In ARM encoding:
2929 // movw r12, #0
2930 // movt r12, #0
2931 // bx r12
2932 uint32_t code[] = { 0xe300c000, 0xe340c000, 0xe12fff1c };
2933
2934 // Patch lower half-word into movw
2935 code[0] |= ((target>>12) & 0xf) << 16;
2936 code[0] |= target & 0xfff;
2937 // Patch upper half-word into movt
2938 target >>= 16;
2939 code[1] |= ((target>>12) & 0xf) << 16;
2940 code[1] |= target & 0xfff;
2941
2942 memcpy(extra->jumpIsland, code, 12);
2943
2944 } else {
2945 // In Thumb encoding:
2946 // movw r12, #0
2947 // movt r12, #0
2948 // bx r12
2949 uint16_t code[] = { 0xf240, 0x0c00,
2950 0xf2c0, 0x0c00,
2951 0x4760 };
2952
2953 // Patch lower half-word into movw
2954 code[0] |= (target>>12) & 0xf;
2955 code[0] |= ((target>>11) & 0x1) << 10;
2956 code[1] |= ((target>>8) & 0x7) << 12;
2957 code[1] |= target & 0xff;
2958 // Patch upper half-word into movt
2959 target >>= 16;
2960 code[2] |= (target>>12) & 0xf;
2961 code[2] |= ((target>>11) & 0x1) << 10;
2962 code[3] |= ((target>>8) & 0x7) << 12;
2963 code[3] |= target & 0xff;
2964
2965 memcpy(extra->jumpIsland, code, 10);
2966 }
2967
2968 return extra;
2969 }
2970 #endif // arm_HOST_ARCH
2971
2972 /* --------------------------------------------------------------------------
2973 * PowerPC specifics (instruction cache flushing)
2974 * ------------------------------------------------------------------------*/
2975
2976 #ifdef powerpc_HOST_ARCH
2977 /*
2978 ocFlushInstructionCache
2979
2980 Flush the data & instruction caches.
2981 Because the PPC has split data/instruction caches, we have to
2982 do that whenever we modify code at runtime.
2983 */
2984
2985 static void
2986 ocFlushInstructionCacheFrom(void* begin, size_t length)
2987 {
2988 size_t n = (length + 3) / 4;
2989 unsigned long* p = begin;
2990
2991 while (n--)
2992 {
2993 __asm__ volatile ( "dcbf 0,%0\n\t"
2994 "sync\n\t"
2995 "icbi 0,%0"
2996 :
2997 : "r" (p)
2998 );
2999 p++;
3000 }
3001 __asm__ volatile ( "sync\n\t"
3002 "isync"
3003 );
3004 }
3005
3006 static void
3007 ocFlushInstructionCache( ObjectCode *oc )
3008 {
3009 /* The main object code */
3010 ocFlushInstructionCacheFrom(oc->image + oc->misalignment, oc->fileSize);
3011
3012 /* Jump Islands */
3013 ocFlushInstructionCacheFrom(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras);
3014 }
3015 #endif /* powerpc_HOST_ARCH */
3016
3017
3018 /* --------------------------------------------------------------------------
3019 * PEi386(+) specifics (Win32 targets)
3020 * ------------------------------------------------------------------------*/
3021
3022 /* The information for this linker comes from
3023 Microsoft Portable Executable
3024 and Common Object File Format Specification
3025 revision 8.3 February 2013
3026
3027 It can be found online at:
3028
3029 https://msdn.microsoft.com/en-us/windows/hardware/gg463119.aspx
3030
3031 Things move, so if that fails, try searching for it via
3032
3033 http://www.google.com/search?q=PE+COFF+specification
3034
3035 The ultimate reference for the PE format is the Winnt.h
3036 header file that comes with the Platform SDKs; as always,
3037 implementations will drift wrt their documentation.
3038
3039 A good background article on the PE format is Matt Pietrek's
3040 March 1994 article in Microsoft System Journal (MSJ)
3041 (Vol.9, No. 3): "Peering Inside the PE: A Tour of the
3042 Win32 Portable Executable File Format." The info in there
3043 has recently been updated in a two part article in
3044 MSDN magazine, issues Feb and March 2002,
3045 "Inside Windows: An In-Depth Look into the Win32 Portable
3046 Executable File Format"
3047
3048 John Levine's book "Linkers and Loaders" contains useful
3049 info on PE too.
3050
3051 The PE specification doesn't specify how to do the actual
3052 relocations. For this reason, and because both PE and ELF are
3053 based on COFF, the relocations for the PEi386+ code is based on
3054 the ELF relocations for the equivalent relocation type.
3055
3056 The ELF ABI can be found at
3057
3058 http://www.x86-64.org/documentation/abi.pdf
3059
3060 The current code is based on version 0.99.6 - October 2013
3061 */
3062
3063
3064 #if defined(OBJFORMAT_PEi386)
3065
3066 static int verifyCOFFHeader ( COFF_header *hdr, pathchar *filename);
3067
3068 /* We assume file pointer is right at the
3069 beginning of COFF object.
3070 */
3071 static char *
3072 allocateImageAndTrampolines (
3073 pathchar* arch_name, char* member_name,
3074 FILE* f USED_IF_x86_64_HOST_ARCH,
3075 int size,
3076 int isThin USED_IF_x86_64_HOST_ARCH)
3077 {
3078 char* image;
3079 #if defined(x86_64_HOST_ARCH)
3080 if (!isThin)
3081 {
3082 /* PeCoff contains number of symbols right in it's header, so
3083 we can reserve the room for symbolExtras right here. */
3084 COFF_header hdr;
3085 size_t n;
3086
3087 n = fread(&hdr, 1, sizeof_COFF_header, f);
3088 if (n != sizeof(COFF_header)) {
3089 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%S'",
3090 member_name, arch_name);
3091 return NULL;
3092 }
3093 fseek(f, -sizeof_COFF_header, SEEK_CUR);
3094
3095 if (!verifyCOFFHeader(&hdr, arch_name)) {
3096 return 0;
3097 }
3098
3099 /* We get back 8-byte aligned memory (is that guaranteed?), but
3100 the offsets to the sections within the file are all 4 mod 8
3101 (is that guaranteed?). We therefore need to offset the image
3102 by 4, so that all the pointers are 8-byte aligned, so that
3103 pointer tagging works. */
3104 /* For 32-bit case we don't need this, hence we use macro PEi386_IMAGE_OFFSET,
3105 which equals to 4 for 64-bit case and 0 for 32-bit case. */
3106 /* We allocate trampolines area for all symbols right behind
3107 image data, aligned on 8. */
3108 size = ((PEi386_IMAGE_OFFSET + size + 0x7) & ~0x7)
3109 + hdr.NumberOfSymbols * sizeof(SymbolExtra);
3110 }
3111 #endif
3112 image = VirtualAlloc(NULL, size,
3113 MEM_RESERVE | MEM_COMMIT,
3114 PAGE_EXECUTE_READWRITE);
3115
3116 if (image == NULL) {
3117 errorBelch("%" PATH_FMT ": failed to allocate memory for image for %s",
3118 arch_name, member_name);
3119 return NULL;
3120 }
3121
3122 return image + PEi386_IMAGE_OFFSET;
3123 }
3124
3125 static int findAndLoadImportLibrary(ObjectCode* oc)
3126 {
3127 int i;
3128
3129 COFF_header* hdr;
3130 COFF_section* sectab;
3131 COFF_symbol* symtab;
3132 UChar* strtab;
3133
3134 hdr = (COFF_header*)(oc->image);
3135 sectab = (COFF_section*)(
3136 ((UChar*)(oc->image))
3137 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3138 );
3139
3140 symtab = (COFF_symbol*)(
3141 ((UChar*)(oc->image))
3142 + hdr->PointerToSymbolTable
3143 );
3144
3145 strtab = ((UChar*)symtab)
3146 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3147
3148 for (i = 0; i < oc->n_sections; i++)
3149 {
3150 COFF_section* sectab_i
3151 = (COFF_section*)myindex(sizeof_COFF_section, sectab, i);
3152
3153 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3154
3155 // Find the first entry containing a valid .idata$7 section.
3156 if (strcmp(secname, ".idata$7") == 0) {
3157 /* First load the containing DLL if not loaded. */
3158 Section section = oc->sections[i];
3159
3160 pathchar* dirName = pathdir(oc->fileName);
3161 HsPtr token = addLibrarySearchPath(dirName);
3162 stgFree(dirName);
3163 char* dllName = (char*)section.start;
3164
3165 if (strlen(dllName) == 0 || dllName[0] == ' ')
3166 {
3167 continue;
3168 }
3169
3170 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand '%ls' => `%s'\n", oc->fileName, dllName));
3171
3172 pathchar* dll = mkPath(dllName);
3173 removeLibrarySearchPath(token);
3174
3175 const char* result = addDLL(dll);
3176 stgFree(dll);
3177
3178 if (result != NULL) {
3179 errorBelch("Could not load `%s'. Reason: %s\n", (char*)dllName, result);
3180 return 0;
3181 }
3182
3183 break;
3184 }
3185
3186 stgFree(secname);
3187 }
3188
3189 return 1;
3190 }
3191
3192 static int checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f)
3193 {
3194 char* image;
3195 static HsBool load_dll_warn = HS_BOOL_FALSE;
3196
3197 if (load_dll_warn) { return 0; }
3198
3199 /* Based on Import Library specification. PE Spec section 7.1 */
3200
3201 COFF_import_header hdr;
3202 size_t n;
3203
3204 n = fread(&hdr, 1, sizeof_COFF_import_Header, f);
3205 if (n != sizeof(COFF_header)) {
3206 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%" PATH_FMT "'\n",
3207 member_name, arch_name);
3208 return 0;
3209 }
3210
3211 if (hdr.Sig1 != 0x0 || hdr.Sig2 != 0xFFFF) {
3212 fseek(f, -sizeof_COFF_import_Header, SEEK_CUR);
3213 IF_DEBUG(linker, debugBelch("loadArchive: Object `%s` is not an import lib. Skipping...\n", member_name));
3214 return 0;
3215 }
3216
3217 IF_DEBUG(linker, debugBelch("loadArchive: reading %d bytes at %ld\n", hdr.SizeOfData, ftell(f)));
3218
3219 image = malloc(hdr.SizeOfData);
3220 n = fread(image, 1, hdr.SizeOfData, f);
3221 if (n != hdr.SizeOfData) {
3222 errorBelch("loadArchive: error whilst reading `%s' header in `%" PATH_FMT "'. Did not read enough bytes.\n",
3223 member_name, arch_name);
3224 }
3225
3226 char* symbol = strtok(image, "\0");
3227 int symLen = strlen(symbol) + 1;
3228 int nameLen = n - symLen;
3229 char* dllName = malloc(sizeof(char) * nameLen);
3230 dllName = strncpy(dllName, image + symLen, nameLen);
3231 pathchar* dll = malloc(sizeof(wchar_t) * nameLen);
3232 mbstowcs(dll, dllName, nameLen);
3233 free(dllName);
3234
3235 IF_DEBUG(linker, debugBelch("loadArchive: read symbol %s from lib `%ls'\n", symbol, dll));
3236 const char* result = addDLL(dll);
3237
3238 free(image);
3239
3240 if (result != NULL) {
3241 errorBelch("Could not load `%ls'. Reason: %s\n", dll, result);
3242 load_dll_warn = HS_BOOL_TRUE;
3243
3244 free(dll);
3245 fseek(f, -(n + sizeof_COFF_import_Header), SEEK_CUR);
3246 return 0;
3247 }
3248
3249 free(dll);
3250 return 1;
3251 }
3252
3253 /* We use myindex to calculate array addresses, rather than
3254 simply doing the normal subscript thing. That's because
3255 some of the above structs have sizes which are not
3256 a whole number of words. GCC rounds their sizes up to a
3257 whole number of words, which means that the address calcs
3258 arising from using normal C indexing or pointer arithmetic
3259 are just plain wrong. Sigh.
3260 */
3261 static UChar *
3262 myindex ( int scale, void* base, int index )
3263 {
3264 return
3265 ((UChar*)base) + scale * index;
3266 }
3267
3268
3269 static void
3270 printName ( UChar* name, UChar* strtab )
3271 {
3272 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3273 UInt32 strtab_offset = * (UInt32*)(name+4);
3274 debugBelch("%s", strtab + strtab_offset );
3275 } else {
3276 int i;
3277 for (i = 0; i < 8; i++) {
3278 if (name[i] == 0) break;
3279 debugBelch("%c", name[i] );
3280 }
3281 }
3282 }
3283
3284
3285 static void
3286 copyName ( UChar* name, UChar* strtab, UChar* dst, int dstSize )
3287 {
3288 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3289 UInt32 strtab_offset = * (UInt32*)(name+4);
3290 strncpy ( (char*)dst, (char*)strtab+strtab_offset, dstSize );
3291 dst[dstSize-1] = 0;
3292 } else {
3293 int i = 0;
3294 while (1) {
3295 if (i >= 8) break;
3296 if (name[i] == 0) break;
3297 dst[i] = name[i];
3298 i++;
3299 }
3300 dst[i] = 0;
3301 }
3302 }
3303
3304
3305 static UChar *
3306 cstring_from_COFF_symbol_name ( UChar* name, UChar* strtab )
3307 {
3308 UChar* newstr;
3309 /* If the string is longer than 8 bytes, look in the
3310 string table for it -- this will be correctly zero terminated.
3311 */
3312 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3313 UInt32 strtab_offset = * (UInt32*)(name+4);
3314 return ((UChar*)strtab) + strtab_offset;
3315 }
3316 /* Otherwise, if shorter than 8 bytes, return the original,
3317 which by defn is correctly terminated.
3318 */
3319 if (name[7]==0) return name;
3320 /* The annoying case: 8 bytes. Copy into a temporary
3321 (XXX which is never freed ...)
3322 */
3323 newstr = stgMallocBytes(9, "cstring_from_COFF_symbol_name");
3324 ASSERT(newstr);
3325 strncpy((char*)newstr,(char*)name,8);
3326 newstr[8] = 0;
3327 return newstr;
3328 }
3329
3330 /* Getting the name of a section is mildly tricky, so we make a
3331 function for it. Sadly, in one case we have to copy the string
3332 (when it is exactly 8 bytes long there's no trailing '\0'), so for
3333 consistency we *always* copy the string; the caller must free it
3334 */
3335 static char *
3336 cstring_from_section_name (UChar* name, UChar* strtab)
3337 {
3338 char *newstr;
3339
3340 if (name[0]=='/') {
3341 int strtab_offset = strtol((char*)name+1,NULL,10);
3342 int len = strlen(((char*)strtab) + strtab_offset);
3343
3344 newstr = stgMallocBytes(len+1, "cstring_from_section_symbol_name");
3345 strcpy((char*)newstr, (char*)((UChar*)strtab) + strtab_offset);
3346 return newstr;
3347 }
3348 else
3349 {
3350 newstr = stgMallocBytes(9, "cstring_from_section_symbol_name");
3351 ASSERT(newstr);
3352 strncpy((char*)newstr,(char*)name,8);
3353 newstr[8] = 0;
3354 return newstr;
3355 }
3356 }
3357
3358 /* See Note [mingw-w64 name decoration scheme] */
3359 #ifndef x86_64_HOST_ARCH
3360 static void
3361 zapTrailingAtSign ( UChar* sym )
3362 {
3363 # define my_isdigit(c) ((c) >= '0' && (c) <= '9')
3364 int i, j;
3365 if (sym[0] == 0) return;
3366 i = 0;
3367 while (sym[i] != 0) i++;
3368 i--;
3369 j = i;
3370 while (j > 0 && my_isdigit(sym[j])) j--;
3371 if (j > 0 && sym[j] == '@' && j != i) sym[j] = 0;
3372 # undef my_isdigit
3373 }
3374 #endif
3375
3376 /* See Note [mingw-w64 name decoration scheme] */
3377 #ifndef x86_64_HOST_ARCH
3378 #define STRIP_LEADING_UNDERSCORE 1
3379 #else
3380 #define STRIP_LEADING_UNDERSCORE 0
3381 #endif
3382
3383 /*
3384 Note [mingw-w64 name decoration scheme]
3385
3386 What's going on with name decoration? Well, original code
3387 have some crufty and ad-hocish paths related mostly to very old
3388 mingw gcc/binutils/runtime combinations. Now mingw-w64 offers pretty
3389 uniform and MS-compatible decoration scheme across its tools and runtime.
3390
3391 The scheme is pretty straightforward: on 32 bit objects symbols are exported
3392 with underscore prepended (and @ + stack size suffix appended for stdcall
3393 functions), on 64 bits no underscore is prepended and no suffix is appended
3394 because we have no stdcall convention on 64 bits.
3395
3396 See #9218
3397 */
3398
3399 static SymbolAddr*
3400 lookupSymbolInDLLs ( UChar *lbl )
3401 {
3402 OpenedDLL* o_dll;
3403 SymbolAddr* sym;
3404
3405 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
3406 /* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */
3407
3408 sym = GetProcAddress(o_dll->instance, (char*)(lbl+STRIP_LEADING_UNDERSCORE));
3409 if (sym != NULL) {
3410 /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/
3411 return sym;
3412 }
3413
3414 /* Ticket #2283.
3415 Long description: http://support.microsoft.com/kb/132044
3416 tl;dr:
3417 If C/C++ compiler sees __declspec(dllimport) ... foo ...
3418 it generates call *__imp_foo, and __imp_foo here has exactly
3419 the same semantics as in __imp_foo = GetProcAddress(..., "foo")
3420 */
3421 if (sym == NULL && strncmp ((const char*)lbl, "__imp_", 6) == 0) {
3422 sym = GetProcAddress(o_dll->instance, (char*)(lbl+6+STRIP_LEADING_UNDERSCORE));
3423 if (sym != NULL) {
3424 IndirectAddr* ret;
3425 ret = stgMallocBytes( sizeof(IndirectAddr), "lookupSymbolInDLLs" );
3426 ret->addr = sym;
3427 ret->next = indirects;
3428 indirects = ret;
3429 IF_DEBUG(linker,
3430 debugBelch("warning: %s from %S is linked instead of %s\n",
3431 (char*)(lbl+6+STRIP_LEADING_UNDERSCORE), o_dll->name, (char*)lbl));
3432 return (void*) & ret->addr;
3433 }
3434 }
3435
3436 sym = GetProcAddress(o_dll->instance, (char*)lbl);
3437 if (sym != NULL) {
3438 /*debugBelch("found %s in %s\n", lbl,o_dll->name);*/
3439 return sym;
3440 }
3441 }
3442 return NULL;
3443 }
3444
3445 static int
3446 verifyCOFFHeader (COFF_header *hdr, pathchar *fileName)
3447 {
3448 #if defined(i386_HOST_ARCH)
3449 if (hdr->Machine != 0x14c) {
3450 errorBelch("%" PATH_FMT ": Not x86 PEi386", fileName);
3451 return 0;
3452 }
3453 #elif defined(x86_64_HOST_ARCH)
3454 if (hdr->Machine != 0x8664) {
3455 errorBelch("%" PATH_FMT ": Not x86_64 PEi386", fileName);
3456 return 0;
3457 }
3458 #else
3459 errorBelch("PEi386 not supported on this arch");
3460 #endif
3461
3462 if (hdr->SizeOfOptionalHeader != 0) {
3463 errorBelch("%" PATH_FMT ": PEi386 with nonempty optional header",
3464 fileName);
3465 return 0;
3466 }
3467 if ( /* (hdr->Characteristics & MYIMAGE_FILE_RELOCS_STRIPPED) || */
3468 (hdr->Characteristics & MYIMAGE_FILE_EXECUTABLE_IMAGE) ||
3469 (hdr->Characteristics & MYIMAGE_FILE_DLL) ||
3470 (hdr->Characteristics & MYIMAGE_FILE_SYSTEM) ) {
3471 errorBelch("%" PATH_FMT ": Not a PEi386 object file", fileName);
3472 return 0;
3473 }
3474 if ( (hdr->Characteristics & MYIMAGE_FILE_BYTES_REVERSED_HI)
3475 /* || !(hdr->Characteristics & MYIMAGE_FILE_32BIT_MACHINE) */ ) {
3476 errorBelch("%" PATH_FMT ": Invalid PEi386 word size or endiannness: %d",
3477 fileName,
3478 (int)(hdr->Characteristics));
3479 return 0;
3480 }
3481 return 1;
3482 }
3483
3484 static int
3485 ocVerifyImage_PEi386 ( ObjectCode* oc )
3486 {
3487 int i;
3488 UInt32 j, noRelocs;
3489 COFF_header* hdr;
3490 COFF_section* sectab;
3491 COFF_symbol* symtab;
3492 UChar* strtab;
3493 /* debugBelch("\nLOADING %s\n", oc->fileName); */
3494 hdr = (COFF_header*)(oc->image);
3495 sectab = (COFF_section*) (
3496 ((UChar*)(oc->image))
3497 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3498 );
3499 symtab = (COFF_symbol*) (
3500 ((UChar*)(oc->image))
3501 + hdr->PointerToSymbolTable
3502 );
3503 strtab = ((UChar*)symtab)
3504 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3505
3506 if (!verifyCOFFHeader(hdr, oc->fileName)) {
3507 return 0;
3508 }
3509
3510 /* If the string table size is way crazy, this might indicate that
3511 there are more than 64k relocations, despite claims to the
3512 contrary. Hence this test. */
3513 /* debugBelch("strtab size %d\n", * (UInt32*)strtab); */
3514 #if 0
3515 if ( (*(UInt32*)strtab) > 600000 ) {
3516 /* Note that 600k has no special significance other than being
3517 big enough to handle the almost-2MB-sized lumps that
3518 constitute HSwin32*.o. */
3519 debugBelch("PEi386 object has suspiciously large string table; > 64k relocs?");
3520 return 0;
3521 }
3522 #endif
3523
3524 /* .BSS Section is initialized in ocGetNames_PEi386
3525 but we need the Sections array initialized here already. */
3526 Section *sections;
3527 sections = (Section*)stgCallocBytes(
3528 sizeof(Section),
3529 hdr->NumberOfSections + 1, /* +1 for the global BSS section see ocGetNames_PEi386 */
3530 "ocVerifyImage_PEi386(sections)");
3531 oc->sections = sections;
3532 oc->n_sections = hdr->NumberOfSections + 1;
3533
3534 /* Initialize the Sections */
3535 for (i = 0; i < hdr->NumberOfSections; i++) {
3536 COFF_section* sectab_i
3537 = (COFF_section*)
3538 myindex(sizeof_COFF_section, sectab, i);
3539
3540 /* Calculate the start of the data section */
3541 sections[i].start = oc->image + sectab_i->PointerToRawData;
3542 }
3543
3544 /* No further verification after this point; only debug printing. */
3545 i = 0;
3546 IF_DEBUG(linker, i=1);
3547 if (i == 0) return 1;
3548
3549 debugBelch("sectab offset = %" FMT_SizeT "\n",
3550 ((UChar*)sectab) - ((UChar*)hdr) );
3551 debugBelch("symtab offset = %" FMT_SizeT "\n",
3552 ((UChar*)symtab) - ((UChar*)hdr) );
3553 debugBelch("strtab offset = %" FMT_SizeT "\n",
3554 ((UChar*)strtab) - ((UChar*)hdr) );
3555
3556 debugBelch("\n" );
3557 debugBelch( "Machine: 0x%x\n", (UInt32)(hdr->Machine) );
3558 debugBelch( "# sections: %d\n", (UInt32)(hdr->NumberOfSections) );
3559 debugBelch( "time/date: 0x%x\n", (UInt32)(hdr->TimeDateStamp) );
3560 debugBelch( "symtab offset: %d\n", (UInt32)(hdr->PointerToSymbolTable) );
3561 debugBelch( "# symbols: %d\n", (UInt32)(hdr->NumberOfSymbols) );
3562 debugBelch( "sz of opt hdr: %d\n", (UInt32)(hdr->SizeOfOptionalHeader) );
3563 debugBelch( "characteristics: 0x%x\n", (UInt32)(hdr->Characteristics) );
3564
3565 /* Print the section table. */
3566 debugBelch("\n" );
3567 for (i = 0; i < hdr->NumberOfSections; i++) {
3568 COFF_reloc* reltab;
3569 COFF_section* sectab_i
3570 = (COFF_section*)
3571 myindex ( sizeof_COFF_section, sectab, i );
3572 Section section = sections[i];
3573 debugBelch(
3574 "\n"
3575 "section %d\n"
3576 " name `",
3577 i
3578 );
3579 printName ( sectab_i->Name, strtab );
3580 debugBelch(
3581 "'\n"
3582 " vsize %d\n"
3583 " vaddr %d\n"
3584 " data sz %d\n"
3585 " data off 0x%p\n"
3586 " num rel %d\n"
3587 " off rel %d\n"
3588 " ptr raw 0x%x\n",
3589 sectab_i->VirtualSize,
3590 sectab_i->VirtualAddress,
3591 sectab_i->SizeOfRawData,
3592 section.start,
3593 sectab_i->NumberOfRelocations,
3594 sectab_i->PointerToRelocations,
3595 sectab_i->PointerToRawData
3596 );
3597 reltab = (COFF_reloc*) (
3598 ((UChar*)(oc->image)) + sectab_i->PointerToRelocations
3599 );
3600
3601 if ( sectab_i->Characteristics & MYIMAGE_SCN_LNK_NRELOC_OVFL ) {
3602 /* If the relocation field (a short) has overflowed, the
3603 * real count can be found in the first reloc entry.
3604 *
3605 * See Section 4.1 (last para) of the PE spec (rev6.0).
3606 */
3607 COFF_reloc* rel = (COFF_reloc*)
3608 myindex ( sizeof_COFF_reloc, reltab, 0 );
3609 noRelocs = rel->VirtualAddress;
3610 j = 1;
3611 } else {
3612 noRelocs = sectab_i->NumberOfRelocations;
3613 j = 0;
3614 }
3615
3616 for (; j < noRelocs; j++) {
3617 COFF_symbol* sym;
3618 COFF_reloc* rel = (COFF_reloc*)
3619 myindex ( sizeof_COFF_reloc, reltab, j );
3620 debugBelch(
3621 " type 0x%-4x vaddr 0x%-8x name `",
3622 (UInt32)rel->Type,
3623 rel->VirtualAddress );
3624 sym = (COFF_symbol*)
3625 myindex ( sizeof_COFF_symbol, symtab, rel->SymbolTableIndex );
3626 /* Hmm..mysterious looking offset - what's it for? SOF */
3627 printName ( sym->Name, strtab -10 );
3628 debugBelch("'\n" );
3629 }
3630
3631 debugBelch("\n" );
3632 }
3633 debugBelch("\n" );
3634 debugBelch("string table has size 0x%x\n", * (UInt32*)strtab );
3635 debugBelch("---START of string table---\n");
3636 for (i = 4; i < *(Int32*)strtab; i++) {
3637 if (strtab[i] == 0)
3638 debugBelch("\n"); else
3639 debugBelch("%c", strtab[i] );
3640 }
3641 debugBelch("--- END of string table---\n");
3642
3643 debugBelch("\n" );
3644 i = 0;
3645 while (1) {
3646 COFF_symbol* symtab_i;
3647 if (i >= (Int32)(hdr->NumberOfSymbols)) break;
3648 symtab_i = (COFF_symbol*)
3649 myindex ( sizeof_COFF_symbol, symtab, i );
3650 debugBelch(
3651 "symbol %d\n"
3652 " name `",
3653 i
3654 );
3655 printName ( symtab_i->Name, strtab );
3656 debugBelch(
3657 "'\n"
3658 " value 0x%x\n"
3659 " 1+sec# %d\n"
3660 " type 0x%x\n"
3661 " sclass 0x%x\n"
3662 " nAux %d\n",
3663 symtab_i->Value,
3664 (Int32)(symtab_i->SectionNumber),
3665 (UInt32)symtab_i->Type,
3666 (UInt32)symtab_i->StorageClass,
3667 (UInt32)symtab_i->NumberOfAuxSymbols
3668 );
3669 i += symtab_i->NumberOfAuxSymbols;
3670 i++;
3671 }
3672
3673 debugBelch("\n" );
3674 return 1;
3675 }
3676
3677
3678 static int
3679 ocGetNames_PEi386 ( ObjectCode* oc )
3680 {
3681 COFF_header* hdr;
3682 COFF_section* sectab;
3683 COFF_symbol* symtab;
3684 UChar* strtab;
3685
3686 UChar* sname;
3687 SymbolAddr* addr;
3688 int i;
3689
3690 hdr = (COFF_header*)(oc->image);
3691 sectab = (COFF_section*) (
3692 ((UChar*)(oc->image))
3693 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3694 );
3695 symtab = (COFF_symbol*) (
3696 ((UChar*)(oc->image))
3697 + hdr->PointerToSymbolTable
3698 );
3699 strtab = ((UChar*)(oc->image))
3700 + hdr->PointerToSymbolTable
3701 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3702
3703 /* Allocate space for any (local, anonymous) .bss sections. */
3704
3705 for (i = 0; i < hdr->NumberOfSections; i++) {
3706 UInt32 bss_sz;
3707 UChar* zspace;
3708 COFF_section* sectab_i
3709 = (COFF_section*)
3710 myindex ( sizeof_COFF_section, sectab, i );
3711
3712 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3713
3714 if (0 != strcmp(secname, ".bss")) {
3715 stgFree(secname);
3716 continue;
3717 }
3718
3719 stgFree(secname);
3720
3721 /* sof 10/05: the PE spec text isn't too clear regarding what
3722 * the SizeOfRawData field is supposed to hold for object
3723 * file sections containing just uninitialized data -- for executables,
3724 * it is supposed to be zero; unclear what it's supposed to be
3725 * for object files. However, VirtualSize is guaranteed to be
3726 * zero for object files, which definitely suggests that SizeOfRawData
3727 * will be non-zero (where else would the size of this .bss section be
3728 * stored?) Looking at the COFF_section info for incoming object files,
3729 * this certainly appears to be the case.
3730 *
3731 * => I suspect we've been incorrectly handling .bss sections in (relocatable)
3732 * object files up until now. This turned out to bite us with ghc-6.4.1's use
3733 * of gcc-3.4.x, which has started to emit initially-zeroed-out local 'static'
3734 * variable decls into the .bss section. (The specific function in Q which
3735 * triggered this is libraries/base/cbits/dirUtils.c:__hscore_getFolderPath())
3736 */
3737 if (sectab_i->VirtualSize == 0 && sectab_i->SizeOfRawData == 0) continue;
3738 /* This is a non-empty .bss section.
3739 Allocate zeroed space for it */
3740 bss_sz = sectab_i->VirtualSize;
3741 if ( bss_sz < sectab_i->SizeOfRawData) { bss_sz = sectab_i->SizeOfRawData; }
3742 zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
3743 oc->sections[i].start = zspace;
3744 addProddableBlock(oc, zspace, bss_sz);
3745 /* debugBelch("BSS anon section at 0x%x\n", zspace); */
3746 }
3747
3748 /* Copy section information into the ObjectCode. */
3749
3750 for (i = 0; i < hdr->NumberOfSections; i++) {
3751 UChar* start;
3752 UChar* end;
3753 UInt32 sz;
3754
3755 /* By default consider all section as CODE or DATA, which means we want to load them. */
3756 SectionKind kind
3757 = SECTIONKIND_CODE_OR_RODATA;
3758 COFF_section* sectab_i
3759 = (COFF_section*)
3760 myindex ( sizeof_COFF_section, sectab, i );
3761 Section section = oc->sections[i];
3762
3763 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3764
3765 IF_DEBUG(linker, debugBelch("section name = %s\n", secname ));
3766
3767 /* The PE file section flag indicates whether the section contains code or data. */
3768 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_CODE ||
3769 sectab_i->Characteristics & MYIMAGE_SCN_CNT_INITIALIZED_DATA)
3770 kind = SECTIONKIND_CODE_OR_RODATA;
3771
3772 /* Check next if it contains any uninitialized data */
3773 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_UNINITIALIZED_DATA)
3774 kind = SECTIONKIND_RWDATA;
3775
3776 /* Finally check if it can be discarded. This will also ignore .debug sections */
3777 if (sectab_i->Characteristics & MYIMAGE_SCN_MEM_DISCARDABLE ||
3778 sectab_i->Characteristics & MYIMAGE_SCN_LNK_REMOVE)
3779 kind = SECTIONKIND_OTHER;
3780
3781 if (0==strcmp(".ctors", (char*)secname))
3782 kind = SECTIONKIND_INIT_ARRAY;
3783
3784 ASSERT(sectab_i->SizeOfRawData == 0 || sectab_i->VirtualSize == 0);
3785 sz = sectab_i->SizeOfRawData;
3786 if (sz < sectab_i->VirtualSize) sz = sectab_i->VirtualSize;
3787
3788 start = section.start;
3789 end = start + sz - 1;
3790
3791 if (kind != SECTIONKIND_OTHER && end >= start) {
3792 addSection(&oc->sections[i], kind, SECTION_NOMEM, start, sz, 0, 0, 0);
3793 addProddableBlock(oc, start, sz);
3794 }
3795
3796 stgFree(secname);
3797 }
3798
3799 /* Copy exported symbols into the ObjectCode. */
3800
3801 oc->n_symbols = hdr->NumberOfSymbols;
3802 oc->symbols = stgCallocBytes(sizeof(SymbolName*), oc->n_symbols,
3803 "ocGetNames_PEi386(oc->symbols)");
3804
3805 /* Work out the size of the global BSS section */
3806 StgWord globalBssSize = 0;
3807 for (i=0; i < (int)hdr->NumberOfSymbols; i++) {
3808 COFF_symbol* symtab_i;
3809 symtab_i = (COFF_symbol*)
3810 myindex ( sizeof_COFF_symbol, symtab, i );
3811 if (symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3812 && symtab_i->Value > 0
3813 && symtab_i->StorageClass != MYIMAGE_SYM_CLASS_SECTION) {
3814 globalBssSize += symtab_i->Value;
3815 }
3816 i += symtab_i->NumberOfAuxSymbols;
3817 }
3818
3819 /* Allocate BSS space */
3820 SymbolAddr* bss = NULL;
3821 if (globalBssSize > 0) {
3822 bss = stgCallocBytes(1, globalBssSize,
3823 "ocGetNames_PEi386(non-anonymous bss)");
3824 addSection(&oc->sections[oc->n_sections-1],
3825 SECTIONKIND_RWDATA, SECTION_MALLOC,
3826 bss, globalBssSize, 0, 0, 0);
3827 IF_DEBUG(linker, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize));
3828 addProddableBlock(oc, bss, globalBssSize);
3829 } else {
3830 addSection(&oc->sections[oc->n_sections-1],
3831 SECTIONKIND_OTHER, SECTION_NOMEM, NULL, 0, 0, 0, 0);
3832 }
3833
3834 for (i = 0; i < oc->n_symbols; i++) {
3835 COFF_symbol* symtab_i;
3836 symtab_i = (COFF_symbol*)
3837 myindex ( sizeof_COFF_symbol, symtab, i );
3838
3839 addr = NULL;
3840 HsBool isWeak = HS_BOOL_FALSE;
3841 if ( symtab_i->SectionNumber != MYIMAGE_SYM_UNDEFINED
3842 && symtab_i->SectionNumber > 0) {
3843 /* This symbol is global and defined, viz, exported */
3844 /* for MYIMAGE_SYMCLASS_EXTERNAL
3845 && !MYIMAGE_SYM_UNDEFINED,
3846 the address of the symbol is:
3847 address of relevant section + offset in section
3848 */
3849 COFF_section* sectabent
3850 = (COFF_section*) myindex ( sizeof_COFF_section,
3851 sectab,
3852 symtab_i->SectionNumber-1 );
3853 if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_EXTERNAL
3854 || ( symtab_i->StorageClass == MYIMAGE_SYM_CLASS_STATIC
3855 && sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT)
3856 ) {
3857 addr = (void*)((size_t)oc->sections[symtab_i->SectionNumber-1].start
3858 + symtab_i->Value);
3859 if (sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT) {
3860 isWeak = HS_BOOL_TRUE;
3861 }
3862 }
3863 }
3864 else if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_WEAK_EXTERNAL) {
3865 isWeak = HS_BOOL_TRUE;
3866 }
3867 else if ( symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3868 && symtab_i->Value > 0) {
3869 /* This symbol isn't in any section at all, ie, global bss.
3870 Allocate zeroed space for it from the BSS section */
3871 addr = bss;
3872 bss = (SymbolAddr*)((StgWord)bss + (StgWord)symtab_i->Value);
3873 IF_DEBUG(linker, debugBelch("bss symbol @ %p %u\n", addr, symtab_i->Value));
3874 }
3875
3876 sname = cstring_from_COFF_symbol_name(symtab_i->Name, strtab);
3877 if (addr != NULL || isWeak == HS_BOOL_TRUE) {
3878
3879 /* debugBelch("addSymbol %p `%s' Weak:%lld \n", addr, sname, isWeak); */
3880 IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr,sname);)
3881 ASSERT(i >= 0 && i < oc->n_symbols);
3882 /* cstring_from_COFF_symbol_name always succeeds. */
3883 oc->symbols[i] = (SymbolName*)sname;
3884 if (isWeak == HS_BOOL_TRUE) {
3885 setWeakSymbol(oc, sname);
3886 }
3887
3888 if (! ghciInsertSymbolTable(oc->fileName, symhash, (SymbolName*)sname, addr,
3889 isWeak, oc)) {
3890 return 0;
3891 }
3892 } else {
3893 /* We're skipping the symbol, but if we ever load this
3894 object file we'll want to skip it then too. */
3895 oc->symbols[i] = NULL;
3896
3897 # if 0
3898 debugBelch(
3899 "IGNORING symbol %d\n"
3900 " name `",
3901 i
3902 );
3903 printName ( symtab_i->Name, strtab );
3904 debugBelch(
3905 "'\n"
3906 " value 0x%x\n"
3907 " 1+sec# %d\n"
3908 " type 0x%x\n"
3909 " sclass 0x%x\n"
3910 " nAux %d\n",
3911 symtab_i->Value,
3912 (Int32)(symtab_i->SectionNumber),
3913 (UInt32)symtab_i->Type,
3914 (UInt32)symtab_i->StorageClass,
3915 (UInt32)symtab_i->NumberOfAuxSymbols
3916 );
3917 # endif
3918 }
3919
3920 i