Added support for deprecated POSIX functions on Windows.
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "Stable.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31
32 #if !defined(mingw32_HOST_OS)
33 #include "posix/Signals.h"
34 #endif
35
36 // get protos for is*()
37 #include <ctype.h>
38
39 #ifdef HAVE_SYS_TYPES_H
40 #include <sys/types.h>
41 #endif
42
43 #include <inttypes.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <stdio.h>
47 #include <assert.h>
48 #include <libgen.h>
49
50 #ifdef HAVE_SYS_STAT_H
51 #include <sys/stat.h>
52 #endif
53
54 #if defined(HAVE_DLFCN_H)
55 #include <dlfcn.h>
56 #endif
57
58 /* PowerPC and ARM have relative branch instructions with only 24 bit
59 * displacements and therefore need jump islands contiguous with each object
60 * code module.
61 */
62 #if defined(powerpc_HOST_ARCH)
63 #define SHORT_REL_BRANCH 1
64 #endif
65 #if defined(arm_HOST_ARCH)
66 #define SHORT_REL_BRANCH 1
67 #endif
68
69 #if (RTS_LINKER_USE_MMAP && defined(SHORT_REL_BRANCH) && defined(linux_HOST_OS))
70 #define USE_CONTIGUOUS_MMAP 1
71 #else
72 #define USE_CONTIGUOUS_MMAP 0
73 #endif
74
75 #if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
76 # define OBJFORMAT_ELF
77 # include <regex.h> // regex is already used by dlopen() so this is OK
78 // to use here without requiring an additional lib
79 #elif defined (mingw32_HOST_OS)
80 # define OBJFORMAT_PEi386
81 # include <windows.h>
82 # include <shfolder.h> /* SHGetFolderPathW */
83 # include <math.h>
84 # include <wchar.h>
85 #elif defined(darwin_HOST_OS)
86 # define OBJFORMAT_MACHO
87 # include <regex.h>
88 # include <mach/machine.h>
89 # include <mach-o/fat.h>
90 # include <mach-o/loader.h>
91 # include <mach-o/nlist.h>
92 # include <mach-o/reloc.h>
93 #if defined(powerpc_HOST_ARCH)
94 # include <mach-o/ppc/reloc.h>
95 #endif
96 #if defined(x86_64_HOST_ARCH)
97 # include <mach-o/x86_64/reloc.h>
98 #endif
99 #endif
100
101 #if defined(x86_64_HOST_ARCH) && defined(darwin_HOST_OS)
102 #define ALWAYS_PIC
103 #endif
104
105 #if defined(dragonfly_HOST_OS)
106 #include <sys/tls.h>
107 #endif
108
109 /* SymbolInfo tracks a symbol's address, the object code from which
110 it originated, and whether or not it's weak.
111
112 RtsSymbolInfo is used to track the state of the symbols currently
113 loaded or to be loaded by the Linker.
114
115 Where the information in the `ObjectCode` is used to track the
116 original status of the symbol inside the `ObjectCode`.
117
118 A weak symbol that has been used will still be marked as weak
119 in the `ObjectCode` but in the `RtsSymbolInfo` it won't be.
120 */
121 typedef struct _RtsSymbolInfo {
122 SymbolAddr* value;
123 ObjectCode *owner;
124 HsBool weak;
125 } RtsSymbolInfo;
126
127 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
128 This hashtable will contain information on all symbols
129 that we know of, however the .o they are in may not be loaded.
130
131 Until the ObjectCode the symbol belongs to is actually
132 loaded this symbol may be replaced. So do not rely on
133 addresses of unloaded symbols.
134
135 Note [runtime-linker-phases]
136 --------------------------------------
137 Broadly the behavior of the runtime linker can be
138 split into the following four phases:
139
140 - Indexing (e.g. ocVerifyImage and ocGetNames)
141 - Initialization (e.g. ocResolve and ocRunInit)
142 - Resolve (e.g. resolveObjs())
143 - Lookup (e.g. lookupSymbol)
144
145 This is to enable lazy loading of symbols. Eager loading is problematic
146 as it means that all symbols must be available, even those which we will
147 never use. This is especially painful of Windows, where the number of
148 libraries required to link things like mingwex grows to be quite high.
149
150 We proceed through these stages as follows,
151
152 * During Indexing we verify and open the ObjectCode and
153 perform a quick scan/indexing of the ObjectCode. All the work
154 required to actually load the ObjectCode is done.
155
156 All symbols from the ObjectCode is also inserted into
157 `symhash`, where possible duplicates are handled via the semantics
158 described in `ghciInsertSymbolTable`.
159
160 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
161 depending on whether they are an archive members or not.
162
163 * During initialization we load ObjectCode, perform relocations, execute
164 static constructors etc. This phase may trigger other ObjectCodes to
165 be loaded because of the calls to lookupSymbol.
166
167 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
168 previous status was `OBJECT_LOADED`.
169
170 * During resolve we attempt to resolve all the symbols needed for the
171 initial link. This essentially means, that for any ObjectCode given
172 directly to the command-line we perform lookupSymbols on the required
173 symbols. lookupSymbols may trigger the loading of additional ObjectCode
174 if required.
175
176 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
177 the previous status was `OBJECT_NEEDED`.
178
179 * Lookup symbols is used to lookup any symbols required, both during initial
180 link and during statement and expression compilations in the REPL.
181 Declaration of e.g. an foreign import, will eventually call lookupSymbol
182 which will either fail (symbol unknown) or succeed (and possibly triggered a
183 load).
184
185 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
186
187 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
188 by calling unloadObj and starting over.
189 When a new declaration or statement is performed ultimately lookupSymbol is called
190 without doing a re-link.
191
192 The goal of these different phases is to allow the linker to be able to perform
193 "lazy loading" of ObjectCode. The reason for this is that we want to only link
194 in symbols that are actually required for the link. This reduces:
195
196 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
197 then we don't need to load libB. This means the dependency chain for libraries
198 such as mingw32 and mingwex can be broken down.
199
200 2) The number of duplicate symbols, since now only symbols that are
201 true duplicates will display the error.
202 */
203 static /*Str*/HashTable *symhash;
204
205 /* List of currently loaded objects */
206 ObjectCode *objects = NULL; /* initially empty */
207
208 /* List of objects that have been unloaded via unloadObj(), but are waiting
209 to be actually freed via checkUnload() */
210 ObjectCode *unloaded_objects = NULL; /* initially empty */
211
212 #ifdef THREADED_RTS
213 /* This protects all the Linker's global state except unloaded_objects */
214 Mutex linker_mutex;
215 /*
216 * This protects unloaded_objects. We have a separate mutex for this, because
217 * the GC needs to access unloaded_objects in checkUnload, while the linker only
218 * needs to access unloaded_objects in unloadObj(), so this allows most linker
219 * operations proceed concurrently with the GC.
220 */
221 Mutex linker_unloaded_mutex;
222 #endif
223
224 /* Type of the initializer */
225 typedef void (*init_t) (int argc, char **argv, char **env);
226
227 static HsInt isAlreadyLoaded( pathchar *path );
228 static HsInt loadOc( ObjectCode* oc );
229 static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
230 rtsBool mapped, char *archiveMemberName,
231 int misalignment
232 );
233
234 // Use wchar_t for pathnames on Windows (#5697)
235 #if defined(mingw32_HOST_OS)
236 #define pathcmp wcscmp
237 #define pathlen wcslen
238 #define pathopen _wfopen
239 #define pathstat _wstat
240 #define struct_stat struct _stat
241 #define open wopen
242 #define WSTR(s) L##s
243 #define pathprintf swprintf
244 #define pathsize sizeof(wchar_t)
245 #else
246 #define pathcmp strcmp
247 #define pathlen strlen
248 #define pathopen fopen
249 #define pathstat stat
250 #define struct_stat struct stat
251 #define WSTR(s) s
252 #define pathprintf snprintf
253 #define pathsize sizeof(char)
254 #endif
255
256 static pathchar* pathdup(pathchar *path)
257 {
258 pathchar *ret;
259 #if defined(mingw32_HOST_OS)
260 ret = wcsdup(path);
261 #else
262 /* sigh, strdup() isn't a POSIX function, so do it the long way */
263 ret = stgMallocBytes( strlen(path)+1, "pathdup" );
264 strcpy(ret, path);
265 #endif
266 return ret;
267 }
268
269 static pathchar* pathdir(pathchar *path)
270 {
271 pathchar *ret;
272 #if defined(mingw32_HOST_OS)
273 pathchar *drive, *dirName;
274 size_t memberLen = pathlen(path) + 1;
275 dirName = stgMallocBytes(pathsize * memberLen, "pathdir(path)");
276 ret = stgMallocBytes(pathsize * memberLen, "pathdir(path)");
277 drive = stgMallocBytes(pathsize * _MAX_DRIVE, "pathdir(path)");
278 _wsplitpath_s(path, drive, _MAX_DRIVE, dirName, pathsize * pathlen(path), NULL, 0, NULL, 0);
279 pathprintf(ret, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), drive, dirName);
280 stgFree(drive);
281 stgFree(dirName);
282 #else
283 pathchar* dirName = dirname(path);
284 size_t memberLen = pathlen(dirName);
285 ret = stgMallocBytes(pathsize * (memberLen + 2), "pathdir(path)");
286 strcpy(ret, dirName);
287 ret[memberLen ] = '/';
288 ret[memberLen+1] = '\0';
289 #endif
290 return ret;
291 }
292
293 static pathchar* mkPath(char* path)
294 {
295 #if defined(mingw32_HOST_OS)
296 size_t required = mbstowcs(NULL, path, 0);
297 pathchar *ret = stgMallocBytes(sizeof(pathchar) * (required + 1), "mkPath");
298 if (mbstowcs(ret, path, required) == (size_t)-1)
299 {
300 barf("mkPath failed converting char* to wchar_t*");
301 }
302 ret[required] = '\0';
303 return ret;
304 #else
305 return pathdup(path);
306 #endif
307 }
308
309 /* Generic wrapper function to try and Resolve and RunInit oc files */
310 int ocTryLoad( ObjectCode* oc );
311
312 #if defined(OBJFORMAT_ELF)
313 static int ocVerifyImage_ELF ( ObjectCode* oc );
314 static int ocGetNames_ELF ( ObjectCode* oc );
315 static int ocResolve_ELF ( ObjectCode* oc );
316 static int ocRunInit_ELF ( ObjectCode* oc );
317 #if NEED_SYMBOL_EXTRAS
318 static int ocAllocateSymbolExtras_ELF ( ObjectCode* oc );
319 #endif
320 #elif defined(OBJFORMAT_PEi386)
321 static int ocVerifyImage_PEi386 ( ObjectCode* oc );
322 static int ocGetNames_PEi386 ( ObjectCode* oc );
323 static int ocResolve_PEi386 ( ObjectCode* oc );
324 static int ocRunInit_PEi386 ( ObjectCode* oc );
325 static void *lookupSymbolInDLLs ( unsigned char *lbl );
326 /* See Note [mingw-w64 name decoration scheme] */
327 #ifndef x86_64_HOST_ARCH
328 static void zapTrailingAtSign ( unsigned char *sym );
329 #endif
330
331 #if defined(x86_64_HOST_ARCH)
332 #define USED_IF_x86_64_HOST_ARCH /* Nothing */
333 #else
334 #define USED_IF_x86_64_HOST_ARCH STG_UNUSED
335 #endif
336
337 static char *allocateImageAndTrampolines (
338 pathchar* arch_name, char* member_name,
339 FILE* f,
340 int size,
341 int isThin);
342 #if defined(x86_64_HOST_ARCH)
343 static int ocAllocateSymbolExtras_PEi386 ( ObjectCode* oc );
344 static size_t makeSymbolExtra_PEi386( ObjectCode* oc, size_t, char* symbol );
345 #define PEi386_IMAGE_OFFSET 4
346 #else
347 #define PEi386_IMAGE_OFFSET 0
348 #endif
349 #elif defined(OBJFORMAT_MACHO)
350 static int ocVerifyImage_MachO ( ObjectCode* oc );
351 static int ocGetNames_MachO ( ObjectCode* oc );
352 static int ocResolve_MachO ( ObjectCode* oc );
353 static int ocRunInit_MachO ( ObjectCode* oc );
354 static int machoGetMisalignment( FILE * );
355 #if NEED_SYMBOL_EXTRAS
356 static int ocAllocateSymbolExtras_MachO ( ObjectCode* oc );
357 #endif
358 #ifdef powerpc_HOST_ARCH
359 static void machoInitSymbolsWithoutUnderscore( void );
360 #endif
361 #endif
362
363 #if defined(OBJFORMAT_PEi386)
364 /* string utility function */
365 static HsBool endsWithPath(pathchar* base, pathchar* str) {
366 int blen = pathlen(base);
367 int slen = pathlen(str);
368 return (blen >= slen) && (0 == pathcmp(base + blen - slen, str));
369 }
370
371 static int checkAndLoadImportLibrary(
372 pathchar* arch_name,
373 char* member_name,
374 FILE* f);
375
376 static int findAndLoadImportLibrary(
377 ObjectCode* oc
378 );
379
380 static UChar *myindex(
381 int scale,
382 void* base,
383 int index);
384 static UChar *cstring_from_COFF_symbol_name(
385 UChar* name,
386 UChar* strtab);
387 static char *cstring_from_section_name(
388 UChar* name,
389 UChar* strtab);
390
391
392 /* Add ld symbol for PE image base. */
393 #if defined(__GNUC__)
394 #define __ImageBase __MINGW_LSYMBOL(_image_base__)
395 #endif
396
397 /* Get the base of the module. */
398 /* This symbol is defined by ld. */
399 extern IMAGE_DOS_HEADER __ImageBase;
400 #define __image_base (void*)((HINSTANCE)&__ImageBase)
401
402 // MingW-w64 is missing these from the implementation. So we have to look them up
403 typedef DLL_DIRECTORY_COOKIE(WINAPI *LPAddDLLDirectory)(PCWSTR NewDirectory);
404 typedef WINBOOL(WINAPI *LPRemoveDLLDirectory)(DLL_DIRECTORY_COOKIE Cookie);
405 #endif /* OBJFORMAT_PEi386 */
406
407 static void freeProddableBlocks (ObjectCode *oc);
408
409 /* on x86_64 we have a problem with relocating symbol references in
410 * code that was compiled without -fPIC. By default, the small memory
411 * model is used, which assumes that symbol references can fit in a
412 * 32-bit slot. The system dynamic linker makes this work for
413 * references to shared libraries by either (a) allocating a jump
414 * table slot for code references, or (b) moving the symbol at load
415 * time (and copying its contents, if necessary) for data references.
416 *
417 * We unfortunately can't tell whether symbol references are to code
418 * or data. So for now we assume they are code (the vast majority
419 * are), and allocate jump-table slots. Unfortunately this will
420 * SILENTLY generate crashing code for data references. This hack is
421 * enabled by X86_64_ELF_NONPIC_HACK.
422 *
423 * One workaround is to use shared Haskell libraries. This is
424 * coming. Another workaround is to keep the static libraries but
425 * compile them with -fPIC, because that will generate PIC references
426 * to data which can be relocated. The PIC code is still too green to
427 * do this systematically, though.
428 *
429 * See bug #781
430 * See thread http://www.haskell.org/pipermail/cvs-ghc/2007-September/038458.html
431 *
432 * Naming Scheme for Symbol Macros
433 *
434 * SymI_*: symbol is internal to the RTS. It resides in an object
435 * file/library that is statically.
436 * SymE_*: symbol is external to the RTS library. It might be linked
437 * dynamically.
438 *
439 * Sym*_HasProto : the symbol prototype is imported in an include file
440 * or defined explicitly
441 * Sym*_NeedsProto: the symbol is undefined and we add a dummy
442 * default proto extern void sym(void);
443 */
444 #define X86_64_ELF_NONPIC_HACK 1
445
446 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
447 * small memory model on this architecture (see gcc docs,
448 * -mcmodel=small).
449 *
450 * MAP_32BIT not available on OpenBSD/amd64
451 */
452 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
453 #define TRY_MAP_32BIT MAP_32BIT
454 #else
455 #define TRY_MAP_32BIT 0
456 #endif
457
458 /*
459 Note [The ARM/Thumb Story]
460 ~~~~~~~~~~~~~~~~~~~~~~~~~~
461
462 Support for the ARM architecture is complicated by the fact that ARM has not
463 one but several instruction encodings. The two relevant ones here are the original
464 ARM encoding and Thumb, a more dense variant of ARM supporting only a subset
465 of the instruction set.
466
467 How the CPU decodes a particular instruction is determined by a mode bit. This
468 mode bit is set on jump instructions, the value being determined by the low
469 bit of the target address: An odd address means the target is a procedure
470 encoded in the Thumb encoding whereas an even address means it's a traditional
471 ARM procedure (the actual address jumped to is even regardless of the encoding bit).
472
473 Interoperation between Thumb- and ARM-encoded object code (known as "interworking")
474 is tricky. If the linker needs to link a call by an ARM object into Thumb code
475 (or vice-versa) it will produce a jump island. This, however, is incompatible with
476 GHC's tables-next-to-code. For this reason, it is critical that GHC emit
477 exclusively ARM or Thumb objects for all Haskell code.
478
479 We still do, however, need to worry about foreign code.
480 */
481
482 /*
483 * Due to the small memory model (see above), on x86_64 we have to map
484 * all our non-PIC object files into the low 2Gb of the address space
485 * (why 2Gb and not 4Gb? Because all addresses must be reachable
486 * using a 32-bit signed PC-relative offset). On Linux we can do this
487 * using the MAP_32BIT flag to mmap(), however on other OSs
488 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
489 * can't do this. So on these systems, we have to pick a base address
490 * in the low 2Gb of the address space and try to allocate memory from
491 * there.
492 *
493 * We pick a default address based on the OS, but also make this
494 * configurable via an RTS flag (+RTS -xm)
495 */
496 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
497
498 #if defined(MAP_32BIT)
499 // Try to use MAP_32BIT
500 #define MMAP_32BIT_BASE_DEFAULT 0
501 #else
502 // A guess: 1Gb.
503 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
504 #endif
505
506 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
507 #endif
508
509 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
510 ObjectCode *owner)
511 {
512 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
513 if (!pinfo || owner != pinfo->owner) return;
514 removeStrHashTable(table, key, NULL);
515 stgFree(pinfo);
516 }
517
518 /* -----------------------------------------------------------------------------
519 * Insert symbols into hash tables, checking for duplicates.
520 *
521 * Returns: 0 on failure, nonzero on success
522 */
523 /*
524 Note [weak-symbols-support]
525 -------------------------------------
526 While ghciInsertSymbolTable does implement extensive
527 logic for weak symbol support, weak symbols are not currently
528 fully supported by the RTS. This code is mostly here for COMDAT
529 support which uses the weak symbols support.
530
531 Linking weak symbols defined purely in C code with other C code
532 should also work, probably. Observing weak symbols in Haskell
533 won't.
534
535 Some test have been written for weak symbols but have been disabled
536 mostly because it's unsure how the weak symbols support should look.
537 See Trac #11223
538 */
539 static int ghciInsertSymbolTable(
540 pathchar* obj_name,
541 HashTable *table,
542 const SymbolName* key,
543 SymbolAddr* data,
544 HsBool weak,
545 ObjectCode *owner)
546 {
547 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
548 if (!pinfo) /* new entry */
549 {
550 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
551 pinfo->value = data;
552 pinfo->owner = owner;
553 pinfo->weak = weak;
554 insertStrHashTable(table, key, pinfo);
555 return 1;
556 }
557 else if (weak && data && pinfo->weak && !pinfo->value)
558 {
559 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
560 pinfo->value = data;
561 pinfo->owner = owner;
562 return 1;
563 }
564 else if (weak)
565 {
566 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
567 already know of another copy throw this one away.
568
569 or both weak symbols have a nonzero value. Keep the existing one.
570
571 This also preserves the semantics of linking against
572 the first symbol we find. */
573 }
574 else if (pinfo->weak && !weak) /* weak symbol is in the table */
575 {
576 /* override the weak definition with the non-weak one */
577 pinfo->value = data;
578 pinfo->owner = owner;
579 pinfo->weak = HS_BOOL_FALSE;
580 return 1;
581 }
582 else if ( pinfo->owner
583 && pinfo->owner->status != OBJECT_RESOLVED
584 && pinfo->owner->status != OBJECT_NEEDED)
585 {
586 /* If the other symbol hasn't been loaded or will be loaded and we want to
587 explicitly load the new one, we can just swap it out and load the one
588 that has been requested. If not, just keep the first one encountered.
589
590 Because the `symHash' table consists symbols we've also not loaded but
591 found during the initial scan this is safe to do. If however the existing
592 symbol has been loaded then it means we have a duplicate.
593
594 This is essentially emulating the behavior of a linker wherein it will always
595 link in object files that are .o file arguments, but only take object files
596 from archives as needed. */
597 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
598 pinfo->value = data;
599 pinfo->owner = owner;
600 pinfo->weak = weak;
601 }
602
603 return 1;
604 }
605 else if (pinfo->owner == owner)
606 {
607 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
608 return 1;
609 }
610 else if (owner && owner->status == OBJECT_LOADED)
611 {
612 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
613 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
614 call this function again to trigger the duplicate error. */
615 return 1;
616 }
617
618 pathchar* archiveName = NULL;
619 debugBelch(
620 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
621 " %s\n"
622 "whilst processing object file\n"
623 " %" PATH_FMT "\n"
624 "The symbol was previously defined in\n"
625 " %" PATH_FMT "\n"
626 "This could be caused by:\n"
627 " * Loading two different object files which export the same symbol\n"
628 " * Specifying the same object file twice on the GHCi command line\n"
629 " * An incorrect `package.conf' entry, causing some object to be\n"
630 " loaded twice.\n",
631 (char*)key,
632 obj_name,
633 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
634 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
635 : pinfo->owner->fileName
636 );
637
638 if (archiveName)
639 {
640 stgFree(archiveName);
641 archiveName = NULL;
642 }
643 return 0;
644 }
645
646 /* -----------------------------------------------------------------------------
647 * Looks up symbols into hash tables.
648 *
649 * Returns: 0 on failure and result is not set,
650 * nonzero on success and result set to nonzero pointer
651 */
652 static HsBool ghciLookupSymbolInfo(HashTable *table,
653 const SymbolName* key, RtsSymbolInfo **result)
654 {
655 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
656 if (!pinfo) {
657 *result = NULL;
658 return HS_BOOL_FALSE;
659 }
660 if (pinfo->weak)
661 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
662 /* Once it's looked up, it can no longer be overridden */
663 pinfo->weak = HS_BOOL_FALSE;
664
665 *result = pinfo;
666 return HS_BOOL_TRUE;
667 }
668
669 /* -----------------------------------------------------------------------------
670 * initialize the object linker
671 */
672
673
674 static int linker_init_done = 0 ;
675
676 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
677 static void *dl_prog_handle;
678 static regex_t re_invalid;
679 static regex_t re_realso;
680 #ifdef THREADED_RTS
681 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
682 #endif
683 #elif defined(OBJFORMAT_PEi386)
684 void addDLLHandle(pathchar* dll_name, HINSTANCE instance);
685 #endif
686
687 void initLinker (void)
688 {
689 // default to retaining CAFs for backwards compatibility. Most
690 // users will want initLinker_(0): otherwise unloadObj() will not
691 // be able to unload object files when they contain CAFs.
692 initLinker_(1);
693 }
694
695 void
696 initLinker_ (int retain_cafs)
697 {
698 RtsSymbolVal *sym;
699 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
700 int compileResult;
701 #endif
702
703 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
704
705 /* Make initLinker idempotent, so we can call it
706 before every relevant operation; that means we
707 don't need to initialise the linker separately */
708 if (linker_init_done == 1) {
709 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
710 return;
711 } else {
712 linker_init_done = 1;
713 }
714
715 objects = NULL;
716 unloaded_objects = NULL;
717
718 #if defined(THREADED_RTS)
719 initMutex(&linker_mutex);
720 initMutex(&linker_unloaded_mutex);
721 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
722 initMutex(&dl_mutex);
723 #endif
724 #endif
725
726 symhash = allocStrHashTable();
727
728 /* populate the symbol table with stuff from the RTS */
729 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
730 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
731 symhash, sym->lbl, sym->addr, HS_BOOL_FALSE, NULL)) {
732 barf("ghciInsertSymbolTable failed");
733 }
734 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
735 }
736 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
737 machoInitSymbolsWithoutUnderscore();
738 # endif
739 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
740 referenced from a statically linked module. We need to mimic this, but
741 we cannot use NULL because we use it to mean nonexistent symbols. So we
742 use an arbitrary (hopefully unique) address here.
743 */
744 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
745 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
746 barf("ghciInsertSymbolTable failed");
747 }
748
749 #if defined(OBJFORMAT_PEi386)
750 if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"),
751 symhash, "__image_base__", __image_base, HS_BOOL_TRUE, NULL)) {
752 barf("ghciInsertSymbolTable failed");
753 }
754 #endif /* OBJFORMAT_PEi386 */
755
756
757 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
758 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
759 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
760 retain_cafs ? newRetainedCAF : newGCdCAF,
761 HS_BOOL_FALSE, NULL)) {
762 barf("ghciInsertSymbolTable failed");
763 }
764
765 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
766 # if defined(RTLD_DEFAULT)
767 dl_prog_handle = RTLD_DEFAULT;
768 # else
769 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
770 # endif /* RTLD_DEFAULT */
771
772 compileResult = regcomp(&re_invalid,
773 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short)",
774 REG_EXTENDED);
775 if (compileResult != 0) {
776 barf("Compiling re_invalid failed");
777 }
778 compileResult = regcomp(&re_realso,
779 "(GROUP|INPUT) *\\( *([^ )]+)",
780 REG_EXTENDED);
781 if (compileResult != 0) {
782 barf("Compiling re_realso failed");
783 }
784 # endif
785
786 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
787 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
788 // User-override for mmap_32bit_base
789 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
790 }
791 #endif
792
793 #if defined(mingw32_HOST_OS)
794 /*
795 * These two libraries cause problems when added to the static link,
796 * but are necessary for resolving symbols in GHCi, hence we load
797 * them manually here.
798 */
799 addDLL(WSTR("msvcrt"));
800 addDLL(WSTR("kernel32"));
801 addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL));
802 #endif
803
804 if (RTS_LINKER_USE_MMAP)
805 m32_allocator_init();
806
807 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
808 return;
809 }
810
811 void
812 exitLinker( void ) {
813 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
814 if (linker_init_done == 1) {
815 regfree(&re_invalid);
816 regfree(&re_realso);
817 #ifdef THREADED_RTS
818 closeMutex(&dl_mutex);
819 #endif
820 }
821 #endif
822 if (linker_init_done == 1) {
823 freeHashTable(symhash, free);
824 }
825 #ifdef THREADED_RTS
826 closeMutex(&linker_mutex);
827 #endif
828 }
829
830 /* -----------------------------------------------------------------------------
831 * Loading DLL or .so dynamic libraries
832 * -----------------------------------------------------------------------------
833 *
834 * Add a DLL from which symbols may be found. In the ELF case, just
835 * do RTLD_GLOBAL-style add, so no further messing around needs to
836 * happen in order that symbols in the loaded .so are findable --
837 * lookupSymbol() will subsequently see them by dlsym on the program's
838 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
839 *
840 * In the PEi386 case, open the DLLs and put handles to them in a
841 * linked list. When looking for a symbol, try all handles in the
842 * list. This means that we need to load even DLLs that are guaranteed
843 * to be in the ghc.exe image already, just so we can get a handle
844 * to give to loadSymbol, so that we can find the symbols. For such
845 * libraries, the LoadLibrary call should be a no-op except for returning
846 * the handle.
847 *
848 */
849
850 #if defined(OBJFORMAT_PEi386)
851 /* A record for storing handles into DLLs. */
852
853 typedef
854 struct _OpenedDLL {
855 pathchar* name;
856 struct _OpenedDLL* next;
857 HINSTANCE instance;
858 }
859 OpenedDLL;
860
861 /* A list thereof. */
862 static OpenedDLL* opened_dlls = NULL;
863
864 /* A record for storing indirectly linked functions from DLLs. */
865 typedef
866 struct _IndirectAddr {
867 SymbolAddr* addr;
868 struct _IndirectAddr* next;
869 }
870 IndirectAddr;
871
872 /* A list thereof. */
873 static IndirectAddr* indirects = NULL;
874
875 /* Adds a DLL instance to the list of DLLs in which to search for symbols. */
876 void addDLLHandle(pathchar* dll_name, HINSTANCE instance) {
877 OpenedDLL* o_dll;
878 o_dll = stgMallocBytes( sizeof(OpenedDLL), "addDLLHandle" );
879 o_dll->name = dll_name ? pathdup(dll_name) : NULL;
880 o_dll->instance = instance;
881 o_dll->next = opened_dlls;
882 opened_dlls = o_dll;
883 }
884
885 #endif
886
887 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
888
889 /* Suppose in ghci we load a temporary SO for a module containing
890 f = 1
891 and then modify the module, recompile, and load another temporary
892 SO with
893 f = 2
894 Then as we don't unload the first SO, dlsym will find the
895 f = 1
896 symbol whereas we want the
897 f = 2
898 symbol. We therefore need to keep our own SO handle list, and
899 try SOs in the right order. */
900
901 typedef
902 struct _OpenedSO {
903 struct _OpenedSO* next;
904 void *handle;
905 }
906 OpenedSO;
907
908 /* A list thereof. */
909 static OpenedSO* openedSOs = NULL;
910
911 static const char *
912 internal_dlopen(const char *dll_name)
913 {
914 OpenedSO* o_so;
915 void *hdl;
916 const char *errmsg;
917 char *errmsg_copy;
918
919 // omitted: RTLD_NOW
920 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
921 IF_DEBUG(linker,
922 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
923
924 //-------------- Begin critical section ------------------
925 // This critical section is necessary because dlerror() is not
926 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
927 // Also, the error message returned must be copied to preserve it
928 // (see POSIX also)
929
930 ACQUIRE_LOCK(&dl_mutex);
931 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
932
933 errmsg = NULL;
934 if (hdl == NULL) {
935 /* dlopen failed; return a ptr to the error msg. */
936 errmsg = dlerror();
937 if (errmsg == NULL) errmsg = "addDLL: unknown error";
938 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
939 strcpy(errmsg_copy, errmsg);
940 errmsg = errmsg_copy;
941 } else {
942 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
943 o_so->handle = hdl;
944 o_so->next = openedSOs;
945 openedSOs = o_so;
946 }
947
948 RELEASE_LOCK(&dl_mutex);
949 //--------------- End critical section -------------------
950
951 return errmsg;
952 }
953
954 /*
955 Note [RTLD_LOCAL]
956
957 In GHCi we want to be able to override previous .so's with newly
958 loaded .so's when we recompile something. This further implies that
959 when we look up a symbol in internal_dlsym() we have to iterate
960 through the loaded libraries (in order from most recently loaded to
961 oldest) looking up the symbol in each one until we find it.
962
963 However, this can cause problems for some symbols that are copied
964 by the linker into the executable image at runtime - see #8935 for a
965 lengthy discussion. To solve that problem we need to look up
966 symbols in the main executable *first*, before attempting to look
967 them up in the loaded .so's. But in order to make that work, we
968 have to always call dlopen with RTLD_LOCAL, so that the loaded
969 libraries don't populate the global symbol table.
970 */
971
972 static void *
973 internal_dlsym(const char *symbol) {
974 OpenedSO* o_so;
975 void *v;
976
977 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
978 ACQUIRE_LOCK(&dl_mutex);
979 dlerror();
980 // look in program first
981 v = dlsym(dl_prog_handle, symbol);
982 if (dlerror() == NULL) {
983 RELEASE_LOCK(&dl_mutex);
984 return v;
985 }
986
987 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
988 v = dlsym(o_so->handle, symbol);
989 if (dlerror() == NULL) {
990 RELEASE_LOCK(&dl_mutex);
991 return v;
992 }
993 }
994 RELEASE_LOCK(&dl_mutex);
995 return v;
996 }
997 # endif
998
999 const char *
1000 addDLL( pathchar *dll_name )
1001 {
1002 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
1003 /* ------------------- ELF DLL loader ------------------- */
1004
1005 #define NMATCH 5
1006 regmatch_t match[NMATCH];
1007 const char *errmsg;
1008 FILE* fp;
1009 size_t match_length;
1010 #define MAXLINE 1000
1011 char line[MAXLINE];
1012 int result;
1013
1014 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
1015 errmsg = internal_dlopen(dll_name);
1016
1017 if (errmsg == NULL) {
1018 return NULL;
1019 }
1020
1021 // GHC Trac ticket #2615
1022 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
1023 // contain linker scripts rather than ELF-format object code. This
1024 // code handles the situation by recognizing the real object code
1025 // file name given in the linker script.
1026 //
1027 // If an "invalid ELF header" error occurs, it is assumed that the
1028 // .so file contains a linker script instead of ELF object code.
1029 // In this case, the code looks for the GROUP ( ... ) linker
1030 // directive. If one is found, the first file name inside the
1031 // parentheses is treated as the name of a dynamic library and the
1032 // code attempts to dlopen that file. If this is also unsuccessful,
1033 // an error message is returned.
1034
1035 // see if the error message is due to an invalid ELF header
1036 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
1037 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
1038 IF_DEBUG(linker, debugBelch("result = %i\n", result));
1039 if (result == 0) {
1040 // success -- try to read the named file as a linker script
1041 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
1042 MAXLINE-1);
1043 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
1044 line[match_length] = '\0'; // make sure string is null-terminated
1045 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
1046 if ((fp = fopen(line, "r")) == NULL) {
1047 return errmsg; // return original error if open fails
1048 }
1049 // try to find a GROUP or INPUT ( ... ) command
1050 while (fgets(line, MAXLINE, fp) != NULL) {
1051 IF_DEBUG(linker, debugBelch("input line = %s", line));
1052 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
1053 // success -- try to dlopen the first named file
1054 IF_DEBUG(linker, debugBelch("match%s\n",""));
1055 line[match[2].rm_eo] = '\0';
1056 stgFree((void*)errmsg); // Free old message before creating new one
1057 errmsg = internal_dlopen(line+match[2].rm_so);
1058 break;
1059 }
1060 // if control reaches here, no GROUP or INPUT ( ... ) directive
1061 // was found and the original error message is returned to the
1062 // caller
1063 }
1064 fclose(fp);
1065 }
1066 return errmsg;
1067
1068 # elif defined(OBJFORMAT_PEi386)
1069 /* ------------------- Win32 DLL loader ------------------- */
1070
1071 pathchar* buf;
1072 OpenedDLL* o_dll;
1073 HINSTANCE instance;
1074
1075 IF_DEBUG(linker, debugBelch("\naddDLL; dll_name = `%" PATH_FMT "'\n", dll_name));
1076
1077 /* See if we've already got it, and ignore if so. */
1078 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
1079 if (0 == pathcmp(o_dll->name, dll_name))
1080 return NULL;
1081 }
1082
1083 /* The file name has no suffix (yet) so that we can try
1084 both foo.dll and foo.drv
1085
1086 The documentation for LoadLibrary says:
1087 If no file name extension is specified in the lpFileName
1088 parameter, the default library extension .dll is
1089 appended. However, the file name string can include a trailing
1090 point character (.) to indicate that the module name has no
1091 extension. */
1092
1093 size_t bufsize = pathlen(dll_name) + 10;
1094 buf = stgMallocBytes(bufsize * sizeof(wchar_t), "addDLL");
1095
1096 /* These are ordered by probability of success and order we'd like them */
1097 const wchar_t *formats[] = { L"%ls.DLL", L"%ls.DRV", L"lib%ls.DLL", L"%ls" };
1098 const DWORD flags[] = { LOAD_LIBRARY_SEARCH_USER_DIRS | LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, 0 };
1099
1100 int cFormat;
1101 int cFlag;
1102 int flags_start = 1; // Assume we don't support the new API
1103
1104 /* Detect if newer API are available, if not, skip the first flags entry */
1105 if (GetProcAddress((HMODULE)LoadLibraryW(L"Kernel32.DLL"), "AddDllDirectory")) {
1106 flags_start = 0;
1107 }
1108
1109 /* Iterate through the possible flags and formats */
1110 for (cFlag = flags_start; cFlag < 2; cFlag++)
1111 {
1112 for (cFormat = 0; cFormat < 4; cFormat++)
1113 {
1114 snwprintf(buf, bufsize, formats[cFormat], dll_name);
1115 instance = LoadLibraryExW(buf, NULL, flags[cFlag]);
1116 if (instance == NULL)
1117 {
1118 if (GetLastError() != ERROR_MOD_NOT_FOUND)
1119 {
1120 goto error;
1121 }
1122 }
1123 else
1124 {
1125 break; // We're done. DLL has been loaded.
1126 }
1127 }
1128 }
1129
1130 // Check if we managed to load the DLL
1131 if (instance == NULL) {
1132 goto error;
1133 }
1134
1135 stgFree(buf);
1136
1137 addDLLHandle(dll_name, instance);
1138
1139 return NULL;
1140
1141 error:
1142 stgFree(buf);
1143 sysErrorBelch("addDLL: %" PATH_FMT " (Win32 error %lu)", dll_name, GetLastError());
1144
1145 /* LoadLibrary failed; return a ptr to the error msg. */
1146 return "addDLL: could not load DLL";
1147
1148 # else
1149 barf("addDLL: not implemented on this platform");
1150 # endif
1151 }
1152
1153 /* -----------------------------------------------------------------------------
1154 * Searches the system directories to determine if there is a system DLL that
1155 * satisfies the given name. This prevent GHCi from linking against a static
1156 * library if a DLL is available.
1157 *
1158 * Returns: NULL on failure or no DLL found, else the full path to the DLL
1159 * that can be loaded.
1160 */
1161 pathchar* findSystemLibrary(pathchar* dll_name)
1162 {
1163 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%" PATH_FMT "'\n", dll_name));
1164
1165 #if defined(OBJFORMAT_PEi386)
1166 const unsigned int init_buf_size = 1024;
1167 unsigned int bufsize = init_buf_size;
1168 wchar_t* result = malloc(sizeof(wchar_t) * bufsize);
1169 DWORD wResult = SearchPathW(NULL, dll_name, NULL, bufsize, result, NULL);
1170
1171 if (wResult > bufsize) {
1172 result = realloc(result, sizeof(wchar_t) * wResult);
1173 wResult = SearchPathW(NULL, dll_name, NULL, wResult, result, NULL);
1174 }
1175
1176
1177 if (!wResult) {
1178 free(result);
1179 return NULL;
1180 }
1181
1182 return result;
1183 #else
1184 (void)(dll_name); // Function not implemented for other platforms.
1185 return NULL;
1186 #endif
1187 }
1188
1189 /* -----------------------------------------------------------------------------
1190 * Emits a warning determining that the system is missing a required security
1191 * update that we need to get access to the proper APIs
1192 */
1193 void warnMissingKBLibraryPaths( void )
1194 {
1195 static HsBool missing_update_warn = HS_BOOL_FALSE;
1196 if (!missing_update_warn) {
1197 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
1198 missing_update_warn = HS_BOOL_TRUE;
1199 }
1200 }
1201
1202 /* -----------------------------------------------------------------------------
1203 * appends a directory to the process DLL Load path so LoadLibrary can find it
1204 *
1205 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
1206 * restore the search path to what it was before this call.
1207 */
1208 HsPtr addLibrarySearchPath(pathchar* dll_path)
1209 {
1210 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%" PATH_FMT "'\n", dll_path));
1211
1212 #if defined(OBJFORMAT_PEi386)
1213 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1214 LPAddDLLDirectory AddDllDirectory = (LPAddDLLDirectory)GetProcAddress((HMODULE)hDLL, "AddDllDirectory");
1215
1216 HsPtr result = NULL;
1217
1218 const unsigned int init_buf_size = 4096;
1219 int bufsize = init_buf_size;
1220
1221 // Make sure the path is an absolute path
1222 WCHAR* abs_path = malloc(sizeof(WCHAR) * init_buf_size);
1223 DWORD wResult = GetFullPathNameW(dll_path, bufsize, abs_path, NULL);
1224 if (!wResult){
1225 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1226 }
1227 else if (wResult > init_buf_size) {
1228 abs_path = realloc(abs_path, sizeof(WCHAR) * wResult);
1229 if (!GetFullPathNameW(dll_path, bufsize, abs_path, NULL)) {
1230 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1231 }
1232 }
1233
1234 if (AddDllDirectory) {
1235 result = AddDllDirectory(abs_path);
1236 }
1237 else
1238 {
1239 warnMissingKBLibraryPaths();
1240 WCHAR* str = malloc(sizeof(WCHAR) * init_buf_size);
1241 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1242
1243 if (wResult > init_buf_size) {
1244 str = realloc(str, sizeof(WCHAR) * wResult);
1245 bufsize = wResult;
1246 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1247 if (!wResult) {
1248 sysErrorBelch("addLibrarySearchPath[GetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1249 }
1250 }
1251
1252 bufsize = wResult + 2 + pathlen(abs_path);
1253 wchar_t* newPath = malloc(sizeof(wchar_t) * bufsize);
1254
1255 wcscpy(newPath, abs_path);
1256 wcscat(newPath, L";");
1257 wcscat(newPath, str);
1258 if (!SetEnvironmentVariableW(L"PATH", (LPCWSTR)newPath)) {
1259 sysErrorBelch("addLibrarySearchPath[SetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1260 }
1261
1262 free(newPath);
1263 free(abs_path);
1264
1265 return str;
1266 }
1267
1268 if (!result) {
1269 sysErrorBelch("addLibrarySearchPath: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1270 free(abs_path);
1271 return NULL;
1272 }
1273
1274 free(abs_path);
1275 return result;
1276 #else
1277 (void)(dll_path); // Function not implemented for other platforms.
1278 return NULL;
1279 #endif
1280 }
1281
1282 /* -----------------------------------------------------------------------------
1283 * removes a directory from the process DLL Load path
1284 *
1285 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
1286 */
1287 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
1288 {
1289 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n", dll_path_index));
1290
1291 #if defined(OBJFORMAT_PEi386)
1292 HsBool result = 0;
1293
1294 if (dll_path_index != NULL) {
1295 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1296 LPRemoveDLLDirectory RemoveDllDirectory = (LPRemoveDLLDirectory)GetProcAddress((HMODULE)hDLL, "RemoveDllDirectory");
1297
1298 if (RemoveDllDirectory) {
1299 result = RemoveDllDirectory(dll_path_index);
1300 // dll_path_index is now invalid, do not use it after this point.
1301 }
1302 else
1303 {
1304 warnMissingKBLibraryPaths();
1305 result = SetEnvironmentVariableW(L"PATH", (LPCWSTR)dll_path_index);
1306 free(dll_path_index);
1307 }
1308
1309 if (!result) {
1310 sysErrorBelch("removeLibrarySearchPath: (Win32 error %lu)", GetLastError());
1311 return HS_BOOL_FALSE;
1312 }
1313 }
1314
1315 return result == 0 ? HS_BOOL_TRUE : HS_BOOL_FALSE;
1316 #else
1317 (void)(dll_path_index); // Function not implemented for other platforms.
1318 return HS_BOOL_FALSE;
1319 #endif
1320 }
1321
1322 /* -----------------------------------------------------------------------------
1323 * insert a symbol in the hash table
1324 *
1325 * Returns: 0 on failure, nozero on success
1326 */
1327 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
1328 {
1329 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE, NULL);
1330 }
1331
1332 /* -----------------------------------------------------------------------------
1333 * lookup a symbol in the hash table
1334 */
1335 static SymbolAddr* lookupSymbol_ (SymbolName* lbl)
1336 {
1337 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
1338
1339 ASSERT(symhash != NULL);
1340 RtsSymbolInfo *pinfo;
1341
1342 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
1343 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
1344
1345 # if defined(OBJFORMAT_ELF)
1346 return internal_dlsym(lbl);
1347 # elif defined(OBJFORMAT_MACHO)
1348
1349 /* HACK: On OS X, all symbols are prefixed with an underscore.
1350 However, dlsym wants us to omit the leading underscore from the
1351 symbol name -- the dlsym routine puts it back on before searching
1352 for the symbol. For now, we simply strip it off here (and ONLY
1353 here).
1354 */
1355 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", lbl));
1356 ASSERT(lbl[0] == '_');
1357 return internal_dlsym(lbl + 1);
1358 # elif defined(OBJFORMAT_PEi386)
1359 SymbolAddr* sym;
1360
1361 /* See Note [mingw-w64 name decoration scheme] */
1362 #ifndef x86_64_HOST_ARCH
1363 zapTrailingAtSign ( (unsigned char*)lbl );
1364 #endif
1365 sym = lookupSymbolInDLLs((unsigned char*)lbl);
1366 return sym; // might be NULL if not found
1367
1368 # else
1369 ASSERT(2+2 == 5);
1370 return NULL;
1371 # endif
1372 } else {
1373 #if defined(mingw32_HOST_OS)
1374 // If Windows, perform initialization of uninitialized
1375 // Symbols from the C runtime which was loaded above.
1376 // We do this on lookup to prevent the hit when
1377 // The symbol isn't being used.
1378 if (pinfo->value == (void*)0xBAADF00D)
1379 {
1380 char symBuffer[50];
1381 sprintf(symBuffer, "_%s", lbl);
1382 pinfo->value = GetProcAddress(GetModuleHandle("msvcrt"), symBuffer);
1383 }
1384 #endif
1385 SymbolAddr* val = pinfo->value;
1386 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl, val));
1387
1388 int r;
1389 ObjectCode* oc = pinfo->owner;
1390
1391 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
1392 See Note [runtime-linker-phases] */
1393 if (oc && oc->status == OBJECT_LOADED) {
1394 oc->status = OBJECT_NEEDED;
1395 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand loading symbol '%s'\n", lbl));
1396 r = ocTryLoad(oc);
1397
1398 if (!r) {
1399 errorBelch("Could not on-demand load symbol '%s'\n", lbl);
1400 return NULL;
1401 }
1402 }
1403
1404 return val;
1405 }
1406 }
1407
1408 SymbolAddr* lookupSymbol( SymbolName* lbl )
1409 {
1410 ACQUIRE_LOCK(&linker_mutex);
1411 SymbolAddr* r = lookupSymbol_(lbl);
1412 RELEASE_LOCK(&linker_mutex);
1413 return r;
1414 }
1415
1416 /* -----------------------------------------------------------------------------
1417 Create a StablePtr for a foreign export. This is normally called by
1418 a C function with __attribute__((constructor)), which is generated
1419 by GHC and linked into the module.
1420
1421 If the object code is being loaded dynamically, then we remember
1422 which StablePtrs were allocated by the constructors and free them
1423 again in unloadObj().
1424 -------------------------------------------------------------------------- */
1425
1426 static ObjectCode *loading_obj = NULL;
1427
1428 StgStablePtr foreignExportStablePtr (StgPtr p)
1429 {
1430 ForeignExportStablePtr *fe_sptr;
1431 StgStablePtr *sptr;
1432
1433 sptr = getStablePtr(p);
1434
1435 if (loading_obj != NULL) {
1436 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
1437 "foreignExportStablePtr");
1438 fe_sptr->stable_ptr = sptr;
1439 fe_sptr->next = loading_obj->stable_ptrs;
1440 loading_obj->stable_ptrs = fe_sptr;
1441 }
1442
1443 return sptr;
1444 }
1445
1446
1447 /* -----------------------------------------------------------------------------
1448 * Debugging aid: look in GHCi's object symbol tables for symbols
1449 * within DELTA bytes of the specified address, and show their names.
1450 */
1451 #ifdef DEBUG
1452 void ghci_enquire ( SymbolAddr* addr );
1453
1454 void ghci_enquire(SymbolAddr* addr)
1455 {
1456 int i;
1457 SymbolName* sym;
1458 RtsSymbolInfo* a;
1459 const int DELTA = 64;
1460 ObjectCode* oc;
1461
1462 for (oc = objects; oc; oc = oc->next) {
1463 for (i = 0; i < oc->n_symbols; i++) {
1464 sym = oc->symbols[i];
1465 if (sym == NULL) continue;
1466 a = NULL;
1467 if (a == NULL) {
1468 ghciLookupSymbolInfo(symhash, sym, &a);
1469 }
1470 if (a == NULL) {
1471 // debugBelch("ghci_enquire: can't find %s\n", sym);
1472 }
1473 else if ( a->value
1474 && (char*)addr-DELTA <= (char*)a->value
1475 && (char*)a->value <= (char*)addr+DELTA) {
1476 debugBelch("%p + %3d == `%s'\n", addr, (int)((char*)a->value - (char*)addr), sym);
1477 }
1478 }
1479 }
1480 }
1481 #endif
1482
1483 #if RTS_LINKER_USE_MMAP
1484 //
1485 // Returns NULL on failure.
1486 //
1487 void *
1488 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
1489 {
1490 void *map_addr = NULL;
1491 void *result;
1492 size_t size;
1493 static uint32_t fixed = 0;
1494
1495 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1496 size = roundUpToPage(bytes);
1497
1498 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1499 mmap_again:
1500
1501 if (mmap_32bit_base != 0) {
1502 map_addr = mmap_32bit_base;
1503 }
1504 #endif
1505
1506 IF_DEBUG(linker,
1507 debugBelch("mmapForLinker: \tprotection %#0x\n",
1508 PROT_EXEC | PROT_READ | PROT_WRITE));
1509 IF_DEBUG(linker,
1510 debugBelch("mmapForLinker: \tflags %#0x\n",
1511 MAP_PRIVATE | TRY_MAP_32BIT | fixed | flags));
1512
1513 result = mmap(map_addr, size,
1514 PROT_EXEC|PROT_READ|PROT_WRITE,
1515 MAP_PRIVATE|TRY_MAP_32BIT|fixed|flags, fd, offset);
1516
1517 if (result == MAP_FAILED) {
1518 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1519 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1520 return NULL;
1521 }
1522
1523 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1524 if (mmap_32bit_base != 0) {
1525 if (result == map_addr) {
1526 mmap_32bit_base = (StgWord8*)map_addr + size;
1527 } else {
1528 if ((W_)result > 0x80000000) {
1529 // oops, we were given memory over 2Gb
1530 munmap(result,size);
1531 #if defined(freebsd_HOST_OS) || \
1532 defined(kfreebsdgnu_HOST_OS) || \
1533 defined(dragonfly_HOST_OS)
1534 // Some platforms require MAP_FIXED. This is normally
1535 // a bad idea, because MAP_FIXED will overwrite
1536 // existing mappings.
1537 fixed = MAP_FIXED;
1538 goto mmap_again;
1539 #else
1540 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1541 "asked for %lu bytes at %p. "
1542 "Try specifying an address with +RTS -xm<addr> -RTS",
1543 size, map_addr);
1544 return NULL;
1545 #endif
1546 } else {
1547 // hmm, we were given memory somewhere else, but it's
1548 // still under 2Gb so we can use it. Next time, ask
1549 // for memory right after the place we just got some
1550 mmap_32bit_base = (StgWord8*)result + size;
1551 }
1552 }
1553 } else {
1554 if ((W_)result > 0x80000000) {
1555 // oops, we were given memory over 2Gb
1556 // ... try allocating memory somewhere else?;
1557 debugTrace(DEBUG_linker,
1558 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1559 bytes, result);
1560 munmap(result, size);
1561
1562 // Set a base address and try again... (guess: 1Gb)
1563 mmap_32bit_base = (void*)0x40000000;
1564 goto mmap_again;
1565 }
1566 }
1567 #endif
1568
1569 IF_DEBUG(linker,
1570 debugBelch("mmapForLinker: mapped %" FMT_Word
1571 " bytes starting at %p\n", (W_)size, result));
1572 IF_DEBUG(linker,
1573 debugBelch("mmapForLinker: done\n"));
1574
1575 return result;
1576 }
1577 #endif
1578
1579 /*
1580 * Remove symbols from the symbol table, and free oc->symbols.
1581 * This operation is idempotent.
1582 */
1583 static void removeOcSymbols (ObjectCode *oc)
1584 {
1585 if (oc->symbols == NULL) return;
1586
1587 // Remove all the mappings for the symbols within this object..
1588 int i;
1589 for (i = 0; i < oc->n_symbols; i++) {
1590 if (oc->symbols[i] != NULL) {
1591 ghciRemoveSymbolTable(symhash, oc->symbols[i], oc);
1592 }
1593 }
1594
1595 stgFree(oc->symbols);
1596 oc->symbols = NULL;
1597 }
1598
1599 /*
1600 * Release StablePtrs and free oc->stable_ptrs.
1601 * This operation is idempotent.
1602 */
1603 static void freeOcStablePtrs (ObjectCode *oc)
1604 {
1605 // Release any StablePtrs that were created when this
1606 // object module was initialized.
1607 ForeignExportStablePtr *fe_ptr, *next;
1608
1609 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1610 next = fe_ptr->next;
1611 freeStablePtr(fe_ptr->stable_ptr);
1612 stgFree(fe_ptr);
1613 }
1614 oc->stable_ptrs = NULL;
1615 }
1616
1617 static void
1618 freePreloadObjectFile (ObjectCode *oc)
1619 {
1620 #if defined(mingw32_HOST_OS)
1621
1622 VirtualFree(oc->image - PEi386_IMAGE_OFFSET, 0, MEM_RELEASE);
1623
1624 IndirectAddr *ia, *ia_next;
1625 ia = indirects;
1626 while (ia != NULL) {
1627 ia_next = ia->next;
1628 stgFree(ia);
1629 ia = ia_next;
1630 }
1631 indirects = NULL;
1632
1633 #else
1634
1635 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1636 munmap(oc->image, oc->fileSize);
1637 }
1638 else {
1639 stgFree(oc->image);
1640 }
1641
1642 #endif
1643
1644 oc->image = NULL;
1645 oc->fileSize = 0;
1646 }
1647
1648 /*
1649 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1650 * the GC when a previously unloaded ObjectCode has been determined to be
1651 * unused, and when an error occurs during loadObj().
1652 */
1653 void freeObjectCode (ObjectCode *oc)
1654 {
1655 freePreloadObjectFile(oc);
1656
1657 if (oc->symbols != NULL) {
1658 stgFree(oc->symbols);
1659 oc->symbols = NULL;
1660 }
1661
1662 if (oc->extraInfos != NULL) {
1663 freeHashTable(oc->extraInfos, NULL);
1664 oc->extraInfos = NULL;
1665 }
1666
1667 if (oc->sections != NULL) {
1668 int i;
1669 for (i=0; i < oc->n_sections; i++) {
1670 if (oc->sections[i].start != NULL) {
1671 switch(oc->sections[i].alloc){
1672 #if RTS_LINKER_USE_MMAP
1673 case SECTION_MMAP:
1674 munmap(oc->sections[i].mapped_start,
1675 oc->sections[i].mapped_size);
1676 break;
1677 case SECTION_M32:
1678 m32_free(oc->sections[i].start,
1679 oc->sections[i].size);
1680 break;
1681 #endif
1682 case SECTION_MALLOC:
1683 stgFree(oc->sections[i].start);
1684 break;
1685 default:
1686 break;
1687 }
1688 }
1689 }
1690 stgFree(oc->sections);
1691 }
1692
1693 freeProddableBlocks(oc);
1694
1695 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1696 * alongside the image, so we don't need to free. */
1697 #if NEED_SYMBOL_EXTRAS && (!defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS))
1698 if (RTS_LINKER_USE_MMAP) {
1699 if (!USE_CONTIGUOUS_MMAP && oc->symbol_extras != NULL) {
1700 m32_free(oc->symbol_extras,
1701 sizeof(SymbolExtra) * oc->n_symbol_extras);
1702 }
1703 }
1704 else {
1705 stgFree(oc->symbol_extras);
1706 }
1707 #endif
1708
1709 stgFree(oc->fileName);
1710 stgFree(oc->archiveMemberName);
1711
1712 stgFree(oc);
1713 }
1714
1715 /* -----------------------------------------------------------------------------
1716 * Sets the initial status of a fresh ObjectCode
1717 */
1718 static void setOcInitialStatus(ObjectCode* oc) {
1719 if (oc->archiveMemberName == NULL) {
1720 oc->status = OBJECT_NEEDED;
1721 } else {
1722 oc->status = OBJECT_LOADED;
1723 }
1724 }
1725
1726 static ObjectCode*
1727 mkOc( pathchar *path, char *image, int imageSize,
1728 rtsBool mapped, char *archiveMemberName, int misalignment ) {
1729 ObjectCode* oc;
1730
1731 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1732 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1733
1734 # if defined(OBJFORMAT_ELF)
1735 oc->formatName = "ELF";
1736 # elif defined(OBJFORMAT_PEi386)
1737 oc->formatName = "PEi386";
1738 # elif defined(OBJFORMAT_MACHO)
1739 oc->formatName = "Mach-O";
1740 # else
1741 stgFree(oc);
1742 barf("loadObj: not implemented on this platform");
1743 # endif
1744
1745 oc->image = image;
1746 oc->fileName = pathdup(path);
1747
1748 if (archiveMemberName) {
1749 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1, "loadObj" );
1750 strcpy(oc->archiveMemberName, archiveMemberName);
1751 } else {
1752 oc->archiveMemberName = NULL;
1753 }
1754
1755 setOcInitialStatus( oc );
1756
1757 oc->fileSize = imageSize;
1758 oc->symbols = NULL;
1759 oc->n_sections = 0;
1760 oc->sections = NULL;
1761 oc->proddables = NULL;
1762 oc->stable_ptrs = NULL;
1763 #if NEED_SYMBOL_EXTRAS
1764 oc->symbol_extras = NULL;
1765 #endif
1766 oc->imageMapped = mapped;
1767
1768 oc->misalignment = misalignment;
1769 oc->extraInfos = NULL;
1770
1771 /* chain it onto the list of objects */
1772 oc->next = NULL;
1773
1774 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1775 return oc;
1776 }
1777
1778 /* -----------------------------------------------------------------------------
1779 * Check if an object or archive is already loaded.
1780 *
1781 * Returns: 1 if the path is already loaded, 0 otherwise.
1782 */
1783 static HsInt
1784 isAlreadyLoaded( pathchar *path )
1785 {
1786 ObjectCode *o;
1787 for (o = objects; o; o = o->next) {
1788 if (0 == pathcmp(o->fileName, path)) {
1789 return 1; /* already loaded */
1790 }
1791 }
1792 return 0; /* not loaded yet */
1793 }
1794
1795 static HsInt loadArchive_ (pathchar *path)
1796 {
1797 ObjectCode* oc;
1798 char *image;
1799 int memberSize;
1800 FILE *f;
1801 int n;
1802 size_t thisFileNameSize;
1803 char *fileName;
1804 size_t fileNameSize;
1805 int isObject, isGnuIndex, isThin, isImportLib;
1806 char tmp[20];
1807 char *gnuFileIndex;
1808 int gnuFileIndexSize;
1809 #if defined(darwin_HOST_OS)
1810 int i;
1811 uint32_t nfat_arch, nfat_offset, cputype, cpusubtype;
1812 #if defined(i386_HOST_ARCH)
1813 const uint32_t mycputype = CPU_TYPE_X86;
1814 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_ALL;
1815 #elif defined(x86_64_HOST_ARCH)
1816 const uint32_t mycputype = CPU_TYPE_X86_64;
1817 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_64_ALL;
1818 #elif defined(powerpc_HOST_ARCH)
1819 const uint32_t mycputype = CPU_TYPE_POWERPC;
1820 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1821 #elif defined(powerpc64_HOST_ARCH)
1822 const uint32_t mycputype = CPU_TYPE_POWERPC64;
1823 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1824 #else
1825 #error Unknown Darwin architecture
1826 #endif
1827 #endif
1828 int misalignment = 0;
1829
1830 /* TODO: don't call barf() on error, instead return an error code, freeing
1831 * all resources correctly. This function is pretty complex, so it needs
1832 * to be refactored to make this practical. */
1833
1834 IF_DEBUG(linker, debugBelch("loadArchive: start\n"));
1835 IF_DEBUG(linker, debugBelch("loadArchive: Loading archive `%" PATH_FMT" '\n", path));
1836
1837 /* Check that we haven't already loaded this archive.
1838 Ignore requests to load multiple times */
1839 if (isAlreadyLoaded(path)) {
1840 IF_DEBUG(linker,
1841 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1842 return 1; /* success */
1843 }
1844
1845 gnuFileIndex = NULL;
1846 gnuFileIndexSize = 0;
1847
1848 fileNameSize = 32;
1849 fileName = stgMallocBytes(fileNameSize, "loadArchive(fileName)");
1850
1851 isThin = 0;
1852 isImportLib = 0;
1853
1854 f = pathopen(path, WSTR("rb"));
1855 if (!f)
1856 barf("loadObj: can't read `%" PATH_FMT "'", path);
1857
1858 /* Check if this is an archive by looking for the magic "!<arch>\n"
1859 * string. Usually, if this fails, we barf and quit. On Darwin however,
1860 * we may have a fat archive, which contains archives for more than
1861 * one architecture. Fat archives start with the magic number 0xcafebabe,
1862 * always stored big endian. If we find a fat_header, we scan through
1863 * the fat_arch structs, searching through for one for our host
1864 * architecture. If a matching struct is found, we read the offset
1865 * of our archive data (nfat_offset) and seek forward nfat_offset bytes
1866 * from the start of the file.
1867 *
1868 * A subtlety is that all of the members of the fat_header and fat_arch
1869 * structs are stored big endian, so we need to call byte order
1870 * conversion functions.
1871 *
1872 * If we find the appropriate architecture in a fat archive, we gobble
1873 * its magic "!<arch>\n" string and continue processing just as if
1874 * we had a single architecture archive.
1875 */
1876
1877 n = fread ( tmp, 1, 8, f );
1878 if (n != 8)
1879 barf("loadArchive: Failed reading header from `%" PATH_FMT "'", path);
1880 if (strncmp(tmp, "!<arch>\n", 8) == 0) {}
1881 /* Check if this is a thin archive by looking for the magic string "!<thin>\n"
1882 *
1883 * ar thin libraries have the exact same format as normal archives except they
1884 * have a different magic string and they don't copy the object files into the
1885 * archive.
1886 *
1887 * Instead each header entry points to the location of the object file on disk.
1888 * This is useful when a library is only created to satisfy a compile time dependency
1889 * instead of to be distributed. This saves the time required for copying.
1890 *
1891 * Thin archives are always flattened. They always only contain simple headers
1892 * pointing to the object file and so we need not allocate more memory than needed
1893 * to find the object file.
1894 *
1895 */
1896 else if (strncmp(tmp, "!<thin>\n", 8) == 0) {
1897 isThin = 1;
1898 }
1899 #if defined(darwin_HOST_OS)
1900 /* Not a standard archive, look for a fat archive magic number: */
1901 else if (ntohl(*(uint32_t *)tmp) == FAT_MAGIC) {
1902 nfat_arch = ntohl(*(uint32_t *)(tmp + 4));
1903 IF_DEBUG(linker, debugBelch("loadArchive: found a fat archive containing %d architectures\n", nfat_arch));
1904 nfat_offset = 0;
1905
1906 for (i = 0; i < (int)nfat_arch; i++) {
1907 /* search for the right arch */
1908 n = fread( tmp, 1, 20, f );
1909 if (n != 8)
1910 barf("loadArchive: Failed reading arch from `%s'", path);
1911 cputype = ntohl(*(uint32_t *)tmp);
1912 cpusubtype = ntohl(*(uint32_t *)(tmp + 4));
1913
1914 if (cputype == mycputype && cpusubtype == mycpusubtype) {
1915 IF_DEBUG(linker, debugBelch("loadArchive: found my archive in a fat archive\n"));
1916 nfat_offset = ntohl(*(uint32_t *)(tmp + 8));
1917 break;
1918 }
1919 }
1920
1921 if (nfat_offset == 0) {
1922 barf ("loadArchive: searched %d architectures, but no host arch found", (int)nfat_arch);
1923 }
1924 else {
1925 n = fseek( f, nfat_offset, SEEK_SET );
1926 if (n != 0)
1927 barf("loadArchive: Failed to seek to arch in `%s'", path);
1928 n = fread ( tmp, 1, 8, f );
1929 if (n != 8)
1930 barf("loadArchive: Failed reading header from `%s'", path);
1931 if (strncmp(tmp, "!<arch>\n", 8) != 0) {
1932 barf("loadArchive: couldn't find archive in `%s' at offset %d", path, nfat_offset);
1933 }
1934 }
1935 }
1936 else {
1937 barf("loadArchive: Neither an archive, nor a fat archive: `%s'", path);
1938 }
1939 #else
1940 else {
1941 barf("loadArchive: Not an archive: `%" PATH_FMT "'", path);
1942 }
1943 #endif
1944
1945 IF_DEBUG(linker, debugBelch("loadArchive: loading archive contents\n"));
1946
1947 while (1) {
1948 IF_DEBUG(linker, debugBelch("loadArchive: reading at %ld\n", ftell(f)));
1949 n = fread ( fileName, 1, 16, f );
1950 if (n != 16) {
1951 if (feof(f)) {
1952 IF_DEBUG(linker, debugBelch("loadArchive: EOF while reading from '%" PATH_FMT "'\n", path));
1953 break;
1954 }
1955 else {
1956 barf("loadArchive: Failed reading file name from `%" PATH_FMT "'", path);
1957 }
1958 }
1959
1960 #if defined(darwin_HOST_OS)
1961 if (strncmp(fileName, "!<arch>\n", 8) == 0) {
1962 IF_DEBUG(linker, debugBelch("loadArchive: found the start of another archive, breaking\n"));
1963 break;
1964 }
1965 #endif
1966
1967 n = fread ( tmp, 1, 12, f );
1968 if (n != 12)
1969 barf("loadArchive: Failed reading mod time from `%" PATH_FMT "'", path);
1970 n = fread ( tmp, 1, 6, f );
1971 if (n != 6)
1972 barf("loadArchive: Failed reading owner from `%" PATH_FMT "'", path);
1973 n = fread ( tmp, 1, 6, f );
1974 if (n != 6)
1975 barf("loadArchive: Failed reading group from `%" PATH_FMT "'", path);
1976 n = fread ( tmp, 1, 8, f );
1977 if (n != 8)
1978 barf("loadArchive: Failed reading mode from `%" PATH_FMT "'", path);
1979 n = fread ( tmp, 1, 10, f );
1980 if (n != 10)
1981 barf("loadArchive: Failed reading size from `%" PATH_FMT "'", path);
1982 tmp[10] = '\0';
1983 for (n = 0; isdigit(tmp[n]); n++);
1984 tmp[n] = '\0';
1985 memberSize = atoi(tmp);
1986
1987 IF_DEBUG(linker, debugBelch("loadArchive: size of this archive member is %d\n", memberSize));
1988 n = fread ( tmp, 1, 2, f );
1989 if (n != 2)
1990 barf("loadArchive: Failed reading magic from `%" PATH_FMT "'", path);
1991 if (strncmp(tmp, "\x60\x0A", 2) != 0)
1992 barf("loadArchive: Failed reading magic from `%" PATH_FMT "' at %ld. Got %c%c",
1993 path, ftell(f), tmp[0], tmp[1]);
1994
1995 isGnuIndex = 0;
1996 /* Check for BSD-variant large filenames */
1997 if (0 == strncmp(fileName, "#1/", 3)) {
1998 fileName[16] = '\0';
1999 if (isdigit(fileName[3])) {
2000 for (n = 4; isdigit(fileName[n]); n++);
2001 fileName[n] = '\0';
2002 thisFileNameSize = atoi(fileName + 3);
2003 memberSize -= thisFileNameSize;
2004 if (thisFileNameSize >= fileNameSize) {
2005 /* Double it to avoid potentially continually
2006 increasing it by 1 */
2007 fileNameSize = thisFileNameSize * 2;
2008 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
2009 }
2010 n = fread ( fileName, 1, thisFileNameSize, f );
2011 if (n != (int)thisFileNameSize) {
2012 barf("loadArchive: Failed reading filename from `%" PATH_FMT "'",
2013 path);
2014 }
2015 fileName[thisFileNameSize] = 0;
2016
2017 /* On OS X at least, thisFileNameSize is the size of the
2018 fileName field, not the length of the fileName
2019 itself. */
2020 thisFileNameSize = strlen(fileName);
2021 }
2022 else {
2023 barf("loadArchive: BSD-variant filename size not found while reading filename from `%s'", path);
2024 }
2025 }
2026 /* Check for GNU file index file */
2027 else if (0 == strncmp(fileName, "//", 2)) {
2028 fileName[0] = '\0';
2029 thisFileNameSize = 0;
2030 isGnuIndex = 1;
2031 }
2032 /* Check for a file in the GNU file index */
2033 else if (fileName[0] == '/') {
2034 if (isdigit(fileName[1])) {
2035 int i;
2036
2037 for (n = 2; isdigit(fileName[n]); n++);
2038 fileName[n] = '\0';
2039 n = atoi(fileName + 1);
2040
2041 if (gnuFileIndex == NULL) {
2042 barf("loadArchive: GNU-variant filename without an index while reading from `%s'", path);
2043 }
2044 if (n < 0 || n > gnuFileIndexSize) {
2045 barf("loadArchive: GNU-variant filename offset %d out of range [0..%d] while reading filename from `%s'", n, gnuFileIndexSize, path);
2046 }
2047 if (n != 0 && gnuFileIndex[n - 1] != '\n') {
2048 barf("loadArchive: GNU-variant filename offset %d invalid (range [0..%d]) while reading filename from `%s'", n, gnuFileIndexSize, path);
2049 }
2050 for (i = n; gnuFileIndex[i] != '\n'; i++);
2051 thisFileNameSize = i - n - 1;
2052 if (thisFileNameSize >= fileNameSize) {
2053 /* Double it to avoid potentially continually
2054 increasing it by 1 */
2055 fileNameSize = thisFileNameSize * 2;
2056 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
2057 }
2058 memcpy(fileName, gnuFileIndex + n, thisFileNameSize);
2059 fileName[thisFileNameSize] = '\0';
2060 }
2061 else if (fileName[1] == ' ') {
2062 fileName[0] = '\0';
2063 thisFileNameSize = 0;
2064 }
2065 else {
2066 barf("loadArchive: GNU-variant filename offset not found while reading filename from `%s'", path);
2067 }
2068 }
2069 /* Finally, the case where the filename field actually contains
2070 the filename */
2071 else {
2072 /* GNU ar terminates filenames with a '/', this allowing
2073 spaces in filenames. So first look to see if there is a
2074 terminating '/'. */
2075 for (thisFileNameSize = 0;
2076 thisFileNameSize < 16;
2077 thisFileNameSize++) {
2078 if (fileName[thisFileNameSize] == '/') {
2079 fileName[thisFileNameSize] = '\0';
2080 break;
2081 }
2082 }
2083 /* If we didn't find a '/', then a space teminates the
2084 filename. Note that if we don't find one, then
2085 thisFileNameSize ends up as 16, and we already have the
2086 '\0' at the end. */
2087 if (thisFileNameSize == 16) {
2088 for (thisFileNameSize = 0;
2089 thisFileNameSize < 16;
2090 thisFileNameSize++) {
2091 if (fileName[thisFileNameSize] == ' ') {
2092 fileName[thisFileNameSize] = '\0';
2093 break;
2094 }
2095 }
2096 }
2097 }
2098
2099 IF_DEBUG(linker,
2100 debugBelch("loadArchive: Found member file `%s'\n", fileName));
2101
2102 isObject = (thisFileNameSize >= 2 && strncmp(fileName + thisFileNameSize - 2, ".o" , 2) == 0)
2103 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".p_o", 4) == 0);
2104
2105 #if defined(OBJFORMAT_PEi386)
2106 /*
2107 * Note [MSVC import files (ext .lib)]
2108 * MSVC compilers store the object files in
2109 * the import libraries with extension .dll
2110 * so on Windows we should look for those too.
2111 * The PE COFF format doesn't specify any specific file name
2112 * for sections. So on windows, just try to load it all.
2113 *
2114 * Linker members (e.g. filename / are skipped since they are not needed)
2115 */
2116 isImportLib = thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".dll", 4) == 0;
2117
2118 /*
2119 * Note [GCC import files (ext .dll.a)]
2120 * GCC stores import information in the same binary format
2121 * as the object file normally has. The only difference is that
2122 * all the information are put in .idata sections. The only real
2123 * way to tell if we're dealing with an import lib is by looking
2124 * at the file extension.
2125 */
2126 isImportLib = isImportLib || endsWithPath(path, WSTR(".dll.a"));
2127 #endif // windows
2128
2129 IF_DEBUG(linker, debugBelch("loadArchive: \tthisFileNameSize = %d\n", (int)thisFileNameSize));
2130 IF_DEBUG(linker, debugBelch("loadArchive: \tisObject = %d\n", isObject));
2131
2132 if (isObject) {
2133 char *archiveMemberName;
2134
2135 IF_DEBUG(linker, debugBelch("loadArchive: Member is an object file...loading...\n"));
2136
2137 #if defined(mingw32_HOST_OS)
2138 // TODO: We would like to use allocateExec here, but allocateExec
2139 // cannot currently allocate blocks large enough.
2140 image = allocateImageAndTrampolines(path, fileName, f, memberSize,
2141 isThin);
2142 #elif defined(darwin_HOST_OS)
2143 if (RTS_LINKER_USE_MMAP)
2144 image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
2145 else {
2146 /* See loadObj() */
2147 misalignment = machoGetMisalignment(f);
2148 image = stgMallocBytes(memberSize + misalignment,
2149 "loadArchive(image)");
2150 image += misalignment;
2151 }
2152
2153 #else // not windows or darwin
2154 image = stgMallocBytes(memberSize, "loadArchive(image)");
2155 #endif
2156 if (isThin) {
2157 FILE *member;
2158 pathchar *pathCopy, *dirName, *memberPath, *objFileName;
2159
2160 /* Allocate and setup the dirname of the archive. We'll need
2161 this to locate the thin member */
2162 pathCopy = pathdup(path); // Convert the char* to a pathchar*
2163 dirName = pathdir(pathCopy);
2164
2165 /* Append the relative member name to the dirname. This should be
2166 be the full path to the actual thin member. */
2167 int memberLen = pathlen(dirName) + 1 + strlen(fileName) + 1;
2168 memberPath = stgMallocBytes(pathsize * memberLen, "loadArchive(file)");
2169 objFileName = mkPath(fileName);
2170 pathprintf(memberPath, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), dirName, objFileName);
2171 stgFree(objFileName);
2172 stgFree(dirName);
2173
2174 member = pathopen(memberPath, WSTR("rb"));
2175 if (!member)
2176 barf("loadObj: can't read thin archive `%" PATH_FMT "'", memberPath);
2177
2178 n = fread ( image, 1, memberSize, member );
2179 if (n != memberSize) {
2180 barf("loadArchive: error whilst reading `%s'", fileName);
2181 }
2182
2183 fclose(member);
2184 stgFree(memberPath);
2185 stgFree(pathCopy);
2186 }
2187 else
2188 {
2189 n = fread ( image, 1, memberSize, f );
2190 if (n != memberSize) {
2191 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2192 }
2193 }
2194
2195 archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
2196 "loadArchive(file)");
2197 sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
2198 path, (int)thisFileNameSize, fileName);
2199
2200 oc = mkOc(path, image, memberSize, rtsFalse, archiveMemberName
2201 , misalignment);
2202
2203 stgFree(archiveMemberName);
2204
2205 if (0 == loadOc(oc)) {
2206 stgFree(fileName);
2207 fclose(f);
2208 return 0;
2209 } else {
2210 #if defined(OBJFORMAT_PEi386)
2211 if (isImportLib)
2212 {
2213 findAndLoadImportLibrary(oc);
2214 stgFree(oc);
2215 oc = NULL;
2216 break;
2217 } else {
2218 #endif
2219 oc->next = objects;
2220 objects = oc;
2221 #if defined(OBJFORMAT_PEi386)
2222 }
2223 #endif
2224 }
2225 }
2226 else if (isGnuIndex) {
2227 if (gnuFileIndex != NULL) {
2228 barf("loadArchive: GNU-variant index found, but already have an index, while reading filename from `%s'", path);
2229 }
2230 IF_DEBUG(linker, debugBelch("loadArchive: Found GNU-variant file index\n"));
2231 #if RTS_LINKER_USE_MMAP
2232 gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
2233 #else
2234 gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
2235 #endif
2236 n = fread ( gnuFileIndex, 1, memberSize, f );
2237 if (n != memberSize) {
2238 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2239 }
2240 gnuFileIndex[memberSize] = '/';
2241 gnuFileIndexSize = memberSize;
2242 }
2243 else if (isImportLib) {
2244 #if defined(OBJFORMAT_PEi386)
2245 if (checkAndLoadImportLibrary(path, fileName, f)) {
2246 IF_DEBUG(linker, debugBelch("loadArchive: Member is an import file section... Corresponding DLL has been loaded...\n"));
2247 }
2248 else {
2249 IF_DEBUG(linker, debugBelch("loadArchive: Member is not a valid import file section... Skipping...\n"));
2250 n = fseek(f, memberSize, SEEK_CUR);
2251 if (n != 0)
2252 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
2253 memberSize, path);
2254 }
2255 #endif
2256 }
2257 else {
2258 IF_DEBUG(linker, debugBelch("loadArchive: '%s' does not appear to be an object file\n", fileName));
2259 if (!isThin || thisFileNameSize == 0) {
2260 n = fseek(f, memberSize, SEEK_CUR);
2261 if (n != 0)
2262 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
2263 memberSize, path);
2264 }
2265 }
2266
2267 /* .ar files are 2-byte aligned */
2268 if (!(isThin && thisFileNameSize > 0) && memberSize % 2) {
2269 IF_DEBUG(linker, debugBelch("loadArchive: trying to read one pad byte\n"));
2270 n = fread ( tmp, 1, 1, f );
2271 if (n != 1) {
2272 if (feof(f)) {
2273 IF_DEBUG(linker, debugBelch("loadArchive: found EOF while reading one pad byte\n"));
2274 break;
2275 }
2276 else {
2277 barf("loadArchive: Failed reading padding from `%" PATH_FMT "'", path);
2278 }
2279 }
2280 IF_DEBUG(linker, debugBelch("loadArchive: successfully read one pad byte\n"));
2281 }
2282 IF_DEBUG(linker, debugBelch("loadArchive: reached end of archive loading while loop\n"));
2283 }
2284
2285 fclose(f);
2286
2287 stgFree(fileName);
2288 if (gnuFileIndex != NULL) {
2289 #if RTS_LINKER_USE_MMAP
2290 munmap(gnuFileIndex, gnuFileIndexSize + 1);
2291 #else
2292 stgFree(gnuFileIndex);
2293 #endif
2294 }
2295
2296 if (RTS_LINKER_USE_MMAP)
2297 m32_allocator_flush();
2298
2299 IF_DEBUG(linker, debugBelch("loadArchive: done\n"));
2300 return 1;
2301 }
2302
2303 HsInt loadArchive (pathchar *path)
2304 {
2305 ACQUIRE_LOCK(&linker_mutex);
2306 HsInt r = loadArchive_(path);
2307 RELEASE_LOCK(&linker_mutex);
2308 return r;
2309 }
2310
2311 //
2312 // Load the object file into memory. This will not be its final resting place,
2313 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
2314 // address space, properly aligned.
2315 //
2316 static ObjectCode *
2317 preloadObjectFile (pathchar *path)
2318 {
2319 int fileSize;
2320 struct_stat st;
2321 int r;
2322 void *image;
2323 ObjectCode *oc;
2324 int misalignment = 0;
2325
2326 r = pathstat(path, &st);
2327 if (r == -1) {
2328 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
2329 return NULL;
2330 }
2331
2332 fileSize = st.st_size;
2333
2334 #if RTS_LINKER_USE_MMAP
2335 int fd;
2336
2337 /* On many architectures malloc'd memory isn't executable, so we need to use
2338 * mmap. */
2339
2340 #if defined(openbsd_HOST_OS)
2341 fd = open(path, O_RDONLY, S_IRUSR);
2342 #else
2343 fd = open(path, O_RDONLY);
2344 #endif
2345 if (fd == -1) {
2346 errorBelch("loadObj: can't open %s", path);
2347 return NULL;
2348 }
2349
2350 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
2351 MAP_PRIVATE, fd, 0);
2352 // not 32-bit yet, we'll remap later
2353 close(fd);
2354
2355 #else /* !RTS_LINKER_USE_MMAP */
2356 FILE *f;
2357
2358 /* load the image into memory */
2359 /* coverity[toctou] */
2360 f = pathopen(path, WSTR("rb"));
2361 if (!f) {
2362 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
2363 return NULL;
2364 }
2365
2366 # if defined(mingw32_HOST_OS)
2367
2368 // TODO: We would like to use allocateExec here, but allocateExec
2369 // cannot currently allocate blocks large enough.
2370 image = allocateImageAndTrampolines(path, "itself", f, fileSize,
2371 HS_BOOL_FALSE);
2372 if (image == NULL) {
2373 fclose(f);
2374 return NULL;
2375 }
2376
2377 # elif defined(darwin_HOST_OS)
2378
2379 // In a Mach-O .o file, all sections can and will be misaligned
2380 // if the total size of the headers is not a multiple of the
2381 // desired alignment. This is fine for .o files that only serve
2382 // as input for the static linker, but it's not fine for us,
2383 // as SSE (used by gcc for floating point) and Altivec require
2384 // 16-byte alignment.
2385 // We calculate the correct alignment from the header before
2386 // reading the file, and then we misalign image on purpose so
2387 // that the actual sections end up aligned again.
2388 misalignment = machoGetMisalignment(f);
2389 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
2390 image += misalignment;
2391
2392 # else /* !defined(mingw32_HOST_OS) */
2393
2394 image = stgMallocBytes(fileSize, "loadObj(image)");
2395
2396 #endif
2397
2398 int n;
2399 n = fread ( image, 1, fileSize, f );
2400 fclose(f);
2401 if (n != fileSize) {
2402 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
2403 stgFree(image);
2404 return NULL;
2405 }
2406
2407 #endif /* RTS_LINKER_USE_MMAP */
2408
2409 oc = mkOc(path, image, fileSize, rtsTrue, NULL, misalignment);
2410
2411 return oc;
2412 }
2413
2414 /* -----------------------------------------------------------------------------
2415 * Load an obj (populate the global symbol table, but don't resolve yet)
2416 *
2417 * Returns: 1 if ok, 0 on error.
2418 */
2419 static HsInt loadObj_ (pathchar *path)
2420 {
2421 ObjectCode* oc;
2422 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
2423
2424 /* debugBelch("loadObj %s\n", path ); */
2425
2426 /* Check that we haven't already loaded this object.
2427 Ignore requests to load multiple times */
2428
2429 if (isAlreadyLoaded(path)) {
2430 IF_DEBUG(linker,
2431 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
2432 return 1; /* success */
2433 }
2434
2435 oc = preloadObjectFile(path);
2436 if (oc == NULL) return 0;
2437
2438 if (! loadOc(oc)) {
2439 // failed; free everything we've allocated
2440 removeOcSymbols(oc);
2441 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
2442 freeObjectCode(oc);
2443 return 0;
2444 }
2445
2446 oc->next = objects;
2447 objects = oc;
2448 return 1;
2449 }
2450
2451 HsInt loadObj (pathchar *path)
2452 {
2453 ACQUIRE_LOCK(&linker_mutex);
2454 HsInt r = loadObj_(path);
2455 RELEASE_LOCK(&linker_mutex);
2456 return r;
2457 }
2458
2459 static HsInt loadOc (ObjectCode* oc)
2460 {
2461 int r;
2462
2463 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
2464
2465 /* verify the in-memory image */
2466 # if defined(OBJFORMAT_ELF)
2467 r = ocVerifyImage_ELF ( oc );
2468 # elif defined(OBJFORMAT_PEi386)
2469 r = ocVerifyImage_PEi386 ( oc );
2470 # elif defined(OBJFORMAT_MACHO)
2471 r = ocVerifyImage_MachO ( oc );
2472 # else
2473 barf("loadObj: no verify method");
2474 # endif
2475 if (!r) {
2476 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
2477 return r;
2478 }
2479
2480 #if NEED_SYMBOL_EXTRAS
2481 # if defined(OBJFORMAT_MACHO)
2482 r = ocAllocateSymbolExtras_MachO ( oc );
2483 if (!r) {
2484 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_MachO failed\n"));
2485 return r;
2486 }
2487 # elif defined(OBJFORMAT_ELF)
2488 r = ocAllocateSymbolExtras_ELF ( oc );
2489 if (!r) {
2490 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_ELF failed\n"));
2491 return r;
2492 }
2493 # elif defined(OBJFORMAT_PEi386)
2494 ocAllocateSymbolExtras_PEi386 ( oc );
2495 # endif
2496 #endif
2497
2498 /* build the symbol list for this image */
2499 # if defined(OBJFORMAT_ELF)
2500 r = ocGetNames_ELF ( oc );
2501 # elif defined(OBJFORMAT_PEi386)
2502 r = ocGetNames_PEi386 ( oc );
2503 # elif defined(OBJFORMAT_MACHO)
2504 r = ocGetNames_MachO ( oc );
2505 # else
2506 barf("loadObj: no getNames method");
2507 # endif
2508 if (!r) {
2509 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
2510 return r;
2511 }
2512
2513 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
2514 setOcInitialStatus( oc );
2515 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
2516
2517 return 1;
2518 }
2519
2520 /* -----------------------------------------------------------------------------
2521 * try to load and initialize an ObjectCode into memory
2522 *
2523 * Returns: 1 if ok, 0 on error.
2524 */
2525 int ocTryLoad (ObjectCode* oc) {
2526 int r;
2527
2528 if (oc->status != OBJECT_NEEDED) {
2529 return 1;
2530 }
2531
2532 /* Check for duplicate symbols by looking into `symhash`.
2533 Duplicate symbols are any symbols which exist
2534 in different ObjectCodes that have both been loaded, or
2535 are to be loaded by this call.
2536
2537 This call is intended to have no side-effects when a non-duplicate
2538 symbol is re-inserted.
2539
2540 We set the Address to NULL since that is not used to distinguish
2541 symbols. Duplicate symbols are distinguished by name and oc.
2542 */
2543 int x;
2544 SymbolName* symbol;
2545 for (x = 0; x < oc->n_symbols; x++) {
2546 symbol = oc->symbols[x];
2547 if ( symbol
2548 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol, NULL, isSymbolWeak(oc, symbol), oc)) {
2549 return 0;
2550 }
2551 }
2552
2553 # if defined(OBJFORMAT_ELF)
2554 r = ocResolve_ELF ( oc );
2555 # elif defined(OBJFORMAT_PEi386)
2556 r = ocResolve_PEi386 ( oc );
2557 # elif defined(OBJFORMAT_MACHO)
2558 r = ocResolve_MachO ( oc );
2559 # else
2560 barf("ocTryLoad: not implemented on this platform");
2561 # endif
2562 if (!r) { return r; }
2563
2564 // run init/init_array/ctors/mod_init_func
2565
2566 loading_obj = oc; // tells foreignExportStablePtr what to do
2567 #if defined(OBJFORMAT_ELF)
2568 r = ocRunInit_ELF ( oc );
2569 #elif defined(OBJFORMAT_PEi386)
2570 r = ocRunInit_PEi386 ( oc );
2571 #elif defined(OBJFORMAT_MACHO)
2572 r = ocRunInit_MachO ( oc );
2573 #else
2574 barf("ocTryLoad: initializers not implemented on this platform");
2575 #endif
2576 loading_obj = NULL;
2577
2578 if (!r) { return r; }
2579
2580 oc->status = OBJECT_RESOLVED;
2581
2582 return 1;
2583 }
2584
2585 /* -----------------------------------------------------------------------------
2586 * resolve all the currently unlinked objects in memory
2587 *
2588 * Returns: 1 if ok, 0 on error.
2589 */
2590 static HsInt resolveObjs_ (void)
2591 {
2592 ObjectCode *oc;
2593 int r;
2594
2595 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
2596
2597 for (oc = objects; oc; oc = oc->next) {
2598 r = ocTryLoad(oc);
2599 if (!r)
2600 {
2601 return r;
2602 }
2603 }
2604
2605 #ifdef PROFILING
2606 // collect any new cost centres & CCSs that were defined during runInit
2607 initProfiling2();
2608 #endif
2609
2610 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
2611 return 1;
2612 }
2613
2614 HsInt resolveObjs (void)
2615 {
2616 ACQUIRE_LOCK(&linker_mutex);
2617 HsInt r = resolveObjs_();
2618 RELEASE_LOCK(&linker_mutex);
2619 return r;
2620 }
2621
2622 /* -----------------------------------------------------------------------------
2623 * delete an object from the pool
2624 */
2625 static HsInt unloadObj_ (pathchar *path, rtsBool just_purge)
2626 {
2627 ObjectCode *oc, *prev, *next;
2628 HsBool unloadedAnyObj = HS_BOOL_FALSE;
2629
2630 ASSERT(symhash != NULL);
2631 ASSERT(objects != NULL);
2632
2633 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
2634
2635 prev = NULL;
2636 for (oc = objects; oc; oc = next) {
2637 next = oc->next; // oc might be freed
2638
2639 if (!pathcmp(oc->fileName,path)) {
2640
2641 // these are both idempotent, so in just_purge mode we can
2642 // later call unloadObj() to really unload the object.
2643 removeOcSymbols(oc);
2644 freeOcStablePtrs(oc);
2645
2646 if (!just_purge) {
2647 if (prev == NULL) {
2648 objects = oc->next;
2649 } else {
2650 prev->next = oc->next;
2651 }
2652 ACQUIRE_LOCK(&linker_unloaded_mutex);
2653 oc->next = unloaded_objects;
2654 unloaded_objects = oc;
2655 oc->status = OBJECT_UNLOADED;
2656 RELEASE_LOCK(&linker_unloaded_mutex);
2657 // We do not own oc any more; it can be released at any time by
2658 // the GC in checkUnload().
2659 } else {
2660 prev = oc;
2661 }
2662
2663 /* This could be a member of an archive so continue
2664 * unloading other members. */
2665 unloadedAnyObj = HS_BOOL_TRUE;
2666 } else {
2667 prev = oc;
2668 }
2669 }
2670
2671 if (unloadedAnyObj) {
2672 return 1;
2673 }
2674 else {
2675 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
2676 return 0;
2677 }
2678 }
2679
2680 HsInt unloadObj (pathchar *path)
2681 {
2682 ACQUIRE_LOCK(&linker_mutex);
2683 HsInt r = unloadObj_(path, rtsFalse);
2684 RELEASE_LOCK(&linker_mutex);
2685 return r;
2686 }
2687
2688 HsInt purgeObj (pathchar *path)
2689 {
2690 ACQUIRE_LOCK(&linker_mutex);
2691 HsInt r = unloadObj_(path, rtsTrue);
2692 RELEASE_LOCK(&linker_mutex);
2693 return r;
2694 }
2695
2696 /* -----------------------------------------------------------------------------
2697 * Sanity checking. For each ObjectCode, maintain a list of address ranges
2698 * which may be prodded during relocation, and abort if we try and write
2699 * outside any of these.
2700 */
2701 static void
2702 addProddableBlock ( ObjectCode* oc, void* start, int size )
2703 {
2704 ProddableBlock* pb
2705 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
2706
2707 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
2708 ASSERT(size > 0);
2709 pb->start = start;
2710 pb->size = size;
2711 pb->next = oc->proddables;
2712 oc->proddables = pb;
2713 }
2714
2715 static void
2716 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
2717 {
2718 ProddableBlock* pb;
2719
2720 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
2721 char* s = (char*)(pb->start);
2722 char* e = s + pb->size;
2723 char* a = (char*)addr;
2724 if (a >= s && (a+size) <= e) return;
2725 }
2726 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
2727 }
2728
2729 static void freeProddableBlocks (ObjectCode *oc)
2730 {
2731 ProddableBlock *pb, *next;
2732
2733 for (pb = oc->proddables; pb != NULL; pb = next) {
2734 next = pb->next;
2735 stgFree(pb);
2736 }
2737 oc->proddables = NULL;
2738 }
2739
2740 /* -----------------------------------------------------------------------------
2741 * Section management.
2742 */
2743 static void
2744 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
2745 void* start, StgWord size, StgWord mapped_offset,
2746 void* mapped_start, StgWord mapped_size)
2747 {
2748 s->start = start; /* actual start of section in memory */
2749 s->size = size; /* actual size of section in memory */
2750 s->kind = kind;
2751 s->alloc = alloc;
2752 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
2753
2754 s->mapped_start = mapped_start; /* start of mmap() block */
2755 s->mapped_size = mapped_size; /* size of mmap() block */
2756
2757 IF_DEBUG(linker,
2758 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
2759 start, (void*)((StgWord)start + size),
2760 size, kind ));
2761 }
2762
2763
2764 /* --------------------------------------------------------------------------
2765 * Symbol Extras.
2766 * This is about allocating a small chunk of memory for every symbol in the
2767 * object file. We make sure that the SymboLExtras are always "in range" of
2768 * limited-range PC-relative instructions on various platforms by allocating
2769 * them right next to the object code itself.
2770 */
2771
2772 #if NEED_SYMBOL_EXTRAS
2773 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2774
2775 /*
2776 ocAllocateSymbolExtras
2777
2778 Allocate additional space at the end of the object file image to make room
2779 for jump islands (powerpc, x86_64, arm) and GOT entries (x86_64).
2780
2781 PowerPC relative branch instructions have a 24 bit displacement field.
2782 As PPC code is always 4-byte-aligned, this yields a +-32MB range.
2783 If a particular imported symbol is outside this range, we have to redirect
2784 the jump to a short piece of new code that just loads the 32bit absolute
2785 address and jumps there.
2786 On x86_64, PC-relative jumps and PC-relative accesses to the GOT are limited
2787 to 32 bits (+-2GB).
2788
2789 This function just allocates space for one SymbolExtra for every
2790 undefined symbol in the object file. The code for the jump islands is
2791 filled in by makeSymbolExtra below.
2792 */
2793
2794 static int ocAllocateSymbolExtras( ObjectCode* oc, int count, int first )
2795 {
2796 size_t n;
2797
2798 if (RTS_LINKER_USE_MMAP && USE_CONTIGUOUS_MMAP) {
2799 n = roundUpToPage(oc->fileSize);
2800
2801 /* Keep image and symbol_extras contiguous */
2802 void *new = mmapForLinker(n + (sizeof(SymbolExtra) * count),
2803 MAP_ANONYMOUS, -1, 0);
2804 if (new) {
2805 memcpy(new, oc->image, oc->fileSize);
2806 if (oc->imageMapped) {
2807 munmap(oc->image, n);
2808 }
2809 oc->image = new;
2810 oc->imageMapped = rtsTrue;
2811 oc->fileSize = n + (sizeof(SymbolExtra) * count);
2812 oc->symbol_extras = (SymbolExtra *) (oc->image + n);
2813 }
2814 else {
2815 oc->symbol_extras = NULL;
2816 return 0;
2817 }
2818 }
2819 else if( count > 0 ) {
2820 if (RTS_LINKER_USE_MMAP) {
2821 n = roundUpToPage(oc->fileSize);
2822
2823 oc->symbol_extras = m32_alloc(sizeof(SymbolExtra) * count, 8);
2824 if (oc->symbol_extras == NULL) return 0;
2825 }
2826 else {
2827 // round up to the nearest 4
2828 int aligned = (oc->fileSize + 3) & ~3;
2829 int misalignment = oc->misalignment;
2830
2831 oc->image -= misalignment;
2832 oc->image = stgReallocBytes( oc->image,
2833 misalignment +
2834 aligned + sizeof (SymbolExtra) * count,
2835 "ocAllocateSymbolExtras" );
2836 oc->image += misalignment;
2837
2838 oc->symbol_extras = (SymbolExtra *) (oc->image + aligned);
2839 }
2840 }
2841
2842 if (oc->symbol_extras != NULL) {
2843 memset( oc->symbol_extras, 0, sizeof (SymbolExtra) * count );
2844 }
2845
2846 oc->first_symbol_extra = first;
2847 oc->n_symbol_extras = count;
2848
2849 return 1;
2850 }
2851
2852 #endif
2853 #endif // NEED_SYMBOL_EXTRAS
2854
2855 #if defined(arm_HOST_ARCH)
2856
2857 static void
2858 ocFlushInstructionCache( ObjectCode *oc )
2859 {
2860 int i;
2861 // Object code
2862 for (i=0; i < oc->n_sections; i++) {
2863 Section *s = &oc->sections[i];
2864 // This is a bit too broad but we don't have any way to determine what
2865 // is certainly code
2866 if (s->kind == SECTIONKIND_CODE_OR_RODATA)
2867 __clear_cache(s->start, (void*) ((uintptr_t) s->start + s->size));
2868 }
2869
2870 // Jump islands
2871 // Note the (+1) to ensure that the last symbol extra is covered by the
2872 // flush.
2873 __clear_cache(oc->symbol_extras, &oc->symbol_extras[oc->n_symbol_extras+1]);
2874 }
2875
2876 #endif
2877
2878 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2879 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2880
2881 static SymbolExtra* makeSymbolExtra( ObjectCode* oc,
2882 unsigned long symbolNumber,
2883 unsigned long target )
2884 {
2885 SymbolExtra *extra;
2886
2887 ASSERT( symbolNumber >= oc->first_symbol_extra
2888 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2889
2890 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2891
2892 #ifdef powerpc_HOST_ARCH
2893 // lis r12, hi16(target)
2894 extra->jumpIsland.lis_r12 = 0x3d80;
2895 extra->jumpIsland.hi_addr = target >> 16;
2896
2897 // ori r12, r12, lo16(target)
2898 extra->jumpIsland.ori_r12_r12 = 0x618c;
2899 extra->jumpIsland.lo_addr = target & 0xffff;
2900
2901 // mtctr r12
2902 extra->jumpIsland.mtctr_r12 = 0x7d8903a6;
2903
2904 // bctr
2905 extra->jumpIsland.bctr = 0x4e800420;
2906 #endif
2907 #ifdef x86_64_HOST_ARCH
2908 // jmp *-14(%rip)
2909 static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF };
2910 extra->addr = target;
2911 memcpy(extra->jumpIsland, jmp, 6);
2912 #endif
2913
2914 return extra;
2915 }
2916
2917 #endif
2918 #endif // defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2919
2920 #ifdef arm_HOST_ARCH
2921 static SymbolExtra* makeArmSymbolExtra( ObjectCode* oc,
2922 unsigned long symbolNumber,
2923 unsigned long target,
2924 int fromThumb,
2925 int toThumb )
2926 {
2927 SymbolExtra *extra;
2928
2929 ASSERT( symbolNumber >= oc->first_symbol_extra
2930 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2931
2932 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2933
2934 // Make sure instruction mode bit is set properly
2935 if (toThumb)
2936 target |= 1;
2937 else
2938 target &= ~1;
2939
2940 if (!fromThumb) {
2941 // In ARM encoding:
2942 // movw r12, #0
2943 // movt r12, #0
2944 // bx r12
2945 uint32_t code[] = { 0xe300c000, 0xe340c000, 0xe12fff1c };
2946
2947 // Patch lower half-word into movw
2948 code[0] |= ((target>>12) & 0xf) << 16;
2949 code[0] |= target & 0xfff;
2950 // Patch upper half-word into movt
2951 target >>= 16;
2952 code[1] |= ((target>>12) & 0xf) << 16;
2953 code[1] |= target & 0xfff;
2954
2955 memcpy(extra->jumpIsland, code, 12);
2956
2957 } else {
2958 // In Thumb encoding:
2959 // movw r12, #0
2960 // movt r12, #0
2961 // bx r12
2962 uint16_t code[] = { 0xf240, 0x0c00,
2963 0xf2c0, 0x0c00,
2964 0x4760 };
2965
2966 // Patch lower half-word into movw
2967 code[0] |= (target>>12) & 0xf;
2968 code[0] |= ((target>>11) & 0x1) << 10;
2969 code[1] |= ((target>>8) & 0x7) << 12;
2970 code[1] |= target & 0xff;
2971 // Patch upper half-word into movt
2972 target >>= 16;
2973 code[2] |= (target>>12) & 0xf;
2974 code[2] |= ((target>>11) & 0x1) << 10;
2975 code[3] |= ((target>>8) & 0x7) << 12;
2976 code[3] |= target & 0xff;
2977
2978 memcpy(extra->jumpIsland, code, 10);
2979 }
2980
2981 return extra;
2982 }
2983 #endif // arm_HOST_ARCH
2984
2985 /* --------------------------------------------------------------------------
2986 * PowerPC specifics (instruction cache flushing)
2987 * ------------------------------------------------------------------------*/
2988
2989 #ifdef powerpc_HOST_ARCH
2990 /*
2991 ocFlushInstructionCache
2992
2993 Flush the data & instruction caches.
2994 Because the PPC has split data/instruction caches, we have to
2995 do that whenever we modify code at runtime.
2996 */
2997
2998 static void
2999 ocFlushInstructionCacheFrom(void* begin, size_t length)
3000 {
3001 size_t n = (length + 3) / 4;
3002 unsigned long* p = begin;
3003
3004 while (n--)
3005 {
3006 __asm__ volatile ( "dcbf 0,%0\n\t"
3007 "sync\n\t"
3008 "icbi 0,%0"
3009 :
3010 : "r" (p)
3011 );
3012 p++;
3013 }
3014 __asm__ volatile ( "sync\n\t"
3015 "isync"
3016 );
3017 }
3018
3019 static void
3020 ocFlushInstructionCache( ObjectCode *oc )
3021 {
3022 /* The main object code */
3023 ocFlushInstructionCacheFrom(oc->image + oc->misalignment, oc->fileSize);
3024
3025 /* Jump Islands */
3026 ocFlushInstructionCacheFrom(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras);
3027 }
3028 #endif /* powerpc_HOST_ARCH */
3029
3030
3031 /* --------------------------------------------------------------------------
3032 * PEi386(+) specifics (Win32 targets)
3033 * ------------------------------------------------------------------------*/
3034
3035 /* The information for this linker comes from
3036 Microsoft Portable Executable
3037 and Common Object File Format Specification
3038 revision 8.3 February 2013
3039
3040 It can be found online at:
3041
3042 https://msdn.microsoft.com/en-us/windows/hardware/gg463119.aspx
3043
3044 Things move, so if that fails, try searching for it via
3045
3046 http://www.google.com/search?q=PE+COFF+specification
3047
3048 The ultimate reference for the PE format is the Winnt.h
3049 header file that comes with the Platform SDKs; as always,
3050 implementations will drift wrt their documentation.
3051
3052 A good background article on the PE format is Matt Pietrek's
3053 March 1994 article in Microsoft System Journal (MSJ)
3054 (Vol.9, No. 3): "Peering Inside the PE: A Tour of the
3055 Win32 Portable Executable File Format." The info in there
3056 has recently been updated in a two part article in
3057 MSDN magazine, issues Feb and March 2002,
3058 "Inside Windows: An In-Depth Look into the Win32 Portable
3059 Executable File Format"
3060
3061 John Levine's book "Linkers and Loaders" contains useful
3062 info on PE too.
3063
3064 The PE specification doesn't specify how to do the actual
3065 relocations. For this reason, and because both PE and ELF are
3066 based on COFF, the relocations for the PEi386+ code is based on
3067 the ELF relocations for the equivalent relocation type.
3068
3069 The ELF ABI can be found at
3070
3071 http://www.x86-64.org/documentation/abi.pdf
3072
3073 The current code is based on version 0.99.6 - October 2013
3074 */
3075
3076
3077 #if defined(OBJFORMAT_PEi386)
3078
3079 static int verifyCOFFHeader ( COFF_header *hdr, pathchar *filename);
3080
3081 /* We assume file pointer is right at the
3082 beginning of COFF object.
3083 */
3084 static char *
3085 allocateImageAndTrampolines (
3086 pathchar* arch_name, char* member_name,
3087 FILE* f USED_IF_x86_64_HOST_ARCH,
3088 int size,
3089 int isThin USED_IF_x86_64_HOST_ARCH)
3090 {
3091 char* image;
3092 #if defined(x86_64_HOST_ARCH)
3093 if (!isThin)
3094 {
3095 /* PeCoff contains number of symbols right in it's header, so
3096 we can reserve the room for symbolExtras right here. */
3097 COFF_header hdr;
3098 size_t n;
3099
3100 n = fread(&hdr, 1, sizeof_COFF_header, f);
3101 if (n != sizeof(COFF_header)) {
3102 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%S'",
3103 member_name, arch_name);
3104 return NULL;
3105 }
3106 fseek(f, -sizeof_COFF_header, SEEK_CUR);
3107
3108 if (!verifyCOFFHeader(&hdr, arch_name)) {
3109 return 0;
3110 }
3111
3112 /* We get back 8-byte aligned memory (is that guaranteed?), but
3113 the offsets to the sections within the file are all 4 mod 8
3114 (is that guaranteed?). We therefore need to offset the image
3115 by 4, so that all the pointers are 8-byte aligned, so that
3116 pointer tagging works. */
3117 /* For 32-bit case we don't need this, hence we use macro PEi386_IMAGE_OFFSET,
3118 which equals to 4 for 64-bit case and 0 for 32-bit case. */
3119 /* We allocate trampolines area for all symbols right behind
3120 image data, aligned on 8. */
3121 size = ((PEi386_IMAGE_OFFSET + size + 0x7) & ~0x7)
3122 + hdr.NumberOfSymbols * sizeof(SymbolExtra);
3123 }
3124 #endif
3125 image = VirtualAlloc(NULL, size,
3126 MEM_RESERVE | MEM_COMMIT,
3127 PAGE_EXECUTE_READWRITE);
3128
3129 if (image == NULL) {
3130 errorBelch("%" PATH_FMT ": failed to allocate memory for image for %s",
3131 arch_name, member_name);
3132 return NULL;
3133 }
3134
3135 return image + PEi386_IMAGE_OFFSET;
3136 }
3137
3138 static int findAndLoadImportLibrary(ObjectCode* oc)
3139 {
3140 int i;
3141
3142 COFF_header* hdr;
3143 COFF_section* sectab;
3144 COFF_symbol* symtab;
3145 UChar* strtab;
3146
3147 hdr = (COFF_header*)(oc->image);
3148 sectab = (COFF_section*)(
3149 ((UChar*)(oc->image))
3150 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3151 );
3152
3153 symtab = (COFF_symbol*)(
3154 ((UChar*)(oc->image))
3155 + hdr->PointerToSymbolTable
3156 );
3157
3158 strtab = ((UChar*)symtab)
3159 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3160
3161 for (i = 0; i < oc->n_sections; i++)
3162 {
3163 COFF_section* sectab_i
3164 = (COFF_section*)myindex(sizeof_COFF_section, sectab, i);
3165
3166 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3167
3168 // Find the first entry containing a valid .idata$7 section.
3169 if (strcmp(secname, ".idata$7") == 0) {
3170 /* First load the containing DLL if not loaded. */
3171 Section section = oc->sections[i];
3172
3173 pathchar* dirName = pathdir(oc->fileName);
3174 HsPtr token = addLibrarySearchPath(dirName);
3175 stgFree(dirName);
3176 char* dllName = (char*)section.start;
3177
3178 if (strlen(dllName) == 0 || dllName[0] == ' ')
3179 {
3180 continue;
3181 }
3182
3183 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand '%ls' => `%s'\n", oc->fileName, dllName));
3184
3185 pathchar* dll = mkPath(dllName);
3186 removeLibrarySearchPath(token);
3187
3188 const char* result = addDLL(dll);
3189 stgFree(dll);
3190
3191 if (result != NULL) {
3192 errorBelch("Could not load `%s'. Reason: %s\n", (char*)dllName, result);
3193 return 0;
3194 }
3195
3196 break;
3197 }
3198
3199 stgFree(secname);
3200 }
3201
3202 return 1;
3203 }
3204
3205 static int checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f)
3206 {
3207 char* image;
3208 static HsBool load_dll_warn = HS_BOOL_FALSE;
3209
3210 if (load_dll_warn) { return 0; }
3211
3212 /* Based on Import Library specification. PE Spec section 7.1 */
3213
3214 COFF_import_header hdr;
3215 size_t n;
3216
3217 n = fread(&hdr, 1, sizeof_COFF_import_Header, f);
3218 if (n != sizeof(COFF_header)) {
3219 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%" PATH_FMT "'\n",
3220 member_name, arch_name);
3221 return 0;
3222 }
3223
3224 if (hdr.Sig1 != 0x0 || hdr.Sig2 != 0xFFFF) {
3225 fseek(f, -sizeof_COFF_import_Header, SEEK_CUR);
3226 IF_DEBUG(linker, debugBelch("loadArchive: Object `%s` is not an import lib. Skipping...\n", member_name));
3227 return 0;
3228 }
3229
3230 IF_DEBUG(linker, debugBelch("loadArchive: reading %d bytes at %ld\n", hdr.SizeOfData, ftell(f)));
3231
3232 image = malloc(hdr.SizeOfData);
3233 n = fread(image, 1, hdr.SizeOfData, f);
3234 if (n != hdr.SizeOfData) {
3235 errorBelch("loadArchive: error whilst reading `%s' header in `%" PATH_FMT "'. Did not read enough bytes.\n",
3236 member_name, arch_name);
3237 }
3238
3239 char* symbol = strtok(image, "\0");
3240 int symLen = strlen(symbol) + 1;
3241 int nameLen = n - symLen;
3242 char* dllName = malloc(sizeof(char) * nameLen);
3243 dllName = strncpy(dllName, image + symLen, nameLen);
3244 pathchar* dll = malloc(sizeof(wchar_t) * nameLen);
3245 mbstowcs(dll, dllName, nameLen);
3246 free(dllName);
3247
3248 IF_DEBUG(linker, debugBelch("loadArchive: read symbol %s from lib `%ls'\n", symbol, dll));
3249 const char* result = addDLL(dll);
3250
3251 free(image);
3252
3253 if (result != NULL) {
3254 errorBelch("Could not load `%ls'. Reason: %s\n", dll, result);
3255 load_dll_warn = HS_BOOL_TRUE;
3256
3257 free(dll);
3258 fseek(f, -(n + sizeof_COFF_import_Header), SEEK_CUR);
3259 return 0;
3260 }
3261
3262 free(dll);
3263 return 1;
3264 }
3265
3266 /* We use myindex to calculate array addresses, rather than
3267 simply doing the normal subscript thing. That's because
3268 some of the above structs have sizes which are not
3269 a whole number of words. GCC rounds their sizes up to a
3270 whole number of words, which means that the address calcs
3271 arising from using normal C indexing or pointer arithmetic
3272 are just plain wrong. Sigh.
3273 */
3274 static UChar *
3275 myindex ( int scale, void* base, int index )
3276 {
3277 return
3278 ((UChar*)base) + scale * index;
3279 }
3280
3281
3282 static void
3283 printName ( UChar* name, UChar* strtab )
3284 {
3285 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3286 UInt32 strtab_offset = * (UInt32*)(name+4);
3287 debugBelch("%s", strtab + strtab_offset );
3288 } else {
3289 int i;
3290 for (i = 0; i < 8; i++) {
3291 if (name[i] == 0) break;
3292 debugBelch("%c", name[i] );
3293 }
3294 }
3295 }
3296
3297
3298 static void
3299 copyName ( UChar* name, UChar* strtab, UChar* dst, int dstSize )
3300 {
3301 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3302 UInt32 strtab_offset = * (UInt32*)(name+4);
3303 strncpy ( (char*)dst, (char*)strtab+strtab_offset, dstSize );
3304 dst[dstSize-1] = 0;
3305 } else {
3306 int i = 0;
3307 while (1) {
3308 if (i >= 8) break;
3309 if (name[i] == 0) break;
3310 dst[i] = name[i];
3311 i++;
3312 }
3313 dst[i] = 0;
3314 }
3315 }
3316
3317
3318 static UChar *
3319 cstring_from_COFF_symbol_name ( UChar* name, UChar* strtab )
3320 {
3321 UChar* newstr;
3322 /* If the string is longer than 8 bytes, look in the
3323 string table for it -- this will be correctly zero terminated.
3324 */
3325 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3326 UInt32 strtab_offset = * (UInt32*)(name+4);
3327 return ((UChar*)strtab) + strtab_offset;
3328 }
3329 /* Otherwise, if shorter than 8 bytes, return the original,
3330 which by defn is correctly terminated.
3331 */
3332 if (name[7]==0) return name;
3333 /* The annoying case: 8 bytes. Copy into a temporary
3334 (XXX which is never freed ...)
3335 */
3336 newstr = stgMallocBytes(9, "cstring_from_COFF_symbol_name");
3337 ASSERT(newstr);
3338 strncpy((char*)newstr,(char*)name,8);
3339 newstr[8] = 0;
3340 return newstr;
3341 }
3342
3343 /* Getting the name of a section is mildly tricky, so we make a
3344 function for it. Sadly, in one case we have to copy the string
3345 (when it is exactly 8 bytes long there's no trailing '\0'), so for
3346 consistency we *always* copy the string; the caller must free it
3347 */
3348 static char *
3349 cstring_from_section_name (UChar* name, UChar* strtab)
3350 {
3351 char *newstr;
3352
3353 if (name[0]=='/') {
3354 int strtab_offset = strtol((char*)name+1,NULL,10);
3355 int len = strlen(((char*)strtab) + strtab_offset);
3356
3357 newstr = stgMallocBytes(len+1, "cstring_from_section_symbol_name");
3358 strcpy((char*)newstr, (char*)((UChar*)strtab) + strtab_offset);
3359 return newstr;
3360 }
3361 else
3362 {
3363 newstr = stgMallocBytes(9, "cstring_from_section_symbol_name");
3364 ASSERT(newstr);
3365 strncpy((char*)newstr,(char*)name,8);
3366 newstr[8] = 0;
3367 return newstr;
3368 }
3369 }
3370
3371 /* See Note [mingw-w64 name decoration scheme] */
3372 #ifndef x86_64_HOST_ARCH
3373 static void
3374 zapTrailingAtSign ( UChar* sym )
3375 {
3376 # define my_isdigit(c) ((c) >= '0' && (c) <= '9')
3377 int i, j;
3378 if (sym[0] == 0) return;
3379 i = 0;
3380 while (sym[i] != 0) i++;
3381 i--;
3382 j = i;
3383 while (j > 0 && my_isdigit(sym[j])) j--;
3384 if (j > 0 && sym[j] == '@' && j != i) sym[j] = 0;
3385 # undef my_isdigit
3386 }
3387 #endif
3388
3389 /* See Note [mingw-w64 name decoration scheme] */
3390 #ifndef x86_64_HOST_ARCH
3391 #define STRIP_LEADING_UNDERSCORE 1
3392 #else
3393 #define STRIP_LEADING_UNDERSCORE 0
3394 #endif
3395
3396 /*
3397 Note [mingw-w64 name decoration scheme]
3398
3399 What's going on with name decoration? Well, original code
3400 have some crufty and ad-hocish paths related mostly to very old
3401 mingw gcc/binutils/runtime combinations. Now mingw-w64 offers pretty
3402 uniform and MS-compatible decoration scheme across its tools and runtime.
3403
3404 The scheme is pretty straightforward: on 32 bit objects symbols are exported
3405 with underscore prepended (and @ + stack size suffix appended for stdcall
3406 functions), on 64 bits no underscore is prepended and no suffix is appended
3407 because we have no stdcall convention on 64 bits.
3408
3409 See #9218
3410 */
3411
3412 static SymbolAddr*
3413 lookupSymbolInDLLs ( UChar *lbl )
3414 {
3415 OpenedDLL* o_dll;
3416 SymbolAddr* sym;
3417
3418 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
3419 /* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */
3420
3421 sym = GetProcAddress(o_dll->instance, (char*)(lbl+STRIP_LEADING_UNDERSCORE));
3422 if (sym != NULL) {
3423 /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/
3424 return sym;
3425 }
3426
3427 /* Ticket #2283.
3428 Long description: http://support.microsoft.com/kb/132044
3429 tl;dr:
3430 If C/C++ compiler sees __declspec(dllimport) ... foo ...
3431 it generates call *__imp_foo, and __imp_foo here has exactly
3432 the same semantics as in __imp_foo = GetProcAddress(..., "foo")
3433 */
3434 if (sym == NULL && strncmp ((const char*)lbl, "__imp_", 6) == 0) {
3435 sym = GetProcAddress(o_dll->instance, (char*)(lbl+6+STRIP_LEADING_UNDERSCORE));
3436 if (sym != NULL) {
3437 IndirectAddr* ret;
3438 ret = stgMallocBytes( sizeof(IndirectAddr), "lookupSymbolInDLLs" );
3439 ret->addr = sym;
3440 ret->next = indirects;
3441 indirects = ret;
3442 IF_DEBUG(linker,
3443 debugBelch("warning: %s from %S is linked instead of %s\n",
3444 (char*)(lbl+6+STRIP_LEADING_UNDERSCORE), o_dll->name, (char*)lbl));
3445 return (void*) & ret->addr;
3446 }
3447 }
3448
3449 sym = GetProcAddress(o_dll->instance, (char*)lbl);
3450 if (sym != NULL) {
3451 /*debugBelch("found %s in %s\n", lbl,o_dll->name);*/
3452 return sym;
3453 }
3454 }
3455 return NULL;
3456 }
3457
3458 static int
3459 verifyCOFFHeader (COFF_header *hdr, pathchar *fileName)
3460 {
3461 #if defined(i386_HOST_ARCH)
3462 if (hdr->Machine != 0x14c) {
3463 errorBelch("%" PATH_FMT ": Not x86 PEi386", fileName);
3464 return 0;
3465 }
3466 #elif defined(x86_64_HOST_ARCH)
3467 if (hdr->Machine != 0x8664) {
3468 errorBelch("%" PATH_FMT ": Not x86_64 PEi386", fileName);
3469 return 0;
3470 }
3471 #else
3472 errorBelch("PEi386 not supported on this arch");
3473 #endif
3474
3475 if (hdr->SizeOfOptionalHeader != 0) {
3476 errorBelch("%" PATH_FMT ": PEi386 with nonempty optional header",
3477 fileName);
3478 return 0;
3479 }
3480 if ( /* (hdr->Characteristics & MYIMAGE_FILE_RELOCS_STRIPPED) || */
3481 (hdr->Characteristics & MYIMAGE_FILE_EXECUTABLE_IMAGE) ||
3482 (hdr->Characteristics & MYIMAGE_FILE_DLL) ||
3483 (hdr->Characteristics & MYIMAGE_FILE_SYSTEM) ) {
3484 errorBelch("%" PATH_FMT ": Not a PEi386 object file", fileName);
3485 return 0;
3486 }
3487 if ( (hdr->Characteristics & MYIMAGE_FILE_BYTES_REVERSED_HI)
3488 /* || !(hdr->Characteristics & MYIMAGE_FILE_32BIT_MACHINE) */ ) {
3489 errorBelch("%" PATH_FMT ": Invalid PEi386 word size or endiannness: %d",
3490 fileName,
3491 (int)(hdr->Characteristics));
3492 return 0;
3493 }
3494 return 1;
3495 }
3496
3497 static int
3498 ocVerifyImage_PEi386 ( ObjectCode* oc )
3499 {
3500 int i;
3501 UInt32 j, noRelocs;
3502 COFF_header* hdr;
3503 COFF_section* sectab;
3504 COFF_symbol* symtab;
3505 UChar* strtab;
3506 /* debugBelch("\nLOADING %s\n", oc->fileName); */
3507 hdr = (COFF_header*)(oc->image);
3508 sectab = (COFF_section*) (
3509 ((UChar*)(oc->image))
3510 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3511 );
3512 symtab = (COFF_symbol*) (
3513 ((UChar*)(oc->image))
3514 + hdr->PointerToSymbolTable
3515 );
3516 strtab = ((UChar*)symtab)
3517 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3518
3519 if (!verifyCOFFHeader(hdr, oc->fileName)) {
3520 return 0;
3521 }
3522
3523 /* If the string table size is way crazy, this might indicate that
3524 there are more than 64k relocations, despite claims to the
3525 contrary. Hence this test. */
3526 /* debugBelch("strtab size %d\n", * (UInt32*)strtab); */
3527 #if 0
3528 if ( (*(UInt32*)strtab) > 600000 ) {
3529 /* Note that 600k has no special significance other than being
3530 big enough to handle the almost-2MB-sized lumps that
3531 constitute HSwin32*.o. */
3532 debugBelch("PEi386 object has suspiciously large string table; > 64k relocs?");
3533 return 0;
3534 }
3535 #endif
3536
3537 /* .BSS Section is initialized in ocGetNames_PEi386
3538 but we need the Sections array initialized here already. */
3539 Section *sections;
3540 sections = (Section*)stgCallocBytes(
3541 sizeof(Section),
3542 hdr->NumberOfSections + 1, /* +1 for the global BSS section see ocGetNames_PEi386 */
3543 "ocVerifyImage_PEi386(sections)");
3544 oc->sections = sections;
3545 oc->n_sections = hdr->NumberOfSections + 1;
3546
3547 /* Initialize the Sections */
3548 for (i = 0; i < hdr->NumberOfSections; i++) {
3549 COFF_section* sectab_i
3550 = (COFF_section*)
3551 myindex(sizeof_COFF_section, sectab, i);
3552
3553 /* Calculate the start of the data section */
3554 sections[i].start = oc->image + sectab_i->PointerToRawData;
3555 }
3556
3557 /* No further verification after this point; only debug printing. */
3558 i = 0;
3559 IF_DEBUG(linker, i=1);
3560 if (i == 0) return 1;
3561
3562 debugBelch("sectab offset = %" FMT_SizeT "\n",
3563 ((UChar*)sectab) - ((UChar*)hdr) );
3564 debugBelch("symtab offset = %" FMT_SizeT "\n",
3565 ((UChar*)symtab) - ((UChar*)hdr) );
3566 debugBelch("strtab offset = %" FMT_SizeT "\n",
3567 ((UChar*)strtab) - ((UChar*)hdr) );
3568
3569 debugBelch("\n" );
3570 debugBelch( "Machine: 0x%x\n", (UInt32)(hdr->Machine) );
3571 debugBelch( "# sections: %d\n", (UInt32)(hdr->NumberOfSections) );
3572 debugBelch( "time/date: 0x%x\n", (UInt32)(hdr->TimeDateStamp) );
3573 debugBelch( "symtab offset: %d\n", (UInt32)(hdr->PointerToSymbolTable) );
3574 debugBelch( "# symbols: %d\n", (UInt32)(hdr->NumberOfSymbols) );
3575 debugBelch( "sz of opt hdr: %d\n", (UInt32)(hdr->SizeOfOptionalHeader) );
3576 debugBelch( "characteristics: 0x%x\n", (UInt32)(hdr->Characteristics) );
3577
3578 /* Print the section table. */
3579 debugBelch("\n" );
3580 for (i = 0; i < hdr->NumberOfSections; i++) {
3581 COFF_reloc* reltab;
3582 COFF_section* sectab_i
3583 = (COFF_section*)
3584 myindex ( sizeof_COFF_section, sectab, i );
3585 Section section = sections[i];
3586 debugBelch(
3587 "\n"
3588 "section %d\n"
3589 " name `",
3590 i
3591 );
3592 printName ( sectab_i->Name, strtab );
3593 debugBelch(
3594 "'\n"
3595 " vsize %d\n"
3596 " vaddr %d\n"
3597 " data sz %d\n"
3598 " data off 0x%p\n"
3599 " num rel %d\n"
3600 " off rel %d\n"
3601 " ptr raw 0x%x\n",
3602 sectab_i->VirtualSize,
3603 sectab_i->VirtualAddress,
3604 sectab_i->SizeOfRawData,
3605 section.start,
3606 sectab_i->NumberOfRelocations,
3607 sectab_i->PointerToRelocations,
3608 sectab_i->PointerToRawData
3609 );
3610 reltab = (COFF_reloc*) (
3611 ((UChar*)(oc->image)) + sectab_i->PointerToRelocations
3612 );
3613
3614 if ( sectab_i->Characteristics & MYIMAGE_SCN_LNK_NRELOC_OVFL ) {
3615 /* If the relocation field (a short) has overflowed, the
3616 * real count can be found in the first reloc entry.
3617 *
3618 * See Section 4.1 (last para) of the PE spec (rev6.0).
3619 */
3620 COFF_reloc* rel = (COFF_reloc*)
3621 myindex ( sizeof_COFF_reloc, reltab, 0 );
3622 noRelocs = rel->VirtualAddress;
3623 j = 1;
3624 } else {
3625 noRelocs = sectab_i->NumberOfRelocations;
3626 j = 0;
3627 }
3628
3629 for (; j < noRelocs; j++) {
3630 COFF_symbol* sym;
3631 COFF_reloc* rel = (COFF_reloc*)
3632 myindex ( sizeof_COFF_reloc, reltab, j );
3633 debugBelch(
3634 " type 0x%-4x vaddr 0x%-8x name `",
3635 (UInt32)rel->Type,
3636 rel->VirtualAddress );
3637 sym = (COFF_symbol*)
3638 myindex ( sizeof_COFF_symbol, symtab, rel->SymbolTableIndex );
3639 /* Hmm..mysterious looking offset - what's it for? SOF */
3640 printName ( sym->Name, strtab -10 );
3641 debugBelch("'\n" );
3642 }
3643
3644 debugBelch("\n" );
3645 }
3646 debugBelch("\n" );
3647 debugBelch("string table has size 0x%x\n", * (UInt32*)strtab );
3648 debugBelch("---START of string table---\n");
3649 for (i = 4; i < *(Int32*)strtab; i++) {
3650 if (strtab[i] == 0)
3651 debugBelch("\n"); else
3652 debugBelch("%c", strtab[i] );
3653 }
3654 debugBelch("--- END of string table---\n");
3655
3656 debugBelch("\n" );
3657 i = 0;
3658 while (1) {
3659 COFF_symbol* symtab_i;
3660 if (i >= (Int32)(hdr->NumberOfSymbols)) break;
3661 symtab_i = (COFF_symbol*)
3662 myindex ( sizeof_COFF_symbol, symtab, i );
3663 debugBelch(
3664 "symbol %d\n"
3665 " name `",
3666 i
3667 );
3668 printName ( symtab_i->Name, strtab );
3669 debugBelch(
3670 "'\n"
3671 " value 0x%x\n"
3672 " 1+sec# %d\n"
3673 " type 0x%x\n"
3674 " sclass 0x%x\n"
3675 " nAux %d\n",
3676 symtab_i->Value,
3677 (Int32)(symtab_i->SectionNumber),
3678 (UInt32)symtab_i->Type,
3679 (UInt32)symtab_i->StorageClass,
3680 (UInt32)symtab_i->NumberOfAuxSymbols
3681 );
3682 i += symtab_i->NumberOfAuxSymbols;
3683 i++;
3684 }
3685
3686 debugBelch("\n" );
3687 return 1;
3688 }
3689
3690
3691 static int
3692 ocGetNames_PEi386 ( ObjectCode* oc )
3693 {
3694 COFF_header* hdr;
3695 COFF_section* sectab;
3696 COFF_symbol* symtab;
3697 UChar* strtab;
3698
3699 UChar* sname;
3700 SymbolAddr* addr;
3701 int i;
3702
3703 hdr = (COFF_header*)(oc->image);
3704 sectab = (COFF_section*) (
3705 ((UChar*)(oc->image))
3706 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3707 );
3708 symtab = (COFF_symbol*) (
3709 ((UChar*)(oc->image))
3710 + hdr->PointerToSymbolTable
3711 );
3712 strtab = ((UChar*)(oc->image))
3713 + hdr->PointerToSymbolTable
3714 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3715
3716 /* Allocate space for any (local, anonymous) .bss sections. */
3717
3718 for (i = 0; i < hdr->NumberOfSections; i++) {
3719 UInt32 bss_sz;
3720 UChar* zspace;
3721 COFF_section* sectab_i
3722 = (COFF_section*)
3723 myindex ( sizeof_COFF_section, sectab, i );
3724
3725 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3726
3727 if (0 != strcmp(secname, ".bss")) {
3728 stgFree(secname);
3729 continue;
3730 }
3731
3732 stgFree(secname);
3733
3734 /* sof 10/05: the PE spec text isn't too clear regarding what
3735 * the SizeOfRawData field is supposed to hold for object
3736 * file sections containing just uninitialized data -- for executables,
3737 * it is supposed to be zero; unclear what it's supposed to be
3738 * for object files. However, VirtualSize is guaranteed to be
3739 * zero for object files, which definitely suggests that SizeOfRawData
3740 * will be non-zero (where else would the size of this .bss section be
3741 * stored?) Looking at the COFF_section info for incoming object files,
3742 * this certainly appears to be the case.
3743 *
3744 * => I suspect we've been incorrectly handling .bss sections in (relocatable)
3745 * object files up until now. This turned out to bite us with ghc-6.4.1's use
3746 * of gcc-3.4.x, which has started to emit initially-zeroed-out local 'static'
3747 * variable decls into the .bss section. (The specific function in Q which
3748 * triggered this is libraries/base/cbits/dirUtils.c:__hscore_getFolderPath())
3749 */
3750 if (sectab_i->VirtualSize == 0 && sectab_i->SizeOfRawData == 0) continue;
3751 /* This is a non-empty .bss section.
3752 Allocate zeroed space for it */
3753 bss_sz = sectab_i->VirtualSize;
3754 if ( bss_sz < sectab_i->SizeOfRawData) { bss_sz = sectab_i->SizeOfRawData; }
3755 zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
3756 oc->sections[i].start = zspace;
3757 addProddableBlock(oc, zspace, bss_sz);
3758 /* debugBelch("BSS anon section at 0x%x\n", zspace); */
3759 }
3760
3761 /* Copy section information into the ObjectCode. */
3762
3763 for (i = 0; i < hdr->NumberOfSections; i++) {
3764 UChar* start;
3765 UChar* end;
3766 UInt32 sz;
3767
3768 /* By default consider all section as CODE or DATA, which means we want to load them. */
3769 SectionKind kind
3770 = SECTIONKIND_CODE_OR_RODATA;
3771 COFF_section* sectab_i
3772 = (COFF_section*)
3773 myindex ( sizeof_COFF_section, sectab, i );
3774 Section section = oc->sections[i];
3775
3776 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3777
3778 IF_DEBUG(linker, debugBelch("section name = %s\n", secname ));
3779
3780 /* The PE file section flag indicates whether the section contains code or data. */
3781 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_CODE ||
3782 sectab_i->Characteristics & MYIMAGE_SCN_CNT_INITIALIZED_DATA)
3783 kind = SECTIONKIND_CODE_OR_RODATA;
3784
3785 /* Check next if it contains any uninitialized data */
3786 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_UNINITIALIZED_DATA)
3787 kind = SECTIONKIND_RWDATA;
3788
3789 /* Finally check if it can be discarded. This will also ignore .debug sections */
3790 if (sectab_i->Characteristics & MYIMAGE_SCN_MEM_DISCARDABLE ||
3791 sectab_i->Characteristics & MYIMAGE_SCN_LNK_REMOVE)
3792 kind = SECTIONKIND_OTHER;
3793
3794 if (0==strcmp(".ctors", (char*)secname))
3795 kind = SECTIONKIND_INIT_ARRAY;
3796
3797 ASSERT(sectab_i->SizeOfRawData == 0 || sectab_i->VirtualSize == 0);
3798 sz = sectab_i->SizeOfRawData;
3799 if (sz < sectab_i->VirtualSize) sz = sectab_i->VirtualSize;
3800
3801 start = section.start;
3802 end = start + sz - 1;
3803
3804 if (kind != SECTIONKIND_OTHER && end >= start) {
3805 addSection(&oc->sections[i], kind, SECTION_NOMEM, start, sz, 0, 0, 0);
3806 addProddableBlock(oc, start, sz);
3807 }
3808
3809 stgFree(secname);
3810 }
3811
3812 /* Copy exported symbols into the ObjectCode. */
3813
3814 oc->n_symbols = hdr->NumberOfSymbols;
3815 oc->symbols = stgCallocBytes(sizeof(SymbolName*), oc->n_symbols,
3816 "ocGetNames_PEi386(oc->symbols)");
3817
3818 /* Work out the size of the global BSS section */
3819 StgWord globalBssSize = 0;
3820 for (i=0; i < (int)hdr->NumberOfSymbols; i++) {
3821 COFF_symbol* symtab_i;
3822 symtab_i = (COFF_symbol*)
3823 myindex ( sizeof_COFF_symbol, symtab, i );
3824 if (symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3825 && symtab_i->Value > 0
3826 && symtab_i->StorageClass != MYIMAGE_SYM_CLASS_SECTION) {
3827 globalBssSize += symtab_i->Value;
3828 }
3829 i += symtab_i->NumberOfAuxSymbols;
3830 }
3831
3832 /* Allocate BSS space */
3833 SymbolAddr* bss = NULL;
3834 if (globalBssSize > 0) {
3835 bss = stgCallocBytes(1, globalBssSize,
3836 "ocGetNames_PEi386(non-anonymous bss)");
3837 addSection(&oc->sections[oc->n_sections-1],
3838 SECTIONKIND_RWDATA, SECTION_MALLOC,
3839 bss, globalBssSize, 0, 0, 0);
3840 IF_DEBUG(linker, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize));
3841 addProddableBlock(oc, bss, globalBssSize);
3842 } else {
3843 addSection(&oc->sections[oc->n_sections-1],
3844 SECTIONKIND_OTHER, SECTION_NOMEM, NULL, 0, 0, 0, 0);
3845 }
3846
3847 for (i = 0; i < oc->n_symbols; i++) {
3848 COFF_symbol* symtab_i;
3849 symtab_i = (COFF_symbol*)
3850 myindex ( sizeof_COFF_symbol, symtab, i );
3851
3852 addr = NULL;
3853 HsBool isWeak = HS_BOOL_FALSE;
3854 if ( symtab_i->SectionNumber != MYIMAGE_SYM_UNDEFINED
3855 && symtab_i->SectionNumber > 0) {
3856 /* This symbol is global and defined, viz, exported */
3857 /* for MYIMAGE_SYMCLASS_EXTERNAL
3858 && !MYIMAGE_SYM_UNDEFINED,
3859 the address of the symbol is:
3860 address of relevant section + offset in section
3861 */
3862 COFF_section* sectabent
3863 = (COFF_section*) myindex ( sizeof_COFF_section,
3864 sectab,
3865 symtab_i->SectionNumber-1 );
3866 if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_EXTERNAL
3867 || ( symtab_i->StorageClass == MYIMAGE_SYM_CLASS_STATIC
3868 && sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT)
3869 ) {
3870 addr = (void*)((size_t)oc->sections[symtab_i->SectionNumber-1].start
3871 + symtab_i->Value);
3872 if (sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT) {
3873 isWeak = HS_BOOL_TRUE;
3874 }
3875 }
3876 }
3877 else if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_WEAK_EXTERNAL) {
3878 isWeak = HS_BOOL_TRUE;
3879 }
3880 else if ( symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3881 && symtab_i->Value > 0) {
3882 /* This symbol isn't in any section at all, ie, global bss.
3883 Allocate zeroed space for it from the BSS section */
3884 addr = bss;
3885 bss = (SymbolAddr*)((StgWord)bss + (StgWord)symtab_i->Value);
3886 IF_DEBUG(linker, debugBelch("bss symbol @ %p %u\n", addr, symtab_i->Value));
3887 }
3888
3889 sname = cstring_from_COFF_symbol_name(symtab_i->Name, strtab);
3890 if (addr != NULL || isWeak == HS_BOOL_TRUE) {
3891
3892 /* debugBelch("addSymbol %p `%s' Weak:%lld \n", addr, sname, isWeak); */
3893 IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr,sname);)
3894 ASSERT(i >= 0 && i < oc->n_symbols);
3895 /* cstring_from_COFF_symbol_name always succeeds. */
3896 oc->symbols[i] = (SymbolName*)sname;
3897 if (isWeak == HS_BOOL_TRUE) {
3898 setWeakSymbol(oc, sname);
3899 }
3900
3901 if (! ghciInsertSymbolTable(oc->fileName, symhash, (SymbolName*)sname, addr,
3902 isWeak, oc)) {
3903 return 0;
3904 }
3905 } else {
3906 /* We're skipping the symbol, but if we ever load this
3907 object file we'll want to skip it then too. */
3908 oc->symbols[i] = NULL;
3909
3910 # if 0
3911 debugBelch(
3912 "IGNORING symbol %d\n"
3913 " name `",
3914 i
3915 );
3916 printName ( symtab_i->Name, strtab );
3917 debugBelch(
3918 "'\n"
3919 " value 0x%x\n"