Add thin library support to Windows too
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "Stable.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31
32 #if !defined(mingw32_HOST_OS)
33 #include "posix/Signals.h"
34 #endif
35
36 // get protos for is*()
37 #include <ctype.h>
38
39 #ifdef HAVE_SYS_TYPES_H
40 #include <sys/types.h>
41 #endif
42
43 #include <inttypes.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <stdio.h>
47 #include <assert.h>
48 #include <libgen.h>
49
50 #ifdef HAVE_SYS_STAT_H
51 #include <sys/stat.h>
52 #endif
53
54 #if defined(HAVE_DLFCN_H)
55 #include <dlfcn.h>
56 #endif
57
58 /* PowerPC and ARM have relative branch instructions with only 24 bit
59 * displacements and therefore need jump islands contiguous with each object
60 * code module.
61 */
62 #if defined(powerpc_HOST_ARCH)
63 #define SHORT_REL_BRANCH 1
64 #endif
65 #if defined(arm_HOST_ARCH)
66 #define SHORT_REL_BRANCH 1
67 #endif
68
69 #if (RTS_LINKER_USE_MMAP && defined(SHORT_REL_BRANCH) && defined(linux_HOST_OS))
70 #define USE_CONTIGUOUS_MMAP 1
71 #else
72 #define USE_CONTIGUOUS_MMAP 0
73 #endif
74
75 #if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
76 # define OBJFORMAT_ELF
77 # include <regex.h> // regex is already used by dlopen() so this is OK
78 // to use here without requiring an additional lib
79 #elif defined (mingw32_HOST_OS)
80 # define OBJFORMAT_PEi386
81 # include <windows.h>
82 # include <shfolder.h> /* SHGetFolderPathW */
83 # include <math.h>
84 # include <wchar.h>
85 #elif defined(darwin_HOST_OS)
86 # define OBJFORMAT_MACHO
87 # include <regex.h>
88 # include <mach/machine.h>
89 # include <mach-o/fat.h>
90 # include <mach-o/loader.h>
91 # include <mach-o/nlist.h>
92 # include <mach-o/reloc.h>
93 #if defined(powerpc_HOST_ARCH)
94 # include <mach-o/ppc/reloc.h>
95 #endif
96 #if defined(x86_64_HOST_ARCH)
97 # include <mach-o/x86_64/reloc.h>
98 #endif
99 #endif
100
101 #if defined(x86_64_HOST_ARCH) && defined(darwin_HOST_OS)
102 #define ALWAYS_PIC
103 #endif
104
105 #if defined(dragonfly_HOST_OS)
106 #include <sys/tls.h>
107 #endif
108
109 /* SymbolInfo tracks a symbol's address, the object code from which
110 it originated, and whether or not it's weak.
111
112 RtsSymbolInfo is used to track the state of the symbols currently
113 loaded or to be loaded by the Linker.
114
115 Where the information in the `ObjectCode` is used to track the
116 original status of the symbol inside the `ObjectCode`.
117
118 A weak symbol that has been used will still be marked as weak
119 in the `ObjectCode` but in the `RtsSymbolInfo` it won't be.
120 */
121 typedef struct _RtsSymbolInfo {
122 SymbolAddr* value;
123 ObjectCode *owner;
124 HsBool weak;
125 } RtsSymbolInfo;
126
127 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
128 This hashtable will contain information on all symbols
129 that we know of, however the .o they are in may not be loaded.
130
131 Until the ObjectCode the symbol belongs to is actually
132 loaded this symbol may be replaced. So do not rely on
133 addresses of unloaded symbols.
134
135 Note [runtime-linker-phases]
136 --------------------------------------
137 Broadly the behavior of the runtime linker can be
138 split into the following four phases:
139
140 - Indexing (e.g. ocVerifyImage and ocGetNames)
141 - Initialization (e.g. ocResolve and ocRunInit)
142 - Resolve (e.g. resolveObjs())
143 - Lookup (e.g. lookupSymbol)
144
145 This is to enable lazy loading of symbols. Eager loading is problematic
146 as it means that all symbols must be available, even those which we will
147 never use. This is especially painful of Windows, where the number of
148 libraries required to link things like mingwex grows to be quite high.
149
150 We proceed through these stages as follows,
151
152 * During Indexing we verify and open the ObjectCode and
153 perform a quick scan/indexing of the ObjectCode. All the work
154 required to actually load the ObjectCode is done.
155
156 All symbols from the ObjectCode is also inserted into
157 `symhash`, where possible duplicates are handled via the semantics
158 described in `ghciInsertSymbolTable`.
159
160 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
161 depending on whether they are an archive members or not.
162
163 * During initialization we load ObjectCode, perform relocations, execute
164 static constructors etc. This phase may trigger other ObjectCodes to
165 be loaded because of the calls to lookupSymbol.
166
167 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
168 previous status was `OBJECT_LOADED`.
169
170 * During resolve we attempt to resolve all the symbols needed for the
171 initial link. This essentially means, that for any ObjectCode given
172 directly to the command-line we perform lookupSymbols on the required
173 symbols. lookupSymbols may trigger the loading of additional ObjectCode
174 if required.
175
176 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
177 the previous status was `OBJECT_NEEDED`.
178
179 * Lookup symbols is used to lookup any symbols required, both during initial
180 link and during statement and expression compilations in the REPL.
181 Declaration of e.g. an foreign import, will eventually call lookupSymbol
182 which will either fail (symbol unknown) or succeed (and possibly triggered a
183 load).
184
185 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
186
187 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
188 by calling unloadObj and starting over.
189 When a new declaration or statement is performed ultimately lookupSymbol is called
190 without doing a re-link.
191
192 The goal of these different phases is to allow the linker to be able to perform
193 "lazy loading" of ObjectCode. The reason for this is that we want to only link
194 in symbols that are actually required for the link. This reduces:
195
196 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
197 then we don't need to load libB. This means the dependency chain for libraries
198 such as mingw32 and mingwex can be broken down.
199
200 2) The number of duplicate symbols, since now only symbols that are
201 true duplicates will display the error.
202 */
203 static /*Str*/HashTable *symhash;
204
205 /* List of currently loaded objects */
206 ObjectCode *objects = NULL; /* initially empty */
207
208 /* List of objects that have been unloaded via unloadObj(), but are waiting
209 to be actually freed via checkUnload() */
210 ObjectCode *unloaded_objects = NULL; /* initially empty */
211
212 #ifdef THREADED_RTS
213 /* This protects all the Linker's global state except unloaded_objects */
214 Mutex linker_mutex;
215 /*
216 * This protects unloaded_objects. We have a separate mutex for this, because
217 * the GC needs to access unloaded_objects in checkUnload, while the linker only
218 * needs to access unloaded_objects in unloadObj(), so this allows most linker
219 * operations proceed concurrently with the GC.
220 */
221 Mutex linker_unloaded_mutex;
222 #endif
223
224 /* Type of the initializer */
225 typedef void (*init_t) (int argc, char **argv, char **env);
226
227 static HsInt isAlreadyLoaded( pathchar *path );
228 static HsInt loadOc( ObjectCode* oc );
229 static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
230 rtsBool mapped, char *archiveMemberName,
231 int misalignment
232 );
233
234 // Use wchar_t for pathnames on Windows (#5697)
235 #if defined(mingw32_HOST_OS)
236 #define pathcmp wcscmp
237 #define pathlen wcslen
238 #define pathopen _wfopen
239 #define pathstat _wstat
240 #define struct_stat struct _stat
241 #define open wopen
242 #define WSTR(s) L##s
243 #define pathprintf swprintf
244 #define pathsize sizeof(wchar_t)
245 #else
246 #define pathcmp strcmp
247 #define pathlen strlen
248 #define pathopen fopen
249 #define pathstat stat
250 #define struct_stat struct stat
251 #define WSTR(s) s
252 #define pathprintf snprintf
253 #define pathsize sizeof(char)
254 #endif
255
256 static pathchar* pathdup(pathchar *path)
257 {
258 pathchar *ret;
259 #if defined(mingw32_HOST_OS)
260 ret = wcsdup(path);
261 #else
262 /* sigh, strdup() isn't a POSIX function, so do it the long way */
263 ret = stgMallocBytes( strlen(path)+1, "pathdup" );
264 strcpy(ret, path);
265 #endif
266 return ret;
267 }
268
269 static pathchar* pathdir(pathchar *path)
270 {
271 pathchar *ret;
272 #if defined(mingw32_HOST_OS)
273 pathchar *drive, *dirName;
274 size_t memberLen = pathlen(path) + 1;
275 dirName = stgMallocBytes(pathsize * memberLen, "pathdir(path)");
276 ret = stgMallocBytes(pathsize * memberLen, "pathdir(path)");
277 drive = stgMallocBytes(pathsize * _MAX_DRIVE, "pathdir(path)");
278 _wsplitpath_s(path, drive, _MAX_DRIVE, dirName, pathsize * pathlen(path), NULL, 0, NULL, 0);
279 pathprintf(ret, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), drive, dirName);
280 stgFree(drive);
281 stgFree(dirName);
282 #else
283 pathchar* dirName = dirname(path);
284 size_t memberLen = pathlen(dirName);
285 ret = stgMallocBytes(pathsize * (memberLen + 2), "pathdir(path)");
286 strcpy(ret, dirName);
287 ret[memberLen ] = '/';
288 ret[memberLen+1] = '\0';
289 #endif
290 return ret;
291 }
292
293 static pathchar* mkPath(char* path)
294 {
295 #if defined(mingw32_HOST_OS)
296 size_t required = mbstowcs(NULL, path, 0);
297 pathchar *ret = stgMallocBytes(sizeof(pathchar) * (required + 1), "mkPath");
298 if (mbstowcs(ret, path, required) == (size_t)-1)
299 {
300 barf("mkPath failed converting char* to wchar_t*");
301 }
302 ret[required] = '\0';
303 return ret;
304 #else
305 return pathdup(path);
306 #endif
307 }
308
309 /* Generic wrapper function to try and Resolve and RunInit oc files */
310 int ocTryLoad( ObjectCode* oc );
311
312 #if defined(OBJFORMAT_ELF)
313 static int ocVerifyImage_ELF ( ObjectCode* oc );
314 static int ocGetNames_ELF ( ObjectCode* oc );
315 static int ocResolve_ELF ( ObjectCode* oc );
316 static int ocRunInit_ELF ( ObjectCode* oc );
317 #if NEED_SYMBOL_EXTRAS
318 static int ocAllocateSymbolExtras_ELF ( ObjectCode* oc );
319 #endif
320 #elif defined(OBJFORMAT_PEi386)
321 static int ocVerifyImage_PEi386 ( ObjectCode* oc );
322 static int ocGetNames_PEi386 ( ObjectCode* oc );
323 static int ocResolve_PEi386 ( ObjectCode* oc );
324 static int ocRunInit_PEi386 ( ObjectCode* oc );
325 static void *lookupSymbolInDLLs ( unsigned char *lbl );
326 /* See Note [mingw-w64 name decoration scheme] */
327 #ifndef x86_64_HOST_ARCH
328 static void zapTrailingAtSign ( unsigned char *sym );
329 #endif
330 static char *allocateImageAndTrampolines (
331 pathchar* arch_name, char* member_name,
332 #if defined(x86_64_HOST_ARCH)
333 FILE* f,
334 #endif
335 int size,
336 int isThin);
337 #if defined(x86_64_HOST_ARCH)
338 static int ocAllocateSymbolExtras_PEi386 ( ObjectCode* oc );
339 static size_t makeSymbolExtra_PEi386( ObjectCode* oc, size_t, char* symbol );
340 #define PEi386_IMAGE_OFFSET 4
341 #else
342 #define PEi386_IMAGE_OFFSET 0
343 #endif
344 #elif defined(OBJFORMAT_MACHO)
345 static int ocVerifyImage_MachO ( ObjectCode* oc );
346 static int ocGetNames_MachO ( ObjectCode* oc );
347 static int ocResolve_MachO ( ObjectCode* oc );
348 static int ocRunInit_MachO ( ObjectCode* oc );
349 static int machoGetMisalignment( FILE * );
350 #if NEED_SYMBOL_EXTRAS
351 static int ocAllocateSymbolExtras_MachO ( ObjectCode* oc );
352 #endif
353 #ifdef powerpc_HOST_ARCH
354 static void machoInitSymbolsWithoutUnderscore( void );
355 #endif
356 #endif
357
358 #if defined(OBJFORMAT_PEi386)
359 /* string utility function */
360 static HsBool endsWithPath(pathchar* base, pathchar* str) {
361 int blen = pathlen(base);
362 int slen = pathlen(str);
363 return (blen >= slen) && (0 == pathcmp(base + blen - slen, str));
364 }
365
366 static int checkAndLoadImportLibrary(
367 pathchar* arch_name,
368 char* member_name,
369 FILE* f);
370
371 static int findAndLoadImportLibrary(
372 ObjectCode* oc
373 );
374
375 static UChar *myindex(
376 int scale,
377 void* base,
378 int index);
379 static UChar *cstring_from_COFF_symbol_name(
380 UChar* name,
381 UChar* strtab);
382 static char *cstring_from_section_name(
383 UChar* name,
384 UChar* strtab);
385
386
387 /* Add ld symbol for PE image base. */
388 #if defined(__GNUC__)
389 #define __ImageBase __MINGW_LSYMBOL(_image_base__)
390 #endif
391
392 /* Get the base of the module. */
393 /* This symbol is defined by ld. */
394 extern IMAGE_DOS_HEADER __ImageBase;
395 #define __image_base (void*)((HINSTANCE)&__ImageBase)
396
397 // MingW-w64 is missing these from the implementation. So we have to look them up
398 typedef DLL_DIRECTORY_COOKIE(WINAPI *LPAddDLLDirectory)(PCWSTR NewDirectory);
399 typedef WINBOOL(WINAPI *LPRemoveDLLDirectory)(DLL_DIRECTORY_COOKIE Cookie);
400 #endif /* OBJFORMAT_PEi386 */
401
402 static void freeProddableBlocks (ObjectCode *oc);
403
404 /* on x86_64 we have a problem with relocating symbol references in
405 * code that was compiled without -fPIC. By default, the small memory
406 * model is used, which assumes that symbol references can fit in a
407 * 32-bit slot. The system dynamic linker makes this work for
408 * references to shared libraries by either (a) allocating a jump
409 * table slot for code references, or (b) moving the symbol at load
410 * time (and copying its contents, if necessary) for data references.
411 *
412 * We unfortunately can't tell whether symbol references are to code
413 * or data. So for now we assume they are code (the vast majority
414 * are), and allocate jump-table slots. Unfortunately this will
415 * SILENTLY generate crashing code for data references. This hack is
416 * enabled by X86_64_ELF_NONPIC_HACK.
417 *
418 * One workaround is to use shared Haskell libraries. This is
419 * coming. Another workaround is to keep the static libraries but
420 * compile them with -fPIC, because that will generate PIC references
421 * to data which can be relocated. The PIC code is still too green to
422 * do this systematically, though.
423 *
424 * See bug #781
425 * See thread http://www.haskell.org/pipermail/cvs-ghc/2007-September/038458.html
426 *
427 * Naming Scheme for Symbol Macros
428 *
429 * SymI_*: symbol is internal to the RTS. It resides in an object
430 * file/library that is statically.
431 * SymE_*: symbol is external to the RTS library. It might be linked
432 * dynamically.
433 *
434 * Sym*_HasProto : the symbol prototype is imported in an include file
435 * or defined explicitly
436 * Sym*_NeedsProto: the symbol is undefined and we add a dummy
437 * default proto extern void sym(void);
438 */
439 #define X86_64_ELF_NONPIC_HACK 1
440
441 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
442 * small memory model on this architecture (see gcc docs,
443 * -mcmodel=small).
444 *
445 * MAP_32BIT not available on OpenBSD/amd64
446 */
447 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
448 #define TRY_MAP_32BIT MAP_32BIT
449 #else
450 #define TRY_MAP_32BIT 0
451 #endif
452
453 /*
454 Note [The ARM/Thumb Story]
455 ~~~~~~~~~~~~~~~~~~~~~~~~~~
456
457 Support for the ARM architecture is complicated by the fact that ARM has not
458 one but several instruction encodings. The two relevant ones here are the original
459 ARM encoding and Thumb, a more dense variant of ARM supporting only a subset
460 of the instruction set.
461
462 How the CPU decodes a particular instruction is determined by a mode bit. This
463 mode bit is set on jump instructions, the value being determined by the low
464 bit of the target address: An odd address means the target is a procedure
465 encoded in the Thumb encoding whereas an even address means it's a traditional
466 ARM procedure (the actual address jumped to is even regardless of the encoding bit).
467
468 Interoperation between Thumb- and ARM-encoded object code (known as "interworking")
469 is tricky. If the linker needs to link a call by an ARM object into Thumb code
470 (or vice-versa) it will produce a jump island. This, however, is incompatible with
471 GHC's tables-next-to-code. For this reason, it is critical that GHC emit
472 exclusively ARM or Thumb objects for all Haskell code.
473
474 We still do, however, need to worry about foreign code.
475 */
476
477 /*
478 * Due to the small memory model (see above), on x86_64 we have to map
479 * all our non-PIC object files into the low 2Gb of the address space
480 * (why 2Gb and not 4Gb? Because all addresses must be reachable
481 * using a 32-bit signed PC-relative offset). On Linux we can do this
482 * using the MAP_32BIT flag to mmap(), however on other OSs
483 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
484 * can't do this. So on these systems, we have to pick a base address
485 * in the low 2Gb of the address space and try to allocate memory from
486 * there.
487 *
488 * We pick a default address based on the OS, but also make this
489 * configurable via an RTS flag (+RTS -xm)
490 */
491 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
492
493 #if defined(MAP_32BIT)
494 // Try to use MAP_32BIT
495 #define MMAP_32BIT_BASE_DEFAULT 0
496 #else
497 // A guess: 1Gb.
498 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
499 #endif
500
501 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
502 #endif
503
504 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
505 ObjectCode *owner)
506 {
507 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
508 if (!pinfo || owner != pinfo->owner) return;
509 removeStrHashTable(table, key, NULL);
510 stgFree(pinfo);
511 }
512
513 /* -----------------------------------------------------------------------------
514 * Insert symbols into hash tables, checking for duplicates.
515 *
516 * Returns: 0 on failure, nonzero on success
517 */
518 /*
519 Note [weak-symbols-support]
520 -------------------------------------
521 While ghciInsertSymbolTable does implement extensive
522 logic for weak symbol support, weak symbols are not currently
523 fully supported by the RTS. This code is mostly here for COMDAT
524 support which uses the weak symbols support.
525
526 Linking weak symbols defined purely in C code with other C code
527 should also work, probably. Observing weak symbols in Haskell
528 won't.
529
530 Some test have been written for weak symbols but have been disabled
531 mostly because it's unsure how the weak symbols support should look.
532 See Trac #11223
533 */
534 static int ghciInsertSymbolTable(
535 pathchar* obj_name,
536 HashTable *table,
537 const SymbolName* key,
538 SymbolAddr* data,
539 HsBool weak,
540 ObjectCode *owner)
541 {
542 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
543 if (!pinfo) /* new entry */
544 {
545 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
546 pinfo->value = data;
547 pinfo->owner = owner;
548 pinfo->weak = weak;
549 insertStrHashTable(table, key, pinfo);
550 return 1;
551 }
552 else if (weak && data && pinfo->weak && !pinfo->value)
553 {
554 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
555 pinfo->value = data;
556 pinfo->owner = owner;
557 return 1;
558 }
559 else if (weak)
560 {
561 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
562 already know of another copy throw this one away.
563
564 or both weak symbols have a nonzero value. Keep the existing one.
565
566 This also preserves the semantics of linking against
567 the first symbol we find. */
568 }
569 else if (pinfo->weak && !weak) /* weak symbol is in the table */
570 {
571 /* override the weak definition with the non-weak one */
572 pinfo->value = data;
573 pinfo->owner = owner;
574 pinfo->weak = HS_BOOL_FALSE;
575 return 1;
576 }
577 else if ( pinfo->owner
578 && pinfo->owner->status != OBJECT_RESOLVED
579 && pinfo->owner->status != OBJECT_NEEDED)
580 {
581 /* If the other symbol hasn't been loaded or will be loaded and we want to
582 explicitly load the new one, we can just swap it out and load the one
583 that has been requested. If not, just keep the first one encountered.
584
585 Because the `symHash' table consists symbols we've also not loaded but
586 found during the initial scan this is safe to do. If however the existing
587 symbol has been loaded then it means we have a duplicate.
588
589 This is essentially emulating the behavior of a linker wherein it will always
590 link in object files that are .o file arguments, but only take object files
591 from archives as needed. */
592 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
593 pinfo->value = data;
594 pinfo->owner = owner;
595 pinfo->weak = weak;
596 }
597
598 return 1;
599 }
600 else if (pinfo->owner == owner)
601 {
602 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
603 return 1;
604 }
605 else if (owner && owner->status == OBJECT_LOADED)
606 {
607 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
608 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
609 call this function again to trigger the duplicate error. */
610 return 1;
611 }
612
613 pathchar* archiveName = NULL;
614 debugBelch(
615 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
616 " %s\n"
617 "whilst processing object file\n"
618 " %" PATH_FMT "\n"
619 "The symbol was previously defined in\n"
620 " %" PATH_FMT "\n"
621 "This could be caused by:\n"
622 " * Loading two different object files which export the same symbol\n"
623 " * Specifying the same object file twice on the GHCi command line\n"
624 " * An incorrect `package.conf' entry, causing some object to be\n"
625 " loaded twice.\n",
626 (char*)key,
627 obj_name,
628 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
629 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
630 : pinfo->owner->fileName
631 );
632
633 if (archiveName)
634 {
635 stgFree(archiveName);
636 archiveName = NULL;
637 }
638 return 0;
639 }
640
641 /* -----------------------------------------------------------------------------
642 * Looks up symbols into hash tables.
643 *
644 * Returns: 0 on failure and result is not set,
645 * nonzero on success and result set to nonzero pointer
646 */
647 static HsBool ghciLookupSymbolInfo(HashTable *table,
648 const SymbolName* key, RtsSymbolInfo **result)
649 {
650 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
651 if (!pinfo) {
652 *result = NULL;
653 return HS_BOOL_FALSE;
654 }
655 if (pinfo->weak)
656 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
657 /* Once it's looked up, it can no longer be overridden */
658 pinfo->weak = HS_BOOL_FALSE;
659
660 *result = pinfo;
661 return HS_BOOL_TRUE;
662 }
663
664 /* -----------------------------------------------------------------------------
665 * initialize the object linker
666 */
667
668
669 static int linker_init_done = 0 ;
670
671 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
672 static void *dl_prog_handle;
673 static regex_t re_invalid;
674 static regex_t re_realso;
675 #ifdef THREADED_RTS
676 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
677 #endif
678 #elif defined(OBJFORMAT_PEi386)
679 void addDLLHandle(pathchar* dll_name, HINSTANCE instance);
680 #endif
681
682 void initLinker (void)
683 {
684 // default to retaining CAFs for backwards compatibility. Most
685 // users will want initLinker_(0): otherwise unloadObj() will not
686 // be able to unload object files when they contain CAFs.
687 initLinker_(1);
688 }
689
690 void
691 initLinker_ (int retain_cafs)
692 {
693 RtsSymbolVal *sym;
694 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
695 int compileResult;
696 #endif
697
698 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
699
700 /* Make initLinker idempotent, so we can call it
701 before every relevant operation; that means we
702 don't need to initialise the linker separately */
703 if (linker_init_done == 1) {
704 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
705 return;
706 } else {
707 linker_init_done = 1;
708 }
709
710 objects = NULL;
711 unloaded_objects = NULL;
712
713 #if defined(THREADED_RTS)
714 initMutex(&linker_mutex);
715 initMutex(&linker_unloaded_mutex);
716 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
717 initMutex(&dl_mutex);
718 #endif
719 #endif
720 symhash = allocStrHashTable();
721
722 /* populate the symbol table with stuff from the RTS */
723 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
724 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
725 symhash, sym->lbl, sym->addr, HS_BOOL_FALSE, NULL)) {
726 barf("ghciInsertSymbolTable failed");
727 }
728 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
729 }
730 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
731 machoInitSymbolsWithoutUnderscore();
732 # endif
733 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
734 referenced from a statically linked module. We need to mimic this, but
735 we cannot use NULL because we use it to mean nonexistent symbols. So we
736 use an arbitrary (hopefully unique) address here.
737 */
738 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
739 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
740 barf("ghciInsertSymbolTable failed");
741 }
742
743 #if defined(OBJFORMAT_PEi386)
744 if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"),
745 symhash, "__image_base__", __image_base, HS_BOOL_TRUE, NULL)) {
746 barf("ghciInsertSymbolTable failed");
747 }
748 #endif /* OBJFORMAT_PEi386 */
749
750
751 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
752 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
753 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
754 retain_cafs ? newRetainedCAF : newGCdCAF,
755 HS_BOOL_FALSE, NULL)) {
756 barf("ghciInsertSymbolTable failed");
757 }
758
759 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
760 # if defined(RTLD_DEFAULT)
761 dl_prog_handle = RTLD_DEFAULT;
762 # else
763 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
764 # endif /* RTLD_DEFAULT */
765
766 compileResult = regcomp(&re_invalid,
767 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short)",
768 REG_EXTENDED);
769 if (compileResult != 0) {
770 barf("Compiling re_invalid failed");
771 }
772 compileResult = regcomp(&re_realso,
773 "(GROUP|INPUT) *\\( *([^ )]+)",
774 REG_EXTENDED);
775 if (compileResult != 0) {
776 barf("Compiling re_realso failed");
777 }
778 # endif
779
780 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
781 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
782 // User-override for mmap_32bit_base
783 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
784 }
785 #endif
786
787 #if defined(mingw32_HOST_OS)
788 /*
789 * These two libraries cause problems when added to the static link,
790 * but are necessary for resolving symbols in GHCi, hence we load
791 * them manually here.
792 */
793 addDLL(WSTR("msvcrt"));
794 addDLL(WSTR("kernel32"));
795 addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL));
796 #endif
797
798 if (RTS_LINKER_USE_MMAP)
799 m32_allocator_init();
800
801 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
802 return;
803 }
804
805 void
806 exitLinker( void ) {
807 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
808 if (linker_init_done == 1) {
809 regfree(&re_invalid);
810 regfree(&re_realso);
811 #ifdef THREADED_RTS
812 closeMutex(&dl_mutex);
813 #endif
814 }
815 #endif
816 if (linker_init_done == 1) {
817 freeHashTable(symhash, free);
818 }
819 #ifdef THREADED_RTS
820 closeMutex(&linker_mutex);
821 #endif
822 }
823
824 /* -----------------------------------------------------------------------------
825 * Loading DLL or .so dynamic libraries
826 * -----------------------------------------------------------------------------
827 *
828 * Add a DLL from which symbols may be found. In the ELF case, just
829 * do RTLD_GLOBAL-style add, so no further messing around needs to
830 * happen in order that symbols in the loaded .so are findable --
831 * lookupSymbol() will subsequently see them by dlsym on the program's
832 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
833 *
834 * In the PEi386 case, open the DLLs and put handles to them in a
835 * linked list. When looking for a symbol, try all handles in the
836 * list. This means that we need to load even DLLs that are guaranteed
837 * to be in the ghc.exe image already, just so we can get a handle
838 * to give to loadSymbol, so that we can find the symbols. For such
839 * libraries, the LoadLibrary call should be a no-op except for returning
840 * the handle.
841 *
842 */
843
844 #if defined(OBJFORMAT_PEi386)
845 /* A record for storing handles into DLLs. */
846
847 typedef
848 struct _OpenedDLL {
849 pathchar* name;
850 struct _OpenedDLL* next;
851 HINSTANCE instance;
852 }
853 OpenedDLL;
854
855 /* A list thereof. */
856 static OpenedDLL* opened_dlls = NULL;
857
858 /* A record for storing indirectly linked functions from DLLs. */
859 typedef
860 struct _IndirectAddr {
861 SymbolAddr* addr;
862 struct _IndirectAddr* next;
863 }
864 IndirectAddr;
865
866 /* A list thereof. */
867 static IndirectAddr* indirects = NULL;
868
869 /* Adds a DLL instance to the list of DLLs in which to search for symbols. */
870 void addDLLHandle(pathchar* dll_name, HINSTANCE instance) {
871 OpenedDLL* o_dll;
872 o_dll = stgMallocBytes( sizeof(OpenedDLL), "addDLLHandle" );
873 o_dll->name = dll_name ? pathdup(dll_name) : NULL;
874 o_dll->instance = instance;
875 o_dll->next = opened_dlls;
876 opened_dlls = o_dll;
877 }
878
879 #endif
880
881 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
882
883 /* Suppose in ghci we load a temporary SO for a module containing
884 f = 1
885 and then modify the module, recompile, and load another temporary
886 SO with
887 f = 2
888 Then as we don't unload the first SO, dlsym will find the
889 f = 1
890 symbol whereas we want the
891 f = 2
892 symbol. We therefore need to keep our own SO handle list, and
893 try SOs in the right order. */
894
895 typedef
896 struct _OpenedSO {
897 struct _OpenedSO* next;
898 void *handle;
899 }
900 OpenedSO;
901
902 /* A list thereof. */
903 static OpenedSO* openedSOs = NULL;
904
905 static const char *
906 internal_dlopen(const char *dll_name)
907 {
908 OpenedSO* o_so;
909 void *hdl;
910 const char *errmsg;
911 char *errmsg_copy;
912
913 // omitted: RTLD_NOW
914 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
915 IF_DEBUG(linker,
916 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
917
918 //-------------- Begin critical section ------------------
919 // This critical section is necessary because dlerror() is not
920 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
921 // Also, the error message returned must be copied to preserve it
922 // (see POSIX also)
923
924 ACQUIRE_LOCK(&dl_mutex);
925 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
926
927 errmsg = NULL;
928 if (hdl == NULL) {
929 /* dlopen failed; return a ptr to the error msg. */
930 errmsg = dlerror();
931 if (errmsg == NULL) errmsg = "addDLL: unknown error";
932 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
933 strcpy(errmsg_copy, errmsg);
934 errmsg = errmsg_copy;
935 } else {
936 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
937 o_so->handle = hdl;
938 o_so->next = openedSOs;
939 openedSOs = o_so;
940 }
941
942 RELEASE_LOCK(&dl_mutex);
943 //--------------- End critical section -------------------
944
945 return errmsg;
946 }
947
948 /*
949 Note [RTLD_LOCAL]
950
951 In GHCi we want to be able to override previous .so's with newly
952 loaded .so's when we recompile something. This further implies that
953 when we look up a symbol in internal_dlsym() we have to iterate
954 through the loaded libraries (in order from most recently loaded to
955 oldest) looking up the symbol in each one until we find it.
956
957 However, this can cause problems for some symbols that are copied
958 by the linker into the executable image at runtime - see #8935 for a
959 lengthy discussion. To solve that problem we need to look up
960 symbols in the main executable *first*, before attempting to look
961 them up in the loaded .so's. But in order to make that work, we
962 have to always call dlopen with RTLD_LOCAL, so that the loaded
963 libraries don't populate the global symbol table.
964 */
965
966 static void *
967 internal_dlsym(const char *symbol) {
968 OpenedSO* o_so;
969 void *v;
970
971 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
972 ACQUIRE_LOCK(&dl_mutex);
973 dlerror();
974 // look in program first
975 v = dlsym(dl_prog_handle, symbol);
976 if (dlerror() == NULL) {
977 RELEASE_LOCK(&dl_mutex);
978 return v;
979 }
980
981 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
982 v = dlsym(o_so->handle, symbol);
983 if (dlerror() == NULL) {
984 RELEASE_LOCK(&dl_mutex);
985 return v;
986 }
987 }
988 RELEASE_LOCK(&dl_mutex);
989 return v;
990 }
991 # endif
992
993 const char *
994 addDLL( pathchar *dll_name )
995 {
996 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
997 /* ------------------- ELF DLL loader ------------------- */
998
999 #define NMATCH 5
1000 regmatch_t match[NMATCH];
1001 const char *errmsg;
1002 FILE* fp;
1003 size_t match_length;
1004 #define MAXLINE 1000
1005 char line[MAXLINE];
1006 int result;
1007
1008 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
1009 errmsg = internal_dlopen(dll_name);
1010
1011 if (errmsg == NULL) {
1012 return NULL;
1013 }
1014
1015 // GHC Trac ticket #2615
1016 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
1017 // contain linker scripts rather than ELF-format object code. This
1018 // code handles the situation by recognizing the real object code
1019 // file name given in the linker script.
1020 //
1021 // If an "invalid ELF header" error occurs, it is assumed that the
1022 // .so file contains a linker script instead of ELF object code.
1023 // In this case, the code looks for the GROUP ( ... ) linker
1024 // directive. If one is found, the first file name inside the
1025 // parentheses is treated as the name of a dynamic library and the
1026 // code attempts to dlopen that file. If this is also unsuccessful,
1027 // an error message is returned.
1028
1029 // see if the error message is due to an invalid ELF header
1030 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
1031 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
1032 IF_DEBUG(linker, debugBelch("result = %i\n", result));
1033 if (result == 0) {
1034 // success -- try to read the named file as a linker script
1035 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
1036 MAXLINE-1);
1037 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
1038 line[match_length] = '\0'; // make sure string is null-terminated
1039 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
1040 if ((fp = fopen(line, "r")) == NULL) {
1041 return errmsg; // return original error if open fails
1042 }
1043 // try to find a GROUP or INPUT ( ... ) command
1044 while (fgets(line, MAXLINE, fp) != NULL) {
1045 IF_DEBUG(linker, debugBelch("input line = %s", line));
1046 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
1047 // success -- try to dlopen the first named file
1048 IF_DEBUG(linker, debugBelch("match%s\n",""));
1049 line[match[2].rm_eo] = '\0';
1050 stgFree((void*)errmsg); // Free old message before creating new one
1051 errmsg = internal_dlopen(line+match[2].rm_so);
1052 break;
1053 }
1054 // if control reaches here, no GROUP or INPUT ( ... ) directive
1055 // was found and the original error message is returned to the
1056 // caller
1057 }
1058 fclose(fp);
1059 }
1060 return errmsg;
1061
1062 # elif defined(OBJFORMAT_PEi386)
1063 /* ------------------- Win32 DLL loader ------------------- */
1064
1065 pathchar* buf;
1066 OpenedDLL* o_dll;
1067 HINSTANCE instance;
1068
1069 IF_DEBUG(linker, debugBelch("\naddDLL; dll_name = `%" PATH_FMT "'\n", dll_name));
1070
1071 /* See if we've already got it, and ignore if so. */
1072 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
1073 if (0 == pathcmp(o_dll->name, dll_name))
1074 return NULL;
1075 }
1076
1077 /* The file name has no suffix (yet) so that we can try
1078 both foo.dll and foo.drv
1079
1080 The documentation for LoadLibrary says:
1081 If no file name extension is specified in the lpFileName
1082 parameter, the default library extension .dll is
1083 appended. However, the file name string can include a trailing
1084 point character (.) to indicate that the module name has no
1085 extension. */
1086
1087 size_t bufsize = pathlen(dll_name) + 10;
1088 buf = stgMallocBytes(bufsize * sizeof(wchar_t), "addDLL");
1089
1090 /* These are ordered by probability of success and order we'd like them */
1091 const wchar_t *formats[] = { L"%ls.DLL", L"%ls.DRV", L"lib%ls.DLL", L"%ls" };
1092 const DWORD flags[] = { LOAD_LIBRARY_SEARCH_USER_DIRS | LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, 0 };
1093
1094 int cFormat;
1095 int cFlag;
1096 int flags_start = 1; // Assume we don't support the new API
1097
1098 /* Detect if newer API are available, if not, skip the first flags entry */
1099 if (GetProcAddress((HMODULE)LoadLibraryW(L"Kernel32.DLL"), "AddDllDirectory")) {
1100 flags_start = 0;
1101 }
1102
1103 /* Iterate through the possible flags and formats */
1104 for (cFlag = flags_start; cFlag < 2; cFlag++)
1105 {
1106 for (cFormat = 0; cFormat < 4; cFormat++)
1107 {
1108 snwprintf(buf, bufsize, formats[cFormat], dll_name);
1109 instance = LoadLibraryExW(buf, NULL, flags[cFlag]);
1110 if (instance == NULL)
1111 {
1112 if (GetLastError() != ERROR_MOD_NOT_FOUND)
1113 {
1114 goto error;
1115 }
1116 }
1117 else
1118 {
1119 break; // We're done. DLL has been loaded.
1120 }
1121 }
1122 }
1123
1124 // Check if we managed to load the DLL
1125 if (instance == NULL) {
1126 goto error;
1127 }
1128
1129 stgFree(buf);
1130
1131 addDLLHandle(dll_name, instance);
1132
1133 return NULL;
1134
1135 error:
1136 stgFree(buf);
1137 sysErrorBelch("addDLL: %" PATH_FMT " (Win32 error %lu)", dll_name, GetLastError());
1138
1139 /* LoadLibrary failed; return a ptr to the error msg. */
1140 return "addDLL: could not load DLL";
1141
1142 # else
1143 barf("addDLL: not implemented on this platform");
1144 # endif
1145 }
1146
1147 /* -----------------------------------------------------------------------------
1148 * Searches the system directories to determine if there is a system DLL that
1149 * satisfies the given name. This prevent GHCi from linking against a static
1150 * library if a DLL is available.
1151 *
1152 * Returns: NULL on failure or no DLL found, else the full path to the DLL
1153 * that can be loaded.
1154 */
1155 pathchar* findSystemLibrary(pathchar* dll_name)
1156 {
1157 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%" PATH_FMT "'\n", dll_name));
1158
1159 #if defined(OBJFORMAT_PEi386)
1160 const unsigned int init_buf_size = 1024;
1161 unsigned int bufsize = init_buf_size;
1162 wchar_t* result = malloc(sizeof(wchar_t) * bufsize);
1163 DWORD wResult = SearchPathW(NULL, dll_name, NULL, bufsize, result, NULL);
1164
1165 if (wResult > bufsize) {
1166 result = realloc(result, sizeof(wchar_t) * wResult);
1167 wResult = SearchPathW(NULL, dll_name, NULL, wResult, result, NULL);
1168 }
1169
1170
1171 if (!wResult) {
1172 free(result);
1173 return NULL;
1174 }
1175
1176 return result;
1177 #else
1178 (void)(dll_name); // Function not implemented for other platforms.
1179 return NULL;
1180 #endif
1181 }
1182
1183 /* -----------------------------------------------------------------------------
1184 * Emits a warning determining that the system is missing a required security
1185 * update that we need to get access to the proper APIs
1186 */
1187 void warnMissingKBLibraryPaths( void )
1188 {
1189 static HsBool missing_update_warn = HS_BOOL_FALSE;
1190 if (!missing_update_warn) {
1191 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
1192 missing_update_warn = HS_BOOL_TRUE;
1193 }
1194 }
1195
1196 /* -----------------------------------------------------------------------------
1197 * appends a directory to the process DLL Load path so LoadLibrary can find it
1198 *
1199 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
1200 * restore the search path to what it was before this call.
1201 */
1202 HsPtr addLibrarySearchPath(pathchar* dll_path)
1203 {
1204 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%" PATH_FMT "'\n", dll_path));
1205
1206 #if defined(OBJFORMAT_PEi386)
1207 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1208 LPAddDLLDirectory AddDllDirectory = (LPAddDLLDirectory)GetProcAddress((HMODULE)hDLL, "AddDllDirectory");
1209
1210 HsPtr result = NULL;
1211
1212 const unsigned int init_buf_size = 4096;
1213 int bufsize = init_buf_size;
1214
1215 // Make sure the path is an absolute path
1216 WCHAR* abs_path = malloc(sizeof(WCHAR) * init_buf_size);
1217 DWORD wResult = GetFullPathNameW(dll_path, bufsize, abs_path, NULL);
1218 if (!wResult){
1219 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1220 }
1221 else if (wResult > init_buf_size) {
1222 abs_path = realloc(abs_path, sizeof(WCHAR) * wResult);
1223 if (!GetFullPathNameW(dll_path, bufsize, abs_path, NULL)) {
1224 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1225 }
1226 }
1227
1228 if (AddDllDirectory) {
1229 result = AddDllDirectory(abs_path);
1230 }
1231 else
1232 {
1233 warnMissingKBLibraryPaths();
1234 WCHAR* str = malloc(sizeof(WCHAR) * init_buf_size);
1235 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1236
1237 if (wResult > init_buf_size) {
1238 str = realloc(str, sizeof(WCHAR) * wResult);
1239 bufsize = wResult;
1240 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1241 if (!wResult) {
1242 sysErrorBelch("addLibrarySearchPath[GetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1243 }
1244 }
1245
1246 bufsize = wResult + 2 + pathlen(abs_path);
1247 wchar_t* newPath = malloc(sizeof(wchar_t) * bufsize);
1248
1249 wcscpy(newPath, abs_path);
1250 wcscat(newPath, L";");
1251 wcscat(newPath, str);
1252 if (!SetEnvironmentVariableW(L"PATH", (LPCWSTR)newPath)) {
1253 sysErrorBelch("addLibrarySearchPath[SetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1254 }
1255
1256 free(newPath);
1257 free(abs_path);
1258
1259 return str;
1260 }
1261
1262 if (!result) {
1263 sysErrorBelch("addLibrarySearchPath: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1264 free(abs_path);
1265 return NULL;
1266 }
1267
1268 free(abs_path);
1269 return result;
1270 #else
1271 (void)(dll_path); // Function not implemented for other platforms.
1272 return NULL;
1273 #endif
1274 }
1275
1276 /* -----------------------------------------------------------------------------
1277 * removes a directory from the process DLL Load path
1278 *
1279 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
1280 */
1281 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
1282 {
1283 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n", dll_path_index));
1284
1285 #if defined(OBJFORMAT_PEi386)
1286 HsBool result = 0;
1287
1288 if (dll_path_index != NULL) {
1289 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1290 LPRemoveDLLDirectory RemoveDllDirectory = (LPRemoveDLLDirectory)GetProcAddress((HMODULE)hDLL, "RemoveDllDirectory");
1291
1292 if (RemoveDllDirectory) {
1293 result = RemoveDllDirectory(dll_path_index);
1294 // dll_path_index is now invalid, do not use it after this point.
1295 }
1296 else
1297 {
1298 warnMissingKBLibraryPaths();
1299 result = SetEnvironmentVariableW(L"PATH", (LPCWSTR)dll_path_index);
1300 free(dll_path_index);
1301 }
1302
1303 if (!result) {
1304 sysErrorBelch("removeLibrarySearchPath: (Win32 error %lu)", GetLastError());
1305 return HS_BOOL_FALSE;
1306 }
1307 }
1308
1309 return result == 0 ? HS_BOOL_TRUE : HS_BOOL_FALSE;
1310 #else
1311 (void)(dll_path_index); // Function not implemented for other platforms.
1312 return HS_BOOL_FALSE;
1313 #endif
1314 }
1315
1316 /* -----------------------------------------------------------------------------
1317 * insert a symbol in the hash table
1318 *
1319 * Returns: 0 on failure, nozero on success
1320 */
1321 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
1322 {
1323 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE, NULL);
1324 }
1325
1326 /* -----------------------------------------------------------------------------
1327 * lookup a symbol in the hash table
1328 */
1329 static SymbolAddr* lookupSymbol_ (SymbolName* lbl)
1330 {
1331 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
1332
1333 ASSERT(symhash != NULL);
1334 RtsSymbolInfo *pinfo;
1335
1336 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
1337 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
1338
1339 # if defined(OBJFORMAT_ELF)
1340 return internal_dlsym(lbl);
1341 # elif defined(OBJFORMAT_MACHO)
1342
1343 /* HACK: On OS X, all symbols are prefixed with an underscore.
1344 However, dlsym wants us to omit the leading underscore from the
1345 symbol name -- the dlsym routine puts it back on before searching
1346 for the symbol. For now, we simply strip it off here (and ONLY
1347 here).
1348 */
1349 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", lbl));
1350 ASSERT(lbl[0] == '_');
1351 return internal_dlsym(lbl + 1);
1352 # elif defined(OBJFORMAT_PEi386)
1353 SymbolAddr* sym;
1354
1355 /* See Note [mingw-w64 name decoration scheme] */
1356 #ifndef x86_64_HOST_ARCH
1357 zapTrailingAtSign ( (unsigned char*)lbl );
1358 #endif
1359 sym = lookupSymbolInDLLs((unsigned char*)lbl);
1360 return sym; // might be NULL if not found
1361
1362 # else
1363 ASSERT(2+2 == 5);
1364 return NULL;
1365 # endif
1366 } else {
1367 SymbolAddr* val = pinfo->value;
1368 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl, val));
1369
1370 int r;
1371 ObjectCode* oc = pinfo->owner;
1372
1373 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
1374 See Note [runtime-linker-phases] */
1375 if (oc && oc->status == OBJECT_LOADED) {
1376 oc->status = OBJECT_NEEDED;
1377 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand loading symbol '%s'\n", lbl));
1378 r = ocTryLoad(oc);
1379
1380 if (!r) {
1381 errorBelch("Could not on-demand load symbol '%s'\n", lbl);
1382 return NULL;
1383 }
1384 }
1385
1386 return val;
1387 }
1388 }
1389
1390 SymbolAddr* lookupSymbol( SymbolName* lbl )
1391 {
1392 ACQUIRE_LOCK(&linker_mutex);
1393 SymbolAddr* r = lookupSymbol_(lbl);
1394 RELEASE_LOCK(&linker_mutex);
1395 return r;
1396 }
1397
1398 /* -----------------------------------------------------------------------------
1399 Create a StablePtr for a foreign export. This is normally called by
1400 a C function with __attribute__((constructor)), which is generated
1401 by GHC and linked into the module.
1402
1403 If the object code is being loaded dynamically, then we remember
1404 which StablePtrs were allocated by the constructors and free them
1405 again in unloadObj().
1406 -------------------------------------------------------------------------- */
1407
1408 static ObjectCode *loading_obj = NULL;
1409
1410 StgStablePtr foreignExportStablePtr (StgPtr p)
1411 {
1412 ForeignExportStablePtr *fe_sptr;
1413 StgStablePtr *sptr;
1414
1415 sptr = getStablePtr(p);
1416
1417 if (loading_obj != NULL) {
1418 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
1419 "foreignExportStablePtr");
1420 fe_sptr->stable_ptr = sptr;
1421 fe_sptr->next = loading_obj->stable_ptrs;
1422 loading_obj->stable_ptrs = fe_sptr;
1423 }
1424
1425 return sptr;
1426 }
1427
1428
1429 /* -----------------------------------------------------------------------------
1430 * Debugging aid: look in GHCi's object symbol tables for symbols
1431 * within DELTA bytes of the specified address, and show their names.
1432 */
1433 #ifdef DEBUG
1434 void ghci_enquire ( SymbolAddr* addr );
1435
1436 void ghci_enquire(SymbolAddr* addr)
1437 {
1438 int i;
1439 SymbolName* sym;
1440 RtsSymbolInfo* a;
1441 const int DELTA = 64;
1442 ObjectCode* oc;
1443
1444 for (oc = objects; oc; oc = oc->next) {
1445 for (i = 0; i < oc->n_symbols; i++) {
1446 sym = oc->symbols[i];
1447 if (sym == NULL) continue;
1448 a = NULL;
1449 if (a == NULL) {
1450 ghciLookupSymbolInfo(symhash, sym, &a);
1451 }
1452 if (a == NULL) {
1453 // debugBelch("ghci_enquire: can't find %s\n", sym);
1454 }
1455 else if ( a->value
1456 && (char*)addr-DELTA <= (char*)a->value
1457 && (char*)a->value <= (char*)addr+DELTA) {
1458 debugBelch("%p + %3d == `%s'\n", addr, (int)((char*)a->value - (char*)addr), sym);
1459 }
1460 }
1461 }
1462 }
1463 #endif
1464
1465 #if RTS_LINKER_USE_MMAP
1466 //
1467 // Returns NULL on failure.
1468 //
1469 void *
1470 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
1471 {
1472 void *map_addr = NULL;
1473 void *result;
1474 size_t size;
1475 static uint32_t fixed = 0;
1476
1477 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1478 size = roundUpToPage(bytes);
1479
1480 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1481 mmap_again:
1482
1483 if (mmap_32bit_base != 0) {
1484 map_addr = mmap_32bit_base;
1485 }
1486 #endif
1487
1488 IF_DEBUG(linker,
1489 debugBelch("mmapForLinker: \tprotection %#0x\n",
1490 PROT_EXEC | PROT_READ | PROT_WRITE));
1491 IF_DEBUG(linker,
1492 debugBelch("mmapForLinker: \tflags %#0x\n",
1493 MAP_PRIVATE | TRY_MAP_32BIT | fixed | flags));
1494
1495 result = mmap(map_addr, size,
1496 PROT_EXEC|PROT_READ|PROT_WRITE,
1497 MAP_PRIVATE|TRY_MAP_32BIT|fixed|flags, fd, offset);
1498
1499 if (result == MAP_FAILED) {
1500 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1501 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1502 return NULL;
1503 }
1504
1505 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1506 if (mmap_32bit_base != 0) {
1507 if (result == map_addr) {
1508 mmap_32bit_base = (StgWord8*)map_addr + size;
1509 } else {
1510 if ((W_)result > 0x80000000) {
1511 // oops, we were given memory over 2Gb
1512 munmap(result,size);
1513 #if defined(freebsd_HOST_OS) || \
1514 defined(kfreebsdgnu_HOST_OS) || \
1515 defined(dragonfly_HOST_OS)
1516 // Some platforms require MAP_FIXED. This is normally
1517 // a bad idea, because MAP_FIXED will overwrite
1518 // existing mappings.
1519 fixed = MAP_FIXED;
1520 goto mmap_again;
1521 #else
1522 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1523 "asked for %lu bytes at %p. "
1524 "Try specifying an address with +RTS -xm<addr> -RTS",
1525 size, map_addr);
1526 return NULL;
1527 #endif
1528 } else {
1529 // hmm, we were given memory somewhere else, but it's
1530 // still under 2Gb so we can use it. Next time, ask
1531 // for memory right after the place we just got some
1532 mmap_32bit_base = (StgWord8*)result + size;
1533 }
1534 }
1535 } else {
1536 if ((W_)result > 0x80000000) {
1537 // oops, we were given memory over 2Gb
1538 // ... try allocating memory somewhere else?;
1539 debugTrace(DEBUG_linker,
1540 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1541 bytes, result);
1542 munmap(result, size);
1543
1544 // Set a base address and try again... (guess: 1Gb)
1545 mmap_32bit_base = (void*)0x40000000;
1546 goto mmap_again;
1547 }
1548 }
1549 #endif
1550
1551 IF_DEBUG(linker,
1552 debugBelch("mmapForLinker: mapped %" FMT_Word
1553 " bytes starting at %p\n", (W_)size, result));
1554 IF_DEBUG(linker,
1555 debugBelch("mmapForLinker: done\n"));
1556
1557 return result;
1558 }
1559 #endif
1560
1561 /*
1562 * Remove symbols from the symbol table, and free oc->symbols.
1563 * This operation is idempotent.
1564 */
1565 static void removeOcSymbols (ObjectCode *oc)
1566 {
1567 if (oc->symbols == NULL) return;
1568
1569 // Remove all the mappings for the symbols within this object..
1570 int i;
1571 for (i = 0; i < oc->n_symbols; i++) {
1572 if (oc->symbols[i] != NULL) {
1573 ghciRemoveSymbolTable(symhash, oc->symbols[i], oc);
1574 }
1575 }
1576
1577 stgFree(oc->symbols);
1578 oc->symbols = NULL;
1579 }
1580
1581 /*
1582 * Release StablePtrs and free oc->stable_ptrs.
1583 * This operation is idempotent.
1584 */
1585 static void freeOcStablePtrs (ObjectCode *oc)
1586 {
1587 // Release any StablePtrs that were created when this
1588 // object module was initialized.
1589 ForeignExportStablePtr *fe_ptr, *next;
1590
1591 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1592 next = fe_ptr->next;
1593 freeStablePtr(fe_ptr->stable_ptr);
1594 stgFree(fe_ptr);
1595 }
1596 oc->stable_ptrs = NULL;
1597 }
1598
1599 static void
1600 freePreloadObjectFile (ObjectCode *oc)
1601 {
1602 #if defined(mingw32_HOST_OS)
1603
1604 VirtualFree(oc->image - PEi386_IMAGE_OFFSET, 0, MEM_RELEASE);
1605
1606 IndirectAddr *ia, *ia_next;
1607 ia = indirects;
1608 while (ia != NULL) {
1609 ia_next = ia->next;
1610 stgFree(ia);
1611 ia = ia_next;
1612 }
1613 indirects = NULL;
1614
1615 #else
1616
1617 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1618 munmap(oc->image, oc->fileSize);
1619 }
1620 else {
1621 stgFree(oc->image);
1622 }
1623
1624 #endif
1625
1626 oc->image = NULL;
1627 oc->fileSize = 0;
1628 }
1629
1630 /*
1631 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1632 * the GC when a previously unloaded ObjectCode has been determined to be
1633 * unused, and when an error occurs during loadObj().
1634 */
1635 void freeObjectCode (ObjectCode *oc)
1636 {
1637 freePreloadObjectFile(oc);
1638
1639 if (oc->symbols != NULL) {
1640 stgFree(oc->symbols);
1641 oc->symbols = NULL;
1642 }
1643
1644 if (oc->extraInfos != NULL) {
1645 freeHashTable(oc->extraInfos, NULL);
1646 oc->extraInfos = NULL;
1647 }
1648
1649 if (oc->sections != NULL) {
1650 int i;
1651 for (i=0; i < oc->n_sections; i++) {
1652 if (oc->sections[i].start != NULL) {
1653 switch(oc->sections[i].alloc){
1654 #if RTS_LINKER_USE_MMAP
1655 case SECTION_MMAP:
1656 munmap(oc->sections[i].mapped_start,
1657 oc->sections[i].mapped_size);
1658 break;
1659 case SECTION_M32:
1660 m32_free(oc->sections[i].start,
1661 oc->sections[i].size);
1662 break;
1663 #endif
1664 case SECTION_MALLOC:
1665 stgFree(oc->sections[i].start);
1666 break;
1667 default:
1668 break;
1669 }
1670 }
1671 }
1672 stgFree(oc->sections);
1673 }
1674
1675 freeProddableBlocks(oc);
1676
1677 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1678 * alongside the image, so we don't need to free. */
1679 #if NEED_SYMBOL_EXTRAS && (!defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS))
1680 if (RTS_LINKER_USE_MMAP) {
1681 if (!USE_CONTIGUOUS_MMAP && oc->symbol_extras != NULL) {
1682 m32_free(oc->symbol_extras,
1683 sizeof(SymbolExtra) * oc->n_symbol_extras);
1684 }
1685 }
1686 else {
1687 stgFree(oc->symbol_extras);
1688 }
1689 #endif
1690
1691 stgFree(oc->fileName);
1692 stgFree(oc->archiveMemberName);
1693
1694 stgFree(oc);
1695 }
1696
1697 /* -----------------------------------------------------------------------------
1698 * Sets the initial status of a fresh ObjectCode
1699 */
1700 static void setOcInitialStatus(ObjectCode* oc) {
1701 if (oc->isImportLib == HS_BOOL_TRUE) {
1702 oc->status = OBJECT_DONT_RESOLVE;
1703 } else if (oc->archiveMemberName == NULL) {
1704 oc->status = OBJECT_NEEDED;
1705 } else {
1706 oc->status = OBJECT_LOADED;
1707 }
1708 }
1709
1710 static ObjectCode*
1711 mkOc( pathchar *path, char *image, int imageSize,
1712 rtsBool mapped, char *archiveMemberName, int misalignment ) {
1713 ObjectCode* oc;
1714
1715 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1716 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1717
1718 # if defined(OBJFORMAT_ELF)
1719 oc->formatName = "ELF";
1720 # elif defined(OBJFORMAT_PEi386)
1721 oc->formatName = "PEi386";
1722 # elif defined(OBJFORMAT_MACHO)
1723 oc->formatName = "Mach-O";
1724 # else
1725 stgFree(oc);
1726 barf("loadObj: not implemented on this platform");
1727 # endif
1728
1729 oc->image = image;
1730 oc->fileName = pathdup(path);
1731
1732 if (archiveMemberName) {
1733 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1, "loadObj" );
1734 strcpy(oc->archiveMemberName, archiveMemberName);
1735 } else {
1736 oc->archiveMemberName = NULL;
1737 }
1738
1739 setOcInitialStatus( oc );
1740
1741 oc->fileSize = imageSize;
1742 oc->symbols = NULL;
1743 oc->n_sections = 0;
1744 oc->sections = NULL;
1745 oc->proddables = NULL;
1746 oc->stable_ptrs = NULL;
1747 #if NEED_SYMBOL_EXTRAS
1748 oc->symbol_extras = NULL;
1749 #endif
1750 oc->imageMapped = mapped;
1751
1752 oc->misalignment = misalignment;
1753 oc->extraInfos = NULL;
1754
1755 /* chain it onto the list of objects */
1756 oc->next = NULL;
1757
1758 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1759 return oc;
1760 }
1761
1762 /* -----------------------------------------------------------------------------
1763 * Check if an object or archive is already loaded.
1764 *
1765 * Returns: 1 if the path is already loaded, 0 otherwise.
1766 */
1767 static HsInt
1768 isAlreadyLoaded( pathchar *path )
1769 {
1770 ObjectCode *o;
1771 for (o = objects; o; o = o->next) {
1772 if (0 == pathcmp(o->fileName, path)) {
1773 return 1; /* already loaded */
1774 }
1775 }
1776 return 0; /* not loaded yet */
1777 }
1778
1779 static HsInt loadArchive_ (pathchar *path)
1780 {
1781 ObjectCode* oc;
1782 char *image;
1783 int memberSize;
1784 FILE *f;
1785 int n;
1786 size_t thisFileNameSize;
1787 char *fileName;
1788 size_t fileNameSize;
1789 int isObject, isGnuIndex, isThin, isImportLib;
1790 char tmp[20];
1791 char *gnuFileIndex;
1792 int gnuFileIndexSize;
1793 #if defined(darwin_HOST_OS)
1794 int i;
1795 uint32_t nfat_arch, nfat_offset, cputype, cpusubtype;
1796 #if defined(i386_HOST_ARCH)
1797 const uint32_t mycputype = CPU_TYPE_X86;
1798 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_ALL;
1799 #elif defined(x86_64_HOST_ARCH)
1800 const uint32_t mycputype = CPU_TYPE_X86_64;
1801 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_64_ALL;
1802 #elif defined(powerpc_HOST_ARCH)
1803 const uint32_t mycputype = CPU_TYPE_POWERPC;
1804 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1805 #elif defined(powerpc64_HOST_ARCH)
1806 const uint32_t mycputype = CPU_TYPE_POWERPC64;
1807 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1808 #else
1809 #error Unknown Darwin architecture
1810 #endif
1811 #endif
1812 int misalignment = 0;
1813
1814 /* TODO: don't call barf() on error, instead return an error code, freeing
1815 * all resources correctly. This function is pretty complex, so it needs
1816 * to be refactored to make this practical. */
1817
1818 IF_DEBUG(linker, debugBelch("loadArchive: start\n"));
1819 IF_DEBUG(linker, debugBelch("loadArchive: Loading archive `%" PATH_FMT" '\n", path));
1820
1821 /* Check that we haven't already loaded this archive.
1822 Ignore requests to load multiple times */
1823 if (isAlreadyLoaded(path)) {
1824 IF_DEBUG(linker,
1825 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1826 return 1; /* success */
1827 }
1828
1829 gnuFileIndex = NULL;
1830 gnuFileIndexSize = 0;
1831
1832 fileNameSize = 32;
1833 fileName = stgMallocBytes(fileNameSize, "loadArchive(fileName)");
1834
1835 isThin = 0;
1836 isImportLib = 0;
1837
1838 f = pathopen(path, WSTR("rb"));
1839 if (!f)
1840 barf("loadObj: can't read `%" PATH_FMT "'", path);
1841
1842 /* Check if this is an archive by looking for the magic "!<arch>\n"
1843 * string. Usually, if this fails, we barf and quit. On Darwin however,
1844 * we may have a fat archive, which contains archives for more than
1845 * one architecture. Fat archives start with the magic number 0xcafebabe,
1846 * always stored big endian. If we find a fat_header, we scan through
1847 * the fat_arch structs, searching through for one for our host
1848 * architecture. If a matching struct is found, we read the offset
1849 * of our archive data (nfat_offset) and seek forward nfat_offset bytes
1850 * from the start of the file.
1851 *
1852 * A subtlety is that all of the members of the fat_header and fat_arch
1853 * structs are stored big endian, so we need to call byte order
1854 * conversion functions.
1855 *
1856 * If we find the appropriate architecture in a fat archive, we gobble
1857 * its magic "!<arch>\n" string and continue processing just as if
1858 * we had a single architecture archive.
1859 */
1860
1861 n = fread ( tmp, 1, 8, f );
1862 if (n != 8)
1863 barf("loadArchive: Failed reading header from `%" PATH_FMT "'", path);
1864 if (strncmp(tmp, "!<arch>\n", 8) == 0) {}
1865 /* Check if this is a thin archive by looking for the magic string "!<thin>\n"
1866 *
1867 * ar thin libraries have the exact same format as normal archives except they
1868 * have a different magic string and they don't copy the object files into the
1869 * archive.
1870 *
1871 * Instead each header entry points to the location of the object file on disk.
1872 * This is useful when a library is only created to satisfy a compile time dependency
1873 * instead of to be distributed. This saves the time required for copying.
1874 *
1875 * Thin archives are always flattened. They always only contain simple headers
1876 * pointing to the object file and so we need not allocate more memory than needed
1877 * to find the object file.
1878 *
1879 */
1880 else if (strncmp(tmp, "!<thin>\n", 8) == 0) {
1881 isThin = 1;
1882 }
1883 #if defined(darwin_HOST_OS)
1884 /* Not a standard archive, look for a fat archive magic number: */
1885 else if (ntohl(*(uint32_t *)tmp) == FAT_MAGIC) {
1886 nfat_arch = ntohl(*(uint32_t *)(tmp + 4));
1887 IF_DEBUG(linker, debugBelch("loadArchive: found a fat archive containing %d architectures\n", nfat_arch));
1888 nfat_offset = 0;
1889
1890 for (i = 0; i < (int)nfat_arch; i++) {
1891 /* search for the right arch */
1892 n = fread( tmp, 1, 20, f );
1893 if (n != 8)
1894 barf("loadArchive: Failed reading arch from `%s'", path);
1895 cputype = ntohl(*(uint32_t *)tmp);
1896 cpusubtype = ntohl(*(uint32_t *)(tmp + 4));
1897
1898 if (cputype == mycputype && cpusubtype == mycpusubtype) {
1899 IF_DEBUG(linker, debugBelch("loadArchive: found my archive in a fat archive\n"));
1900 nfat_offset = ntohl(*(uint32_t *)(tmp + 8));
1901 break;
1902 }
1903 }
1904
1905 if (nfat_offset == 0) {
1906 barf ("loadArchive: searched %d architectures, but no host arch found", (int)nfat_arch);
1907 }
1908 else {
1909 n = fseek( f, nfat_offset, SEEK_SET );
1910 if (n != 0)
1911 barf("loadArchive: Failed to seek to arch in `%s'", path);
1912 n = fread ( tmp, 1, 8, f );
1913 if (n != 8)
1914 barf("loadArchive: Failed reading header from `%s'", path);
1915 if (strncmp(tmp, "!<arch>\n", 8) != 0) {
1916 barf("loadArchive: couldn't find archive in `%s' at offset %d", path, nfat_offset);
1917 }
1918 }
1919 }
1920 else {
1921 barf("loadArchive: Neither an archive, nor a fat archive: `%s'", path);
1922 }
1923 #else
1924 else {
1925 barf("loadArchive: Not an archive: `%" PATH_FMT "'", path);
1926 }
1927 #endif
1928
1929 IF_DEBUG(linker, debugBelch("loadArchive: loading archive contents\n"));
1930
1931 while (1) {
1932 IF_DEBUG(linker, debugBelch("loadArchive: reading at %ld\n", ftell(f)));
1933 n = fread ( fileName, 1, 16, f );
1934 if (n != 16) {
1935 if (feof(f)) {
1936 IF_DEBUG(linker, debugBelch("loadArchive: EOF while reading from '%" PATH_FMT "'\n", path));
1937 break;
1938 }
1939 else {
1940 barf("loadArchive: Failed reading file name from `%" PATH_FMT "'", path);
1941 }
1942 }
1943
1944 #if defined(darwin_HOST_OS)
1945 if (strncmp(fileName, "!<arch>\n", 8) == 0) {
1946 IF_DEBUG(linker, debugBelch("loadArchive: found the start of another archive, breaking\n"));
1947 break;
1948 }
1949 #endif
1950
1951 n = fread ( tmp, 1, 12, f );
1952 if (n != 12)
1953 barf("loadArchive: Failed reading mod time from `%" PATH_FMT "'", path);
1954 n = fread ( tmp, 1, 6, f );
1955 if (n != 6)
1956 barf("loadArchive: Failed reading owner from `%" PATH_FMT "'", path);
1957 n = fread ( tmp, 1, 6, f );
1958 if (n != 6)
1959 barf("loadArchive: Failed reading group from `%" PATH_FMT "'", path);
1960 n = fread ( tmp, 1, 8, f );
1961 if (n != 8)
1962 barf("loadArchive: Failed reading mode from `%" PATH_FMT "'", path);
1963 n = fread ( tmp, 1, 10, f );
1964 if (n != 10)
1965 barf("loadArchive: Failed reading size from `%" PATH_FMT "'", path);
1966 tmp[10] = '\0';
1967 for (n = 0; isdigit(tmp[n]); n++);
1968 tmp[n] = '\0';
1969 memberSize = atoi(tmp);
1970
1971 IF_DEBUG(linker, debugBelch("loadArchive: size of this archive member is %d\n", memberSize));
1972 n = fread ( tmp, 1, 2, f );
1973 if (n != 2)
1974 barf("loadArchive: Failed reading magic from `%" PATH_FMT "'", path);
1975 if (strncmp(tmp, "\x60\x0A", 2) != 0)
1976 barf("loadArchive: Failed reading magic from `%" PATH_FMT "' at %ld. Got %c%c",
1977 path, ftell(f), tmp[0], tmp[1]);
1978
1979 isGnuIndex = 0;
1980 /* Check for BSD-variant large filenames */
1981 if (0 == strncmp(fileName, "#1/", 3)) {
1982 fileName[16] = '\0';
1983 if (isdigit(fileName[3])) {
1984 for (n = 4; isdigit(fileName[n]); n++);
1985 fileName[n] = '\0';
1986 thisFileNameSize = atoi(fileName + 3);
1987 memberSize -= thisFileNameSize;
1988 if (thisFileNameSize >= fileNameSize) {
1989 /* Double it to avoid potentially continually
1990 increasing it by 1 */
1991 fileNameSize = thisFileNameSize * 2;
1992 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
1993 }
1994 n = fread ( fileName, 1, thisFileNameSize, f );
1995 if (n != (int)thisFileNameSize) {
1996 barf("loadArchive: Failed reading filename from `%" PATH_FMT "'",
1997 path);
1998 }
1999 fileName[thisFileNameSize] = 0;
2000
2001 /* On OS X at least, thisFileNameSize is the size of the
2002 fileName field, not the length of the fileName
2003 itself. */
2004 thisFileNameSize = strlen(fileName);
2005 }
2006 else {
2007 barf("loadArchive: BSD-variant filename size not found while reading filename from `%s'", path);
2008 }
2009 }
2010 /* Check for GNU file index file */
2011 else if (0 == strncmp(fileName, "//", 2)) {
2012 fileName[0] = '\0';
2013 thisFileNameSize = 0;
2014 isGnuIndex = 1;
2015 }
2016 /* Check for a file in the GNU file index */
2017 else if (fileName[0] == '/') {
2018 if (isdigit(fileName[1])) {
2019 int i;
2020
2021 for (n = 2; isdigit(fileName[n]); n++);
2022 fileName[n] = '\0';
2023 n = atoi(fileName + 1);
2024
2025 if (gnuFileIndex == NULL) {
2026 barf("loadArchive: GNU-variant filename without an index while reading from `%s'", path);
2027 }
2028 if (n < 0 || n > gnuFileIndexSize) {
2029 barf("loadArchive: GNU-variant filename offset %d out of range [0..%d] while reading filename from `%s'", n, gnuFileIndexSize, path);
2030 }
2031 if (n != 0 && gnuFileIndex[n - 1] != '\n') {
2032 barf("loadArchive: GNU-variant filename offset %d invalid (range [0..%d]) while reading filename from `%s'", n, gnuFileIndexSize, path);
2033 }
2034 for (i = n; gnuFileIndex[i] != '\n'; i++);
2035 thisFileNameSize = i - n - 1;
2036 if (thisFileNameSize >= fileNameSize) {
2037 /* Double it to avoid potentially continually
2038 increasing it by 1 */
2039 fileNameSize = thisFileNameSize * 2;
2040 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
2041 }
2042 memcpy(fileName, gnuFileIndex + n, thisFileNameSize);
2043 fileName[thisFileNameSize] = '\0';
2044 }
2045 else if (fileName[1] == ' ') {
2046 fileName[0] = '\0';
2047 thisFileNameSize = 0;
2048 }
2049 else {
2050 barf("loadArchive: GNU-variant filename offset not found while reading filename from `%s'", path);
2051 }
2052 }
2053 /* Finally, the case where the filename field actually contains
2054 the filename */
2055 else {
2056 /* GNU ar terminates filenames with a '/', this allowing
2057 spaces in filenames. So first look to see if there is a
2058 terminating '/'. */
2059 for (thisFileNameSize = 0;
2060 thisFileNameSize < 16;
2061 thisFileNameSize++) {
2062 if (fileName[thisFileNameSize] == '/') {
2063 fileName[thisFileNameSize] = '\0';
2064 break;
2065 }
2066 }
2067 /* If we didn't find a '/', then a space teminates the
2068 filename. Note that if we don't find one, then
2069 thisFileNameSize ends up as 16, and we already have the
2070 '\0' at the end. */
2071 if (thisFileNameSize == 16) {
2072 for (thisFileNameSize = 0;
2073 thisFileNameSize < 16;
2074 thisFileNameSize++) {
2075 if (fileName[thisFileNameSize] == ' ') {
2076 fileName[thisFileNameSize] = '\0';
2077 break;
2078 }
2079 }
2080 }
2081 }
2082
2083 IF_DEBUG(linker,
2084 debugBelch("loadArchive: Found member file `%s'\n", fileName));
2085
2086 isObject = (thisFileNameSize >= 2 && strncmp(fileName + thisFileNameSize - 2, ".o" , 2) == 0)
2087 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".p_o", 4) == 0);
2088
2089 #if defined(OBJFORMAT_PEi386)
2090 /*
2091 * Note [MSVC import files (ext .lib)]
2092 * MSVC compilers store the object files in
2093 * the import libraries with extension .dll
2094 * so on Windows we should look for those too.
2095 * The PE COFF format doesn't specify any specific file name
2096 * for sections. So on windows, just try to load it all.
2097 *
2098 * Linker members (e.g. filename / are skipped since they are not needed)
2099 */
2100 isImportLib = thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".dll", 4) == 0;
2101
2102 /*
2103 * Note [GCC import files (ext .dll.a)]
2104 * GCC stores import information in the same binary format
2105 * as the object file normally has. The only difference is that
2106 * all the information are put in .idata sections. The only real
2107 * way to tell if we're dealing with an import lib is by looking
2108 * at the file extension.
2109 */
2110 isImportLib = isImportLib || endsWithPath(path, WSTR(".dll.a"));
2111 #endif // windows
2112
2113 IF_DEBUG(linker, debugBelch("loadArchive: \tthisFileNameSize = %d\n", (int)thisFileNameSize));
2114 IF_DEBUG(linker, debugBelch("loadArchive: \tisObject = %d\n", isObject));
2115
2116 if (isObject) {
2117 char *archiveMemberName;
2118
2119 IF_DEBUG(linker, debugBelch("loadArchive: Member is an object file...loading...\n"));
2120
2121 #if defined(mingw32_HOST_OS)
2122 // TODO: We would like to use allocateExec here, but allocateExec
2123 // cannot currently allocate blocks large enough.
2124 image = allocateImageAndTrampolines(path, fileName,
2125 #if defined(x86_64_HOST_ARCH)
2126 f,
2127 #endif
2128 memberSize, isThin);
2129 #elif defined(darwin_HOST_OS)
2130 if (RTS_LINKER_USE_MMAP)
2131 image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
2132 else {
2133 /* See loadObj() */
2134 misalignment = machoGetMisalignment(f);
2135 image = stgMallocBytes(memberSize + misalignment,
2136 "loadArchive(image)");
2137 image += misalignment;
2138 }
2139
2140 #else // not windows or darwin
2141 image = stgMallocBytes(memberSize, "loadArchive(image)");
2142 #endif
2143 if (isThin) {
2144 FILE *member;
2145 pathchar *pathCopy, *dirName, *memberPath, *objFileName;
2146
2147 /* Allocate and setup the dirname of the archive. We'll need
2148 this to locate the thin member */
2149 pathCopy = pathdup(path); // Convert the char* to a pathchar*
2150 dirName = pathdir(pathCopy);
2151
2152 /* Append the relative member name to the dirname. This should be
2153 be the full path to the actual thin member. */
2154 int memberLen = pathlen(dirName) + 1 + strlen(fileName) + 1;
2155 memberPath = stgMallocBytes(pathsize * memberLen, "loadArchive(file)");
2156 objFileName = mkPath(fileName);
2157 pathprintf(memberPath, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), dirName, objFileName);
2158 stgFree(objFileName);
2159 stgFree(dirName);
2160
2161 member = pathopen(memberPath, WSTR("rb"));
2162 if (!member)
2163 barf("loadObj: can't read thin archive `%" PATH_FMT "'", memberPath);
2164
2165 n = fread ( image, 1, memberSize, member );
2166 if (n != memberSize) {
2167 barf("loadArchive: error whilst reading `%s'", fileName);
2168 }
2169
2170 fclose(member);
2171 stgFree(memberPath);
2172 stgFree(pathCopy);
2173 }
2174 else
2175 {
2176 n = fread ( image, 1, memberSize, f );
2177 if (n != memberSize) {
2178 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2179 }
2180 }
2181
2182 archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
2183 "loadArchive(file)");
2184 sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
2185 path, (int)thisFileNameSize, fileName);
2186
2187 oc = mkOc(path, image, memberSize, rtsFalse, archiveMemberName
2188 , misalignment);
2189
2190 stgFree(archiveMemberName);
2191
2192 if (0 == loadOc(oc)) {
2193 stgFree(fileName);
2194 fclose(f);
2195 return 0;
2196 } else {
2197 #if defined(OBJFORMAT_PEi386)
2198 if (isImportLib)
2199 {
2200 findAndLoadImportLibrary(oc);
2201 stgFree(oc);
2202 oc = NULL;
2203 break;
2204 } else {
2205 #endif
2206 oc->next = objects;
2207 objects = oc;
2208 #if defined(OBJFORMAT_PEi386)
2209 }
2210 #endif
2211 }
2212 }
2213 else if (isGnuIndex) {
2214 if (gnuFileIndex != NULL) {
2215 barf("loadArchive: GNU-variant index found, but already have an index, while reading filename from `%s'", path);
2216 }
2217 IF_DEBUG(linker, debugBelch("loadArchive: Found GNU-variant file index\n"));
2218 #if RTS_LINKER_USE_MMAP
2219 gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
2220 #else
2221 gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
2222 #endif
2223 n = fread ( gnuFileIndex, 1, memberSize, f );
2224 if (n != memberSize) {
2225 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2226 }
2227 gnuFileIndex[memberSize] = '/';
2228 gnuFileIndexSize = memberSize;
2229 }
2230 else if (isImportLib) {
2231 #if defined(OBJFORMAT_PEi386)
2232 if (checkAndLoadImportLibrary(path, fileName, f)) {
2233 IF_DEBUG(linker, debugBelch("loadArchive: Member is an import file section... Corresponding DLL has been loaded...\n"));
2234 }
2235 else {
2236 IF_DEBUG(linker, debugBelch("loadArchive: Member is not a valid import file section... Skipping...\n"));
2237 n = fseek(f, memberSize, SEEK_CUR);
2238 if (n != 0)
2239 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
2240 memberSize, path);
2241 }
2242 #endif
2243 }
2244 else {
2245 IF_DEBUG(linker, debugBelch("loadArchive: '%s' does not appear to be an object file\n", fileName));
2246 if (!isThin || thisFileNameSize == 0) {
2247 n = fseek(f, memberSize, SEEK_CUR);
2248 if (n != 0)
2249 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
2250 memberSize, path);
2251 }
2252 }
2253
2254 /* .ar files are 2-byte aligned */
2255 if (!(isThin && thisFileNameSize > 0) && memberSize % 2) {
2256 IF_DEBUG(linker, debugBelch("loadArchive: trying to read one pad byte\n"));
2257 n = fread ( tmp, 1, 1, f );
2258 if (n != 1) {
2259 if (feof(f)) {
2260 IF_DEBUG(linker, debugBelch("loadArchive: found EOF while reading one pad byte\n"));
2261 break;
2262 }
2263 else {
2264 barf("loadArchive: Failed reading padding from `%" PATH_FMT "'", path);
2265 }
2266 }
2267 IF_DEBUG(linker, debugBelch("loadArchive: successfully read one pad byte\n"));
2268 }
2269 IF_DEBUG(linker, debugBelch("loadArchive: reached end of archive loading while loop\n"));
2270 }
2271
2272 fclose(f);
2273
2274 stgFree(fileName);
2275 if (gnuFileIndex != NULL) {
2276 #if RTS_LINKER_USE_MMAP
2277 munmap(gnuFileIndex, gnuFileIndexSize + 1);
2278 #else
2279 stgFree(gnuFileIndex);
2280 #endif
2281 }
2282
2283 if (RTS_LINKER_USE_MMAP)
2284 m32_allocator_flush();
2285
2286 IF_DEBUG(linker, debugBelch("loadArchive: done\n"));
2287 return 1;
2288 }
2289
2290 HsInt loadArchive (pathchar *path)
2291 {
2292 ACQUIRE_LOCK(&linker_mutex);
2293 HsInt r = loadArchive_(path);
2294 RELEASE_LOCK(&linker_mutex);
2295 return r;
2296 }
2297
2298 //
2299 // Load the object file into memory. This will not be its final resting place,
2300 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
2301 // address space, properly aligned.
2302 //
2303 static ObjectCode *
2304 preloadObjectFile (pathchar *path)
2305 {
2306 int fileSize;
2307 struct_stat st;
2308 int r;
2309 void *image;
2310 ObjectCode *oc;
2311 int misalignment = 0;
2312
2313 r = pathstat(path, &st);
2314 if (r == -1) {
2315 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
2316 return NULL;
2317 }
2318
2319 fileSize = st.st_size;
2320
2321 #if RTS_LINKER_USE_MMAP
2322 int fd;
2323
2324 /* On many architectures malloc'd memory isn't executable, so we need to use
2325 * mmap. */
2326
2327 #if defined(openbsd_HOST_OS)
2328 fd = open(path, O_RDONLY, S_IRUSR);
2329 #else
2330 fd = open(path, O_RDONLY);
2331 #endif
2332 if (fd == -1) {
2333 errorBelch("loadObj: can't open %s", path);
2334 return NULL;
2335 }
2336
2337 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
2338 MAP_PRIVATE, fd, 0);
2339 // not 32-bit yet, we'll remap later
2340 close(fd);
2341
2342 #else /* !RTS_LINKER_USE_MMAP */
2343 FILE *f;
2344
2345 /* load the image into memory */
2346 /* coverity[toctou] */
2347 f = pathopen(path, WSTR("rb"));
2348 if (!f) {
2349 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
2350 return NULL;
2351 }
2352
2353 # if defined(mingw32_HOST_OS)
2354
2355 // TODO: We would like to use allocateExec here, but allocateExec
2356 // cannot currently allocate blocks large enough.
2357 image = allocateImageAndTrampolines(path, "itself",
2358 #if defined(x86_64_HOST_ARCH)
2359 f,
2360 #endif
2361 fileSize, HS_BOOL_FALSE);
2362 if (image == NULL) {
2363 fclose(f);
2364 return NULL;
2365 }
2366
2367 # elif defined(darwin_HOST_OS)
2368
2369 // In a Mach-O .o file, all sections can and will be misaligned
2370 // if the total size of the headers is not a multiple of the
2371 // desired alignment. This is fine for .o files that only serve
2372 // as input for the static linker, but it's not fine for us,
2373 // as SSE (used by gcc for floating point) and Altivec require
2374 // 16-byte alignment.
2375 // We calculate the correct alignment from the header before
2376 // reading the file, and then we misalign image on purpose so
2377 // that the actual sections end up aligned again.
2378 misalignment = machoGetMisalignment(f);
2379 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
2380 image += misalignment;
2381
2382 # else /* !defined(mingw32_HOST_OS) */
2383
2384 image = stgMallocBytes(fileSize, "loadObj(image)");
2385
2386 #endif
2387
2388 int n;
2389 n = fread ( image, 1, fileSize, f );
2390 fclose(f);
2391 if (n != fileSize) {
2392 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
2393 stgFree(image);
2394 return NULL;
2395 }
2396
2397 #endif /* RTS_LINKER_USE_MMAP */
2398
2399 oc = mkOc(path, image, fileSize, rtsTrue, NULL, misalignment);
2400
2401 return oc;
2402 }
2403
2404 /* -----------------------------------------------------------------------------
2405 * Load an obj (populate the global symbol table, but don't resolve yet)
2406 *
2407 * Returns: 1 if ok, 0 on error.
2408 */
2409 static HsInt loadObj_ (pathchar *path)
2410 {
2411 ObjectCode* oc;
2412 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
2413
2414 /* debugBelch("loadObj %s\n", path ); */
2415
2416 /* Check that we haven't already loaded this object.
2417 Ignore requests to load multiple times */
2418
2419 if (isAlreadyLoaded(path)) {
2420 IF_DEBUG(linker,
2421 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
2422 return 1; /* success */
2423 }
2424
2425 oc = preloadObjectFile(path);
2426 if (oc == NULL) return 0;
2427
2428 if (! loadOc(oc)) {
2429 // failed; free everything we've allocated
2430 removeOcSymbols(oc);
2431 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
2432 freeObjectCode(oc);
2433 return 0;
2434 }
2435
2436 oc->next = objects;
2437 objects = oc;
2438 return 1;
2439 }
2440
2441 HsInt loadObj (pathchar *path)
2442 {
2443 ACQUIRE_LOCK(&linker_mutex);
2444 HsInt r = loadObj_(path);
2445 RELEASE_LOCK(&linker_mutex);
2446 return r;
2447 }
2448
2449 static HsInt loadOc (ObjectCode* oc)
2450 {
2451 int r;
2452
2453 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
2454
2455 /* verify the in-memory image */
2456 # if defined(OBJFORMAT_ELF)
2457 r = ocVerifyImage_ELF ( oc );
2458 # elif defined(OBJFORMAT_PEi386)
2459 r = ocVerifyImage_PEi386 ( oc );
2460 # elif defined(OBJFORMAT_MACHO)
2461 r = ocVerifyImage_MachO ( oc );
2462 # else
2463 barf("loadObj: no verify method");
2464 # endif
2465 if (!r) {
2466 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
2467 return r;
2468 }
2469
2470 #if NEED_SYMBOL_EXTRAS
2471 # if defined(OBJFORMAT_MACHO)
2472 r = ocAllocateSymbolExtras_MachO ( oc );
2473 if (!r) {
2474 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_MachO failed\n"));
2475 return r;
2476 }
2477 # elif defined(OBJFORMAT_ELF)
2478 r = ocAllocateSymbolExtras_ELF ( oc );
2479 if (!r) {
2480 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_ELF failed\n"));
2481 return r;
2482 }
2483 # elif defined(OBJFORMAT_PEi386)
2484 ocAllocateSymbolExtras_PEi386 ( oc );
2485 # endif
2486 #endif
2487
2488 /* build the symbol list for this image */
2489 # if defined(OBJFORMAT_ELF)
2490 r = ocGetNames_ELF ( oc );
2491 # elif defined(OBJFORMAT_PEi386)
2492 r = ocGetNames_PEi386 ( oc );
2493 # elif defined(OBJFORMAT_MACHO)
2494 r = ocGetNames_MachO ( oc );
2495 # else
2496 barf("loadObj: no getNames method");
2497 # endif
2498 if (!r) {
2499 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
2500 return r;
2501 }
2502
2503 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
2504 setOcInitialStatus( oc );
2505 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
2506
2507 return 1;
2508 }
2509
2510 /* -----------------------------------------------------------------------------
2511 * try to load and initialize an ObjectCode into memory
2512 *
2513 * Returns: 1 if ok, 0 on error.
2514 */
2515 int ocTryLoad (ObjectCode* oc) {
2516 int r;
2517
2518 if (oc->status != OBJECT_NEEDED) {
2519 return 1;
2520 }
2521
2522 /* Check for duplicate symbols by looking into `symhash`.
2523 Duplicate symbols are any symbols which exist
2524 in different ObjectCodes that have both been loaded, or
2525 are to be loaded by this call.
2526
2527 This call is intended to have no side-effects when a non-duplicate
2528 symbol is re-inserted.
2529
2530 We set the Address to NULL since that is not used to distinguish
2531 symbols. Duplicate symbols are distinguished by name and oc.
2532 */
2533 int x;
2534 SymbolName* symbol;
2535 for (x = 0; x < oc->n_symbols; x++) {
2536 symbol = oc->symbols[x];
2537 if ( symbol
2538 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol, NULL, isSymbolWeak(oc, symbol), oc)) {
2539 return 0;
2540 }
2541 }
2542
2543 # if defined(OBJFORMAT_ELF)
2544 r = ocResolve_ELF ( oc );
2545 # elif defined(OBJFORMAT_PEi386)
2546 r = ocResolve_PEi386 ( oc );
2547 # elif defined(OBJFORMAT_MACHO)
2548 r = ocResolve_MachO ( oc );
2549 # else
2550 barf("ocTryLoad: not implemented on this platform");
2551 # endif
2552 if (!r) { return r; }
2553
2554 // run init/init_array/ctors/mod_init_func
2555
2556 loading_obj = oc; // tells foreignExportStablePtr what to do
2557 #if defined(OBJFORMAT_ELF)
2558 r = ocRunInit_ELF ( oc );
2559 #elif defined(OBJFORMAT_PEi386)
2560 r = ocRunInit_PEi386 ( oc );
2561 #elif defined(OBJFORMAT_MACHO)
2562 r = ocRunInit_MachO ( oc );
2563 #else
2564 barf("ocTryLoad: initializers not implemented on this platform");
2565 #endif
2566 loading_obj = NULL;
2567
2568 if (!r) { return r; }
2569
2570 oc->status = OBJECT_RESOLVED;
2571
2572 return 1;
2573 }
2574
2575 /* -----------------------------------------------------------------------------
2576 * resolve all the currently unlinked objects in memory
2577 *
2578 * Returns: 1 if ok, 0 on error.
2579 */
2580 static HsInt resolveObjs_ (void)
2581 {
2582 ObjectCode *oc;
2583 int r;
2584
2585 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
2586
2587 for (oc = objects; oc; oc = oc->next) {
2588 r = ocTryLoad(oc);
2589 if (!r)
2590 {
2591 return r;
2592 }
2593 }
2594
2595 #ifdef PROFILING
2596 // collect any new cost centres & CCSs that were defined during runInit
2597 initProfiling2();
2598 #endif
2599
2600 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
2601 return 1;
2602 }
2603
2604 HsInt resolveObjs (void)
2605 {
2606 ACQUIRE_LOCK(&linker_mutex);
2607 HsInt r = resolveObjs_();
2608 RELEASE_LOCK(&linker_mutex);
2609 return r;
2610 }
2611
2612 /* -----------------------------------------------------------------------------
2613 * delete an object from the pool
2614 */
2615 static HsInt unloadObj_ (pathchar *path, rtsBool just_purge)
2616 {
2617 ObjectCode *oc, *prev, *next;
2618 HsBool unloadedAnyObj = HS_BOOL_FALSE;
2619
2620 ASSERT(symhash != NULL);
2621 ASSERT(objects != NULL);
2622
2623 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
2624
2625 prev = NULL;
2626 for (oc = objects; oc; oc = next) {
2627 next = oc->next; // oc might be freed
2628
2629 if (!pathcmp(oc->fileName,path)) {
2630
2631 // these are both idempotent, so in just_purge mode we can
2632 // later call unloadObj() to really unload the object.
2633 removeOcSymbols(oc);
2634 freeOcStablePtrs(oc);
2635
2636 if (!just_purge) {
2637 if (prev == NULL) {
2638 objects = oc->next;
2639 } else {
2640 prev->next = oc->next;
2641 }
2642 ACQUIRE_LOCK(&linker_unloaded_mutex);
2643 oc->next = unloaded_objects;
2644 unloaded_objects = oc;
2645 oc->status = OBJECT_UNLOADED;
2646 RELEASE_LOCK(&linker_unloaded_mutex);
2647 // We do not own oc any more; it can be released at any time by
2648 // the GC in checkUnload().
2649 } else {
2650 prev = oc;
2651 }
2652
2653 /* This could be a member of an archive so continue
2654 * unloading other members. */
2655 unloadedAnyObj = HS_BOOL_TRUE;
2656 } else {
2657 prev = oc;
2658 }
2659 }
2660
2661 if (unloadedAnyObj) {
2662 return 1;
2663 }
2664 else {
2665 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
2666 return 0;
2667 }
2668 }
2669
2670 HsInt unloadObj (pathchar *path)
2671 {
2672 ACQUIRE_LOCK(&linker_mutex);
2673 HsInt r = unloadObj_(path, rtsFalse);
2674 RELEASE_LOCK(&linker_mutex);
2675 return r;
2676 }
2677
2678 HsInt purgeObj (pathchar *path)
2679 {
2680 ACQUIRE_LOCK(&linker_mutex);
2681 HsInt r = unloadObj_(path, rtsTrue);
2682 RELEASE_LOCK(&linker_mutex);
2683 return r;
2684 }
2685
2686 /* -----------------------------------------------------------------------------
2687 * Sanity checking. For each ObjectCode, maintain a list of address ranges
2688 * which may be prodded during relocation, and abort if we try and write
2689 * outside any of these.
2690 */
2691 static void
2692 addProddableBlock ( ObjectCode* oc, void* start, int size )
2693 {
2694 ProddableBlock* pb
2695 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
2696
2697 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
2698 ASSERT(size > 0);
2699 pb->start = start;
2700 pb->size = size;
2701 pb->next = oc->proddables;
2702 oc->proddables = pb;
2703 }
2704
2705 static void
2706 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
2707 {
2708 ProddableBlock* pb;
2709
2710 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
2711 char* s = (char*)(pb->start);
2712 char* e = s + pb->size;
2713 char* a = (char*)addr;
2714 if (a >= s && (a+size) <= e) return;
2715 }
2716 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
2717 }
2718
2719 static void freeProddableBlocks (ObjectCode *oc)
2720 {
2721 ProddableBlock *pb, *next;
2722
2723 for (pb = oc->proddables; pb != NULL; pb = next) {
2724 next = pb->next;
2725 stgFree(pb);
2726 }
2727 oc->proddables = NULL;
2728 }
2729
2730 /* -----------------------------------------------------------------------------
2731 * Section management.
2732 */
2733 static void
2734 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
2735 void* start, StgWord size, StgWord mapped_offset,
2736 void* mapped_start, StgWord mapped_size)
2737 {
2738 s->start = start; /* actual start of section in memory */
2739 s->size = size; /* actual size of section in memory */
2740 s->kind = kind;
2741 s->alloc = alloc;
2742 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
2743
2744 s->mapped_start = mapped_start; /* start of mmap() block */
2745 s->mapped_size = mapped_size; /* size of mmap() block */
2746
2747 IF_DEBUG(linker,
2748 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
2749 start, (void*)((StgWord)start + size),
2750 size, kind ));
2751 }
2752
2753
2754 /* --------------------------------------------------------------------------
2755 * Symbol Extras.
2756 * This is about allocating a small chunk of memory for every symbol in the
2757 * object file. We make sure that the SymboLExtras are always "in range" of
2758 * limited-range PC-relative instructions on various platforms by allocating
2759 * them right next to the object code itself.
2760 */
2761
2762 #if NEED_SYMBOL_EXTRAS
2763 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2764
2765 /*
2766 ocAllocateSymbolExtras
2767
2768 Allocate additional space at the end of the object file image to make room
2769 for jump islands (powerpc, x86_64, arm) and GOT entries (x86_64).
2770
2771 PowerPC relative branch instructions have a 24 bit displacement field.
2772 As PPC code is always 4-byte-aligned, this yields a +-32MB range.
2773 If a particular imported symbol is outside this range, we have to redirect
2774 the jump to a short piece of new code that just loads the 32bit absolute
2775 address and jumps there.
2776 On x86_64, PC-relative jumps and PC-relative accesses to the GOT are limited
2777 to 32 bits (+-2GB).
2778
2779 This function just allocates space for one SymbolExtra for every
2780 undefined symbol in the object file. The code for the jump islands is
2781 filled in by makeSymbolExtra below.
2782 */
2783
2784 static int ocAllocateSymbolExtras( ObjectCode* oc, int count, int first )
2785 {
2786 size_t n;
2787
2788 if (RTS_LINKER_USE_MMAP && USE_CONTIGUOUS_MMAP) {
2789 n = roundUpToPage(oc->fileSize);
2790
2791 /* Keep image and symbol_extras contiguous */
2792 void *new = mmapForLinker(n + (sizeof(SymbolExtra) * count),
2793 MAP_ANONYMOUS, -1, 0);
2794 if (new) {
2795 memcpy(new, oc->image, oc->fileSize);
2796 if (oc->imageMapped) {
2797 munmap(oc->image, n);
2798 }
2799 oc->image = new;
2800 oc->imageMapped = rtsTrue;
2801 oc->fileSize = n + (sizeof(SymbolExtra) * count);
2802 oc->symbol_extras = (SymbolExtra *) (oc->image + n);
2803 }
2804 else {
2805 oc->symbol_extras = NULL;
2806 return 0;
2807 }
2808 }
2809 else if( count > 0 ) {
2810 if (RTS_LINKER_USE_MMAP) {
2811 n = roundUpToPage(oc->fileSize);
2812
2813 oc->symbol_extras = m32_alloc(sizeof(SymbolExtra) * count, 8);
2814 if (oc->symbol_extras == NULL) return 0;
2815 }
2816 else {
2817 // round up to the nearest 4
2818 int aligned = (oc->fileSize + 3) & ~3;
2819 int misalignment = oc->misalignment;
2820
2821 oc->image -= misalignment;
2822 oc->image = stgReallocBytes( oc->image,
2823 misalignment +
2824 aligned + sizeof (SymbolExtra) * count,
2825 "ocAllocateSymbolExtras" );
2826 oc->image += misalignment;
2827
2828 oc->symbol_extras = (SymbolExtra *) (oc->image + aligned);
2829 }
2830 }
2831
2832 if (oc->symbol_extras != NULL) {
2833 memset( oc->symbol_extras, 0, sizeof (SymbolExtra) * count );
2834 }
2835
2836 oc->first_symbol_extra = first;
2837 oc->n_symbol_extras = count;
2838
2839 return 1;
2840 }
2841
2842 #endif
2843 #endif // NEED_SYMBOL_EXTRAS
2844
2845 #if defined(arm_HOST_ARCH)
2846
2847 static void
2848 ocFlushInstructionCache( ObjectCode *oc )
2849 {
2850 int i;
2851 // Object code
2852 for (i=0; i < oc->n_sections; i++) {
2853 Section *s = &oc->sections[i];
2854 // This is a bit too broad but we don't have any way to determine what
2855 // is certainly code
2856 if (s->kind == SECTIONKIND_CODE_OR_RODATA)
2857 __clear_cache(s->start, (void*) ((uintptr_t) s->start + s->size));
2858 }
2859
2860 // Jump islands
2861 // Note the (+1) to ensure that the last symbol extra is covered by the
2862 // flush.
2863 __clear_cache(oc->symbol_extras, &oc->symbol_extras[oc->n_symbol_extras+1]);
2864 }
2865
2866 #endif
2867
2868 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2869 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2870
2871 static SymbolExtra* makeSymbolExtra( ObjectCode* oc,
2872 unsigned long symbolNumber,
2873 unsigned long target )
2874 {
2875 SymbolExtra *extra;
2876
2877 ASSERT( symbolNumber >= oc->first_symbol_extra
2878 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2879
2880 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2881
2882 #ifdef powerpc_HOST_ARCH
2883 // lis r12, hi16(target)
2884 extra->jumpIsland.lis_r12 = 0x3d80;
2885 extra->jumpIsland.hi_addr = target >> 16;
2886
2887 // ori r12, r12, lo16(target)
2888 extra->jumpIsland.ori_r12_r12 = 0x618c;
2889 extra->jumpIsland.lo_addr = target & 0xffff;
2890
2891 // mtctr r12
2892 extra->jumpIsland.mtctr_r12 = 0x7d8903a6;
2893
2894 // bctr
2895 extra->jumpIsland.bctr = 0x4e800420;
2896 #endif
2897 #ifdef x86_64_HOST_ARCH
2898 // jmp *-14(%rip)
2899 static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF };
2900 extra->addr = target;
2901 memcpy(extra->jumpIsland, jmp, 6);
2902 #endif
2903
2904 return extra;
2905 }
2906
2907 #endif
2908 #endif // defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2909
2910 #ifdef arm_HOST_ARCH
2911 static SymbolExtra* makeArmSymbolExtra( ObjectCode* oc,
2912 unsigned long symbolNumber,
2913 unsigned long target,
2914 int fromThumb,
2915 int toThumb )
2916 {
2917 SymbolExtra *extra;
2918
2919 ASSERT( symbolNumber >= oc->first_symbol_extra
2920 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2921
2922 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2923
2924 // Make sure instruction mode bit is set properly
2925 if (toThumb)
2926 target |= 1;
2927 else
2928 target &= ~1;
2929
2930 if (!fromThumb) {
2931 // In ARM encoding:
2932 // movw r12, #0
2933 // movt r12, #0
2934 // bx r12
2935 uint32_t code[] = { 0xe300c000, 0xe340c000, 0xe12fff1c };
2936
2937 // Patch lower half-word into movw
2938 code[0] |= ((target>>12) & 0xf) << 16;
2939 code[0] |= target & 0xfff;
2940 // Patch upper half-word into movt
2941 target >>= 16;
2942 code[1] |= ((target>>12) & 0xf) << 16;
2943 code[1] |= target & 0xfff;
2944
2945 memcpy(extra->jumpIsland, code, 12);
2946
2947 } else {
2948 // In Thumb encoding:
2949 // movw r12, #0
2950 // movt r12, #0
2951 // bx r12
2952 uint16_t code[] = { 0xf240, 0x0c00,
2953 0xf2c0, 0x0c00,
2954 0x4760 };
2955
2956 // Patch lower half-word into movw
2957 code[0] |= (target>>12) & 0xf;
2958 code[0] |= ((target>>11) & 0x1) << 10;
2959 code[1] |= ((target>>8) & 0x7) << 12;
2960 code[1] |= target & 0xff;
2961 // Patch upper half-word into movt
2962 target >>= 16;
2963 code[2] |= (target>>12) & 0xf;
2964 code[2] |= ((target>>11) & 0x1) << 10;
2965 code[3] |= ((target>>8) & 0x7) << 12;
2966 code[3] |= target & 0xff;
2967
2968 memcpy(extra->jumpIsland, code, 10);
2969 }
2970
2971 return extra;
2972 }
2973 #endif // arm_HOST_ARCH
2974
2975 /* --------------------------------------------------------------------------
2976 * PowerPC specifics (instruction cache flushing)
2977 * ------------------------------------------------------------------------*/
2978
2979 #ifdef powerpc_HOST_ARCH
2980 /*
2981 ocFlushInstructionCache
2982
2983 Flush the data & instruction caches.
2984 Because the PPC has split data/instruction caches, we have to
2985 do that whenever we modify code at runtime.
2986 */
2987
2988 static void
2989 ocFlushInstructionCacheFrom(void* begin, size_t length)
2990 {
2991 size_t n = (length + 3) / 4;
2992 unsigned long* p = begin;
2993
2994 while (n--)
2995 {
2996 __asm__ volatile ( "dcbf 0,%0\n\t"
2997 "sync\n\t"
2998 "icbi 0,%0"
2999 :
3000 : "r" (p)
3001 );
3002 p++;
3003 }
3004 __asm__ volatile ( "sync\n\t"
3005 "isync"
3006 );
3007 }
3008
3009 static void
3010 ocFlushInstructionCache( ObjectCode *oc )
3011 {
3012 /* The main object code */
3013 ocFlushInstructionCacheFrom(oc->image + oc->misalignment, oc->fileSize);
3014
3015 /* Jump Islands */
3016 ocFlushInstructionCacheFrom(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras);
3017 }
3018 #endif /* powerpc_HOST_ARCH */
3019
3020
3021 /* --------------------------------------------------------------------------
3022 * PEi386(+) specifics (Win32 targets)
3023 * ------------------------------------------------------------------------*/
3024
3025 /* The information for this linker comes from
3026 Microsoft Portable Executable
3027 and Common Object File Format Specification
3028 revision 8.3 February 2013
3029
3030 It can be found online at:
3031
3032 https://msdn.microsoft.com/en-us/windows/hardware/gg463119.aspx
3033
3034 Things move, so if that fails, try searching for it via
3035
3036 http://www.google.com/search?q=PE+COFF+specification
3037
3038 The ultimate reference for the PE format is the Winnt.h
3039 header file that comes with the Platform SDKs; as always,
3040 implementations will drift wrt their documentation.
3041
3042 A good background article on the PE format is Matt Pietrek's
3043 March 1994 article in Microsoft System Journal (MSJ)
3044 (Vol.9, No. 3): "Peering Inside the PE: A Tour of the
3045 Win32 Portable Executable File Format." The info in there
3046 has recently been updated in a two part article in
3047 MSDN magazine, issues Feb and March 2002,
3048 "Inside Windows: An In-Depth Look into the Win32 Portable
3049 Executable File Format"
3050
3051 John Levine's book "Linkers and Loaders" contains useful
3052 info on PE too.
3053
3054 The PE specification doesn't specify how to do the actual
3055 relocations. For this reason, and because both PE and ELF are
3056 based on COFF, the relocations for the PEi386+ code is based on
3057 the ELF relocations for the equivalent relocation type.
3058
3059 The ELF ABI can be found at
3060
3061 http://www.x86-64.org/documentation/abi.pdf
3062
3063 The current code is based on version 0.99.6 - October 2013
3064 */
3065
3066
3067 #if defined(OBJFORMAT_PEi386)
3068
3069 static int verifyCOFFHeader ( COFF_header *hdr, pathchar *filename);
3070
3071 /* We assume file pointer is right at the
3072 beginning of COFF object.
3073 */
3074 static char *
3075 allocateImageAndTrampolines (
3076 pathchar* arch_name, char* member_name,
3077 #if defined(x86_64_HOST_ARCH)
3078 FILE* f,
3079 #endif
3080 int size,
3081 int isThin)
3082 {
3083 char* image;
3084 #if defined(x86_64_HOST_ARCH)
3085 if (!isThin)
3086 {
3087 /* PeCoff contains number of symbols right in it's header, so
3088 we can reserve the room for symbolExtras right here. */
3089 COFF_header hdr;
3090 size_t n;
3091
3092 n = fread(&hdr, 1, sizeof_COFF_header, f);
3093 if (n != sizeof(COFF_header)) {
3094 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%S'",
3095 member_name, arch_name);
3096 return NULL;
3097 }
3098 fseek(f, -sizeof_COFF_header, SEEK_CUR);
3099
3100 if (!verifyCOFFHeader(&hdr, arch_name)) {
3101 return 0;
3102 }
3103
3104 /* We get back 8-byte aligned memory (is that guaranteed?), but
3105 the offsets to the sections within the file are all 4 mod 8
3106 (is that guaranteed?). We therefore need to offset the image
3107 by 4, so that all the pointers are 8-byte aligned, so that
3108 pointer tagging works. */
3109 /* For 32-bit case we don't need this, hence we use macro PEi386_IMAGE_OFFSET,
3110 which equals to 4 for 64-bit case and 0 for 32-bit case. */
3111 /* We allocate trampolines area for all symbols right behind
3112 image data, aligned on 8. */
3113 size = ((PEi386_IMAGE_OFFSET + size + 0x7) & ~0x7)
3114 + hdr.NumberOfSymbols * sizeof(SymbolExtra);
3115 }
3116 #endif
3117 image = VirtualAlloc(NULL, size,
3118 MEM_RESERVE | MEM_COMMIT,
3119 PAGE_EXECUTE_READWRITE);
3120
3121 if (image == NULL) {
3122 errorBelch("%" PATH_FMT ": failed to allocate memory for image for %s",
3123 arch_name, member_name);
3124 return NULL;
3125 }
3126
3127 return image + PEi386_IMAGE_OFFSET;
3128 }
3129
3130 static int findAndLoadImportLibrary(ObjectCode* oc)
3131 {
3132 int i;
3133
3134 COFF_header* hdr;
3135 COFF_section* sectab;
3136 COFF_symbol* symtab;
3137 UChar* strtab;
3138
3139 hdr = (COFF_header*)(oc->image);
3140 sectab = (COFF_section*)(
3141 ((UChar*)(oc->image))
3142 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3143 );
3144
3145 symtab = (COFF_symbol*)(
3146 ((UChar*)(oc->image))
3147 + hdr->PointerToSymbolTable
3148 );
3149
3150 strtab = ((UChar*)symtab)
3151 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3152
3153 for (i = 0; i < oc->n_sections; i++)
3154 {
3155 COFF_section* sectab_i
3156 = (COFF_section*)myindex(sizeof_COFF_section, sectab, i);
3157
3158 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3159
3160 // Find the first entry containing a valid .idata$7 section.
3161 if (strcmp(secname, ".idata$7") == 0) {
3162 /* First load the containing DLL if not loaded. */
3163 Section section = oc->sections[i];
3164
3165 pathchar* dirName = pathdir(oc->fileName);
3166 HsPtr token = addLibrarySearchPath(dirName);
3167 stgFree(dirName);
3168 char* dllName = (char*)section.start;
3169
3170 if (strlen(dllName) == 0 || dllName[0] == ' ')
3171 {
3172 continue;
3173 }
3174
3175 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand '%ls' => `%s'\n", oc->fileName, dllName));
3176
3177 pathchar* dll = mkPath(dllName);
3178 removeLibrarySearchPath(token);
3179
3180 const char* result = addDLL(dll);
3181 stgFree(dll);
3182
3183 if (result != NULL) {
3184 errorBelch("Could not load `%s'. Reason: %s\n", (char*)dllName, result);
3185 return 0;
3186 }
3187
3188 break;
3189 }
3190
3191 stgFree(secname);
3192 }
3193
3194 return 1;
3195 }
3196
3197 static int checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f)
3198 {
3199 char* image;
3200 static HsBool load_dll_warn = HS_BOOL_FALSE;
3201
3202 if (load_dll_warn) { return 0; }
3203
3204 /* Based on Import Library specification. PE Spec section 7.1 */
3205
3206 COFF_import_header hdr;
3207 size_t n;
3208
3209 n = fread(&hdr, 1, sizeof_COFF_import_Header, f);
3210 if (n != sizeof(COFF_header)) {
3211 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%" PATH_FMT "'\n",
3212 member_name, arch_name);
3213 return 0;
3214 }
3215
3216 if (hdr.Sig1 != 0x0 || hdr.Sig2 != 0xFFFF) {
3217 fseek(f, -sizeof_COFF_import_Header, SEEK_CUR);
3218 IF_DEBUG(linker, debugBelch("loadArchive: Object `%s` is not an import lib. Skipping...\n", member_name));
3219 return 0;
3220 }
3221
3222 IF_DEBUG(linker, debugBelch("loadArchive: reading %d bytes at %ld\n", hdr.SizeOfData, ftell(f)));
3223
3224 image = malloc(hdr.SizeOfData);
3225 n = fread(image, 1, hdr.SizeOfData, f);
3226 if (n != hdr.SizeOfData) {
3227 errorBelch("loadArchive: error whilst reading `%s' header in `%" PATH_FMT "'. Did not read enough bytes.\n",
3228 member_name, arch_name);
3229 }
3230
3231 char* symbol = strtok(image, "\0");
3232 int symLen = strlen(symbol) + 1;
3233 int nameLen = n - symLen;
3234 char* dllName = malloc(sizeof(char) * nameLen);
3235 dllName = strncpy(dllName, image + symLen, nameLen);
3236 pathchar* dll = malloc(sizeof(wchar_t) * nameLen);
3237 mbstowcs(dll, dllName, nameLen);
3238 free(dllName);
3239
3240 IF_DEBUG(linker, debugBelch("loadArchive: read symbol %s from lib `%ls'\n", symbol, dll));
3241 const char* result = addDLL(dll);
3242
3243 free(image);
3244
3245 if (result != NULL) {
3246 errorBelch("Could not load `%ls'. Reason: %s\n", dll, result);
3247 load_dll_warn = HS_BOOL_TRUE;
3248
3249 free(dll);
3250 fseek(f, -(n + sizeof_COFF_import_Header), SEEK_CUR);
3251 return 0;
3252 }
3253
3254 free(dll);
3255 return 1;
3256 }
3257
3258 /* We use myindex to calculate array addresses, rather than
3259 simply doing the normal subscript thing. That's because
3260 some of the above structs have sizes which are not
3261 a whole number of words. GCC rounds their sizes up to a
3262 whole number of words, which means that the address calcs
3263 arising from using normal C indexing or pointer arithmetic
3264 are just plain wrong. Sigh.
3265 */
3266 static UChar *
3267 myindex ( int scale, void* base, int index )
3268 {
3269 return
3270 ((UChar*)base) + scale * index;
3271 }
3272
3273
3274 static void
3275 printName ( UChar* name, UChar* strtab )
3276 {
3277 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3278 UInt32 strtab_offset = * (UInt32*)(name+4);
3279 debugBelch("%s", strtab + strtab_offset );
3280 } else {
3281 int i;
3282 for (i = 0; i < 8; i++) {
3283 if (name[i] == 0) break;
3284 debugBelch("%c", name[i] );
3285 }
3286 }
3287 }
3288
3289
3290 static void
3291 copyName ( UChar* name, UChar* strtab, UChar* dst, int dstSize )
3292 {
3293 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3294 UInt32 strtab_offset = * (UInt32*)(name+4);
3295 strncpy ( (char*)dst, (char*)strtab+strtab_offset, dstSize );
3296 dst[dstSize-1] = 0;
3297 } else {
3298 int i = 0;
3299 while (1) {
3300 if (i >= 8) break;
3301 if (name[i] == 0) break;
3302 dst[i] = name[i];
3303 i++;
3304 }
3305 dst[i] = 0;
3306 }
3307 }
3308
3309
3310 static UChar *
3311 cstring_from_COFF_symbol_name ( UChar* name, UChar* strtab )
3312 {
3313 UChar* newstr;
3314 /* If the string is longer than 8 bytes, look in the
3315 string table for it -- this will be correctly zero terminated.
3316 */
3317 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3318 UInt32 strtab_offset = * (UInt32*)(name+4);
3319 return ((UChar*)strtab) + strtab_offset;
3320 }
3321 /* Otherwise, if shorter than 8 bytes, return the original,
3322 which by defn is correctly terminated.
3323 */
3324 if (name[7]==0) return name;
3325 /* The annoying case: 8 bytes. Copy into a temporary
3326 (XXX which is never freed ...)
3327 */
3328 newstr = stgMallocBytes(9, "cstring_from_COFF_symbol_name");
3329 ASSERT(newstr);
3330 strncpy((char*)newstr,(char*)name,8);
3331 newstr[8] = 0;
3332 return newstr;
3333 }
3334
3335 /* Getting the name of a section is mildly tricky, so we make a
3336 function for it. Sadly, in one case we have to copy the string
3337 (when it is exactly 8 bytes long there's no trailing '\0'), so for
3338 consistency we *always* copy the string; the caller must free it
3339 */
3340 static char *
3341 cstring_from_section_name (UChar* name, UChar* strtab)
3342 {
3343 char *newstr;
3344
3345 if (name[0]=='/') {
3346 int strtab_offset = strtol((char*)name+1,NULL,10);
3347 int len = strlen(((char*)strtab) + strtab_offset);
3348
3349 newstr = stgMallocBytes(len+1, "cstring_from_section_symbol_name");
3350 strcpy((char*)newstr, (char*)((UChar*)strtab) + strtab_offset);
3351 return newstr;
3352 }
3353 else
3354 {
3355 newstr = stgMallocBytes(9, "cstring_from_section_symbol_name");
3356 ASSERT(newstr);
3357 strncpy((char*)newstr,(char*)name,8);
3358 newstr[8] = 0;
3359 return newstr;
3360 }
3361 }
3362
3363 /* See Note [mingw-w64 name decoration scheme] */
3364 #ifndef x86_64_HOST_ARCH
3365 static void
3366 zapTrailingAtSign ( UChar* sym )
3367 {
3368 # define my_isdigit(c) ((c) >= '0' && (c) <= '9')
3369 int i, j;
3370 if (sym[0] == 0) return;
3371 i = 0;
3372 while (sym[i] != 0) i++;
3373 i--;
3374 j = i;
3375 while (j > 0 && my_isdigit(sym[j])) j--;
3376 if (j > 0 && sym[j] == '@' && j != i) sym[j] = 0;
3377 # undef my_isdigit
3378 }
3379 #endif
3380
3381 /* See Note [mingw-w64 name decoration scheme] */
3382 #ifndef x86_64_HOST_ARCH
3383 #define STRIP_LEADING_UNDERSCORE 1
3384 #else
3385 #define STRIP_LEADING_UNDERSCORE 0
3386 #endif
3387
3388 /*
3389 Note [mingw-w64 name decoration scheme]
3390
3391 What's going on with name decoration? Well, original code
3392 have some crufty and ad-hocish paths related mostly to very old
3393 mingw gcc/binutils/runtime combinations. Now mingw-w64 offers pretty
3394 uniform and MS-compatible decoration scheme across its tools and runtime.
3395
3396 The scheme is pretty straightforward: on 32 bit objects symbols are exported
3397 with underscore prepended (and @ + stack size suffix appended for stdcall
3398 functions), on 64 bits no underscore is prepended and no suffix is appended
3399 because we have no stdcall convention on 64 bits.
3400
3401 See #9218
3402 */
3403
3404 static SymbolAddr*
3405 lookupSymbolInDLLs ( UChar *lbl )
3406 {
3407 OpenedDLL* o_dll;
3408 SymbolAddr* sym;
3409
3410 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
3411 /* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */
3412
3413 sym = GetProcAddress(o_dll->instance, (char*)(lbl+STRIP_LEADING_UNDERSCORE));
3414 if (sym != NULL) {
3415 /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/
3416 return sym;
3417 }
3418
3419 /* Ticket #2283.
3420 Long description: http://support.microsoft.com/kb/132044
3421 tl;dr:
3422 If C/C++ compiler sees __declspec(dllimport) ... foo ...
3423 it generates call *__imp_foo, and __imp_foo here has exactly
3424 the same semantics as in __imp_foo = GetProcAddress(..., "foo")
3425 */
3426 if (sym == NULL && strncmp ((const char*)lbl, "__imp_", 6) == 0) {
3427 sym = GetProcAddress(o_dll->instance, (char*)(lbl+6+STRIP_LEADING_UNDERSCORE));
3428 if (sym != NULL) {
3429 IndirectAddr* ret;
3430 ret = stgMallocBytes( sizeof(IndirectAddr), "lookupSymbolInDLLs" );
3431 ret->addr = sym;
3432 ret->next = indirects;
3433 indirects = ret;
3434 IF_DEBUG(linker,
3435 debugBelch("warning: %s from %S is linked instead of %s\n",
3436 (char*)(lbl+6+STRIP_LEADING_UNDERSCORE), o_dll->name, (char*)lbl));
3437 return (void*) & ret->addr;
3438 }
3439 }
3440
3441 sym = GetProcAddress(o_dll->instance, (char*)lbl);
3442 if (sym != NULL) {
3443 /*debugBelch("found %s in %s\n", lbl,o_dll->name);*/
3444 return sym;
3445 }
3446 }
3447 return NULL;
3448 }
3449
3450 static int
3451 verifyCOFFHeader (COFF_header *hdr, pathchar *fileName)
3452 {
3453 #if defined(i386_HOST_ARCH)
3454 if (hdr->Machine != 0x14c) {
3455 errorBelch("%" PATH_FMT ": Not x86 PEi386", fileName);
3456 return 0;
3457 }
3458 #elif defined(x86_64_HOST_ARCH)
3459 if (hdr->Machine != 0x8664) {
3460 errorBelch("%" PATH_FMT ": Not x86_64 PEi386", fileName);
3461 return 0;
3462 }
3463 #else
3464 errorBelch("PEi386 not supported on this arch");
3465 #endif
3466
3467 if (hdr->SizeOfOptionalHeader != 0) {
3468 errorBelch("%" PATH_FMT ": PEi386 with nonempty optional header",
3469 fileName);
3470 return 0;
3471 }
3472 if ( /* (hdr->Characteristics & MYIMAGE_FILE_RELOCS_STRIPPED) || */
3473 (hdr->Characteristics & MYIMAGE_FILE_EXECUTABLE_IMAGE) ||
3474 (hdr->Characteristics & MYIMAGE_FILE_DLL) ||
3475 (hdr->Characteristics & MYIMAGE_FILE_SYSTEM) ) {
3476 errorBelch("%" PATH_FMT ": Not a PEi386 object file", fileName);
3477 return 0;
3478 }
3479 if ( (hdr->Characteristics & MYIMAGE_FILE_BYTES_REVERSED_HI)
3480 /* || !(hdr->Characteristics & MYIMAGE_FILE_32BIT_MACHINE) */ ) {
3481 errorBelch("%" PATH_FMT ": Invalid PEi386 word size or endiannness: %d",
3482 fileName,
3483 (int)(hdr->Characteristics));
3484 return 0;
3485 }
3486 return 1;
3487 }
3488
3489 static int
3490 ocVerifyImage_PEi386 ( ObjectCode* oc )
3491 {
3492 int i;
3493 UInt32 j, noRelocs;
3494 COFF_header* hdr;
3495 COFF_section* sectab;
3496 COFF_symbol* symtab;
3497 UChar* strtab;
3498 /* debugBelch("\nLOADING %s\n", oc->fileName); */
3499 hdr = (COFF_header*)(oc->image);
3500 sectab = (COFF_section*) (
3501 ((UChar*)(oc->image))
3502 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3503 );
3504 symtab = (COFF_symbol*) (
3505 ((UChar*)(oc->image))
3506 + hdr->PointerToSymbolTable
3507 );
3508 strtab = ((UChar*)symtab)
3509 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3510
3511 if (!verifyCOFFHeader(hdr, oc->fileName)) {
3512 return 0;
3513 }
3514
3515 /* If the string table size is way crazy, this might indicate that
3516 there are more than 64k relocations, despite claims to the
3517 contrary. Hence this test. */
3518 /* debugBelch("strtab size %d\n", * (UInt32*)strtab); */
3519 #if 0
3520 if ( (*(UInt32*)strtab) > 600000 ) {
3521 /* Note that 600k has no special significance other than being
3522 big enough to handle the almost-2MB-sized lumps that
3523 constitute HSwin32*.o. */
3524 debugBelch("PEi386 object has suspiciously large string table; > 64k relocs?");
3525 return 0;
3526 }
3527 #endif
3528
3529 /* .BSS Section is initialized in ocGetNames_PEi386
3530 but we need the Sections array initialized here already. */
3531 Section *sections;
3532 sections = (Section*)stgCallocBytes(
3533 sizeof(Section),
3534 hdr->NumberOfSections + 1, /* +1 for the global BSS section see ocGetNames_PEi386 */
3535 "ocVerifyImage_PEi386(sections)");
3536 oc->sections = sections;
3537 oc->n_sections = hdr->NumberOfSections + 1;
3538
3539 /* Initialize the Sections */
3540 for (i = 0; i < hdr->NumberOfSections; i++) {
3541 COFF_section* sectab_i
3542 = (COFF_section*)
3543 myindex(sizeof_COFF_section, sectab, i);
3544
3545 /* Calculate the start of the data section */
3546 sections[i].start = oc->image + sectab_i->PointerToRawData;
3547 }
3548
3549 /* No further verification after this point; only debug printing. */
3550 i = 0;
3551 IF_DEBUG(linker, i=1);
3552 if (i == 0) return 1;
3553
3554 debugBelch("sectab offset = %" FMT_SizeT "\n",
3555 ((UChar*)sectab) - ((UChar*)hdr) );
3556 debugBelch("symtab offset = %" FMT_SizeT "\n",
3557 ((UChar*)symtab) - ((UChar*)hdr) );
3558 debugBelch("strtab offset = %" FMT_SizeT "\n",
3559 ((UChar*)strtab) - ((UChar*)hdr) );
3560
3561 debugBelch("\n" );
3562 debugBelch( "Machine: 0x%x\n", (UInt32)(hdr->Machine) );
3563 debugBelch( "# sections: %d\n", (UInt32)(hdr->NumberOfSections) );
3564 debugBelch( "time/date: 0x%x\n", (UInt32)(hdr->TimeDateStamp) );
3565 debugBelch( "symtab offset: %d\n", (UInt32)(hdr->PointerToSymbolTable) );
3566 debugBelch( "# symbols: %d\n", (UInt32)(hdr->NumberOfSymbols) );
3567 debugBelch( "sz of opt hdr: %d\n", (UInt32)(hdr->SizeOfOptionalHeader) );
3568 debugBelch( "characteristics: 0x%x\n", (UInt32)(hdr->Characteristics) );
3569
3570 /* Print the section table. */
3571 debugBelch("\n" );
3572 for (i = 0; i < hdr->NumberOfSections; i++) {
3573 COFF_reloc* reltab;
3574 COFF_section* sectab_i
3575 = (COFF_section*)
3576 myindex ( sizeof_COFF_section, sectab, i );
3577 Section section = sections[i];
3578 debugBelch(
3579 "\n"
3580 "section %d\n"
3581 " name `",
3582 i
3583 );
3584 printName ( sectab_i->Name, strtab );
3585 debugBelch(
3586 "'\n"
3587 " vsize %d\n"
3588 " vaddr %d\n"
3589 " data sz %d\n"
3590 " data off 0x%p\n"
3591 " num rel %d\n"
3592 " off rel %d\n"
3593 " ptr raw 0x%x\n",
3594 sectab_i->VirtualSize,
3595 sectab_i->VirtualAddress,
3596 sectab_i->SizeOfRawData,
3597 section.start,
3598 sectab_i->NumberOfRelocations,
3599 sectab_i->PointerToRelocations,
3600 sectab_i->PointerToRawData
3601 );
3602 reltab = (COFF_reloc*) (
3603 ((UChar*)(oc->image)) + sectab_i->PointerToRelocations
3604 );
3605
3606 if ( sectab_i->Characteristics & MYIMAGE_SCN_LNK_NRELOC_OVFL ) {
3607 /* If the relocation field (a short) has overflowed, the
3608 * real count can be found in the first reloc entry.
3609 *
3610 * See Section 4.1 (last para) of the PE spec (rev6.0).
3611 */
3612 COFF_reloc* rel = (COFF_reloc*)
3613 myindex ( sizeof_COFF_reloc, reltab, 0 );
3614 noRelocs = rel->VirtualAddress;
3615 j = 1;
3616 } else {
3617 noRelocs = sectab_i->NumberOfRelocations;
3618 j = 0;
3619 }
3620
3621 for (; j < noRelocs; j++) {
3622 COFF_symbol* sym;
3623 COFF_reloc* rel = (COFF_reloc*)
3624 myindex ( sizeof_COFF_reloc, reltab, j );
3625 debugBelch(
3626 " type 0x%-4x vaddr 0x%-8x name `",
3627 (UInt32)rel->Type,
3628 rel->VirtualAddress );
3629 sym = (COFF_symbol*)
3630 myindex ( sizeof_COFF_symbol, symtab, rel->SymbolTableIndex );
3631 /* Hmm..mysterious looking offset - what's it for? SOF */
3632 printName ( sym->Name, strtab -10 );
3633 debugBelch("'\n" );
3634 }
3635
3636 debugBelch("\n" );
3637 }
3638 debugBelch("\n" );
3639 debugBelch("string table has size 0x%x\n", * (UInt32*)strtab );
3640 debugBelch("---START of string table---\n");
3641 for (i = 4; i < *(Int32*)strtab; i++) {
3642 if (strtab[i] == 0)
3643 debugBelch("\n"); else
3644 debugBelch("%c", strtab[i] );
3645 }
3646 debugBelch("--- END of string table---\n");
3647
3648 debugBelch("\n" );
3649 i = 0;
3650 while (1) {
3651 COFF_symbol* symtab_i;
3652 if (i >= (Int32)(hdr->NumberOfSymbols)) break;
3653 symtab_i = (COFF_symbol*)
3654 myindex ( sizeof_COFF_symbol, symtab, i );
3655 debugBelch(
3656 "symbol %d\n"
3657 " name `",
3658 i
3659 );
3660 printName ( symtab_i->Name, strtab );
3661 debugBelch(
3662 "'\n"
3663 " value 0x%x\n"
3664 " 1+sec# %d\n"
3665 " type 0x%x\n"
3666 " sclass 0x%x\n"
3667 " nAux %d\n",
3668 symtab_i->Value,
3669 (Int32)(symtab_i->SectionNumber),
3670 (UInt32)symtab_i->Type,
3671 (UInt32)symtab_i->StorageClass,
3672 (UInt32)symtab_i->NumberOfAuxSymbols
3673 );
3674 i += symtab_i->NumberOfAuxSymbols;
3675 i++;
3676 }
3677
3678 debugBelch("\n" );
3679 return 1;
3680 }
3681
3682
3683 static int
3684 ocGetNames_PEi386 ( ObjectCode* oc )
3685 {
3686 COFF_header* hdr;
3687 COFF_section* sectab;
3688 COFF_symbol* symtab;
3689 UChar* strtab;
3690
3691 UChar* sname;
3692 SymbolAddr* addr;
3693 int i;
3694
3695 hdr = (COFF_header*)(oc->image);
3696 sectab = (COFF_section*) (
3697 ((UChar*)(oc->image))
3698 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3699 );
3700 symtab = (COFF_symbol*) (
3701 ((UChar*)(oc->image))
3702 + hdr->PointerToSymbolTable
3703 );
3704 strtab = ((UChar*)(oc->image))
3705 + hdr->PointerToSymbolTable
3706 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3707
3708 /* Allocate space for any (local, anonymous) .bss sections. */
3709
3710 for (i = 0; i < hdr->NumberOfSections; i++) {
3711 UInt32 bss_sz;
3712 UChar* zspace;
3713 COFF_section* sectab_i
3714 = (COFF_section*)
3715 myindex ( sizeof_COFF_section, sectab, i );
3716
3717 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3718
3719 if (0 != strcmp(secname, ".bss")) {
3720 stgFree(secname);
3721 continue;
3722 }
3723
3724 stgFree(secname);
3725
3726 /* sof 10/05: the PE spec text isn't too clear regarding what
3727 * the SizeOfRawData field is supposed to hold for object
3728 * file sections containing just uninitialized data -- for executables,
3729 * it is supposed to be zero; unclear what it's supposed to be
3730 * for object files. However, VirtualSize is guaranteed to be
3731 * zero for object files, which definitely suggests that SizeOfRawData
3732 * will be non-zero (where else would the size of this .bss section be
3733 * stored?) Looking at the COFF_section info for incoming object files,
3734 * this certainly appears to be the case.
3735 *
3736 * => I suspect we've been incorrectly handling .bss sections in (relocatable)
3737 * object files up until now. This turned out to bite us with ghc-6.4.1's use
3738 * of gcc-3.4.x, which has started to emit initially-zeroed-out local 'static'
3739 * variable decls into the .bss section. (The specific function in Q which
3740 * triggered this is libraries/base/cbits/dirUtils.c:__hscore_getFolderPath())
3741 */
3742 if (sectab_i->VirtualSize == 0 && sectab_i->SizeOfRawData == 0) continue;
3743 /* This is a non-empty .bss section.
3744 Allocate zeroed space for it */
3745 bss_sz = sectab_i->VirtualSize;
3746 if ( bss_sz < sectab_i->SizeOfRawData) { bss_sz = sectab_i->SizeOfRawData; }
3747 zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
3748 oc->sections[i].start = zspace;
3749 addProddableBlock(oc, zspace, bss_sz);
3750 /* debugBelch("BSS anon section at 0x%x\n", zspace); */
3751 }
3752
3753 /* Copy section information into the ObjectCode. */
3754
3755 for (i = 0; i < hdr->NumberOfSections; i++) {
3756 UChar* start;
3757 UChar* end;
3758 UInt32 sz;
3759
3760 /* By default consider all section as CODE or DATA, which means we want to load them. */
3761 SectionKind kind
3762 = SECTIONKIND_CODE_OR_RODATA;
3763 COFF_section* sectab_i
3764 = (COFF_section*)
3765 myindex ( sizeof_COFF_section, sectab, i );
3766 Section section = oc->sections[i];
3767
3768 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3769
3770 IF_DEBUG(linker, debugBelch("section name = %s\n", secname ));
3771
3772 /* The PE file section flag indicates whether the section contains code or data. */
3773 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_CODE ||
3774 sectab_i->Characteristics & MYIMAGE_SCN_CNT_INITIALIZED_DATA)
3775 kind = SECTIONKIND_CODE_OR_RODATA;
3776
3777 /* Check next if it contains any uninitialized data */
3778 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_UNINITIALIZED_DATA)
3779 kind = SECTIONKIND_RWDATA;
3780
3781 /* Finally check if it can be discarded. This will also ignore .debug sections */
3782 if (sectab_i->Characteristics & MYIMAGE_SCN_MEM_DISCARDABLE ||
3783 sectab_i->Characteristics & MYIMAGE_SCN_LNK_REMOVE)
3784 kind = SECTIONKIND_OTHER;
3785
3786 if (0==strcmp(".ctors", (char*)secname))
3787 kind = SECTIONKIND_INIT_ARRAY;
3788
3789 ASSERT(sectab_i->SizeOfRawData == 0 || sectab_i->VirtualSize == 0);
3790 sz = sectab_i->SizeOfRawData;
3791 if (sz < sectab_i->VirtualSize) sz = sectab_i->VirtualSize;
3792
3793 start = section.start;
3794 end = start + sz - 1;
3795
3796 if (kind != SECTIONKIND_OTHER && end >= start) {
3797 addSection(&oc->sections[i], kind, SECTION_NOMEM, start, sz, 0, 0, 0);
3798 addProddableBlock(oc, start, sz);
3799 }
3800
3801 stgFree(secname);
3802 }
3803
3804 /* Copy exported symbols into the ObjectCode. */
3805
3806 oc->n_symbols = hdr->NumberOfSymbols;
3807 oc->symbols = stgCallocBytes(sizeof(SymbolName*), oc->n_symbols,
3808 "ocGetNames_PEi386(oc->symbols)");
3809
3810 /* Work out the size of the global BSS section */
3811 StgWord globalBssSize = 0;
3812 for (i=0; i < (int)hdr->NumberOfSymbols; i++) {
3813 COFF_symbol* symtab_i;
3814 symtab_i = (COFF_symbol*)
3815 myindex ( sizeof_COFF_symbol, symtab, i );
3816 if (symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3817 && symtab_i->Value > 0
3818 && symtab_i->StorageClass != MYIMAGE_SYM_CLASS_SECTION) {
3819 globalBssSize += symtab_i->Value;
3820 }
3821 i += symtab_i->NumberOfAuxSymbols;
3822 }
3823
3824 /* Allocate BSS space */
3825 SymbolAddr* bss = NULL;
3826 if (globalBssSize > 0) {
3827 bss = stgCallocBytes(1, globalBssSize,
3828 "ocGetNames_PEi386(non-anonymous bss)");
3829 addSection(&oc->sections[oc->n_sections-1],
3830 SECTIONKIND_RWDATA, SECTION_MALLOC,
3831 bss, globalBssSize, 0, 0, 0);
3832 IF_DEBUG(linker, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize));
3833 addProddableBlock(oc, bss, globalBssSize);
3834 } else {
3835 addSection(&oc->sections[oc->n_sections-1],
3836 SECTIONKIND_OTHER, SECTION_NOMEM, NULL, 0, 0, 0, 0);
3837 }
3838
3839 for (i = 0; i < oc->n_symbols; i++) {
3840 COFF_symbol* symtab_i;
3841 symtab_i = (COFF_symbol*)
3842 myindex ( sizeof_COFF_symbol, symtab, i );
3843
3844 addr = NULL;
3845 HsBool isWeak = HS_BOOL_FALSE;
3846 if ( symtab_i->SectionNumber != MYIMAGE_SYM_UNDEFINED
3847 && symtab_i->SectionNumber > 0) {
3848 /* This symbol is global and defined, viz, exported */
3849 /* for MYIMAGE_SYMCLASS_EXTERNAL
3850 && !MYIMAGE_SYM_UNDEFINED,
3851 the address of the symbol is:
3852 address of relevant section + offset in section
3853 */
3854 COFF_section* sectabent
3855 = (COFF_section*) myindex ( sizeof_COFF_section,
3856 sectab,
3857 symtab_i->SectionNumber-1 );
3858 if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_EXTERNAL
3859 || ( symtab_i->StorageClass == MYIMAGE_SYM_CLASS_STATIC
3860 && sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT)
3861 ) {
3862 addr = (void*)((size_t)oc->sections[symtab_i->SectionNumber-1].start
3863 + symtab_i->Value);
3864 if (sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT) {
3865 isWeak = HS_BOOL_TRUE;
3866 }
3867 }
3868 }
3869 else if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_WEAK_EXTERNAL) {
3870 isWeak = HS_BOOL_TRUE;
3871 }
3872 else if ( symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3873 && symtab_i->Value > 0) {
3874 /* This symbol isn't in any section at all, ie, global bss.
3875 Allocate zeroed space for it from the BSS section */
3876 addr = bss;
3877 bss = (SymbolAddr*)((StgWord)bss + (StgWord)symtab_i->Value);
3878 IF_DEBUG(linker, debugBelch("bss symbol @ %p %u\n", addr, symtab_i->Value));
3879 }
3880
3881 sname = cstring_from_COFF_symbol_name(symtab_i->Name, strtab);
3882 if (addr != NULL || isWeak == HS_BOOL_TRUE) {
3883
3884 /* debugBelch("addSymbol %p `%s' Weak:%lld \n", addr, sname, isWeak); */
3885 IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr,sname);)
3886 ASSERT(i >= 0 && i < oc->n_symbols);
3887 /* cstring_from_COFF_symbol_name always succeeds. */
3888 oc->symbols[i] = (SymbolName*)sname;
3889 if (isWeak == HS_BOOL_TRUE) {
3890 setWeakSymbol(oc, sname);
3891 }
3892
3893 if (! ghciInsertSymbolTable(oc->fileName, symhash, (SymbolName*)sname, addr,
3894 isWeak, oc)) {
3895 return 0;
3896 }
3897 } else {
3898 /* We're skipping the symbol, but if we ever load this
3899 object file we'll want to skip it then too. */
3900 oc->symbols[i] = NULL;
3901
3902 # if 0
3903 debugBelch(
3904 "IGNORING symbol %d\n"
3905 " name `",
3906 i
3907 );
3908 printName ( symtab_i->Name, strtab );
3909 debugBelch(
3910 "'\n"
3911 " value 0x%x\n"
3912 " 1+sec# %d\n"
3913 " type 0x%x\n"
3914 " sclass 0x%x\n"
3915 " nAux %d\n",
3916 symtab_i->Value,
3917 (Int32)(symtab_i->SectionNumber),
3918 (UInt32)symtab_i->Type,
3919 (UInt32)symtab_i->StorageClass,
3920