Do not segfault if no common root can be found
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 #include "Rts.h"
14 #include "HsFFI.h"
15
16 #include "sm/Storage.h"
17 #include "Stats.h"
18 #include "Hash.h"
19 #include "LinkerInternals.h"
20 #include "RtsUtils.h"
21 #include "Trace.h"
22 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
23 #include "Proftimer.h"
24 #include "GetEnv.h"
25 #include "Stable.h"
26 #include "RtsSymbols.h"
27 #include "RtsSymbolInfo.h"
28 #include "Profiling.h"
29 #include "sm/OSMem.h"
30 #include "linker/M32Alloc.h"
31
32 #if !defined(mingw32_HOST_OS)
33 #include "posix/Signals.h"
34 #endif
35
36 // get protos for is*()
37 #include <ctype.h>
38
39 #ifdef HAVE_SYS_TYPES_H
40 #include <sys/types.h>
41 #endif
42
43 #include <inttypes.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <stdio.h>
47 #include <assert.h>
48 #include <libgen.h>
49
50 #ifdef HAVE_SYS_STAT_H
51 #include <sys/stat.h>
52 #endif
53
54 #if defined(HAVE_DLFCN_H)
55 #include <dlfcn.h>
56 #endif
57
58 /* PowerPC and ARM have relative branch instructions with only 24 bit
59 * displacements and therefore need jump islands contiguous with each object
60 * code module.
61 */
62 #if defined(powerpc_HOST_ARCH)
63 #define SHORT_REL_BRANCH 1
64 #endif
65 #if defined(arm_HOST_ARCH)
66 #define SHORT_REL_BRANCH 1
67 #endif
68
69 #if (RTS_LINKER_USE_MMAP && defined(SHORT_REL_BRANCH) && defined(linux_HOST_OS))
70 #define USE_CONTIGUOUS_MMAP 1
71 #else
72 #define USE_CONTIGUOUS_MMAP 0
73 #endif
74
75 #if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
76 # define OBJFORMAT_ELF
77 # include <regex.h> // regex is already used by dlopen() so this is OK
78 // to use here without requiring an additional lib
79 #elif defined (mingw32_HOST_OS)
80 # define OBJFORMAT_PEi386
81 # include <windows.h>
82 # include <shfolder.h> /* SHGetFolderPathW */
83 # include <math.h>
84 # include <wchar.h>
85 #elif defined(darwin_HOST_OS)
86 # define OBJFORMAT_MACHO
87 # include <regex.h>
88 # include <mach/machine.h>
89 # include <mach-o/fat.h>
90 # include <mach-o/loader.h>
91 # include <mach-o/nlist.h>
92 # include <mach-o/reloc.h>
93 #if defined(powerpc_HOST_ARCH)
94 # include <mach-o/ppc/reloc.h>
95 #endif
96 #if defined(x86_64_HOST_ARCH)
97 # include <mach-o/x86_64/reloc.h>
98 #endif
99 #endif
100
101 #if defined(x86_64_HOST_ARCH) && defined(darwin_HOST_OS)
102 #define ALWAYS_PIC
103 #endif
104
105 #if defined(dragonfly_HOST_OS)
106 #include <sys/tls.h>
107 #endif
108
109 /* SymbolInfo tracks a symbol's address, the object code from which
110 it originated, and whether or not it's weak.
111
112 RtsSymbolInfo is used to track the state of the symbols currently
113 loaded or to be loaded by the Linker.
114
115 Where the information in the `ObjectCode` is used to track the
116 original status of the symbol inside the `ObjectCode`.
117
118 A weak symbol that has been used will still be marked as weak
119 in the `ObjectCode` but in the `RtsSymbolInfo` it won't be.
120 */
121 typedef struct _RtsSymbolInfo {
122 SymbolAddr* value;
123 ObjectCode *owner;
124 HsBool weak;
125 } RtsSymbolInfo;
126
127 /* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
128 This hashtable will contain information on all symbols
129 that we know of, however the .o they are in may not be loaded.
130
131 Until the ObjectCode the symbol belongs to is actually
132 loaded this symbol may be replaced. So do not rely on
133 addresses of unloaded symbols.
134
135 Note [runtime-linker-phases]
136 --------------------------------------
137 Broadly the behavior of the runtime linker can be
138 split into the following four phases:
139
140 - Indexing (e.g. ocVerifyImage and ocGetNames)
141 - Initialization (e.g. ocResolve and ocRunInit)
142 - Resolve (e.g. resolveObjs())
143 - Lookup (e.g. lookupSymbol)
144
145 This is to enable lazy loading of symbols. Eager loading is problematic
146 as it means that all symbols must be available, even those which we will
147 never use. This is especially painful of Windows, where the number of
148 libraries required to link things like mingwex grows to be quite high.
149
150 We proceed through these stages as follows,
151
152 * During Indexing we verify and open the ObjectCode and
153 perform a quick scan/indexing of the ObjectCode. All the work
154 required to actually load the ObjectCode is done.
155
156 All symbols from the ObjectCode is also inserted into
157 `symhash`, where possible duplicates are handled via the semantics
158 described in `ghciInsertSymbolTable`.
159
160 This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
161 depending on whether they are an archive members or not.
162
163 * During initialization we load ObjectCode, perform relocations, execute
164 static constructors etc. This phase may trigger other ObjectCodes to
165 be loaded because of the calls to lookupSymbol.
166
167 This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
168 previous status was `OBJECT_LOADED`.
169
170 * During resolve we attempt to resolve all the symbols needed for the
171 initial link. This essentially means, that for any ObjectCode given
172 directly to the command-line we perform lookupSymbols on the required
173 symbols. lookupSymbols may trigger the loading of additional ObjectCode
174 if required.
175
176 This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
177 the previous status was `OBJECT_NEEDED`.
178
179 * Lookup symbols is used to lookup any symbols required, both during initial
180 link and during statement and expression compilations in the REPL.
181 Declaration of e.g. an foreign import, will eventually call lookupSymbol
182 which will either fail (symbol unknown) or succeed (and possibly triggered a
183 load).
184
185 This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
186
187 When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
188 by calling unloadObj and starting over.
189 When a new declaration or statement is performed ultimately lookupSymbol is called
190 without doing a re-link.
191
192 The goal of these different phases is to allow the linker to be able to perform
193 "lazy loading" of ObjectCode. The reason for this is that we want to only link
194 in symbols that are actually required for the link. This reduces:
195
196 1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
197 then we don't need to load libB. This means the dependency chain for libraries
198 such as mingw32 and mingwex can be broken down.
199
200 2) The number of duplicate symbols, since now only symbols that are
201 true duplicates will display the error.
202 */
203 static /*Str*/HashTable *symhash;
204
205 /* List of currently loaded objects */
206 ObjectCode *objects = NULL; /* initially empty */
207
208 /* List of objects that have been unloaded via unloadObj(), but are waiting
209 to be actually freed via checkUnload() */
210 ObjectCode *unloaded_objects = NULL; /* initially empty */
211
212 #ifdef THREADED_RTS
213 /* This protects all the Linker's global state except unloaded_objects */
214 Mutex linker_mutex;
215 /*
216 * This protects unloaded_objects. We have a separate mutex for this, because
217 * the GC needs to access unloaded_objects in checkUnload, while the linker only
218 * needs to access unloaded_objects in unloadObj(), so this allows most linker
219 * operations proceed concurrently with the GC.
220 */
221 Mutex linker_unloaded_mutex;
222 #endif
223
224 /* Type of the initializer */
225 typedef void (*init_t) (int argc, char **argv, char **env);
226
227 static HsInt isAlreadyLoaded( pathchar *path );
228 static HsInt loadOc( ObjectCode* oc );
229 static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
230 rtsBool mapped, char *archiveMemberName,
231 int misalignment
232 );
233
234 // Use wchar_t for pathnames on Windows (#5697)
235 #if defined(mingw32_HOST_OS)
236 #define pathcmp wcscmp
237 #define pathlen wcslen
238 #define pathopen _wfopen
239 #define pathstat _wstat
240 #define struct_stat struct _stat
241 #define open wopen
242 #define WSTR(s) L##s
243 #define pathprintf swprintf
244 #define pathsize sizeof(wchar_t)
245 #else
246 #define pathcmp strcmp
247 #define pathlen strlen
248 #define pathopen fopen
249 #define pathstat stat
250 #define struct_stat struct stat
251 #define WSTR(s) s
252 #define pathprintf snprintf
253 #define pathsize sizeof(char)
254 #endif
255
256 static pathchar* pathdup(pathchar *path)
257 {
258 pathchar *ret;
259 #if defined(mingw32_HOST_OS)
260 ret = wcsdup(path);
261 #else
262 /* sigh, strdup() isn't a POSIX function, so do it the long way */
263 ret = stgMallocBytes( strlen(path)+1, "pathdup" );
264 strcpy(ret, path);
265 #endif
266 return ret;
267 }
268
269 static pathchar* pathdir(pathchar *path)
270 {
271 pathchar *ret;
272 #if defined(mingw32_HOST_OS)
273 pathchar *drive, *dirName;
274 size_t memberLen = pathlen(path) + 1;
275 dirName = stgMallocBytes(pathsize * memberLen, "pathdir(path)");
276 ret = stgMallocBytes(pathsize * memberLen, "pathdir(path)");
277 drive = stgMallocBytes(pathsize * _MAX_DRIVE, "pathdir(path)");
278 _wsplitpath_s(path, drive, _MAX_DRIVE, dirName, pathsize * pathlen(path), NULL, 0, NULL, 0);
279 pathprintf(ret, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), drive, dirName);
280 stgFree(drive);
281 stgFree(dirName);
282 #else
283 pathchar* dirName = dirname(path);
284 size_t memberLen = pathlen(dirName);
285 ret = stgMallocBytes(pathsize * (memberLen + 2), "pathdir(path)");
286 strcpy(ret, dirName);
287 ret[memberLen ] = '/';
288 ret[memberLen+1] = '\0';
289 #endif
290 return ret;
291 }
292
293 static pathchar* mkPath(char* path)
294 {
295 #if defined(mingw32_HOST_OS)
296 size_t required = mbstowcs(NULL, path, 0);
297 pathchar *ret = stgMallocBytes(sizeof(pathchar) * (required + 1), "mkPath");
298 if (mbstowcs(ret, path, required) == (size_t)-1)
299 {
300 barf("mkPath failed converting char* to wchar_t*");
301 }
302 ret[required] = '\0';
303 return ret;
304 #else
305 return pathdup(path);
306 #endif
307 }
308
309 /* Generic wrapper function to try and Resolve and RunInit oc files */
310 int ocTryLoad( ObjectCode* oc );
311
312 #if defined(OBJFORMAT_ELF)
313 static int ocVerifyImage_ELF ( ObjectCode* oc );
314 static int ocGetNames_ELF ( ObjectCode* oc );
315 static int ocResolve_ELF ( ObjectCode* oc );
316 static int ocRunInit_ELF ( ObjectCode* oc );
317 #if NEED_SYMBOL_EXTRAS
318 static int ocAllocateSymbolExtras_ELF ( ObjectCode* oc );
319 #endif
320 #elif defined(OBJFORMAT_PEi386)
321 static int ocVerifyImage_PEi386 ( ObjectCode* oc );
322 static int ocGetNames_PEi386 ( ObjectCode* oc );
323 static int ocResolve_PEi386 ( ObjectCode* oc );
324 static int ocRunInit_PEi386 ( ObjectCode* oc );
325 static void *lookupSymbolInDLLs ( unsigned char *lbl );
326 /* See Note [mingw-w64 name decoration scheme] */
327 #ifndef x86_64_HOST_ARCH
328 static void zapTrailingAtSign ( unsigned char *sym );
329 #endif
330
331 #if defined(x86_64_HOST_ARCH)
332 #define USED_IF_x86_64_HOST_ARCH /* Nothing */
333 #else
334 #define USED_IF_x86_64_HOST_ARCH STG_UNUSED
335 #endif
336
337 static char *allocateImageAndTrampolines (
338 pathchar* arch_name, char* member_name,
339 FILE* f,
340 int size,
341 int isThin);
342 #if defined(x86_64_HOST_ARCH)
343 static int ocAllocateSymbolExtras_PEi386 ( ObjectCode* oc );
344 static size_t makeSymbolExtra_PEi386( ObjectCode* oc, size_t, char* symbol );
345 #define PEi386_IMAGE_OFFSET 4
346 #else
347 #define PEi386_IMAGE_OFFSET 0
348 #endif
349 #elif defined(OBJFORMAT_MACHO)
350 static int ocVerifyImage_MachO ( ObjectCode* oc );
351 static int ocGetNames_MachO ( ObjectCode* oc );
352 static int ocResolve_MachO ( ObjectCode* oc );
353 static int ocRunInit_MachO ( ObjectCode* oc );
354 static int machoGetMisalignment( FILE * );
355 #if NEED_SYMBOL_EXTRAS
356 static int ocAllocateSymbolExtras_MachO ( ObjectCode* oc );
357 #endif
358 #ifdef powerpc_HOST_ARCH
359 static void machoInitSymbolsWithoutUnderscore( void );
360 #endif
361 #endif
362
363 #if defined(OBJFORMAT_PEi386)
364 /* string utility function */
365 static HsBool endsWithPath(pathchar* base, pathchar* str) {
366 int blen = pathlen(base);
367 int slen = pathlen(str);
368 return (blen >= slen) && (0 == pathcmp(base + blen - slen, str));
369 }
370
371 static int checkAndLoadImportLibrary(
372 pathchar* arch_name,
373 char* member_name,
374 FILE* f);
375
376 static int findAndLoadImportLibrary(
377 ObjectCode* oc
378 );
379
380 static UChar *myindex(
381 int scale,
382 void* base,
383 int index);
384 static UChar *cstring_from_COFF_symbol_name(
385 UChar* name,
386 UChar* strtab);
387 static char *cstring_from_section_name(
388 UChar* name,
389 UChar* strtab);
390
391
392 /* Add ld symbol for PE image base. */
393 #if defined(__GNUC__)
394 #define __ImageBase __MINGW_LSYMBOL(_image_base__)
395 #endif
396
397 /* Get the base of the module. */
398 /* This symbol is defined by ld. */
399 extern IMAGE_DOS_HEADER __ImageBase;
400 #define __image_base (void*)((HINSTANCE)&__ImageBase)
401
402 // MingW-w64 is missing these from the implementation. So we have to look them up
403 typedef DLL_DIRECTORY_COOKIE(WINAPI *LPAddDLLDirectory)(PCWSTR NewDirectory);
404 typedef WINBOOL(WINAPI *LPRemoveDLLDirectory)(DLL_DIRECTORY_COOKIE Cookie);
405 #endif /* OBJFORMAT_PEi386 */
406
407 static void freeProddableBlocks (ObjectCode *oc);
408
409 /* on x86_64 we have a problem with relocating symbol references in
410 * code that was compiled without -fPIC. By default, the small memory
411 * model is used, which assumes that symbol references can fit in a
412 * 32-bit slot. The system dynamic linker makes this work for
413 * references to shared libraries by either (a) allocating a jump
414 * table slot for code references, or (b) moving the symbol at load
415 * time (and copying its contents, if necessary) for data references.
416 *
417 * We unfortunately can't tell whether symbol references are to code
418 * or data. So for now we assume they are code (the vast majority
419 * are), and allocate jump-table slots. Unfortunately this will
420 * SILENTLY generate crashing code for data references. This hack is
421 * enabled by X86_64_ELF_NONPIC_HACK.
422 *
423 * One workaround is to use shared Haskell libraries. This is
424 * coming. Another workaround is to keep the static libraries but
425 * compile them with -fPIC, because that will generate PIC references
426 * to data which can be relocated. The PIC code is still too green to
427 * do this systematically, though.
428 *
429 * See bug #781
430 * See thread http://www.haskell.org/pipermail/cvs-ghc/2007-September/038458.html
431 *
432 * Naming Scheme for Symbol Macros
433 *
434 * SymI_*: symbol is internal to the RTS. It resides in an object
435 * file/library that is statically.
436 * SymE_*: symbol is external to the RTS library. It might be linked
437 * dynamically.
438 *
439 * Sym*_HasProto : the symbol prototype is imported in an include file
440 * or defined explicitly
441 * Sym*_NeedsProto: the symbol is undefined and we add a dummy
442 * default proto extern void sym(void);
443 */
444 #define X86_64_ELF_NONPIC_HACK 1
445
446 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
447 * small memory model on this architecture (see gcc docs,
448 * -mcmodel=small).
449 *
450 * MAP_32BIT not available on OpenBSD/amd64
451 */
452 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
453 #define TRY_MAP_32BIT MAP_32BIT
454 #else
455 #define TRY_MAP_32BIT 0
456 #endif
457
458 /*
459 Note [The ARM/Thumb Story]
460 ~~~~~~~~~~~~~~~~~~~~~~~~~~
461
462 Support for the ARM architecture is complicated by the fact that ARM has not
463 one but several instruction encodings. The two relevant ones here are the original
464 ARM encoding and Thumb, a more dense variant of ARM supporting only a subset
465 of the instruction set.
466
467 How the CPU decodes a particular instruction is determined by a mode bit. This
468 mode bit is set on jump instructions, the value being determined by the low
469 bit of the target address: An odd address means the target is a procedure
470 encoded in the Thumb encoding whereas an even address means it's a traditional
471 ARM procedure (the actual address jumped to is even regardless of the encoding bit).
472
473 Interoperation between Thumb- and ARM-encoded object code (known as "interworking")
474 is tricky. If the linker needs to link a call by an ARM object into Thumb code
475 (or vice-versa) it will produce a jump island. This, however, is incompatible with
476 GHC's tables-next-to-code. For this reason, it is critical that GHC emit
477 exclusively ARM or Thumb objects for all Haskell code.
478
479 We still do, however, need to worry about foreign code.
480 */
481
482 /*
483 * Due to the small memory model (see above), on x86_64 we have to map
484 * all our non-PIC object files into the low 2Gb of the address space
485 * (why 2Gb and not 4Gb? Because all addresses must be reachable
486 * using a 32-bit signed PC-relative offset). On Linux we can do this
487 * using the MAP_32BIT flag to mmap(), however on other OSs
488 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
489 * can't do this. So on these systems, we have to pick a base address
490 * in the low 2Gb of the address space and try to allocate memory from
491 * there.
492 *
493 * We pick a default address based on the OS, but also make this
494 * configurable via an RTS flag (+RTS -xm)
495 */
496 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
497
498 #if defined(MAP_32BIT)
499 // Try to use MAP_32BIT
500 #define MMAP_32BIT_BASE_DEFAULT 0
501 #else
502 // A guess: 1Gb.
503 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
504 #endif
505
506 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
507 #endif
508
509 static void ghciRemoveSymbolTable(HashTable *table, const SymbolName* key,
510 ObjectCode *owner)
511 {
512 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
513 if (!pinfo || owner != pinfo->owner) return;
514 removeStrHashTable(table, key, NULL);
515 stgFree(pinfo);
516 }
517
518 /* -----------------------------------------------------------------------------
519 * Insert symbols into hash tables, checking for duplicates.
520 *
521 * Returns: 0 on failure, nonzero on success
522 */
523 /*
524 Note [weak-symbols-support]
525 -------------------------------------
526 While ghciInsertSymbolTable does implement extensive
527 logic for weak symbol support, weak symbols are not currently
528 fully supported by the RTS. This code is mostly here for COMDAT
529 support which uses the weak symbols support.
530
531 Linking weak symbols defined purely in C code with other C code
532 should also work, probably. Observing weak symbols in Haskell
533 won't.
534
535 Some test have been written for weak symbols but have been disabled
536 mostly because it's unsure how the weak symbols support should look.
537 See Trac #11223
538 */
539 static int ghciInsertSymbolTable(
540 pathchar* obj_name,
541 HashTable *table,
542 const SymbolName* key,
543 SymbolAddr* data,
544 HsBool weak,
545 ObjectCode *owner)
546 {
547 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
548 if (!pinfo) /* new entry */
549 {
550 pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
551 pinfo->value = data;
552 pinfo->owner = owner;
553 pinfo->weak = weak;
554 insertStrHashTable(table, key, pinfo);
555 return 1;
556 }
557 else if (weak && data && pinfo->weak && !pinfo->value)
558 {
559 /* The existing symbol is weak with a zero value; replace it with the new symbol. */
560 pinfo->value = data;
561 pinfo->owner = owner;
562 return 1;
563 }
564 else if (weak)
565 {
566 return 1; /* weak symbol, because the symbol is weak, data = 0 and we
567 already know of another copy throw this one away.
568
569 or both weak symbols have a nonzero value. Keep the existing one.
570
571 This also preserves the semantics of linking against
572 the first symbol we find. */
573 }
574 else if (pinfo->weak && !weak) /* weak symbol is in the table */
575 {
576 /* override the weak definition with the non-weak one */
577 pinfo->value = data;
578 pinfo->owner = owner;
579 pinfo->weak = HS_BOOL_FALSE;
580 return 1;
581 }
582 else if ( pinfo->owner
583 && pinfo->owner->status != OBJECT_RESOLVED
584 && pinfo->owner->status != OBJECT_NEEDED)
585 {
586 /* If the other symbol hasn't been loaded or will be loaded and we want to
587 explicitly load the new one, we can just swap it out and load the one
588 that has been requested. If not, just keep the first one encountered.
589
590 Because the `symHash' table consists symbols we've also not loaded but
591 found during the initial scan this is safe to do. If however the existing
592 symbol has been loaded then it means we have a duplicate.
593
594 This is essentially emulating the behavior of a linker wherein it will always
595 link in object files that are .o file arguments, but only take object files
596 from archives as needed. */
597 if (owner && (owner->status == OBJECT_NEEDED || owner->status == OBJECT_RESOLVED)) {
598 pinfo->value = data;
599 pinfo->owner = owner;
600 pinfo->weak = weak;
601 }
602
603 return 1;
604 }
605 else if (pinfo->owner == owner)
606 {
607 /* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
608 return 1;
609 }
610 else if (owner && owner->status == OBJECT_LOADED)
611 {
612 /* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
613 member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
614 call this function again to trigger the duplicate error. */
615 return 1;
616 }
617
618 pathchar* archiveName = NULL;
619 debugBelch(
620 "GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
621 " %s\n"
622 "whilst processing object file\n"
623 " %" PATH_FMT "\n"
624 "The symbol was previously defined in\n"
625 " %" PATH_FMT "\n"
626 "This could be caused by:\n"
627 " * Loading two different object files which export the same symbol\n"
628 " * Specifying the same object file twice on the GHCi command line\n"
629 " * An incorrect `package.conf' entry, causing some object to be\n"
630 " loaded twice.\n",
631 (char*)key,
632 obj_name,
633 pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
634 pinfo->owner->archiveMemberName ? archiveName = mkPath(pinfo->owner->archiveMemberName)
635 : pinfo->owner->fileName
636 );
637
638 if (archiveName)
639 {
640 stgFree(archiveName);
641 archiveName = NULL;
642 }
643 return 0;
644 }
645
646 /* -----------------------------------------------------------------------------
647 * Looks up symbols into hash tables.
648 *
649 * Returns: 0 on failure and result is not set,
650 * nonzero on success and result set to nonzero pointer
651 */
652 static HsBool ghciLookupSymbolInfo(HashTable *table,
653 const SymbolName* key, RtsSymbolInfo **result)
654 {
655 RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
656 if (!pinfo) {
657 *result = NULL;
658 return HS_BOOL_FALSE;
659 }
660 if (pinfo->weak)
661 IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
662 /* Once it's looked up, it can no longer be overridden */
663 pinfo->weak = HS_BOOL_FALSE;
664
665 *result = pinfo;
666 return HS_BOOL_TRUE;
667 }
668
669 /* -----------------------------------------------------------------------------
670 * initialize the object linker
671 */
672
673
674 static int linker_init_done = 0 ;
675
676 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
677 static void *dl_prog_handle;
678 static regex_t re_invalid;
679 static regex_t re_realso;
680 #ifdef THREADED_RTS
681 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
682 #endif
683 #elif defined(OBJFORMAT_PEi386)
684 void addDLLHandle(pathchar* dll_name, HINSTANCE instance);
685 #endif
686
687 void initLinker (void)
688 {
689 // default to retaining CAFs for backwards compatibility. Most
690 // users will want initLinker_(0): otherwise unloadObj() will not
691 // be able to unload object files when they contain CAFs.
692 initLinker_(1);
693 }
694
695 void
696 initLinker_ (int retain_cafs)
697 {
698 RtsSymbolVal *sym;
699 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
700 int compileResult;
701 #endif
702
703 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
704
705 /* Make initLinker idempotent, so we can call it
706 before every relevant operation; that means we
707 don't need to initialise the linker separately */
708 if (linker_init_done == 1) {
709 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
710 return;
711 } else {
712 linker_init_done = 1;
713 }
714
715 objects = NULL;
716 unloaded_objects = NULL;
717
718 #if defined(THREADED_RTS)
719 initMutex(&linker_mutex);
720 initMutex(&linker_unloaded_mutex);
721 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
722 initMutex(&dl_mutex);
723 #endif
724 #endif
725
726 symhash = allocStrHashTable();
727
728 /* populate the symbol table with stuff from the RTS */
729 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
730 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
731 symhash, sym->lbl, sym->addr, HS_BOOL_FALSE, NULL)) {
732 barf("ghciInsertSymbolTable failed");
733 }
734 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
735 }
736 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
737 machoInitSymbolsWithoutUnderscore();
738 # endif
739 /* GCC defines a special symbol __dso_handle which is resolved to NULL if
740 referenced from a statically linked module. We need to mimic this, but
741 we cannot use NULL because we use it to mean nonexistent symbols. So we
742 use an arbitrary (hopefully unique) address here.
743 */
744 if (! ghciInsertSymbolTable(WSTR("(GHCi special symbols)"),
745 symhash, "__dso_handle", (void *)0x12345687, HS_BOOL_FALSE, NULL)) {
746 barf("ghciInsertSymbolTable failed");
747 }
748
749 #if defined(OBJFORMAT_PEi386)
750 if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"),
751 symhash, "__image_base__", __image_base, HS_BOOL_TRUE, NULL)) {
752 barf("ghciInsertSymbolTable failed");
753 }
754 #endif /* OBJFORMAT_PEi386 */
755
756
757 // Redirect newCAF to newRetainedCAF if retain_cafs is true.
758 if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
759 MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
760 retain_cafs ? newRetainedCAF : newGCdCAF,
761 HS_BOOL_FALSE, NULL)) {
762 barf("ghciInsertSymbolTable failed");
763 }
764
765 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
766 # if defined(RTLD_DEFAULT)
767 dl_prog_handle = RTLD_DEFAULT;
768 # else
769 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
770 # endif /* RTLD_DEFAULT */
771
772 compileResult = regcomp(&re_invalid,
773 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short)",
774 REG_EXTENDED);
775 if (compileResult != 0) {
776 barf("Compiling re_invalid failed");
777 }
778 compileResult = regcomp(&re_realso,
779 "(GROUP|INPUT) *\\( *([^ )]+)",
780 REG_EXTENDED);
781 if (compileResult != 0) {
782 barf("Compiling re_realso failed");
783 }
784 # endif
785
786 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
787 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
788 // User-override for mmap_32bit_base
789 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
790 }
791 #endif
792
793 #if defined(mingw32_HOST_OS)
794 /*
795 * These two libraries cause problems when added to the static link,
796 * but are necessary for resolving symbols in GHCi, hence we load
797 * them manually here.
798 */
799 addDLL(WSTR("msvcrt"));
800 addDLL(WSTR("kernel32"));
801 addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL));
802 #endif
803
804 if (RTS_LINKER_USE_MMAP)
805 m32_allocator_init();
806
807 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
808 return;
809 }
810
811 void
812 exitLinker( void ) {
813 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
814 if (linker_init_done == 1) {
815 regfree(&re_invalid);
816 regfree(&re_realso);
817 #ifdef THREADED_RTS
818 closeMutex(&dl_mutex);
819 #endif
820 }
821 #endif
822 if (linker_init_done == 1) {
823 freeHashTable(symhash, free);
824 }
825 #ifdef THREADED_RTS
826 closeMutex(&linker_mutex);
827 #endif
828 }
829
830 /* -----------------------------------------------------------------------------
831 * Loading DLL or .so dynamic libraries
832 * -----------------------------------------------------------------------------
833 *
834 * Add a DLL from which symbols may be found. In the ELF case, just
835 * do RTLD_GLOBAL-style add, so no further messing around needs to
836 * happen in order that symbols in the loaded .so are findable --
837 * lookupSymbol() will subsequently see them by dlsym on the program's
838 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
839 *
840 * In the PEi386 case, open the DLLs and put handles to them in a
841 * linked list. When looking for a symbol, try all handles in the
842 * list. This means that we need to load even DLLs that are guaranteed
843 * to be in the ghc.exe image already, just so we can get a handle
844 * to give to loadSymbol, so that we can find the symbols. For such
845 * libraries, the LoadLibrary call should be a no-op except for returning
846 * the handle.
847 *
848 */
849
850 #if defined(OBJFORMAT_PEi386)
851 /* A record for storing handles into DLLs. */
852
853 typedef
854 struct _OpenedDLL {
855 pathchar* name;
856 struct _OpenedDLL* next;
857 HINSTANCE instance;
858 }
859 OpenedDLL;
860
861 /* A list thereof. */
862 static OpenedDLL* opened_dlls = NULL;
863
864 /* A record for storing indirectly linked functions from DLLs. */
865 typedef
866 struct _IndirectAddr {
867 SymbolAddr* addr;
868 struct _IndirectAddr* next;
869 }
870 IndirectAddr;
871
872 /* A list thereof. */
873 static IndirectAddr* indirects = NULL;
874
875 /* Adds a DLL instance to the list of DLLs in which to search for symbols. */
876 void addDLLHandle(pathchar* dll_name, HINSTANCE instance) {
877 OpenedDLL* o_dll;
878 o_dll = stgMallocBytes( sizeof(OpenedDLL), "addDLLHandle" );
879 o_dll->name = dll_name ? pathdup(dll_name) : NULL;
880 o_dll->instance = instance;
881 o_dll->next = opened_dlls;
882 opened_dlls = o_dll;
883 }
884
885 #endif
886
887 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
888
889 /* Suppose in ghci we load a temporary SO for a module containing
890 f = 1
891 and then modify the module, recompile, and load another temporary
892 SO with
893 f = 2
894 Then as we don't unload the first SO, dlsym will find the
895 f = 1
896 symbol whereas we want the
897 f = 2
898 symbol. We therefore need to keep our own SO handle list, and
899 try SOs in the right order. */
900
901 typedef
902 struct _OpenedSO {
903 struct _OpenedSO* next;
904 void *handle;
905 }
906 OpenedSO;
907
908 /* A list thereof. */
909 static OpenedSO* openedSOs = NULL;
910
911 static const char *
912 internal_dlopen(const char *dll_name)
913 {
914 OpenedSO* o_so;
915 void *hdl;
916 const char *errmsg;
917 char *errmsg_copy;
918
919 // omitted: RTLD_NOW
920 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
921 IF_DEBUG(linker,
922 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
923
924 //-------------- Begin critical section ------------------
925 // This critical section is necessary because dlerror() is not
926 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
927 // Also, the error message returned must be copied to preserve it
928 // (see POSIX also)
929
930 ACQUIRE_LOCK(&dl_mutex);
931 hdl = dlopen(dll_name, RTLD_LAZY|RTLD_LOCAL); /* see Note [RTLD_LOCAL] */
932
933 errmsg = NULL;
934 if (hdl == NULL) {
935 /* dlopen failed; return a ptr to the error msg. */
936 errmsg = dlerror();
937 if (errmsg == NULL) errmsg = "addDLL: unknown error";
938 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
939 strcpy(errmsg_copy, errmsg);
940 errmsg = errmsg_copy;
941 } else {
942 o_so = stgMallocBytes(sizeof(OpenedSO), "addDLL");
943 o_so->handle = hdl;
944 o_so->next = openedSOs;
945 openedSOs = o_so;
946 }
947
948 RELEASE_LOCK(&dl_mutex);
949 //--------------- End critical section -------------------
950
951 return errmsg;
952 }
953
954 /*
955 Note [RTLD_LOCAL]
956
957 In GHCi we want to be able to override previous .so's with newly
958 loaded .so's when we recompile something. This further implies that
959 when we look up a symbol in internal_dlsym() we have to iterate
960 through the loaded libraries (in order from most recently loaded to
961 oldest) looking up the symbol in each one until we find it.
962
963 However, this can cause problems for some symbols that are copied
964 by the linker into the executable image at runtime - see #8935 for a
965 lengthy discussion. To solve that problem we need to look up
966 symbols in the main executable *first*, before attempting to look
967 them up in the loaded .so's. But in order to make that work, we
968 have to always call dlopen with RTLD_LOCAL, so that the loaded
969 libraries don't populate the global symbol table.
970 */
971
972 static void *
973 internal_dlsym(const char *symbol) {
974 OpenedSO* o_so;
975 void *v;
976
977 // We acquire dl_mutex as concurrent dl* calls may alter dlerror
978 ACQUIRE_LOCK(&dl_mutex);
979 dlerror();
980 // look in program first
981 v = dlsym(dl_prog_handle, symbol);
982 if (dlerror() == NULL) {
983 RELEASE_LOCK(&dl_mutex);
984 return v;
985 }
986
987 for (o_so = openedSOs; o_so != NULL; o_so = o_so->next) {
988 v = dlsym(o_so->handle, symbol);
989 if (dlerror() == NULL) {
990 RELEASE_LOCK(&dl_mutex);
991 return v;
992 }
993 }
994 RELEASE_LOCK(&dl_mutex);
995 return v;
996 }
997 # endif
998
999 const char *
1000 addDLL( pathchar *dll_name )
1001 {
1002 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
1003 /* ------------------- ELF DLL loader ------------------- */
1004
1005 #define NMATCH 5
1006 regmatch_t match[NMATCH];
1007 const char *errmsg;
1008 FILE* fp;
1009 size_t match_length;
1010 #define MAXLINE 1000
1011 char line[MAXLINE];
1012 int result;
1013
1014 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
1015 errmsg = internal_dlopen(dll_name);
1016
1017 if (errmsg == NULL) {
1018 return NULL;
1019 }
1020
1021 // GHC Trac ticket #2615
1022 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
1023 // contain linker scripts rather than ELF-format object code. This
1024 // code handles the situation by recognizing the real object code
1025 // file name given in the linker script.
1026 //
1027 // If an "invalid ELF header" error occurs, it is assumed that the
1028 // .so file contains a linker script instead of ELF object code.
1029 // In this case, the code looks for the GROUP ( ... ) linker
1030 // directive. If one is found, the first file name inside the
1031 // parentheses is treated as the name of a dynamic library and the
1032 // code attempts to dlopen that file. If this is also unsuccessful,
1033 // an error message is returned.
1034
1035 // see if the error message is due to an invalid ELF header
1036 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
1037 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
1038 IF_DEBUG(linker, debugBelch("result = %i\n", result));
1039 if (result == 0) {
1040 // success -- try to read the named file as a linker script
1041 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
1042 MAXLINE-1);
1043 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
1044 line[match_length] = '\0'; // make sure string is null-terminated
1045 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
1046 if ((fp = fopen(line, "r")) == NULL) {
1047 return errmsg; // return original error if open fails
1048 }
1049 // try to find a GROUP or INPUT ( ... ) command
1050 while (fgets(line, MAXLINE, fp) != NULL) {
1051 IF_DEBUG(linker, debugBelch("input line = %s", line));
1052 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
1053 // success -- try to dlopen the first named file
1054 IF_DEBUG(linker, debugBelch("match%s\n",""));
1055 line[match[2].rm_eo] = '\0';
1056 stgFree((void*)errmsg); // Free old message before creating new one
1057 errmsg = internal_dlopen(line+match[2].rm_so);
1058 break;
1059 }
1060 // if control reaches here, no GROUP or INPUT ( ... ) directive
1061 // was found and the original error message is returned to the
1062 // caller
1063 }
1064 fclose(fp);
1065 }
1066 return errmsg;
1067
1068 # elif defined(OBJFORMAT_PEi386)
1069 /* ------------------- Win32 DLL loader ------------------- */
1070
1071 pathchar* buf;
1072 OpenedDLL* o_dll;
1073 HINSTANCE instance;
1074
1075 IF_DEBUG(linker, debugBelch("\naddDLL; dll_name = `%" PATH_FMT "'\n", dll_name));
1076
1077 /* See if we've already got it, and ignore if so. */
1078 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
1079 if (0 == pathcmp(o_dll->name, dll_name))
1080 return NULL;
1081 }
1082
1083 /* The file name has no suffix (yet) so that we can try
1084 both foo.dll and foo.drv
1085
1086 The documentation for LoadLibrary says:
1087 If no file name extension is specified in the lpFileName
1088 parameter, the default library extension .dll is
1089 appended. However, the file name string can include a trailing
1090 point character (.) to indicate that the module name has no
1091 extension. */
1092
1093 size_t bufsize = pathlen(dll_name) + 10;
1094 buf = stgMallocBytes(bufsize * sizeof(wchar_t), "addDLL");
1095
1096 /* These are ordered by probability of success and order we'd like them */
1097 const wchar_t *formats[] = { L"%ls.DLL", L"%ls.DRV", L"lib%ls.DLL", L"%ls" };
1098 const DWORD flags[] = { LOAD_LIBRARY_SEARCH_USER_DIRS | LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, 0 };
1099
1100 int cFormat;
1101 int cFlag;
1102 int flags_start = 1; // Assume we don't support the new API
1103
1104 /* Detect if newer API are available, if not, skip the first flags entry */
1105 if (GetProcAddress((HMODULE)LoadLibraryW(L"Kernel32.DLL"), "AddDllDirectory")) {
1106 flags_start = 0;
1107 }
1108
1109 /* Iterate through the possible flags and formats */
1110 for (cFlag = flags_start; cFlag < 2; cFlag++)
1111 {
1112 for (cFormat = 0; cFormat < 4; cFormat++)
1113 {
1114 snwprintf(buf, bufsize, formats[cFormat], dll_name);
1115 instance = LoadLibraryExW(buf, NULL, flags[cFlag]);
1116 if (instance == NULL)
1117 {
1118 if (GetLastError() != ERROR_MOD_NOT_FOUND)
1119 {
1120 goto error;
1121 }
1122 }
1123 else
1124 {
1125 break; // We're done. DLL has been loaded.
1126 }
1127 }
1128 }
1129
1130 // Check if we managed to load the DLL
1131 if (instance == NULL) {
1132 goto error;
1133 }
1134
1135 stgFree(buf);
1136
1137 addDLLHandle(dll_name, instance);
1138
1139 return NULL;
1140
1141 error:
1142 stgFree(buf);
1143 sysErrorBelch("addDLL: %" PATH_FMT " (Win32 error %lu)", dll_name, GetLastError());
1144
1145 /* LoadLibrary failed; return a ptr to the error msg. */
1146 return "addDLL: could not load DLL";
1147
1148 # else
1149 barf("addDLL: not implemented on this platform");
1150 # endif
1151 }
1152
1153 /* -----------------------------------------------------------------------------
1154 * Searches the system directories to determine if there is a system DLL that
1155 * satisfies the given name. This prevent GHCi from linking against a static
1156 * library if a DLL is available.
1157 *
1158 * Returns: NULL on failure or no DLL found, else the full path to the DLL
1159 * that can be loaded.
1160 */
1161 pathchar* findSystemLibrary(pathchar* dll_name)
1162 {
1163 IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%" PATH_FMT "'\n", dll_name));
1164
1165 #if defined(OBJFORMAT_PEi386)
1166 const unsigned int init_buf_size = 1024;
1167 unsigned int bufsize = init_buf_size;
1168 wchar_t* result = malloc(sizeof(wchar_t) * bufsize);
1169 DWORD wResult = SearchPathW(NULL, dll_name, NULL, bufsize, result, NULL);
1170
1171 if (wResult > bufsize) {
1172 result = realloc(result, sizeof(wchar_t) * wResult);
1173 wResult = SearchPathW(NULL, dll_name, NULL, wResult, result, NULL);
1174 }
1175
1176
1177 if (!wResult) {
1178 free(result);
1179 return NULL;
1180 }
1181
1182 return result;
1183 #else
1184 (void)(dll_name); // Function not implemented for other platforms.
1185 return NULL;
1186 #endif
1187 }
1188
1189 /* -----------------------------------------------------------------------------
1190 * Emits a warning determining that the system is missing a required security
1191 * update that we need to get access to the proper APIs
1192 */
1193 void warnMissingKBLibraryPaths( void )
1194 {
1195 static HsBool missing_update_warn = HS_BOOL_FALSE;
1196 if (!missing_update_warn) {
1197 debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
1198 missing_update_warn = HS_BOOL_TRUE;
1199 }
1200 }
1201
1202 /* -----------------------------------------------------------------------------
1203 * appends a directory to the process DLL Load path so LoadLibrary can find it
1204 *
1205 * Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
1206 * restore the search path to what it was before this call.
1207 */
1208 HsPtr addLibrarySearchPath(pathchar* dll_path)
1209 {
1210 IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%" PATH_FMT "'\n", dll_path));
1211
1212 #if defined(OBJFORMAT_PEi386)
1213 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1214 LPAddDLLDirectory AddDllDirectory = (LPAddDLLDirectory)GetProcAddress((HMODULE)hDLL, "AddDllDirectory");
1215
1216 HsPtr result = NULL;
1217
1218 const unsigned int init_buf_size = 4096;
1219 int bufsize = init_buf_size;
1220
1221 // Make sure the path is an absolute path
1222 WCHAR* abs_path = malloc(sizeof(WCHAR) * init_buf_size);
1223 DWORD wResult = GetFullPathNameW(dll_path, bufsize, abs_path, NULL);
1224 if (!wResult){
1225 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1226 }
1227 else if (wResult > init_buf_size) {
1228 abs_path = realloc(abs_path, sizeof(WCHAR) * wResult);
1229 if (!GetFullPathNameW(dll_path, bufsize, abs_path, NULL)) {
1230 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1231 }
1232 }
1233
1234 if (AddDllDirectory) {
1235 result = AddDllDirectory(abs_path);
1236 }
1237 else
1238 {
1239 warnMissingKBLibraryPaths();
1240 WCHAR* str = malloc(sizeof(WCHAR) * init_buf_size);
1241 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1242
1243 if (wResult > init_buf_size) {
1244 str = realloc(str, sizeof(WCHAR) * wResult);
1245 bufsize = wResult;
1246 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
1247 if (!wResult) {
1248 sysErrorBelch("addLibrarySearchPath[GetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
1249 }
1250 }
1251
1252 bufsize = wResult + 2 + pathlen(abs_path);
1253 wchar_t* newPath = malloc(sizeof(wchar_t) * bufsize);
1254
1255 wcscpy(newPath, abs_path);
1256 wcscat(newPath, L";");
1257 wcscat(newPath, str);
1258 if (!SetEnvironmentVariableW(L"PATH", (LPCWSTR)newPath)) {
1259 sysErrorBelch("addLibrarySearchPath[SetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1260 }
1261
1262 free(newPath);
1263 free(abs_path);
1264
1265 return str;
1266 }
1267
1268 if (!result) {
1269 sysErrorBelch("addLibrarySearchPath: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
1270 free(abs_path);
1271 return NULL;
1272 }
1273
1274 free(abs_path);
1275 return result;
1276 #else
1277 (void)(dll_path); // Function not implemented for other platforms.
1278 return NULL;
1279 #endif
1280 }
1281
1282 /* -----------------------------------------------------------------------------
1283 * removes a directory from the process DLL Load path
1284 *
1285 * Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
1286 */
1287 HsBool removeLibrarySearchPath(HsPtr dll_path_index)
1288 {
1289 IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n", dll_path_index));
1290
1291 #if defined(OBJFORMAT_PEi386)
1292 HsBool result = 0;
1293
1294 if (dll_path_index != NULL) {
1295 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
1296 LPRemoveDLLDirectory RemoveDllDirectory = (LPRemoveDLLDirectory)GetProcAddress((HMODULE)hDLL, "RemoveDllDirectory");
1297
1298 if (RemoveDllDirectory) {
1299 result = RemoveDllDirectory(dll_path_index);
1300 // dll_path_index is now invalid, do not use it after this point.
1301 }
1302 else
1303 {
1304 warnMissingKBLibraryPaths();
1305 result = SetEnvironmentVariableW(L"PATH", (LPCWSTR)dll_path_index);
1306 free(dll_path_index);
1307 }
1308
1309 if (!result) {
1310 sysErrorBelch("removeLibrarySearchPath: (Win32 error %lu)", GetLastError());
1311 return HS_BOOL_FALSE;
1312 }
1313 }
1314
1315 return result == 0 ? HS_BOOL_TRUE : HS_BOOL_FALSE;
1316 #else
1317 (void)(dll_path_index); // Function not implemented for other platforms.
1318 return HS_BOOL_FALSE;
1319 #endif
1320 }
1321
1322 /* -----------------------------------------------------------------------------
1323 * insert a symbol in the hash table
1324 *
1325 * Returns: 0 on failure, nozero on success
1326 */
1327 HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
1328 {
1329 return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE, NULL);
1330 }
1331
1332 /* -----------------------------------------------------------------------------
1333 * lookup a symbol in the hash table
1334 */
1335 static SymbolAddr* lookupSymbol_ (SymbolName* lbl)
1336 {
1337 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
1338
1339 ASSERT(symhash != NULL);
1340 RtsSymbolInfo *pinfo;
1341
1342 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
1343 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
1344
1345 # if defined(OBJFORMAT_ELF)
1346 return internal_dlsym(lbl);
1347 # elif defined(OBJFORMAT_MACHO)
1348
1349 /* HACK: On OS X, all symbols are prefixed with an underscore.
1350 However, dlsym wants us to omit the leading underscore from the
1351 symbol name -- the dlsym routine puts it back on before searching
1352 for the symbol. For now, we simply strip it off here (and ONLY
1353 here).
1354 */
1355 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", lbl));
1356 ASSERT(lbl[0] == '_');
1357 return internal_dlsym(lbl + 1);
1358 # elif defined(OBJFORMAT_PEi386)
1359 SymbolAddr* sym;
1360
1361 /* See Note [mingw-w64 name decoration scheme] */
1362 #ifndef x86_64_HOST_ARCH
1363 zapTrailingAtSign ( (unsigned char*)lbl );
1364 #endif
1365 sym = lookupSymbolInDLLs((unsigned char*)lbl);
1366 return sym; // might be NULL if not found
1367
1368 # else
1369 ASSERT(2+2 == 5);
1370 return NULL;
1371 # endif
1372 } else {
1373 #if defined(mingw32_HOST_OS)
1374 // If Windows, perform initialization of uninitialized
1375 // Symbols from the C runtime which was loaded above.
1376 // We do this on lookup to prevent the hit when
1377 // The symbol isn't being used.
1378 if (pinfo->value == (void*)0xBAADF00D)
1379 {
1380 char symBuffer[50];
1381 sprintf(symBuffer, "_%s", lbl);
1382 pinfo->value = GetProcAddress(GetModuleHandle("msvcrt"), symBuffer);
1383 }
1384 #endif
1385 SymbolAddr* val = pinfo->value;
1386 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl, val));
1387
1388 int r;
1389 ObjectCode* oc = pinfo->owner;
1390
1391 /* Symbol can be found during linking, but hasn't been relocated. Do so now.
1392 See Note [runtime-linker-phases] */
1393 if (oc && oc->status == OBJECT_LOADED) {
1394 oc->status = OBJECT_NEEDED;
1395 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand loading symbol '%s'\n", lbl));
1396 r = ocTryLoad(oc);
1397
1398 if (!r) {
1399 errorBelch("Could not on-demand load symbol '%s'\n", lbl);
1400 return NULL;
1401 }
1402 #ifdef PROFILING
1403 // collect any new cost centres & CCSs
1404 // that were defined during runInit
1405 initProfiling2();
1406 #endif
1407 }
1408
1409 return val;
1410 }
1411 }
1412
1413 SymbolAddr* lookupSymbol( SymbolName* lbl )
1414 {
1415 ACQUIRE_LOCK(&linker_mutex);
1416 SymbolAddr* r = lookupSymbol_(lbl);
1417 RELEASE_LOCK(&linker_mutex);
1418 return r;
1419 }
1420
1421 /* -----------------------------------------------------------------------------
1422 Create a StablePtr for a foreign export. This is normally called by
1423 a C function with __attribute__((constructor)), which is generated
1424 by GHC and linked into the module.
1425
1426 If the object code is being loaded dynamically, then we remember
1427 which StablePtrs were allocated by the constructors and free them
1428 again in unloadObj().
1429 -------------------------------------------------------------------------- */
1430
1431 static ObjectCode *loading_obj = NULL;
1432
1433 StgStablePtr foreignExportStablePtr (StgPtr p)
1434 {
1435 ForeignExportStablePtr *fe_sptr;
1436 StgStablePtr *sptr;
1437
1438 sptr = getStablePtr(p);
1439
1440 if (loading_obj != NULL) {
1441 fe_sptr = stgMallocBytes(sizeof(ForeignExportStablePtr),
1442 "foreignExportStablePtr");
1443 fe_sptr->stable_ptr = sptr;
1444 fe_sptr->next = loading_obj->stable_ptrs;
1445 loading_obj->stable_ptrs = fe_sptr;
1446 }
1447
1448 return sptr;
1449 }
1450
1451
1452 /* -----------------------------------------------------------------------------
1453 * Debugging aid: look in GHCi's object symbol tables for symbols
1454 * within DELTA bytes of the specified address, and show their names.
1455 */
1456 #ifdef DEBUG
1457 void ghci_enquire ( SymbolAddr* addr );
1458
1459 void ghci_enquire(SymbolAddr* addr)
1460 {
1461 int i;
1462 SymbolName* sym;
1463 RtsSymbolInfo* a;
1464 const int DELTA = 64;
1465 ObjectCode* oc;
1466
1467 for (oc = objects; oc; oc = oc->next) {
1468 for (i = 0; i < oc->n_symbols; i++) {
1469 sym = oc->symbols[i];
1470 if (sym == NULL) continue;
1471 a = NULL;
1472 if (a == NULL) {
1473 ghciLookupSymbolInfo(symhash, sym, &a);
1474 }
1475 if (a == NULL) {
1476 // debugBelch("ghci_enquire: can't find %s\n", sym);
1477 }
1478 else if ( a->value
1479 && (char*)addr-DELTA <= (char*)a->value
1480 && (char*)a->value <= (char*)addr+DELTA) {
1481 debugBelch("%p + %3d == `%s'\n", addr, (int)((char*)a->value - (char*)addr), sym);
1482 }
1483 }
1484 }
1485 }
1486 #endif
1487
1488 #if RTS_LINKER_USE_MMAP
1489 //
1490 // Returns NULL on failure.
1491 //
1492 void *
1493 mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset)
1494 {
1495 void *map_addr = NULL;
1496 void *result;
1497 size_t size;
1498 static uint32_t fixed = 0;
1499
1500 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1501 size = roundUpToPage(bytes);
1502
1503 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1504 mmap_again:
1505
1506 if (mmap_32bit_base != 0) {
1507 map_addr = mmap_32bit_base;
1508 }
1509 #endif
1510
1511 IF_DEBUG(linker,
1512 debugBelch("mmapForLinker: \tprotection %#0x\n",
1513 PROT_EXEC | PROT_READ | PROT_WRITE));
1514 IF_DEBUG(linker,
1515 debugBelch("mmapForLinker: \tflags %#0x\n",
1516 MAP_PRIVATE | TRY_MAP_32BIT | fixed | flags));
1517
1518 result = mmap(map_addr, size,
1519 PROT_EXEC|PROT_READ|PROT_WRITE,
1520 MAP_PRIVATE|TRY_MAP_32BIT|fixed|flags, fd, offset);
1521
1522 if (result == MAP_FAILED) {
1523 sysErrorBelch("mmap %" FMT_Word " bytes at %p",(W_)size,map_addr);
1524 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1525 return NULL;
1526 }
1527
1528 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1529 if (mmap_32bit_base != 0) {
1530 if (result == map_addr) {
1531 mmap_32bit_base = (StgWord8*)map_addr + size;
1532 } else {
1533 if ((W_)result > 0x80000000) {
1534 // oops, we were given memory over 2Gb
1535 munmap(result,size);
1536 #if defined(freebsd_HOST_OS) || \
1537 defined(kfreebsdgnu_HOST_OS) || \
1538 defined(dragonfly_HOST_OS)
1539 // Some platforms require MAP_FIXED. This is normally
1540 // a bad idea, because MAP_FIXED will overwrite
1541 // existing mappings.
1542 fixed = MAP_FIXED;
1543 goto mmap_again;
1544 #else
1545 errorBelch("loadObj: failed to mmap() memory below 2Gb; "
1546 "asked for %lu bytes at %p. "
1547 "Try specifying an address with +RTS -xm<addr> -RTS",
1548 size, map_addr);
1549 return NULL;
1550 #endif
1551 } else {
1552 // hmm, we were given memory somewhere else, but it's
1553 // still under 2Gb so we can use it. Next time, ask
1554 // for memory right after the place we just got some
1555 mmap_32bit_base = (StgWord8*)result + size;
1556 }
1557 }
1558 } else {
1559 if ((W_)result > 0x80000000) {
1560 // oops, we were given memory over 2Gb
1561 // ... try allocating memory somewhere else?;
1562 debugTrace(DEBUG_linker,
1563 "MAP_32BIT didn't work; gave us %lu bytes at 0x%p",
1564 bytes, result);
1565 munmap(result, size);
1566
1567 // Set a base address and try again... (guess: 1Gb)
1568 mmap_32bit_base = (void*)0x40000000;
1569 goto mmap_again;
1570 }
1571 }
1572 #endif
1573
1574 IF_DEBUG(linker,
1575 debugBelch("mmapForLinker: mapped %" FMT_Word
1576 " bytes starting at %p\n", (W_)size, result));
1577 IF_DEBUG(linker,
1578 debugBelch("mmapForLinker: done\n"));
1579
1580 return result;
1581 }
1582 #endif
1583
1584 /*
1585 * Remove symbols from the symbol table, and free oc->symbols.
1586 * This operation is idempotent.
1587 */
1588 static void removeOcSymbols (ObjectCode *oc)
1589 {
1590 if (oc->symbols == NULL) return;
1591
1592 // Remove all the mappings for the symbols within this object..
1593 int i;
1594 for (i = 0; i < oc->n_symbols; i++) {
1595 if (oc->symbols[i] != NULL) {
1596 ghciRemoveSymbolTable(symhash, oc->symbols[i], oc);
1597 }
1598 }
1599
1600 stgFree(oc->symbols);
1601 oc->symbols = NULL;
1602 }
1603
1604 /*
1605 * Release StablePtrs and free oc->stable_ptrs.
1606 * This operation is idempotent.
1607 */
1608 static void freeOcStablePtrs (ObjectCode *oc)
1609 {
1610 // Release any StablePtrs that were created when this
1611 // object module was initialized.
1612 ForeignExportStablePtr *fe_ptr, *next;
1613
1614 for (fe_ptr = oc->stable_ptrs; fe_ptr != NULL; fe_ptr = next) {
1615 next = fe_ptr->next;
1616 freeStablePtr(fe_ptr->stable_ptr);
1617 stgFree(fe_ptr);
1618 }
1619 oc->stable_ptrs = NULL;
1620 }
1621
1622 static void
1623 freePreloadObjectFile (ObjectCode *oc)
1624 {
1625 #if defined(mingw32_HOST_OS)
1626
1627 VirtualFree(oc->image - PEi386_IMAGE_OFFSET, 0, MEM_RELEASE);
1628
1629 IndirectAddr *ia, *ia_next;
1630 ia = indirects;
1631 while (ia != NULL) {
1632 ia_next = ia->next;
1633 stgFree(ia);
1634 ia = ia_next;
1635 }
1636 indirects = NULL;
1637
1638 #else
1639
1640 if (RTS_LINKER_USE_MMAP && oc->imageMapped) {
1641 munmap(oc->image, oc->fileSize);
1642 }
1643 else {
1644 stgFree(oc->image);
1645 }
1646
1647 #endif
1648
1649 oc->image = NULL;
1650 oc->fileSize = 0;
1651 }
1652
1653 /*
1654 * freeObjectCode() releases all the pieces of an ObjectCode. It is called by
1655 * the GC when a previously unloaded ObjectCode has been determined to be
1656 * unused, and when an error occurs during loadObj().
1657 */
1658 void freeObjectCode (ObjectCode *oc)
1659 {
1660 freePreloadObjectFile(oc);
1661
1662 if (oc->symbols != NULL) {
1663 stgFree(oc->symbols);
1664 oc->symbols = NULL;
1665 }
1666
1667 if (oc->extraInfos != NULL) {
1668 freeHashTable(oc->extraInfos, NULL);
1669 oc->extraInfos = NULL;
1670 }
1671
1672 if (oc->sections != NULL) {
1673 int i;
1674 for (i=0; i < oc->n_sections; i++) {
1675 if (oc->sections[i].start != NULL) {
1676 switch(oc->sections[i].alloc){
1677 #if RTS_LINKER_USE_MMAP
1678 case SECTION_MMAP:
1679 munmap(oc->sections[i].mapped_start,
1680 oc->sections[i].mapped_size);
1681 break;
1682 case SECTION_M32:
1683 m32_free(oc->sections[i].start,
1684 oc->sections[i].size);
1685 break;
1686 #endif
1687 case SECTION_MALLOC:
1688 stgFree(oc->sections[i].start);
1689 break;
1690 default:
1691 break;
1692 }
1693 }
1694 }
1695 stgFree(oc->sections);
1696 }
1697
1698 freeProddableBlocks(oc);
1699
1700 /* Free symbol_extras. On x86_64 Windows, symbol_extras are allocated
1701 * alongside the image, so we don't need to free. */
1702 #if NEED_SYMBOL_EXTRAS && (!defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS))
1703 if (RTS_LINKER_USE_MMAP) {
1704 if (!USE_CONTIGUOUS_MMAP && oc->symbol_extras != NULL) {
1705 m32_free(oc->symbol_extras,
1706 sizeof(SymbolExtra) * oc->n_symbol_extras);
1707 }
1708 }
1709 else {
1710 stgFree(oc->symbol_extras);
1711 }
1712 #endif
1713
1714 stgFree(oc->fileName);
1715 stgFree(oc->archiveMemberName);
1716
1717 stgFree(oc);
1718 }
1719
1720 /* -----------------------------------------------------------------------------
1721 * Sets the initial status of a fresh ObjectCode
1722 */
1723 static void setOcInitialStatus(ObjectCode* oc) {
1724 if (oc->archiveMemberName == NULL) {
1725 oc->status = OBJECT_NEEDED;
1726 } else {
1727 oc->status = OBJECT_LOADED;
1728 }
1729 }
1730
1731 static ObjectCode*
1732 mkOc( pathchar *path, char *image, int imageSize,
1733 rtsBool mapped, char *archiveMemberName, int misalignment ) {
1734 ObjectCode* oc;
1735
1736 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1737 oc = stgMallocBytes(sizeof(ObjectCode), "mkOc(oc)");
1738
1739 # if defined(OBJFORMAT_ELF)
1740 oc->formatName = "ELF";
1741 # elif defined(OBJFORMAT_PEi386)
1742 oc->formatName = "PEi386";
1743 # elif defined(OBJFORMAT_MACHO)
1744 oc->formatName = "Mach-O";
1745 # else
1746 stgFree(oc);
1747 barf("loadObj: not implemented on this platform");
1748 # endif
1749
1750 oc->image = image;
1751 oc->fileName = pathdup(path);
1752
1753 if (archiveMemberName) {
1754 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1, "loadObj" );
1755 strcpy(oc->archiveMemberName, archiveMemberName);
1756 } else {
1757 oc->archiveMemberName = NULL;
1758 }
1759
1760 setOcInitialStatus( oc );
1761
1762 oc->fileSize = imageSize;
1763 oc->symbols = NULL;
1764 oc->n_sections = 0;
1765 oc->sections = NULL;
1766 oc->proddables = NULL;
1767 oc->stable_ptrs = NULL;
1768 #if NEED_SYMBOL_EXTRAS
1769 oc->symbol_extras = NULL;
1770 #endif
1771 oc->imageMapped = mapped;
1772
1773 oc->misalignment = misalignment;
1774 oc->extraInfos = NULL;
1775
1776 /* chain it onto the list of objects */
1777 oc->next = NULL;
1778
1779 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1780 return oc;
1781 }
1782
1783 /* -----------------------------------------------------------------------------
1784 * Check if an object or archive is already loaded.
1785 *
1786 * Returns: 1 if the path is already loaded, 0 otherwise.
1787 */
1788 static HsInt
1789 isAlreadyLoaded( pathchar *path )
1790 {
1791 ObjectCode *o;
1792 for (o = objects; o; o = o->next) {
1793 if (0 == pathcmp(o->fileName, path)) {
1794 return 1; /* already loaded */
1795 }
1796 }
1797 return 0; /* not loaded yet */
1798 }
1799
1800 static HsInt loadArchive_ (pathchar *path)
1801 {
1802 ObjectCode* oc;
1803 char *image;
1804 int memberSize;
1805 FILE *f;
1806 int n;
1807 size_t thisFileNameSize;
1808 char *fileName;
1809 size_t fileNameSize;
1810 int isObject, isGnuIndex, isThin, isImportLib;
1811 char tmp[20];
1812 char *gnuFileIndex;
1813 int gnuFileIndexSize;
1814 #if defined(darwin_HOST_OS)
1815 int i;
1816 uint32_t nfat_arch, nfat_offset, cputype, cpusubtype;
1817 #if defined(i386_HOST_ARCH)
1818 const uint32_t mycputype = CPU_TYPE_X86;
1819 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_ALL;
1820 #elif defined(x86_64_HOST_ARCH)
1821 const uint32_t mycputype = CPU_TYPE_X86_64;
1822 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_64_ALL;
1823 #elif defined(powerpc_HOST_ARCH)
1824 const uint32_t mycputype = CPU_TYPE_POWERPC;
1825 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1826 #elif defined(powerpc64_HOST_ARCH)
1827 const uint32_t mycputype = CPU_TYPE_POWERPC64;
1828 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1829 #else
1830 #error Unknown Darwin architecture
1831 #endif
1832 #endif
1833 int misalignment = 0;
1834
1835 /* TODO: don't call barf() on error, instead return an error code, freeing
1836 * all resources correctly. This function is pretty complex, so it needs
1837 * to be refactored to make this practical. */
1838
1839 IF_DEBUG(linker, debugBelch("loadArchive: start\n"));
1840 IF_DEBUG(linker, debugBelch("loadArchive: Loading archive `%" PATH_FMT" '\n", path));
1841
1842 /* Check that we haven't already loaded this archive.
1843 Ignore requests to load multiple times */
1844 if (isAlreadyLoaded(path)) {
1845 IF_DEBUG(linker,
1846 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
1847 return 1; /* success */
1848 }
1849
1850 gnuFileIndex = NULL;
1851 gnuFileIndexSize = 0;
1852
1853 fileNameSize = 32;
1854 fileName = stgMallocBytes(fileNameSize, "loadArchive(fileName)");
1855
1856 isThin = 0;
1857 isImportLib = 0;
1858
1859 f = pathopen(path, WSTR("rb"));
1860 if (!f)
1861 barf("loadObj: can't read `%" PATH_FMT "'", path);
1862
1863 /* Check if this is an archive by looking for the magic "!<arch>\n"
1864 * string. Usually, if this fails, we barf and quit. On Darwin however,
1865 * we may have a fat archive, which contains archives for more than
1866 * one architecture. Fat archives start with the magic number 0xcafebabe,
1867 * always stored big endian. If we find a fat_header, we scan through
1868 * the fat_arch structs, searching through for one for our host
1869 * architecture. If a matching struct is found, we read the offset
1870 * of our archive data (nfat_offset) and seek forward nfat_offset bytes
1871 * from the start of the file.
1872 *
1873 * A subtlety is that all of the members of the fat_header and fat_arch
1874 * structs are stored big endian, so we need to call byte order
1875 * conversion functions.
1876 *
1877 * If we find the appropriate architecture in a fat archive, we gobble
1878 * its magic "!<arch>\n" string and continue processing just as if
1879 * we had a single architecture archive.
1880 */
1881
1882 n = fread ( tmp, 1, 8, f );
1883 if (n != 8)
1884 barf("loadArchive: Failed reading header from `%" PATH_FMT "'", path);
1885 if (strncmp(tmp, "!<arch>\n", 8) == 0) {}
1886 /* Check if this is a thin archive by looking for the magic string "!<thin>\n"
1887 *
1888 * ar thin libraries have the exact same format as normal archives except they
1889 * have a different magic string and they don't copy the object files into the
1890 * archive.
1891 *
1892 * Instead each header entry points to the location of the object file on disk.
1893 * This is useful when a library is only created to satisfy a compile time dependency
1894 * instead of to be distributed. This saves the time required for copying.
1895 *
1896 * Thin archives are always flattened. They always only contain simple headers
1897 * pointing to the object file and so we need not allocate more memory than needed
1898 * to find the object file.
1899 *
1900 */
1901 else if (strncmp(tmp, "!<thin>\n", 8) == 0) {
1902 isThin = 1;
1903 }
1904 #if defined(darwin_HOST_OS)
1905 /* Not a standard archive, look for a fat archive magic number: */
1906 else if (ntohl(*(uint32_t *)tmp) == FAT_MAGIC) {
1907 nfat_arch = ntohl(*(uint32_t *)(tmp + 4));
1908 IF_DEBUG(linker, debugBelch("loadArchive: found a fat archive containing %d architectures\n", nfat_arch));
1909 nfat_offset = 0;
1910
1911 for (i = 0; i < (int)nfat_arch; i++) {
1912 /* search for the right arch */
1913 n = fread( tmp, 1, 20, f );
1914 if (n != 8)
1915 barf("loadArchive: Failed reading arch from `%s'", path);
1916 cputype = ntohl(*(uint32_t *)tmp);
1917 cpusubtype = ntohl(*(uint32_t *)(tmp + 4));
1918
1919 if (cputype == mycputype && cpusubtype == mycpusubtype) {
1920 IF_DEBUG(linker, debugBelch("loadArchive: found my archive in a fat archive\n"));
1921 nfat_offset = ntohl(*(uint32_t *)(tmp + 8));
1922 break;
1923 }
1924 }
1925
1926 if (nfat_offset == 0) {
1927 barf ("loadArchive: searched %d architectures, but no host arch found", (int)nfat_arch);
1928 }
1929 else {
1930 n = fseek( f, nfat_offset, SEEK_SET );
1931 if (n != 0)
1932 barf("loadArchive: Failed to seek to arch in `%s'", path);
1933 n = fread ( tmp, 1, 8, f );
1934 if (n != 8)
1935 barf("loadArchive: Failed reading header from `%s'", path);
1936 if (strncmp(tmp, "!<arch>\n", 8) != 0) {
1937 barf("loadArchive: couldn't find archive in `%s' at offset %d", path, nfat_offset);
1938 }
1939 }
1940 }
1941 else {
1942 barf("loadArchive: Neither an archive, nor a fat archive: `%s'", path);
1943 }
1944 #else
1945 else {
1946 barf("loadArchive: Not an archive: `%" PATH_FMT "'", path);
1947 }
1948 #endif
1949
1950 IF_DEBUG(linker, debugBelch("loadArchive: loading archive contents\n"));
1951
1952 while (1) {
1953 IF_DEBUG(linker, debugBelch("loadArchive: reading at %ld\n", ftell(f)));
1954 n = fread ( fileName, 1, 16, f );
1955 if (n != 16) {
1956 if (feof(f)) {
1957 IF_DEBUG(linker, debugBelch("loadArchive: EOF while reading from '%" PATH_FMT "'\n", path));
1958 break;
1959 }
1960 else {
1961 barf("loadArchive: Failed reading file name from `%" PATH_FMT "'", path);
1962 }
1963 }
1964
1965 #if defined(darwin_HOST_OS)
1966 if (strncmp(fileName, "!<arch>\n", 8) == 0) {
1967 IF_DEBUG(linker, debugBelch("loadArchive: found the start of another archive, breaking\n"));
1968 break;
1969 }
1970 #endif
1971
1972 n = fread ( tmp, 1, 12, f );
1973 if (n != 12)
1974 barf("loadArchive: Failed reading mod time from `%" PATH_FMT "'", path);
1975 n = fread ( tmp, 1, 6, f );
1976 if (n != 6)
1977 barf("loadArchive: Failed reading owner from `%" PATH_FMT "'", path);
1978 n = fread ( tmp, 1, 6, f );
1979 if (n != 6)
1980 barf("loadArchive: Failed reading group from `%" PATH_FMT "'", path);
1981 n = fread ( tmp, 1, 8, f );
1982 if (n != 8)
1983 barf("loadArchive: Failed reading mode from `%" PATH_FMT "'", path);
1984 n = fread ( tmp, 1, 10, f );
1985 if (n != 10)
1986 barf("loadArchive: Failed reading size from `%" PATH_FMT "'", path);
1987 tmp[10] = '\0';
1988 for (n = 0; isdigit(tmp[n]); n++);
1989 tmp[n] = '\0';
1990 memberSize = atoi(tmp);
1991
1992 IF_DEBUG(linker, debugBelch("loadArchive: size of this archive member is %d\n", memberSize));
1993 n = fread ( tmp, 1, 2, f );
1994 if (n != 2)
1995 barf("loadArchive: Failed reading magic from `%" PATH_FMT "'", path);
1996 if (strncmp(tmp, "\x60\x0A", 2) != 0)
1997 barf("loadArchive: Failed reading magic from `%" PATH_FMT "' at %ld. Got %c%c",
1998 path, ftell(f), tmp[0], tmp[1]);
1999
2000 isGnuIndex = 0;
2001 /* Check for BSD-variant large filenames */
2002 if (0 == strncmp(fileName, "#1/", 3)) {
2003 fileName[16] = '\0';
2004 if (isdigit(fileName[3])) {
2005 for (n = 4; isdigit(fileName[n]); n++);
2006 fileName[n] = '\0';
2007 thisFileNameSize = atoi(fileName + 3);
2008 memberSize -= thisFileNameSize;
2009 if (thisFileNameSize >= fileNameSize) {
2010 /* Double it to avoid potentially continually
2011 increasing it by 1 */
2012 fileNameSize = thisFileNameSize * 2;
2013 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
2014 }
2015 n = fread ( fileName, 1, thisFileNameSize, f );
2016 if (n != (int)thisFileNameSize) {
2017 barf("loadArchive: Failed reading filename from `%" PATH_FMT "'",
2018 path);
2019 }
2020 fileName[thisFileNameSize] = 0;
2021
2022 /* On OS X at least, thisFileNameSize is the size of the
2023 fileName field, not the length of the fileName
2024 itself. */
2025 thisFileNameSize = strlen(fileName);
2026 }
2027 else {
2028 barf("loadArchive: BSD-variant filename size not found while reading filename from `%s'", path);
2029 }
2030 }
2031 /* Check for GNU file index file */
2032 else if (0 == strncmp(fileName, "//", 2)) {
2033 fileName[0] = '\0';
2034 thisFileNameSize = 0;
2035 isGnuIndex = 1;
2036 }
2037 /* Check for a file in the GNU file index */
2038 else if (fileName[0] == '/') {
2039 if (isdigit(fileName[1])) {
2040 int i;
2041
2042 for (n = 2; isdigit(fileName[n]); n++);
2043 fileName[n] = '\0';
2044 n = atoi(fileName + 1);
2045
2046 if (gnuFileIndex == NULL) {
2047 barf("loadArchive: GNU-variant filename without an index while reading from `%s'", path);
2048 }
2049 if (n < 0 || n > gnuFileIndexSize) {
2050 barf("loadArchive: GNU-variant filename offset %d out of range [0..%d] while reading filename from `%s'", n, gnuFileIndexSize, path);
2051 }
2052 if (n != 0 && gnuFileIndex[n - 1] != '\n') {
2053 barf("loadArchive: GNU-variant filename offset %d invalid (range [0..%d]) while reading filename from `%s'", n, gnuFileIndexSize, path);
2054 }
2055 for (i = n; gnuFileIndex[i] != '\n'; i++);
2056 thisFileNameSize = i - n - 1;
2057 if (thisFileNameSize >= fileNameSize) {
2058 /* Double it to avoid potentially continually
2059 increasing it by 1 */
2060 fileNameSize = thisFileNameSize * 2;
2061 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
2062 }
2063 memcpy(fileName, gnuFileIndex + n, thisFileNameSize);
2064 fileName[thisFileNameSize] = '\0';
2065 }
2066 else if (fileName[1] == ' ') {
2067 fileName[0] = '\0';
2068 thisFileNameSize = 0;
2069 }
2070 else {
2071 barf("loadArchive: GNU-variant filename offset not found while reading filename from `%s'", path);
2072 }
2073 }
2074 /* Finally, the case where the filename field actually contains
2075 the filename */
2076 else {
2077 /* GNU ar terminates filenames with a '/', this allowing
2078 spaces in filenames. So first look to see if there is a
2079 terminating '/'. */
2080 for (thisFileNameSize = 0;
2081 thisFileNameSize < 16;
2082 thisFileNameSize++) {
2083 if (fileName[thisFileNameSize] == '/') {
2084 fileName[thisFileNameSize] = '\0';
2085 break;
2086 }
2087 }
2088 /* If we didn't find a '/', then a space teminates the
2089 filename. Note that if we don't find one, then
2090 thisFileNameSize ends up as 16, and we already have the
2091 '\0' at the end. */
2092 if (thisFileNameSize == 16) {
2093 for (thisFileNameSize = 0;
2094 thisFileNameSize < 16;
2095 thisFileNameSize++) {
2096 if (fileName[thisFileNameSize] == ' ') {
2097 fileName[thisFileNameSize] = '\0';
2098 break;
2099 }
2100 }
2101 }
2102 }
2103
2104 IF_DEBUG(linker,
2105 debugBelch("loadArchive: Found member file `%s'\n", fileName));
2106
2107 isObject = (thisFileNameSize >= 2 && strncmp(fileName + thisFileNameSize - 2, ".o" , 2) == 0)
2108 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".p_o", 4) == 0);
2109
2110 #if defined(OBJFORMAT_PEi386)
2111 /*
2112 * Note [MSVC import files (ext .lib)]
2113 * MSVC compilers store the object files in
2114 * the import libraries with extension .dll
2115 * so on Windows we should look for those too.
2116 * The PE COFF format doesn't specify any specific file name
2117 * for sections. So on windows, just try to load it all.
2118 *
2119 * Linker members (e.g. filename / are skipped since they are not needed)
2120 */
2121 isImportLib = thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".dll", 4) == 0;
2122
2123 /*
2124 * Note [GCC import files (ext .dll.a)]
2125 * GCC stores import information in the same binary format
2126 * as the object file normally has. The only difference is that
2127 * all the information are put in .idata sections. The only real
2128 * way to tell if we're dealing with an import lib is by looking
2129 * at the file extension.
2130 */
2131 isImportLib = isImportLib || endsWithPath(path, WSTR(".dll.a"));
2132 #endif // windows
2133
2134 IF_DEBUG(linker, debugBelch("loadArchive: \tthisFileNameSize = %d\n", (int)thisFileNameSize));
2135 IF_DEBUG(linker, debugBelch("loadArchive: \tisObject = %d\n", isObject));
2136
2137 if (isObject) {
2138 char *archiveMemberName;
2139
2140 IF_DEBUG(linker, debugBelch("loadArchive: Member is an object file...loading...\n"));
2141
2142 #if defined(mingw32_HOST_OS)
2143 // TODO: We would like to use allocateExec here, but allocateExec
2144 // cannot currently allocate blocks large enough.
2145 image = allocateImageAndTrampolines(path, fileName, f, memberSize,
2146 isThin);
2147 #elif defined(darwin_HOST_OS)
2148 if (RTS_LINKER_USE_MMAP)
2149 image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
2150 else {
2151 /* See loadObj() */
2152 misalignment = machoGetMisalignment(f);
2153 image = stgMallocBytes(memberSize + misalignment,
2154 "loadArchive(image)");
2155 image += misalignment;
2156 }
2157
2158 #else // not windows or darwin
2159 image = stgMallocBytes(memberSize, "loadArchive(image)");
2160 #endif
2161 if (isThin) {
2162 FILE *member;
2163 pathchar *pathCopy, *dirName, *memberPath, *objFileName;
2164
2165 /* Allocate and setup the dirname of the archive. We'll need
2166 this to locate the thin member */
2167 pathCopy = pathdup(path); // Convert the char* to a pathchar*
2168 dirName = pathdir(pathCopy);
2169
2170 /* Append the relative member name to the dirname. This should be
2171 be the full path to the actual thin member. */
2172 int memberLen = pathlen(dirName) + 1 + strlen(fileName) + 1;
2173 memberPath = stgMallocBytes(pathsize * memberLen, "loadArchive(file)");
2174 objFileName = mkPath(fileName);
2175 pathprintf(memberPath, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), dirName, objFileName);
2176 stgFree(objFileName);
2177 stgFree(dirName);
2178
2179 member = pathopen(memberPath, WSTR("rb"));
2180 if (!member)
2181 barf("loadObj: can't read thin archive `%" PATH_FMT "'", memberPath);
2182
2183 n = fread ( image, 1, memberSize, member );
2184 if (n != memberSize) {
2185 barf("loadArchive: error whilst reading `%s'", fileName);
2186 }
2187
2188 fclose(member);
2189 stgFree(memberPath);
2190 stgFree(pathCopy);
2191 }
2192 else
2193 {
2194 n = fread ( image, 1, memberSize, f );
2195 if (n != memberSize) {
2196 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2197 }
2198 }
2199
2200 archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
2201 "loadArchive(file)");
2202 sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
2203 path, (int)thisFileNameSize, fileName);
2204
2205 oc = mkOc(path, image, memberSize, rtsFalse, archiveMemberName
2206 , misalignment);
2207
2208 stgFree(archiveMemberName);
2209
2210 if (0 == loadOc(oc)) {
2211 stgFree(fileName);
2212 fclose(f);
2213 return 0;
2214 } else {
2215 #if defined(OBJFORMAT_PEi386)
2216 if (isImportLib)
2217 {
2218 findAndLoadImportLibrary(oc);
2219 stgFree(oc);
2220 oc = NULL;
2221 break;
2222 } else {
2223 #endif
2224 oc->next = objects;
2225 objects = oc;
2226 #if defined(OBJFORMAT_PEi386)
2227 }
2228 #endif
2229 }
2230 }
2231 else if (isGnuIndex) {
2232 if (gnuFileIndex != NULL) {
2233 barf("loadArchive: GNU-variant index found, but already have an index, while reading filename from `%s'", path);
2234 }
2235 IF_DEBUG(linker, debugBelch("loadArchive: Found GNU-variant file index\n"));
2236 #if RTS_LINKER_USE_MMAP
2237 gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
2238 #else
2239 gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
2240 #endif
2241 n = fread ( gnuFileIndex, 1, memberSize, f );
2242 if (n != memberSize) {
2243 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
2244 }
2245 gnuFileIndex[memberSize] = '/';
2246 gnuFileIndexSize = memberSize;
2247 }
2248 else if (isImportLib) {
2249 #if defined(OBJFORMAT_PEi386)
2250 if (checkAndLoadImportLibrary(path, fileName, f)) {
2251 IF_DEBUG(linker, debugBelch("loadArchive: Member is an import file section... Corresponding DLL has been loaded...\n"));
2252 }
2253 else {
2254 IF_DEBUG(linker, debugBelch("loadArchive: Member is not a valid import file section... Skipping...\n"));
2255 n = fseek(f, memberSize, SEEK_CUR);
2256 if (n != 0)
2257 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
2258 memberSize, path);
2259 }
2260 #endif
2261 }
2262 else {
2263 IF_DEBUG(linker, debugBelch("loadArchive: '%s' does not appear to be an object file\n", fileName));
2264 if (!isThin || thisFileNameSize == 0) {
2265 n = fseek(f, memberSize, SEEK_CUR);
2266 if (n != 0)
2267 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
2268 memberSize, path);
2269 }
2270 }
2271
2272 /* .ar files are 2-byte aligned */
2273 if (!(isThin && thisFileNameSize > 0) && memberSize % 2) {
2274 IF_DEBUG(linker, debugBelch("loadArchive: trying to read one pad byte\n"));
2275 n = fread ( tmp, 1, 1, f );
2276 if (n != 1) {
2277 if (feof(f)) {
2278 IF_DEBUG(linker, debugBelch("loadArchive: found EOF while reading one pad byte\n"));
2279 break;
2280 }
2281 else {
2282 barf("loadArchive: Failed reading padding from `%" PATH_FMT "'", path);
2283 }
2284 }
2285 IF_DEBUG(linker, debugBelch("loadArchive: successfully read one pad byte\n"));
2286 }
2287 IF_DEBUG(linker, debugBelch("loadArchive: reached end of archive loading while loop\n"));
2288 }
2289
2290 fclose(f);
2291
2292 stgFree(fileName);
2293 if (gnuFileIndex != NULL) {
2294 #if RTS_LINKER_USE_MMAP
2295 munmap(gnuFileIndex, gnuFileIndexSize + 1);
2296 #else
2297 stgFree(gnuFileIndex);
2298 #endif
2299 }
2300
2301 if (RTS_LINKER_USE_MMAP)
2302 m32_allocator_flush();
2303
2304 IF_DEBUG(linker, debugBelch("loadArchive: done\n"));
2305 return 1;
2306 }
2307
2308 HsInt loadArchive (pathchar *path)
2309 {
2310 ACQUIRE_LOCK(&linker_mutex);
2311 HsInt r = loadArchive_(path);
2312 RELEASE_LOCK(&linker_mutex);
2313 return r;
2314 }
2315
2316 //
2317 // Load the object file into memory. This will not be its final resting place,
2318 // as on 64-bit platforms we need to map its segments into the low 2Gb of the
2319 // address space, properly aligned.
2320 //
2321 static ObjectCode *
2322 preloadObjectFile (pathchar *path)
2323 {
2324 int fileSize;
2325 struct_stat st;
2326 int r;
2327 void *image;
2328 ObjectCode *oc;
2329 int misalignment = 0;
2330
2331 r = pathstat(path, &st);
2332 if (r == -1) {
2333 errorBelch("loadObj: %" PATH_FMT ": file doesn't exist", path);
2334 return NULL;
2335 }
2336
2337 fileSize = st.st_size;
2338
2339 #if RTS_LINKER_USE_MMAP
2340 int fd;
2341
2342 /* On many architectures malloc'd memory isn't executable, so we need to use
2343 * mmap. */
2344
2345 #if defined(openbsd_HOST_OS)
2346 fd = open(path, O_RDONLY, S_IRUSR);
2347 #else
2348 fd = open(path, O_RDONLY);
2349 #endif
2350 if (fd == -1) {
2351 errorBelch("loadObj: can't open %s", path);
2352 return NULL;
2353 }
2354
2355 image = mmap(NULL, fileSize, PROT_READ|PROT_WRITE|PROT_EXEC,
2356 MAP_PRIVATE, fd, 0);
2357 // not 32-bit yet, we'll remap later
2358 close(fd);
2359
2360 #else /* !RTS_LINKER_USE_MMAP */
2361 FILE *f;
2362
2363 /* load the image into memory */
2364 /* coverity[toctou] */
2365 f = pathopen(path, WSTR("rb"));
2366 if (!f) {
2367 errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
2368 return NULL;
2369 }
2370
2371 # if defined(mingw32_HOST_OS)
2372
2373 // TODO: We would like to use allocateExec here, but allocateExec
2374 // cannot currently allocate blocks large enough.
2375 image = allocateImageAndTrampolines(path, "itself", f, fileSize,
2376 HS_BOOL_FALSE);
2377 if (image == NULL) {
2378 fclose(f);
2379 return NULL;
2380 }
2381
2382 # elif defined(darwin_HOST_OS)
2383
2384 // In a Mach-O .o file, all sections can and will be misaligned
2385 // if the total size of the headers is not a multiple of the
2386 // desired alignment. This is fine for .o files that only serve
2387 // as input for the static linker, but it's not fine for us,
2388 // as SSE (used by gcc for floating point) and Altivec require
2389 // 16-byte alignment.
2390 // We calculate the correct alignment from the header before
2391 // reading the file, and then we misalign image on purpose so
2392 // that the actual sections end up aligned again.
2393 misalignment = machoGetMisalignment(f);
2394 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
2395 image += misalignment;
2396
2397 # else /* !defined(mingw32_HOST_OS) */
2398
2399 image = stgMallocBytes(fileSize, "loadObj(image)");
2400
2401 #endif
2402
2403 int n;
2404 n = fread ( image, 1, fileSize, f );
2405 fclose(f);
2406 if (n != fileSize) {
2407 errorBelch("loadObj: error whilst reading `%" PATH_FMT "'", path);
2408 stgFree(image);
2409 return NULL;
2410 }
2411
2412 #endif /* RTS_LINKER_USE_MMAP */
2413
2414 oc = mkOc(path, image, fileSize, rtsTrue, NULL, misalignment);
2415
2416 return oc;
2417 }
2418
2419 /* -----------------------------------------------------------------------------
2420 * Load an obj (populate the global symbol table, but don't resolve yet)
2421 *
2422 * Returns: 1 if ok, 0 on error.
2423 */
2424 static HsInt loadObj_ (pathchar *path)
2425 {
2426 ObjectCode* oc;
2427 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
2428
2429 /* debugBelch("loadObj %s\n", path ); */
2430
2431 /* Check that we haven't already loaded this object.
2432 Ignore requests to load multiple times */
2433
2434 if (isAlreadyLoaded(path)) {
2435 IF_DEBUG(linker,
2436 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
2437 return 1; /* success */
2438 }
2439
2440 oc = preloadObjectFile(path);
2441 if (oc == NULL) return 0;
2442
2443 if (! loadOc(oc)) {
2444 // failed; free everything we've allocated
2445 removeOcSymbols(oc);
2446 // no need to freeOcStablePtrs, they aren't created until resolveObjs()
2447 freeObjectCode(oc);
2448 return 0;
2449 }
2450
2451 oc->next = objects;
2452 objects = oc;
2453 return 1;
2454 }
2455
2456 HsInt loadObj (pathchar *path)
2457 {
2458 ACQUIRE_LOCK(&linker_mutex);
2459 HsInt r = loadObj_(path);
2460 RELEASE_LOCK(&linker_mutex);
2461 return r;
2462 }
2463
2464 static HsInt loadOc (ObjectCode* oc)
2465 {
2466 int r;
2467
2468 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
2469
2470 /* verify the in-memory image */
2471 # if defined(OBJFORMAT_ELF)
2472 r = ocVerifyImage_ELF ( oc );
2473 # elif defined(OBJFORMAT_PEi386)
2474 r = ocVerifyImage_PEi386 ( oc );
2475 # elif defined(OBJFORMAT_MACHO)
2476 r = ocVerifyImage_MachO ( oc );
2477 # else
2478 barf("loadObj: no verify method");
2479 # endif
2480 if (!r) {
2481 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
2482 return r;
2483 }
2484
2485 #if NEED_SYMBOL_EXTRAS
2486 # if defined(OBJFORMAT_MACHO)
2487 r = ocAllocateSymbolExtras_MachO ( oc );
2488 if (!r) {
2489 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_MachO failed\n"));
2490 return r;
2491 }
2492 # elif defined(OBJFORMAT_ELF)
2493 r = ocAllocateSymbolExtras_ELF ( oc );
2494 if (!r) {
2495 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_ELF failed\n"));
2496 return r;
2497 }
2498 # elif defined(OBJFORMAT_PEi386)
2499 ocAllocateSymbolExtras_PEi386 ( oc );
2500 # endif
2501 #endif
2502
2503 /* build the symbol list for this image */
2504 # if defined(OBJFORMAT_ELF)
2505 r = ocGetNames_ELF ( oc );
2506 # elif defined(OBJFORMAT_PEi386)
2507 r = ocGetNames_PEi386 ( oc );
2508 # elif defined(OBJFORMAT_MACHO)
2509 r = ocGetNames_MachO ( oc );
2510 # else
2511 barf("loadObj: no getNames method");
2512 # endif
2513 if (!r) {
2514 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
2515 return r;
2516 }
2517
2518 /* loaded, but not resolved yet, ensure the OC is in a consistent state */
2519 setOcInitialStatus( oc );
2520 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
2521
2522 return 1;
2523 }
2524
2525 /* -----------------------------------------------------------------------------
2526 * try to load and initialize an ObjectCode into memory
2527 *
2528 * Returns: 1 if ok, 0 on error.
2529 */
2530 int ocTryLoad (ObjectCode* oc) {
2531 int r;
2532
2533 if (oc->status != OBJECT_NEEDED) {
2534 return 1;
2535 }
2536
2537 /* Check for duplicate symbols by looking into `symhash`.
2538 Duplicate symbols are any symbols which exist
2539 in different ObjectCodes that have both been loaded, or
2540 are to be loaded by this call.
2541
2542 This call is intended to have no side-effects when a non-duplicate
2543 symbol is re-inserted.
2544
2545 We set the Address to NULL since that is not used to distinguish
2546 symbols. Duplicate symbols are distinguished by name and oc.
2547 */
2548 int x;
2549 SymbolName* symbol;
2550 for (x = 0; x < oc->n_symbols; x++) {
2551 symbol = oc->symbols[x];
2552 if ( symbol
2553 && !ghciInsertSymbolTable(oc->fileName, symhash, symbol, NULL, isSymbolWeak(oc, symbol), oc)) {
2554 return 0;
2555 }
2556 }
2557
2558 # if defined(OBJFORMAT_ELF)
2559 r = ocResolve_ELF ( oc );
2560 # elif defined(OBJFORMAT_PEi386)
2561 r = ocResolve_PEi386 ( oc );
2562 # elif defined(OBJFORMAT_MACHO)
2563 r = ocResolve_MachO ( oc );
2564 # else
2565 barf("ocTryLoad: not implemented on this platform");
2566 # endif
2567 if (!r) { return r; }
2568
2569 // run init/init_array/ctors/mod_init_func
2570
2571 loading_obj = oc; // tells foreignExportStablePtr what to do
2572 #if defined(OBJFORMAT_ELF)
2573 r = ocRunInit_ELF ( oc );
2574 #elif defined(OBJFORMAT_PEi386)
2575 r = ocRunInit_PEi386 ( oc );
2576 #elif defined(OBJFORMAT_MACHO)
2577 r = ocRunInit_MachO ( oc );
2578 #else
2579 barf("ocTryLoad: initializers not implemented on this platform");
2580 #endif
2581 loading_obj = NULL;
2582
2583 if (!r) { return r; }
2584
2585 oc->status = OBJECT_RESOLVED;
2586
2587 return 1;
2588 }
2589
2590 /* -----------------------------------------------------------------------------
2591 * resolve all the currently unlinked objects in memory
2592 *
2593 * Returns: 1 if ok, 0 on error.
2594 */
2595 static HsInt resolveObjs_ (void)
2596 {
2597 ObjectCode *oc;
2598 int r;
2599
2600 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
2601
2602 for (oc = objects; oc; oc = oc->next) {
2603 r = ocTryLoad(oc);
2604 if (!r)
2605 {
2606 return r;
2607 }
2608 }
2609
2610 #ifdef PROFILING
2611 // collect any new cost centres & CCSs that were defined during runInit
2612 initProfiling2();
2613 #endif
2614
2615 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
2616 return 1;
2617 }
2618
2619 HsInt resolveObjs (void)
2620 {
2621 ACQUIRE_LOCK(&linker_mutex);
2622 HsInt r = resolveObjs_();
2623 RELEASE_LOCK(&linker_mutex);
2624 return r;
2625 }
2626
2627 /* -----------------------------------------------------------------------------
2628 * delete an object from the pool
2629 */
2630 static HsInt unloadObj_ (pathchar *path, rtsBool just_purge)
2631 {
2632 ObjectCode *oc, *prev, *next;
2633 HsBool unloadedAnyObj = HS_BOOL_FALSE;
2634
2635 ASSERT(symhash != NULL);
2636 ASSERT(objects != NULL);
2637
2638 IF_DEBUG(linker, debugBelch("unloadObj: %" PATH_FMT "\n", path));
2639
2640 prev = NULL;
2641 for (oc = objects; oc; oc = next) {
2642 next = oc->next; // oc might be freed
2643
2644 if (!pathcmp(oc->fileName,path)) {
2645
2646 // these are both idempotent, so in just_purge mode we can
2647 // later call unloadObj() to really unload the object.
2648 removeOcSymbols(oc);
2649 freeOcStablePtrs(oc);
2650
2651 if (!just_purge) {
2652 if (prev == NULL) {
2653 objects = oc->next;
2654 } else {
2655 prev->next = oc->next;
2656 }
2657 ACQUIRE_LOCK(&linker_unloaded_mutex);
2658 oc->next = unloaded_objects;
2659 unloaded_objects = oc;
2660 oc->status = OBJECT_UNLOADED;
2661 RELEASE_LOCK(&linker_unloaded_mutex);
2662 // We do not own oc any more; it can be released at any time by
2663 // the GC in checkUnload().
2664 } else {
2665 prev = oc;
2666 }
2667
2668 /* This could be a member of an archive so continue
2669 * unloading other members. */
2670 unloadedAnyObj = HS_BOOL_TRUE;
2671 } else {
2672 prev = oc;
2673 }
2674 }
2675
2676 if (unloadedAnyObj) {
2677 return 1;
2678 }
2679 else {
2680 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
2681 return 0;
2682 }
2683 }
2684
2685 HsInt unloadObj (pathchar *path)
2686 {
2687 ACQUIRE_LOCK(&linker_mutex);
2688 HsInt r = unloadObj_(path, rtsFalse);
2689 RELEASE_LOCK(&linker_mutex);
2690 return r;
2691 }
2692
2693 HsInt purgeObj (pathchar *path)
2694 {
2695 ACQUIRE_LOCK(&linker_mutex);
2696 HsInt r = unloadObj_(path, rtsTrue);
2697 RELEASE_LOCK(&linker_mutex);
2698 return r;
2699 }
2700
2701 /* -----------------------------------------------------------------------------
2702 * Sanity checking. For each ObjectCode, maintain a list of address ranges
2703 * which may be prodded during relocation, and abort if we try and write
2704 * outside any of these.
2705 */
2706 static void
2707 addProddableBlock ( ObjectCode* oc, void* start, int size )
2708 {
2709 ProddableBlock* pb
2710 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
2711
2712 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
2713 ASSERT(size > 0);
2714 pb->start = start;
2715 pb->size = size;
2716 pb->next = oc->proddables;
2717 oc->proddables = pb;
2718 }
2719
2720 static void
2721 checkProddableBlock (ObjectCode *oc, void *addr, size_t size )
2722 {
2723 ProddableBlock* pb;
2724
2725 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
2726 char* s = (char*)(pb->start);
2727 char* e = s + pb->size;
2728 char* a = (char*)addr;
2729 if (a >= s && (a+size) <= e) return;
2730 }
2731 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
2732 }
2733
2734 static void freeProddableBlocks (ObjectCode *oc)
2735 {
2736 ProddableBlock *pb, *next;
2737
2738 for (pb = oc->proddables; pb != NULL; pb = next) {
2739 next = pb->next;
2740 stgFree(pb);
2741 }
2742 oc->proddables = NULL;
2743 }
2744
2745 /* -----------------------------------------------------------------------------
2746 * Section management.
2747 */
2748 static void
2749 addSection (Section *s, SectionKind kind, SectionAlloc alloc,
2750 void* start, StgWord size, StgWord mapped_offset,
2751 void* mapped_start, StgWord mapped_size)
2752 {
2753 s->start = start; /* actual start of section in memory */
2754 s->size = size; /* actual size of section in memory */
2755 s->kind = kind;
2756 s->alloc = alloc;
2757 s->mapped_offset = mapped_offset; /* offset from the image of mapped_start */
2758
2759 s->mapped_start = mapped_start; /* start of mmap() block */
2760 s->mapped_size = mapped_size; /* size of mmap() block */
2761
2762 IF_DEBUG(linker,
2763 debugBelch("addSection: %p-%p (size %" FMT_Word "), kind %d\n",
2764 start, (void*)((StgWord)start + size),
2765 size, kind ));
2766 }
2767
2768
2769 /* --------------------------------------------------------------------------
2770 * Symbol Extras.
2771 * This is about allocating a small chunk of memory for every symbol in the
2772 * object file. We make sure that the SymboLExtras are always "in range" of
2773 * limited-range PC-relative instructions on various platforms by allocating
2774 * them right next to the object code itself.
2775 */
2776
2777 #if NEED_SYMBOL_EXTRAS
2778 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2779
2780 /*
2781 ocAllocateSymbolExtras
2782
2783 Allocate additional space at the end of the object file image to make room
2784 for jump islands (powerpc, x86_64, arm) and GOT entries (x86_64).
2785
2786 PowerPC relative branch instructions have a 24 bit displacement field.
2787 As PPC code is always 4-byte-aligned, this yields a +-32MB range.
2788 If a particular imported symbol is outside this range, we have to redirect
2789 the jump to a short piece of new code that just loads the 32bit absolute
2790 address and jumps there.
2791 On x86_64, PC-relative jumps and PC-relative accesses to the GOT are limited
2792 to 32 bits (+-2GB).
2793
2794 This function just allocates space for one SymbolExtra for every
2795 undefined symbol in the object file. The code for the jump islands is
2796 filled in by makeSymbolExtra below.
2797 */
2798
2799 static int ocAllocateSymbolExtras( ObjectCode* oc, int count, int first )
2800 {
2801 size_t n;
2802
2803 if (RTS_LINKER_USE_MMAP && USE_CONTIGUOUS_MMAP) {
2804 n = roundUpToPage(oc->fileSize);
2805
2806 /* Keep image and symbol_extras contiguous */
2807 void *new = mmapForLinker(n + (sizeof(SymbolExtra) * count),
2808 MAP_ANONYMOUS, -1, 0);
2809 if (new) {
2810 memcpy(new, oc->image, oc->fileSize);
2811 if (oc->imageMapped) {
2812 munmap(oc->image, n);
2813 }
2814 oc->image = new;
2815 oc->imageMapped = rtsTrue;
2816 oc->fileSize = n + (sizeof(SymbolExtra) * count);
2817 oc->symbol_extras = (SymbolExtra *) (oc->image + n);
2818 }
2819 else {
2820 oc->symbol_extras = NULL;
2821 return 0;
2822 }
2823 }
2824 else if( count > 0 ) {
2825 if (RTS_LINKER_USE_MMAP) {
2826 n = roundUpToPage(oc->fileSize);
2827
2828 oc->symbol_extras = m32_alloc(sizeof(SymbolExtra) * count, 8);
2829 if (oc->symbol_extras == NULL) return 0;
2830 }
2831 else {
2832 // round up to the nearest 4
2833 int aligned = (oc->fileSize + 3) & ~3;
2834 int misalignment = oc->misalignment;
2835
2836 oc->image -= misalignment;
2837 oc->image = stgReallocBytes( oc->image,
2838 misalignment +
2839 aligned + sizeof (SymbolExtra) * count,
2840 "ocAllocateSymbolExtras" );
2841 oc->image += misalignment;
2842
2843 oc->symbol_extras = (SymbolExtra *) (oc->image + aligned);
2844 }
2845 }
2846
2847 if (oc->symbol_extras != NULL) {
2848 memset( oc->symbol_extras, 0, sizeof (SymbolExtra) * count );
2849 }
2850
2851 oc->first_symbol_extra = first;
2852 oc->n_symbol_extras = count;
2853
2854 return 1;
2855 }
2856
2857 #endif
2858 #endif // NEED_SYMBOL_EXTRAS
2859
2860 #if defined(arm_HOST_ARCH)
2861
2862 static void
2863 ocFlushInstructionCache( ObjectCode *oc )
2864 {
2865 int i;
2866 // Object code
2867 for (i=0; i < oc->n_sections; i++) {
2868 Section *s = &oc->sections[i];
2869 // This is a bit too broad but we don't have any way to determine what
2870 // is certainly code
2871 if (s->kind == SECTIONKIND_CODE_OR_RODATA)
2872 __clear_cache(s->start, (void*) ((uintptr_t) s->start + s->size));
2873 }
2874
2875 // Jump islands
2876 // Note the (+1) to ensure that the last symbol extra is covered by the
2877 // flush.
2878 __clear_cache(oc->symbol_extras, &oc->symbol_extras[oc->n_symbol_extras+1]);
2879 }
2880
2881 #endif
2882
2883 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2884 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2885
2886 static SymbolExtra* makeSymbolExtra( ObjectCode* oc,
2887 unsigned long symbolNumber,
2888 unsigned long target )
2889 {
2890 SymbolExtra *extra;
2891
2892 ASSERT( symbolNumber >= oc->first_symbol_extra
2893 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2894
2895 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2896
2897 #ifdef powerpc_HOST_ARCH
2898 // lis r12, hi16(target)
2899 extra->jumpIsland.lis_r12 = 0x3d80;
2900 extra->jumpIsland.hi_addr = target >> 16;
2901
2902 // ori r12, r12, lo16(target)
2903 extra->jumpIsland.ori_r12_r12 = 0x618c;
2904 extra->jumpIsland.lo_addr = target & 0xffff;
2905
2906 // mtctr r12
2907 extra->jumpIsland.mtctr_r12 = 0x7d8903a6;
2908
2909 // bctr
2910 extra->jumpIsland.bctr = 0x4e800420;
2911 #endif
2912 #ifdef x86_64_HOST_ARCH
2913 // jmp *-14(%rip)
2914 static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF };
2915 extra->addr = target;
2916 memcpy(extra->jumpIsland, jmp, 6);
2917 #endif
2918
2919 return extra;
2920 }
2921
2922 #endif
2923 #endif // defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2924
2925 #ifdef arm_HOST_ARCH
2926 static SymbolExtra* makeArmSymbolExtra( ObjectCode* oc,
2927 unsigned long symbolNumber,
2928 unsigned long target,
2929 int fromThumb,
2930 int toThumb )
2931 {
2932 SymbolExtra *extra;
2933
2934 ASSERT( symbolNumber >= oc->first_symbol_extra
2935 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2936
2937 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2938
2939 // Make sure instruction mode bit is set properly
2940 if (toThumb)
2941 target |= 1;
2942 else
2943 target &= ~1;
2944
2945 if (!fromThumb) {
2946 // In ARM encoding:
2947 // movw r12, #0
2948 // movt r12, #0
2949 // bx r12
2950 uint32_t code[] = { 0xe300c000, 0xe340c000, 0xe12fff1c };
2951
2952 // Patch lower half-word into movw
2953 code[0] |= ((target>>12) & 0xf) << 16;
2954 code[0] |= target & 0xfff;
2955 // Patch upper half-word into movt
2956 target >>= 16;
2957 code[1] |= ((target>>12) & 0xf) << 16;
2958 code[1] |= target & 0xfff;
2959
2960 memcpy(extra->jumpIsland, code, 12);
2961
2962 } else {
2963 // In Thumb encoding:
2964 // movw r12, #0
2965 // movt r12, #0
2966 // bx r12
2967 uint16_t code[] = { 0xf240, 0x0c00,
2968 0xf2c0, 0x0c00,
2969 0x4760 };
2970
2971 // Patch lower half-word into movw
2972 code[0] |= (target>>12) & 0xf;
2973 code[0] |= ((target>>11) & 0x1) << 10;
2974 code[1] |= ((target>>8) & 0x7) << 12;
2975 code[1] |= target & 0xff;
2976 // Patch upper half-word into movt
2977 target >>= 16;
2978 code[2] |= (target>>12) & 0xf;
2979 code[2] |= ((target>>11) & 0x1) << 10;
2980 code[3] |= ((target>>8) & 0x7) << 12;
2981 code[3] |= target & 0xff;
2982
2983 memcpy(extra->jumpIsland, code, 10);
2984 }
2985
2986 return extra;
2987 }
2988 #endif // arm_HOST_ARCH
2989
2990 /* --------------------------------------------------------------------------
2991 * PowerPC specifics (instruction cache flushing)
2992 * ------------------------------------------------------------------------*/
2993
2994 #ifdef powerpc_HOST_ARCH
2995 /*
2996 ocFlushInstructionCache
2997
2998 Flush the data & instruction caches.
2999 Because the PPC has split data/instruction caches, we have to
3000 do that whenever we modify code at runtime.
3001 */
3002
3003 static void
3004 ocFlushInstructionCacheFrom(void* begin, size_t length)
3005 {
3006 size_t n = (length + 3) / 4;
3007 unsigned long* p = begin;
3008
3009 while (n--)
3010 {
3011 __asm__ volatile ( "dcbf 0,%0\n\t"
3012 "sync\n\t"
3013 "icbi 0,%0"
3014 :
3015 : "r" (p)
3016 );
3017 p++;
3018 }
3019 __asm__ volatile ( "sync\n\t"
3020 "isync"
3021 );
3022 }
3023
3024 static void
3025 ocFlushInstructionCache( ObjectCode *oc )
3026 {
3027 /* The main object code */
3028 ocFlushInstructionCacheFrom(oc->image + oc->misalignment, oc->fileSize);
3029
3030 /* Jump Islands */
3031 ocFlushInstructionCacheFrom(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras);
3032 }
3033 #endif /* powerpc_HOST_ARCH */
3034
3035
3036 /* --------------------------------------------------------------------------
3037 * PEi386(+) specifics (Win32 targets)
3038 * ------------------------------------------------------------------------*/
3039
3040 /* The information for this linker comes from
3041 Microsoft Portable Executable
3042 and Common Object File Format Specification
3043 revision 8.3 February 2013
3044
3045 It can be found online at:
3046
3047 https://msdn.microsoft.com/en-us/windows/hardware/gg463119.aspx
3048
3049 Things move, so if that fails, try searching for it via
3050
3051 http://www.google.com/search?q=PE+COFF+specification
3052
3053 The ultimate reference for the PE format is the Winnt.h
3054 header file that comes with the Platform SDKs; as always,
3055 implementations will drift wrt their documentation.
3056
3057 A good background article on the PE format is Matt Pietrek's
3058 March 1994 article in Microsoft System Journal (MSJ)
3059 (Vol.9, No. 3): "Peering Inside the PE: A Tour of the
3060 Win32 Portable Executable File Format." The info in there
3061 has recently been updated in a two part article in
3062 MSDN magazine, issues Feb and March 2002,
3063 "Inside Windows: An In-Depth Look into the Win32 Portable
3064 Executable File Format"
3065
3066 John Levine's book "Linkers and Loaders" contains useful
3067 info on PE too.
3068
3069 The PE specification doesn't specify how to do the actual
3070 relocations. For this reason, and because both PE and ELF are
3071 based on COFF, the relocations for the PEi386+ code is based on
3072 the ELF relocations for the equivalent relocation type.
3073
3074 The ELF ABI can be found at
3075
3076 http://www.x86-64.org/documentation/abi.pdf
3077
3078 The current code is based on version 0.99.6 - October 2013
3079 */
3080
3081
3082 #if defined(OBJFORMAT_PEi386)
3083
3084 static int verifyCOFFHeader ( COFF_header *hdr, pathchar *filename);
3085
3086 /* We assume file pointer is right at the
3087 beginning of COFF object.
3088 */
3089 static char *
3090 allocateImageAndTrampolines (
3091 pathchar* arch_name, char* member_name,
3092 FILE* f USED_IF_x86_64_HOST_ARCH,
3093 int size,
3094 int isThin USED_IF_x86_64_HOST_ARCH)
3095 {
3096 char* image;
3097 #if defined(x86_64_HOST_ARCH)
3098 if (!isThin)
3099 {
3100 /* PeCoff contains number of symbols right in it's header, so
3101 we can reserve the room for symbolExtras right here. */
3102 COFF_header hdr;
3103 size_t n;
3104
3105 n = fread(&hdr, 1, sizeof_COFF_header, f);
3106 if (n != sizeof(COFF_header)) {
3107 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%S'",
3108 member_name, arch_name);
3109 return NULL;
3110 }
3111 fseek(f, -sizeof_COFF_header, SEEK_CUR);
3112
3113 if (!verifyCOFFHeader(&hdr, arch_name)) {
3114 return 0;
3115 }
3116
3117 /* We get back 8-byte aligned memory (is that guaranteed?), but
3118 the offsets to the sections within the file are all 4 mod 8
3119 (is that guaranteed?). We therefore need to offset the image
3120 by 4, so that all the pointers are 8-byte aligned, so that
3121 pointer tagging works. */
3122 /* For 32-bit case we don't need this, hence we use macro PEi386_IMAGE_OFFSET,
3123 which equals to 4 for 64-bit case and 0 for 32-bit case. */
3124 /* We allocate trampolines area for all symbols right behind
3125 image data, aligned on 8. */
3126 size = ((PEi386_IMAGE_OFFSET + size + 0x7) & ~0x7)
3127 + hdr.NumberOfSymbols * sizeof(SymbolExtra);
3128 }
3129 #endif
3130 image = VirtualAlloc(NULL, size,
3131 MEM_RESERVE | MEM_COMMIT,
3132 PAGE_EXECUTE_READWRITE);
3133
3134 if (image == NULL) {
3135 errorBelch("%" PATH_FMT ": failed to allocate memory for image for %s",
3136 arch_name, member_name);
3137 return NULL;
3138 }
3139
3140 return image + PEi386_IMAGE_OFFSET;
3141 }
3142
3143 static int findAndLoadImportLibrary(ObjectCode* oc)
3144 {
3145 int i;
3146
3147 COFF_header* hdr;
3148 COFF_section* sectab;
3149 COFF_symbol* symtab;
3150 UChar* strtab;
3151
3152 hdr = (COFF_header*)(oc->image);
3153 sectab = (COFF_section*)(
3154 ((UChar*)(oc->image))
3155 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3156 );
3157
3158 symtab = (COFF_symbol*)(
3159 ((UChar*)(oc->image))
3160 + hdr->PointerToSymbolTable
3161 );
3162
3163 strtab = ((UChar*)symtab)
3164 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3165
3166 for (i = 0; i < oc->n_sections; i++)
3167 {
3168 COFF_section* sectab_i
3169 = (COFF_section*)myindex(sizeof_COFF_section, sectab, i);
3170
3171 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3172
3173 // Find the first entry containing a valid .idata$7 section.
3174 if (strcmp(secname, ".idata$7") == 0) {
3175 /* First load the containing DLL if not loaded. */
3176 Section section = oc->sections[i];
3177
3178 pathchar* dirName = pathdir(oc->fileName);
3179 HsPtr token = addLibrarySearchPath(dirName);
3180 stgFree(dirName);
3181 char* dllName = (char*)section.start;
3182
3183 if (strlen(dllName) == 0 || dllName[0] == ' ')
3184 {
3185 continue;
3186 }
3187
3188 IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand '%ls' => `%s'\n", oc->fileName, dllName));
3189
3190 pathchar* dll = mkPath(dllName);
3191 removeLibrarySearchPath(token);
3192
3193 const char* result = addDLL(dll);
3194 stgFree(dll);
3195
3196 if (result != NULL) {
3197 errorBelch("Could not load `%s'. Reason: %s\n", (char*)dllName, result);
3198 return 0;
3199 }
3200
3201 break;
3202 }
3203
3204 stgFree(secname);
3205 }
3206
3207 return 1;
3208 }
3209
3210 static int checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f)
3211 {
3212 char* image;
3213 static HsBool load_dll_warn = HS_BOOL_FALSE;
3214
3215 if (load_dll_warn) { return 0; }
3216
3217 /* Based on Import Library specification. PE Spec section 7.1 */
3218
3219 COFF_import_header hdr;
3220 size_t n;
3221
3222 n = fread(&hdr, 1, sizeof_COFF_import_Header, f);
3223 if (n != sizeof(COFF_header)) {
3224 errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%" PATH_FMT "'\n",
3225 member_name, arch_name);
3226 return 0;
3227 }
3228
3229 if (hdr.Sig1 != 0x0 || hdr.Sig2 != 0xFFFF) {
3230 fseek(f, -sizeof_COFF_import_Header, SEEK_CUR);
3231 IF_DEBUG(linker, debugBelch("loadArchive: Object `%s` is not an import lib. Skipping...\n", member_name));
3232 return 0;
3233 }
3234
3235 IF_DEBUG(linker, debugBelch("loadArchive: reading %d bytes at %ld\n", hdr.SizeOfData, ftell(f)));
3236
3237 image = malloc(hdr.SizeOfData);
3238 n = fread(image, 1, hdr.SizeOfData, f);
3239 if (n != hdr.SizeOfData) {
3240 errorBelch("loadArchive: error whilst reading `%s' header in `%" PATH_FMT "'. Did not read enough bytes.\n",
3241 member_name, arch_name);
3242 }
3243
3244 char* symbol = strtok(image, "\0");
3245 int symLen = strlen(symbol) + 1;
3246 int nameLen = n - symLen;
3247 char* dllName = malloc(sizeof(char) * nameLen);
3248 dllName = strncpy(dllName, image + symLen, nameLen);
3249 pathchar* dll = malloc(sizeof(wchar_t) * nameLen);
3250 mbstowcs(dll, dllName, nameLen);
3251 free(dllName);
3252
3253 IF_DEBUG(linker, debugBelch("loadArchive: read symbol %s from lib `%ls'\n", symbol, dll));
3254 const char* result = addDLL(dll);
3255
3256 free(image);
3257
3258 if (result != NULL) {
3259 errorBelch("Could not load `%ls'. Reason: %s\n", dll, result);
3260 load_dll_warn = HS_BOOL_TRUE;
3261
3262 free(dll);
3263 fseek(f, -(n + sizeof_COFF_import_Header), SEEK_CUR);
3264 return 0;
3265 }
3266
3267 free(dll);
3268 return 1;
3269 }
3270
3271 /* We use myindex to calculate array addresses, rather than
3272 simply doing the normal subscript thing. That's because
3273 some of the above structs have sizes which are not
3274 a whole number of words. GCC rounds their sizes up to a
3275 whole number of words, which means that the address calcs
3276 arising from using normal C indexing or pointer arithmetic
3277 are just plain wrong. Sigh.
3278 */
3279 static UChar *
3280 myindex ( int scale, void* base, int index )
3281 {
3282 return
3283 ((UChar*)base) + scale * index;
3284 }
3285
3286
3287 static void
3288 printName ( UChar* name, UChar* strtab )
3289 {
3290 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3291 UInt32 strtab_offset = * (UInt32*)(name+4);
3292 debugBelch("%s", strtab + strtab_offset );
3293 } else {
3294 int i;
3295 for (i = 0; i < 8; i++) {
3296 if (name[i] == 0) break;
3297 debugBelch("%c", name[i] );
3298 }
3299 }
3300 }
3301
3302
3303 static void
3304 copyName ( UChar* name, UChar* strtab, UChar* dst, int dstSize )
3305 {
3306 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3307 UInt32 strtab_offset = * (UInt32*)(name+4);
3308 strncpy ( (char*)dst, (char*)strtab+strtab_offset, dstSize );
3309 dst[dstSize-1] = 0;
3310 } else {
3311 int i = 0;
3312 while (1) {
3313 if (i >= 8) break;
3314 if (name[i] == 0) break;
3315 dst[i] = name[i];
3316 i++;
3317 }
3318 dst[i] = 0;
3319 }
3320 }
3321
3322
3323 static UChar *
3324 cstring_from_COFF_symbol_name ( UChar* name, UChar* strtab )
3325 {
3326 UChar* newstr;
3327 /* If the string is longer than 8 bytes, look in the
3328 string table for it -- this will be correctly zero terminated.
3329 */
3330 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
3331 UInt32 strtab_offset = * (UInt32*)(name+4);
3332 return ((UChar*)strtab) + strtab_offset;
3333 }
3334 /* Otherwise, if shorter than 8 bytes, return the original,
3335 which by defn is correctly terminated.
3336 */
3337 if (name[7]==0) return name;
3338 /* The annoying case: 8 bytes. Copy into a temporary
3339 (XXX which is never freed ...)
3340 */
3341 newstr = stgMallocBytes(9, "cstring_from_COFF_symbol_name");
3342 ASSERT(newstr);
3343 strncpy((char*)newstr,(char*)name,8);
3344 newstr[8] = 0;
3345 return newstr;
3346 }
3347
3348 /* Getting the name of a section is mildly tricky, so we make a
3349 function for it. Sadly, in one case we have to copy the string
3350 (when it is exactly 8 bytes long there's no trailing '\0'), so for
3351 consistency we *always* copy the string; the caller must free it
3352 */
3353 static char *
3354 cstring_from_section_name (UChar* name, UChar* strtab)
3355 {
3356 char *newstr;
3357
3358 if (name[0]=='/') {
3359 int strtab_offset = strtol((char*)name+1,NULL,10);
3360 int len = strlen(((char*)strtab) + strtab_offset);
3361
3362 newstr = stgMallocBytes(len+1, "cstring_from_section_symbol_name");
3363 strcpy((char*)newstr, (char*)((UChar*)strtab) + strtab_offset);
3364 return newstr;
3365 }
3366 else
3367 {
3368 newstr = stgMallocBytes(9, "cstring_from_section_symbol_name");
3369 ASSERT(newstr);
3370 strncpy((char*)newstr,(char*)name,8);
3371 newstr[8] = 0;
3372 return newstr;
3373 }
3374 }
3375
3376 /* See Note [mingw-w64 name decoration scheme] */
3377 #ifndef x86_64_HOST_ARCH
3378 static void
3379 zapTrailingAtSign ( UChar* sym )
3380 {
3381 # define my_isdigit(c) ((c) >= '0' && (c) <= '9')
3382 int i, j;
3383 if (sym[0] == 0) return;
3384 i = 0;
3385 while (sym[i] != 0) i++;
3386 i--;
3387 j = i;
3388 while (j > 0 && my_isdigit(sym[j])) j--;
3389 if (j > 0 && sym[j] == '@' && j != i) sym[j] = 0;
3390 # undef my_isdigit
3391 }
3392 #endif
3393
3394 /* See Note [mingw-w64 name decoration scheme] */
3395 #ifndef x86_64_HOST_ARCH
3396 #define STRIP_LEADING_UNDERSCORE 1
3397 #else
3398 #define STRIP_LEADING_UNDERSCORE 0
3399 #endif
3400
3401 /*
3402 Note [mingw-w64 name decoration scheme]
3403
3404 What's going on with name decoration? Well, original code
3405 have some crufty and ad-hocish paths related mostly to very old
3406 mingw gcc/binutils/runtime combinations. Now mingw-w64 offers pretty
3407 uniform and MS-compatible decoration scheme across its tools and runtime.
3408
3409 The scheme is pretty straightforward: on 32 bit objects symbols are exported
3410 with underscore prepended (and @ + stack size suffix appended for stdcall
3411 functions), on 64 bits no underscore is prepended and no suffix is appended
3412 because we have no stdcall convention on 64 bits.
3413
3414 See #9218
3415 */
3416
3417 static SymbolAddr*
3418 lookupSymbolInDLLs ( UChar *lbl )
3419 {
3420 OpenedDLL* o_dll;
3421 SymbolAddr* sym;
3422
3423 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
3424 /* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */
3425
3426 sym = GetProcAddress(o_dll->instance, (char*)(lbl+STRIP_LEADING_UNDERSCORE));
3427 if (sym != NULL) {
3428 /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/
3429 return sym;
3430 }
3431
3432 /* Ticket #2283.
3433 Long description: http://support.microsoft.com/kb/132044
3434 tl;dr:
3435 If C/C++ compiler sees __declspec(dllimport) ... foo ...
3436 it generates call *__imp_foo, and __imp_foo here has exactly
3437 the same semantics as in __imp_foo = GetProcAddress(..., "foo")
3438 */
3439 if (sym == NULL && strncmp ((const char*)lbl, "__imp_", 6) == 0) {
3440 sym = GetProcAddress(o_dll->instance, (char*)(lbl+6+STRIP_LEADING_UNDERSCORE));
3441 if (sym != NULL) {
3442 IndirectAddr* ret;
3443 ret = stgMallocBytes( sizeof(IndirectAddr), "lookupSymbolInDLLs" );
3444 ret->addr = sym;
3445 ret->next = indirects;
3446 indirects = ret;
3447 IF_DEBUG(linker,
3448 debugBelch("warning: %s from %S is linked instead of %s\n",
3449 (char*)(lbl+6+STRIP_LEADING_UNDERSCORE), o_dll->name, (char*)lbl));
3450 return (void*) & ret->addr;
3451 }
3452 }
3453
3454 sym = GetProcAddress(o_dll->instance, (char*)lbl);
3455 if (sym != NULL) {
3456 /*debugBelch("found %s in %s\n", lbl,o_dll->name);*/
3457 return sym;
3458 }
3459 }
3460 return NULL;
3461 }
3462
3463 static int
3464 verifyCOFFHeader (COFF_header *hdr, pathchar *fileName)
3465 {
3466 #if defined(i386_HOST_ARCH)
3467 if (hdr->Machine != 0x14c) {
3468 errorBelch("%" PATH_FMT ": Not x86 PEi386", fileName);
3469 return 0;
3470 }
3471 #elif defined(x86_64_HOST_ARCH)
3472 if (hdr->Machine != 0x8664) {
3473 errorBelch("%" PATH_FMT ": Not x86_64 PEi386", fileName);
3474 return 0;
3475 }
3476 #else
3477 errorBelch("PEi386 not supported on this arch");
3478 #endif
3479
3480 if (hdr->SizeOfOptionalHeader != 0) {
3481 errorBelch("%" PATH_FMT ": PEi386 with nonempty optional header",
3482 fileName);
3483 return 0;
3484 }
3485 if ( /* (hdr->Characteristics & MYIMAGE_FILE_RELOCS_STRIPPED) || */
3486 (hdr->Characteristics & MYIMAGE_FILE_EXECUTABLE_IMAGE) ||
3487 (hdr->Characteristics & MYIMAGE_FILE_DLL) ||
3488 (hdr->Characteristics & MYIMAGE_FILE_SYSTEM) ) {
3489 errorBelch("%" PATH_FMT ": Not a PEi386 object file", fileName);
3490 return 0;
3491 }
3492 if ( (hdr->Characteristics & MYIMAGE_FILE_BYTES_REVERSED_HI)
3493 /* || !(hdr->Characteristics & MYIMAGE_FILE_32BIT_MACHINE) */ ) {
3494 errorBelch("%" PATH_FMT ": Invalid PEi386 word size or endiannness: %d",
3495 fileName,
3496 (int)(hdr->Characteristics));
3497 return 0;
3498 }
3499 return 1;
3500 }
3501
3502 static int
3503 ocVerifyImage_PEi386 ( ObjectCode* oc )
3504 {
3505 int i;
3506 UInt32 j, noRelocs;
3507 COFF_header* hdr;
3508 COFF_section* sectab;
3509 COFF_symbol* symtab;
3510 UChar* strtab;
3511 /* debugBelch("\nLOADING %s\n", oc->fileName); */
3512 hdr = (COFF_header*)(oc->image);
3513 sectab = (COFF_section*) (
3514 ((UChar*)(oc->image))
3515 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3516 );
3517 symtab = (COFF_symbol*) (
3518 ((UChar*)(oc->image))
3519 + hdr->PointerToSymbolTable
3520 );
3521 strtab = ((UChar*)symtab)
3522 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3523
3524 if (!verifyCOFFHeader(hdr, oc->fileName)) {
3525 return 0;
3526 }
3527
3528 /* If the string table size is way crazy, this might indicate that
3529 there are more than 64k relocations, despite claims to the
3530 contrary. Hence this test. */
3531 /* debugBelch("strtab size %d\n", * (UInt32*)strtab); */
3532 #if 0
3533 if ( (*(UInt32*)strtab) > 600000 ) {
3534 /* Note that 600k has no special significance other than being
3535 big enough to handle the almost-2MB-sized lumps that
3536 constitute HSwin32*.o. */
3537 debugBelch("PEi386 object has suspiciously large string table; > 64k relocs?");
3538 return 0;
3539 }
3540 #endif
3541
3542 /* .BSS Section is initialized in ocGetNames_PEi386
3543 but we need the Sections array initialized here already. */
3544 Section *sections;
3545 sections = (Section*)stgCallocBytes(
3546 sizeof(Section),
3547 hdr->NumberOfSections + 1, /* +1 for the global BSS section see ocGetNames_PEi386 */
3548 "ocVerifyImage_PEi386(sections)");
3549 oc->sections = sections;
3550 oc->n_sections = hdr->NumberOfSections + 1;
3551
3552 /* Initialize the Sections */
3553 for (i = 0; i < hdr->NumberOfSections; i++) {
3554 COFF_section* sectab_i
3555 = (COFF_section*)
3556 myindex(sizeof_COFF_section, sectab, i);
3557
3558 /* Calculate the start of the data section */
3559 sections[i].start = oc->image + sectab_i->PointerToRawData;
3560 }
3561
3562 /* No further verification after this point; only debug printing. */
3563 i = 0;
3564 IF_DEBUG(linker, i=1);
3565 if (i == 0) return 1;
3566
3567 debugBelch("sectab offset = %" FMT_SizeT "\n",
3568 ((UChar*)sectab) - ((UChar*)hdr) );
3569 debugBelch("symtab offset = %" FMT_SizeT "\n",
3570 ((UChar*)symtab) - ((UChar*)hdr) );
3571 debugBelch("strtab offset = %" FMT_SizeT "\n",
3572 ((UChar*)strtab) - ((UChar*)hdr) );
3573
3574 debugBelch("\n" );
3575 debugBelch( "Machine: 0x%x\n", (UInt32)(hdr->Machine) );
3576 debugBelch( "# sections: %d\n", (UInt32)(hdr->NumberOfSections) );
3577 debugBelch( "time/date: 0x%x\n", (UInt32)(hdr->TimeDateStamp) );
3578 debugBelch( "symtab offset: %d\n", (UInt32)(hdr->PointerToSymbolTable) );
3579 debugBelch( "# symbols: %d\n", (UInt32)(hdr->NumberOfSymbols) );
3580 debugBelch( "sz of opt hdr: %d\n", (UInt32)(hdr->SizeOfOptionalHeader) );
3581 debugBelch( "characteristics: 0x%x\n", (UInt32)(hdr->Characteristics) );
3582
3583 /* Print the section table. */
3584 debugBelch("\n" );
3585 for (i = 0; i < hdr->NumberOfSections; i++) {
3586 COFF_reloc* reltab;
3587 COFF_section* sectab_i
3588 = (COFF_section*)
3589 myindex ( sizeof_COFF_section, sectab, i );
3590 Section section = sections[i];
3591 debugBelch(
3592 "\n"
3593 "section %d\n"
3594 " name `",
3595 i
3596 );
3597 printName ( sectab_i->Name, strtab );
3598 debugBelch(
3599 "'\n"
3600 " vsize %d\n"
3601 " vaddr %d\n"
3602 " data sz %d\n"
3603 " data off 0x%p\n"
3604 " num rel %d\n"
3605 " off rel %d\n"
3606 " ptr raw 0x%x\n",
3607 sectab_i->VirtualSize,
3608 sectab_i->VirtualAddress,
3609 sectab_i->SizeOfRawData,
3610 section.start,
3611 sectab_i->NumberOfRelocations,
3612 sectab_i->PointerToRelocations,
3613 sectab_i->PointerToRawData
3614 );
3615 reltab = (COFF_reloc*) (
3616 ((UChar*)(oc->image)) + sectab_i->PointerToRelocations
3617 );
3618
3619 if ( sectab_i->Characteristics & MYIMAGE_SCN_LNK_NRELOC_OVFL ) {
3620 /* If the relocation field (a short) has overflowed, the
3621 * real count can be found in the first reloc entry.
3622 *
3623 * See Section 4.1 (last para) of the PE spec (rev6.0).
3624 */
3625 COFF_reloc* rel = (COFF_reloc*)
3626 myindex ( sizeof_COFF_reloc, reltab, 0 );
3627 noRelocs = rel->VirtualAddress;
3628 j = 1;
3629 } else {
3630 noRelocs = sectab_i->NumberOfRelocations;
3631 j = 0;
3632 }
3633
3634 for (; j < noRelocs; j++) {
3635 COFF_symbol* sym;
3636 COFF_reloc* rel = (COFF_reloc*)
3637 myindex ( sizeof_COFF_reloc, reltab, j );
3638 debugBelch(
3639 " type 0x%-4x vaddr 0x%-8x name `",
3640 (UInt32)rel->Type,
3641 rel->VirtualAddress );
3642 sym = (COFF_symbol*)
3643 myindex ( sizeof_COFF_symbol, symtab, rel->SymbolTableIndex );
3644 /* Hmm..mysterious looking offset - what's it for? SOF */
3645 printName ( sym->Name, strtab -10 );
3646 debugBelch("'\n" );
3647 }
3648
3649 debugBelch("\n" );
3650 }
3651 debugBelch("\n" );
3652 debugBelch("string table has size 0x%x\n", * (UInt32*)strtab );
3653 debugBelch("---START of string table---\n");
3654 for (i = 4; i < *(Int32*)strtab; i++) {
3655 if (strtab[i] == 0)
3656 debugBelch("\n"); else
3657 debugBelch("%c", strtab[i] );
3658 }
3659 debugBelch("--- END of string table---\n");
3660
3661 debugBelch("\n" );
3662 i = 0;
3663 while (1) {
3664 COFF_symbol* symtab_i;
3665 if (i >= (Int32)(hdr->NumberOfSymbols)) break;
3666 symtab_i = (COFF_symbol*)
3667 myindex ( sizeof_COFF_symbol, symtab, i );
3668 debugBelch(
3669 "symbol %d\n"
3670 " name `",
3671 i
3672 );
3673 printName ( symtab_i->Name, strtab );
3674 debugBelch(
3675 "'\n"
3676 " value 0x%x\n"
3677 " 1+sec# %d\n"
3678 " type 0x%x\n"
3679 " sclass 0x%x\n"
3680 " nAux %d\n",
3681 symtab_i->Value,
3682 (Int32)(symtab_i->SectionNumber),
3683 (UInt32)symtab_i->Type,
3684 (UInt32)symtab_i->StorageClass,
3685 (UInt32)symtab_i->NumberOfAuxSymbols
3686 );
3687 i += symtab_i->NumberOfAuxSymbols;
3688 i++;
3689 }
3690
3691 debugBelch("\n" );
3692 return 1;
3693 }
3694
3695
3696 static int
3697 ocGetNames_PEi386 ( ObjectCode* oc )
3698 {
3699 COFF_header* hdr;
3700 COFF_section* sectab;
3701 COFF_symbol* symtab;
3702 UChar* strtab;
3703
3704 UChar* sname;
3705 SymbolAddr* addr;
3706 int i;
3707
3708 hdr = (COFF_header*)(oc->image);
3709 sectab = (COFF_section*) (
3710 ((UChar*)(oc->image))
3711 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3712 );
3713 symtab = (COFF_symbol*) (
3714 ((UChar*)(oc->image))
3715 + hdr->PointerToSymbolTable
3716 );
3717 strtab = ((UChar*)(oc->image))
3718 + hdr->PointerToSymbolTable
3719 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3720
3721 /* Allocate space for any (local, anonymous) .bss sections. */
3722
3723 for (i = 0; i < hdr->NumberOfSections; i++) {
3724 UInt32 bss_sz;
3725 UChar* zspace;
3726 COFF_section* sectab_i
3727 = (COFF_section*)
3728 myindex ( sizeof_COFF_section, sectab, i );
3729
3730 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3731
3732 if (0 != strcmp(secname, ".bss")) {
3733 stgFree(secname);
3734 continue;
3735 }
3736
3737 stgFree(secname);
3738
3739 /* sof 10/05: the PE spec text isn't too clear regarding what
3740 * the SizeOfRawData field is supposed to hold for object
3741 * file sections containing just uninitialized data -- for executables,
3742 * it is supposed to be zero; unclear what it's supposed to be
3743 * for object files. However, VirtualSize is guaranteed to be
3744 * zero for object files, which definitely suggests that SizeOfRawData
3745 * will be non-zero (where else would the size of this .bss section be
3746 * stored?) Looking at the COFF_section info for incoming object files,
3747 * this certainly appears to be the case.
3748 *
3749 * => I suspect we've been incorrectly handling .bss sections in (relocatable)
3750 * object files up until now. This turned out to bite us with ghc-6.4.1's use
3751 * of gcc-3.4.x, which has started to emit initially-zeroed-out local 'static'
3752 * variable decls into the .bss section. (The specific function in Q which
3753 * triggered this is libraries/base/cbits/dirUtils.c:__hscore_getFolderPath())
3754 */
3755 if (sectab_i->VirtualSize == 0 && sectab_i->SizeOfRawData == 0) continue;
3756 /* This is a non-empty .bss section.
3757 Allocate zeroed space for it */
3758 bss_sz = sectab_i->VirtualSize;
3759 if ( bss_sz < sectab_i->SizeOfRawData) { bss_sz = sectab_i->SizeOfRawData; }
3760 zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
3761 oc->sections[i].start = zspace;
3762 addProddableBlock(oc, zspace, bss_sz);
3763 /* debugBelch("BSS anon section at 0x%x\n", zspace); */
3764 }
3765
3766 /* Copy section information into the ObjectCode. */
3767
3768 for (i = 0; i < hdr->NumberOfSections; i++) {
3769 UChar* start;
3770 UChar* end;
3771 UInt32 sz;
3772
3773 /* By default consider all section as CODE or DATA, which means we want to load them. */
3774 SectionKind kind
3775 = SECTIONKIND_CODE_OR_RODATA;
3776 COFF_section* sectab_i
3777 = (COFF_section*)
3778 myindex ( sizeof_COFF_section, sectab, i );
3779 Section section = oc->sections[i];
3780
3781 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3782
3783 IF_DEBUG(linker, debugBelch("section name = %s\n", secname ));
3784
3785 /* The PE file section flag indicates whether the section contains code or data. */
3786 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_CODE ||
3787 sectab_i->Characteristics & MYIMAGE_SCN_CNT_INITIALIZED_DATA)
3788 kind = SECTIONKIND_CODE_OR_RODATA;
3789
3790 /* Check next if it contains any uninitialized data */
3791 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_UNINITIALIZED_DATA)
3792 kind = SECTIONKIND_RWDATA;
3793
3794 /* Finally check if it can be discarded. This will also ignore .debug sections */
3795 if (sectab_i->Characteristics & MYIMAGE_SCN_MEM_DISCARDABLE ||
3796 sectab_i->Characteristics & MYIMAGE_SCN_LNK_REMOVE)
3797 kind = SECTIONKIND_OTHER;
3798
3799 if (0==strcmp(".ctors", (char*)secname))
3800 kind = SECTIONKIND_INIT_ARRAY;
3801
3802 ASSERT(sectab_i->SizeOfRawData == 0 || sectab_i->VirtualSize == 0);
3803 sz = sectab_i->SizeOfRawData;
3804 if (sz < sectab_i->VirtualSize) sz = sectab_i->VirtualSize;
3805
3806 start = section.start;
3807 end = start + sz - 1;
3808
3809 if (kind != SECTIONKIND_OTHER && end >= start) {
3810 addSection(&oc->sections[i], kind, SECTION_NOMEM, start, sz, 0, 0, 0);
3811 addProddableBlock(oc, start, sz);
3812 }
3813
3814 stgFree(secname);
3815 }
3816
3817 /* Copy exported symbols into the ObjectCode. */
3818
3819 oc->n_symbols = hdr->NumberOfSymbols;
3820 oc->symbols = stgCallocBytes(sizeof(SymbolName*), oc->n_symbols,
3821 "ocGetNames_PEi386(oc->symbols)");
3822
3823 /* Work out the size of the global BSS section */
3824 StgWord globalBssSize = 0;
3825 for (i=0; i < (int)hdr->NumberOfSymbols; i++) {
3826 COFF_symbol* symtab_i;
3827 symtab_i = (COFF_symbol*)
3828 myindex ( sizeof_COFF_symbol, symtab, i );
3829 if (symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3830 && symtab_i->Value > 0
3831 && symtab_i->StorageClass != MYIMAGE_SYM_CLASS_SECTION) {
3832 globalBssSize += symtab_i->Value;
3833 }
3834 i += symtab_i->NumberOfAuxSymbols;
3835 }
3836
3837 /* Allocate BSS space */
3838 SymbolAddr* bss = NULL;
3839 if (globalBssSize > 0) {
3840 bss = stgCallocBytes(1, globalBssSize,
3841 "ocGetNames_PEi386(non-anonymous bss)");
3842 addSection(&oc->sections[oc->n_sections-1],
3843 SECTIONKIND_RWDATA, SECTION_MALLOC,
3844 bss, globalBssSize, 0, 0, 0);
3845 IF_DEBUG(linker, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize));
3846 addProddableBlock(oc, bss, globalBssSize);
3847 } else {
3848 addSection(&oc->sections[oc->n_sections-1],
3849 SECTIONKIND_OTHER, SECTION_NOMEM, NULL, 0, 0, 0, 0);
3850 }
3851
3852 for (i = 0; i < oc->n_symbols; i++) {
3853 COFF_symbol* symtab_i;
3854 symtab_i = (COFF_symbol*)
3855 myindex ( sizeof_COFF_symbol, symtab, i );
3856
3857 addr = NULL;
3858 HsBool isWeak = HS_BOOL_FALSE;
3859 if ( symtab_i->SectionNumber != MYIMAGE_SYM_UNDEFINED
3860 && symtab_i->SectionNumber > 0) {
3861 /* This symbol is global and defined, viz, exported */
3862 /* for MYIMAGE_SYMCLASS_EXTERNAL
3863 && !MYIMAGE_SYM_UNDEFINED,
3864 the address of the symbol is:
3865 address of relevant section + offset in section
3866 */
3867 COFF_section* sectabent
3868 = (COFF_section*) myindex ( sizeof_COFF_section,
3869 sectab,
3870 symtab_i->SectionNumber-1 );
3871 if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_EXTERNAL
3872 || ( symtab_i->StorageClass == MYIMAGE_SYM_CLASS_STATIC
3873 && sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT)
3874 ) {
3875 addr = (void*)((size_t)oc->sections[symtab_i->SectionNumber-1].start
3876 + symtab_i->Value);
3877 if (sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT) {
3878 isWeak = HS_BOOL_TRUE;
3879 }
3880 }
3881 }
3882 else if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_WEAK_EXTERNAL) {
3883 isWeak = HS_BOOL_TRUE;
3884 }
3885 else if ( symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3886 && symtab_i->Value > 0) {
3887 /* This symbol isn't in any section at all, ie, global bss.
3888 Allocate zeroed space for it from the BSS section */
3889 addr = bss;
3890 bss = (SymbolAddr*)((StgWord)bss + (StgWord)symtab_i->Value);
3891 IF_DEBUG(linker, debugBelch("bss symbol @ %p %u\n", addr, symtab_i->Value));
3892 }
3893
3894 sname = cstring_from_COFF_symbol_name(symtab_i->Name, strtab);
3895 if (addr != NULL || isWeak == HS_BOOL_TRUE) {
3896
3897 /* debugBelch("addSymbol %p `%s' Weak:%lld \n", addr, sname, isWeak); */
3898 IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr,sname);)
3899 ASSERT(i >= 0 && i < oc->n_symbols);
3900 /* cstring_from_COFF_symbol_name always succeeds. */
3901 oc->symbols[i] = (SymbolName*)sname;
3902 if (isWeak == HS_BOOL_TRUE) {
3903 setWeakSymbol(oc, sname);
3904 }
3905
3906 if (! ghciInsertSymbolTable(oc->fileName, symhash, (SymbolName*)sname, addr,
3907 isWeak, oc)) {
3908 return 0;
3909 }
3910 } else {
3911 /* We're skipping the symbol, but if we ever load this
3912 object file we'll want to skip it then too. */
3913 oc->symbols[i] = NULL;
3914
3915 # if 0
3916 debugBelch(
3917 "IGNORING symbol %d\n"
3918 " name `",
3919 i
3920 );