4dbb6291f96a473c6dd71311356fd407a7df631a
[ghc.git] / rts / linker / PEi386.c
1 /* --------------------------------------------------------------------------
2 * PEi386(+) specifics (Win32 targets)
3 * ------------------------------------------------------------------------*/
4
5 /* The information for this linker comes from
6 Microsoft Portable Executable
7 and Common Object File Format Specification
8 revision 8.3 February 2013
9
10 It can be found online at:
11
12 https://msdn.microsoft.com/en-us/windows/hardware/gg463119.aspx
13
14 Things move, so if that fails, try searching for it via
15
16 http://www.google.com/search?q=PE+COFF+specification
17
18 The ultimate reference for the PE format is the Winnt.h
19 header file that comes with the Platform SDKs; as always,
20 implementations will drift wrt their documentation.
21
22 A good background article on the PE format is Matt Pietrek's
23 March 1994 article in Microsoft System Journal (MSJ)
24 (Vol.9, No. 3): "Peering Inside the PE: A Tour of the
25 Win32 Portable Executable File Format." The info in there
26 has recently been updated in a two part article in
27 MSDN magazine, issues Feb and March 2002,
28 "Inside Windows: An In-Depth Look into the Win32 Portable
29 Executable File Format"
30
31 John Levine's book "Linkers and Loaders" contains useful
32 info on PE too.
33
34 The PE specification doesn't specify how to do the actual
35 relocations. For this reason, and because both PE and ELF are
36 based on COFF, the relocations for the PEi386+ code is based on
37 the ELF relocations for the equivalent relocation type.
38
39 The ELF ABI can be found at
40
41 http://www.x86-64.org/documentation/abi.pdf
42
43 The current code is based on version 0.99.6 - October 2013
44
45 The current GHCi linker supports the following four object file formats:
46
47 * PE/PE+ obj - The normal COFF_ANON_OBJ format which is generated by default
48 from Windows compilers
49
50 * PE/PE+ big-obj - The big object format COFF_ANON_BIG_OBJ which extends the
51 number of sections to 2^31 and the number of symbols in each section. This
52 requires a flag but all Windows compilers can produce it.
53
54 * PE Import format - The import library format defined in the PE standard
55 COFF_IMPORT_LIB and commonly has the file extension .lib
56
57 * GNU BFD import format - The import library format defined and used by GNU
58 tools. See note below.
59
60 Note [BFD import library]
61 ~~~~~~~~~~~~~~~~~~~~~~~~~
62
63 On Windows, compilers don't link directly to dynamic libraries.
64 The reason for this is that the exports are not always by symbol, the
65 Import Address Table (IAT) also allows exports by ordinal number
66 or raw addresses.
67
68 So to solve the linking issue, import libraries were added. Import libraries
69 can be seen as a specification of how to link implicitly against a dynamic
70 library. As a side note, import libraries are also the mechanism which
71 can be used to break mutual dependencies between shared libraries and to
72 implement delay loading or override the location of a shared library at
73 startup.
74
75 Linkers use these import libraries to populate the IAT of the resulting
76 binary. At startup the system dynamic loader processes the IAT entries
77 and populates the symbols with the correct addresses.
78
79 Anyway, the Windows PE format specifies a simple and efficient format for
80 this: It's essentially a list, saying these X symbols can be found in DLL y.
81 Commonly, y is a versioned name. e.g. liby_43.dll. This is an artifact of
82 the days when Windows did not support side-by-side assemblies. So the
83 solution was to version the DLLs by renaming them to include explicit
84 version numbers, and to then use the import libraries to point to the right
85 version, having the linker do the leg work.
86
87 The format in the PE specification is commonly named using the suffix .lib.
88 Unfortunately, GCC/binutils decided not to implement this format, and instead
89 have created their own format. This format is either named using the suffix
90 .dll.a or .a depending on the tool that makes them. This format is
91 undocumented. However the source of dlltool.c in binutils is pretty handy to
92 understant it.
93
94 To understand the implementation in GHC, this is what is important:
95
96 the .idata section group is used to hold this information. An import library
97 object file will always have these section groups, but the specific
98 configuration depends on what the purpose of the file is. They will also
99 never have a CODE or DATA section, though depending on the tool that creates
100 them they may have the section headers, which will mostly be empty.
101
102 You have to different possible configuration:
103
104 1) Those that define a redirection. In this case the .idata$7 section will
105 contain the name of the actual dll to load. This will be the only content
106 of the section. In the symbol table, the last symbol will be the name
107 used to refer to the dll in the relocation tables. This name will always
108 be in the format "symbol_name_iname", however when refered to, the format
109 "_head_symbol_name" is used.
110
111 We record this symbol early on during GetNames and load the dll and use
112 the module handle as the symbol address.
113
114 2) Symbol definitions. In this case .idata$6 will contain the symbol to load.
115 This is stored in the fixed format of 2-byte ordinals followed by a null
116 terminated string with the symbol name. The ordinal is to be used when
117 the dll does not export symbols by name. (NOTE: We don't currently
118 support this in the runtime linker, but it's easy to add should it be
119 needed). The last symbol in the symbol table of the section will contain
120 the name symbol which contains the dll name to use to resolve the
121 reference.
122
123 As a technicality, this also means that the GCC format will allow us to use
124 one library to store references to multiple dlls. This can't be produced by
125 dlltool, but it can be combined using ar. This is an important feature
126 required for dynamic linking support for GHC. So the runtime linker now
127 supports this too.
128
129 Note [Memory allocation]
130 ~~~~~~~~~~~~~~~~~~~~~~~~
131
132 Previously on Windows we would use VirtualAlloc to allocate enough space for
133 loading the entire object file into memory and keep it there for the duration
134 until the entire object file has been unloaded.
135
136 This has a couple of problems, first of, VirtualAlloc and the other Virtual
137 functions interact directly with the memory manager. Requesting memory from
138 VirtualAlloc will always return whole pages (32k), aligned on a 4k boundary.
139
140 This means for an object file of size N kbytes, we're always wasting 32-N
141 kbytes of memory. Nothing else can access this memory.
142
143 Because of this we're now using HeapAlloc and other heap function to create
144 a private heap. Another solution would have been to write our own memory
145 manager to keep track of where we have free memory, but the private heap
146 solution is simpler.
147
148 The private heap is created with full rights just as the pages we used to get
149 from VirtualAlloc (e.g. READ/WRITE/EXECUTE). In the end we end up using
150 memory much more efficiently than before. The downside is that heap memory
151 is always Allocated AND Committed, thus when the heap resizes the new size is
152 committed. It becomes harder to see how much we're actually using. This makes
153 it seem like for small programs that we're using more memory than before.
154 Certainly a clean GHCi startup will have a slightly higher commit count.
155
156 The second major change in how we allocate memory is that we no longer need
157 the entire object file. We now allocate the object file using normal malloc
158 and instead read bits from it. All tables are stored in the Object file info
159 table and are discarded as soon as they are no longer needed, e.g. after
160 relocation is finished. Only section data is kept around, but this data is
161 copied into the private heap.
162
163 The major knock on effect of this is that we have more memory to use in the
164 sub 2GB range, which means that Template Haskell should fail a lot less as we
165 will violate the small memory model much less than before.
166
167 Note [Section alignment]
168 ~~~~~~~~~~~~~~~~~~~~~~~~
169
170 The Windows linker aligns memory to it's section alignment requirement by
171 aligning it during the copying to the private heap. We also ensure that the
172 trampoline "region" we reserve is 8 bytes aligned.
173 */
174
175 #include "Rts.h"
176
177 #if defined(x86_64_HOST_ARCH)
178 #define USED_IF_x86_64_HOST_ARCH /* Nothing */
179 #else
180 #define USED_IF_x86_64_HOST_ARCH STG_UNUSED
181 #endif
182
183 #if defined(mingw32_HOST_OS)
184
185 #include "RtsUtils.h"
186 #include "RtsSymbolInfo.h"
187 #include "GetEnv.h"
188 #include "linker/PEi386.h"
189 #include "linker/PEi386Types.h"
190 #include "LinkerInternals.h"
191
192 #include <windows.h>
193 #include <shfolder.h> /* SHGetFolderPathW */
194 #include <math.h>
195 #include <wchar.h>
196 #include <stdbool.h>
197 #include <stdint.h>
198
199 #include <inttypes.h>
200 #include <dbghelp.h>
201 #include <stdlib.h>
202 #include <psapi.h>
203
204 #if defined(x86_64_HOST_ARCH)
205 static size_t makeSymbolExtra_PEi386(
206 ObjectCode* oc,
207 uint64_t index,
208 size_t s,
209 SymbolName* symbol);
210 #endif
211
212 static void addDLLHandle(
213 pathchar* dll_name,
214 HINSTANCE instance);
215
216 static bool verifyCOFFHeader(
217 uint16_t machine,
218 IMAGE_FILE_HEADER *hdr,
219 pathchar *fileName);
220
221 static bool checkIfDllLoaded(
222 HINSTANCE instance);
223
224 static uint32_t getSectionAlignment(
225 Section section);
226
227 static uint8_t* getAlignedMemory(
228 uint8_t* value,
229 Section section);
230
231 static size_t getAlignedValue(
232 size_t value,
233 Section section);
234
235 static void addCopySection(
236 ObjectCode *oc,
237 Section *s,
238 SectionKind kind,
239 SectionAlloc alloc,
240 void* start,
241 StgWord size);
242
243 static void releaseOcInfo(
244 ObjectCode* oc);
245
246 /* Add ld symbol for PE image base. */
247 #if defined(__GNUC__)
248 #define __ImageBase __MINGW_LSYMBOL(_image_base__)
249 #endif
250
251 /* Get the base of the module. */
252 /* This symbol is defined by ld. */
253 extern IMAGE_DOS_HEADER __ImageBase;
254 #define __image_base (void*)((HINSTANCE)&__ImageBase)
255
256 const Alignments pe_alignments[] = {
257 { IMAGE_SCN_ALIGN_1BYTES , 1 },
258 { IMAGE_SCN_ALIGN_2BYTES , 2 },
259 { IMAGE_SCN_ALIGN_4BYTES , 4 },
260 { IMAGE_SCN_ALIGN_8BYTES , 8 },
261 { IMAGE_SCN_ALIGN_16BYTES , 16 },
262 { IMAGE_SCN_ALIGN_32BYTES , 32 },
263 { IMAGE_SCN_ALIGN_64BYTES , 64 },
264 { IMAGE_SCN_ALIGN_128BYTES , 128 },
265 { IMAGE_SCN_ALIGN_256BYTES , 256 },
266 { IMAGE_SCN_ALIGN_512BYTES , 512 },
267 { IMAGE_SCN_ALIGN_1024BYTES, 1024},
268 { IMAGE_SCN_ALIGN_2048BYTES, 2048},
269 { IMAGE_SCN_ALIGN_4096BYTES, 4096},
270 { IMAGE_SCN_ALIGN_8192BYTES, 8192},
271 };
272
273 const int pe_alignments_cnt = sizeof (pe_alignments) / sizeof (Alignments);
274 const int default_alignment = 8;
275 const int initHeapSizeMB = 15;
276 static HANDLE code_heap = NULL;
277
278 /* Low Fragmentation Heap, try to prevent heap from increasing in size when
279 space can simply be reclaimed. These are enums missing from mingw-w64's
280 headers. */
281 #define HEAP_LFH 2
282 #define HeapOptimizeResources 3
283
284 void initLinker_PEi386()
285 {
286 if (!ghciInsertSymbolTable(WSTR("(GHCi/Ld special symbols)"),
287 symhash, "__image_base__", __image_base, HS_BOOL_TRUE, NULL)) {
288 barf("ghciInsertSymbolTable failed");
289 }
290
291 #if defined(mingw32_HOST_OS)
292 addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL));
293 /*
294 * Most of these are included by base, but GCC always includes them
295 * So lets make sure we always have them too.
296 *
297 * In most cases they would have been loaded by the
298 * addDLLHandle above.
299 */
300 addDLL(WSTR("msvcrt"));
301 addDLL(WSTR("kernel32"));
302 addDLL(WSTR("advapi32"));
303 addDLL(WSTR("shell32"));
304 addDLL(WSTR("user32"));
305 #endif
306
307 /* See Note [Memory allocation]. */
308 /* Create a private heap which we will use to store all code and data. */
309 SYSTEM_INFO sSysInfo;
310 GetSystemInfo(&sSysInfo);
311 code_heap = HeapCreate (HEAP_CREATE_ENABLE_EXECUTE,
312 initHeapSizeMB * sSysInfo.dwPageSize , 0);
313 if (!code_heap)
314 barf ("Could not create private heap during initialization. Aborting.");
315
316 /* Set some flags for the new code heap. */
317 HeapSetInformation(code_heap, HeapEnableTerminationOnCorruption, NULL, 0);
318 unsigned long HeapInformation = HEAP_LFH;
319 HeapSetInformation(code_heap, HeapEnableTerminationOnCorruption,
320 &HeapInformation, sizeof(HeapInformation));
321 HeapSetInformation(code_heap, HeapOptimizeResources, NULL, 0);
322 }
323
324 void exitLinker_PEi386()
325 {
326 /* See Note [Memory allocation]. */
327 if (code_heap) {
328 HeapDestroy (code_heap);
329 code_heap = NULL;
330 }
331 }
332
333 /* A list thereof. */
334 static OpenedDLL* opened_dlls = NULL;
335
336 /* A list thereof. */
337 static IndirectAddr* indirects = NULL;
338
339 /* Adds a DLL instance to the list of DLLs in which to search for symbols. */
340 static void addDLLHandle(pathchar* dll_name, HINSTANCE instance) {
341
342 /* At this point, we actually know what was loaded.
343 So bail out if it's already been loaded. */
344 if (checkIfDllLoaded(instance))
345 {
346 return;
347 }
348
349 OpenedDLL* o_dll;
350 o_dll = stgMallocBytes( sizeof(OpenedDLL), "addDLLHandle" );
351 o_dll->name = dll_name ? pathdup(dll_name) : NULL;
352 o_dll->instance = instance;
353 o_dll->next = opened_dlls;
354 opened_dlls = o_dll;
355
356 /* Now discover the dependencies of dll_name that were
357 just loaded in our process space. The reason is we have access to them
358 without the user having to explicitly specify them. */
359 PIMAGE_NT_HEADERS header =
360 (PIMAGE_NT_HEADERS)((BYTE *)instance +
361 ((PIMAGE_DOS_HEADER)instance)->e_lfanew);
362 PIMAGE_IMPORT_DESCRIPTOR imports =
363 (PIMAGE_IMPORT_DESCRIPTOR)((BYTE *)instance + header->
364 OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress);
365
366 bool importTableMissing =
367 header->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].Size == 0;
368
369 if (importTableMissing) {
370 return;
371 }
372
373 /* Ignore these compatibility shims. */
374 const pathchar* ms_dll = WSTR("api-ms-win-");
375 const int len = wcslen(ms_dll);
376
377 do {
378 pathchar* module = mkPath((char*)(BYTE *)instance + imports->Name);
379 HINSTANCE module_instance = GetModuleHandleW(module);
380 if (0 != wcsncmp(module, ms_dll, len)
381 && module_instance
382 && !checkIfDllLoaded(module_instance))
383 {
384 IF_DEBUG(linker, debugBelch("Loading dependency %" PATH_FMT " -> %" PATH_FMT ".\n", dll_name, module));
385 /* Now recursively load dependencies too. */
386 addDLLHandle(module, module_instance);
387 }
388 stgFree(module);
389 imports++;
390 } while (imports->Name);
391 }
392
393 static OpenedDLL* findLoadedDll(HINSTANCE instance)
394 {
395 for (OpenedDLL* o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
396 if (o_dll->instance == instance)
397 {
398 return o_dll;
399 }
400 }
401
402 return NULL;
403 }
404
405 static bool checkIfDllLoaded(HINSTANCE instance)
406 {
407 return findLoadedDll (instance) != NULL;
408 }
409
410 void freePreloadObjectFile_PEi386(ObjectCode *oc)
411 {
412 if (oc->image) {
413 stgFree (oc->image);
414 oc->image = NULL;
415 }
416
417 if (oc->info->image) {
418 HeapFree(code_heap, 0, oc->info->image);
419 oc->info->image = NULL;
420 }
421
422 if (oc->info) {
423 if (oc->info->ch_info)
424 stgFree (oc->info->ch_info);
425 stgFree (oc->info);
426 oc->info = NULL;
427 }
428
429 IndirectAddr *ia, *ia_next;
430 ia = indirects;
431 while (ia != NULL) {
432 ia_next = ia->next;
433 stgFree(ia);
434 ia = ia_next;
435 }
436 indirects = NULL;
437 }
438
439 static void releaseOcInfo(ObjectCode* oc) {
440 if (!oc) return;
441
442 if (oc->info) {
443 stgFree (oc->info->ch_info);
444 stgFree (oc->info->str_tab);
445 stgFree (oc->info->symbols);
446 stgFree (oc->info);
447 oc->info = NULL;
448 }
449 for (int i = 0; i < oc->n_sections; i++){
450 Section section = oc->sections[i];
451 if (section.info) {
452 stgFree (section.info->name);
453 if (section.info->relocs) {
454 stgFree (section.info->relocs);
455 section.info->relocs = NULL;
456 }
457 stgFree (section.info);
458 section.info = NULL;
459 }
460 }
461 }
462
463 /*************
464 * This function determines what kind of COFF image we are dealing with.
465 * This is needed in order to correctly load and verify objects and their
466 * sections.
467 *************/
468 COFF_OBJ_TYPE getObjectType ( char* image, pathchar* fileName )
469 {
470 /* {D1BAA1C7-BAEE-4ba9-AF20-FAF66AA4DCB8} */
471 static const char header_bigobj_classid[16] =
472 {
473 0xC7, 0xA1, 0xBA, 0xD1,
474 0xEE, 0xBA,
475 0xa9, 0x4b,
476 0xAF, 0x20,
477 0xFA, 0xF6, 0x6A, 0xA4, 0xDC, 0xB8
478 };
479
480 WORD machine;
481 COFF_OBJ_TYPE ret = COFF_UNKNOWN;
482 /* First check if we have an ANON_OBJECT_HEADER signature. */
483 ANON_OBJECT_HEADER* anon = (ANON_OBJECT_HEADER*)image;
484 if ( anon->Sig1 == IMAGE_FILE_MACHINE_UNKNOWN
485 && anon->Sig2 == IMPORT_OBJECT_HDR_SIG2)
486 {
487 machine = anon->Machine;
488 if (verifyCOFFHeader (machine, NULL, fileName))
489 {
490 switch (anon->Version)
491 {
492 case 0:
493 ret = COFF_IMPORT_LIB;
494 break;
495 case 1:
496 ret = COFF_ANON_OBJ;
497 break;
498 case 2:
499 if (memcmp (&anon->ClassID, header_bigobj_classid, 16) == 0)
500 ret = COFF_ANON_BIG_OBJ;
501 break;
502 default:
503 break;
504 }
505 }
506 } else {
507 /* If it's not an ANON_OBJECT then try an image file. */
508 IMAGE_FILE_HEADER* img = (IMAGE_FILE_HEADER*)image;
509 machine = img->Machine;
510 if (verifyCOFFHeader (machine, img, fileName))
511 ret = COFF_IMAGE;
512 }
513 return ret;
514 }
515
516 /*************
517 * Retrieve common header information
518 *************/
519 COFF_HEADER_INFO* getHeaderInfo ( ObjectCode* oc )
520 {
521 COFF_OBJ_TYPE coff_type = getObjectType (oc->image, oc->fileName);
522
523 COFF_HEADER_INFO* info
524 = stgMallocBytes (sizeof(COFF_HEADER_INFO), "getHeaderInfo");
525 memset (info, 0, sizeof(COFF_HEADER_INFO));
526 info->type = coff_type;
527 switch (coff_type)
528 {
529 case COFF_IMAGE:
530 {
531 IMAGE_FILE_HEADER* hdr = (IMAGE_FILE_HEADER*)oc->image;
532 info->sizeOfHeader = sizeof(IMAGE_FILE_HEADER);
533 info->sizeOfOptionalHeader = hdr->SizeOfOptionalHeader;
534 info->pointerToSymbolTable = hdr->PointerToSymbolTable;
535 info->numberOfSymbols = hdr->NumberOfSymbols;
536 info->numberOfSections = hdr->NumberOfSections;
537 }
538 break;
539 case COFF_ANON_BIG_OBJ:
540 {
541 ANON_OBJECT_HEADER_BIGOBJ* hdr = (ANON_OBJECT_HEADER_BIGOBJ*)oc->image;
542 info->sizeOfHeader = sizeof(ANON_OBJECT_HEADER_BIGOBJ);
543 info->sizeOfOptionalHeader = 0;
544 info->pointerToSymbolTable = hdr->PointerToSymbolTable;
545 info->numberOfSymbols = hdr->NumberOfSymbols;
546 info->numberOfSections = hdr->NumberOfSections;
547 }
548 break;
549 default:
550 {
551 stgFree (info);
552 info = NULL;
553 errorBelch ("Unknown COFF %d type in getHeaderInfo.", coff_type);
554 }
555 break;
556 }
557
558 return info;
559 }
560
561 /*************
562 * Symbol utility functions
563 *************/
564 __attribute__ ((always_inline)) inline
565 size_t getSymbolSize ( COFF_HEADER_INFO *info )
566 {
567 ASSERT(info);
568 switch (info->type)
569 {
570 case COFF_ANON_BIG_OBJ:
571 return sizeof_COFF_symbol_ex;
572 default:
573 return sizeof_COFF_symbol_og;
574 }
575 }
576
577 __attribute__ ((always_inline)) inline
578 int32_t getSymSectionNumber ( COFF_HEADER_INFO *info, COFF_symbol* sym )
579 {
580 ASSERT(info);
581 ASSERT(sym);
582 switch (info->type)
583 {
584 case COFF_ANON_BIG_OBJ:
585 return sym->ex.SectionNumber;
586 default:
587 return sym->og.SectionNumber;
588 }
589 }
590
591 __attribute__ ((always_inline)) inline
592 uint32_t getSymValue ( COFF_HEADER_INFO *info, COFF_symbol* sym )
593 {
594 ASSERT(info);
595 ASSERT(sym);
596 switch (info->type)
597 {
598 case COFF_ANON_BIG_OBJ:
599 return sym->ex.Value;
600 default:
601 return sym->og.Value;
602 }
603 }
604
605 __attribute__ ((always_inline)) inline
606 uint8_t getSymStorageClass ( COFF_HEADER_INFO *info, COFF_symbol* sym )
607 {
608 ASSERT(info);
609 ASSERT(sym);
610 switch (info->type)
611 {
612 case COFF_ANON_BIG_OBJ:
613 return sym->ex.StorageClass;
614 default:
615 return sym->og.StorageClass;
616 }
617 }
618
619 __attribute__ ((always_inline)) inline
620 uint8_t getSymNumberOfAuxSymbols ( COFF_HEADER_INFO *info, COFF_symbol* sym )
621 {
622 ASSERT(info);
623 ASSERT(sym);
624 switch (info->type)
625 {
626 case COFF_ANON_BIG_OBJ:
627 return sym->ex.NumberOfAuxSymbols;
628 default:
629 return sym->og.NumberOfAuxSymbols;
630 }
631 }
632
633 __attribute__ ((always_inline)) inline
634 uint16_t getSymType ( COFF_HEADER_INFO *info, COFF_symbol* sym )
635 {
636 ASSERT(info);
637 ASSERT(sym);
638 switch (info->type)
639 {
640 case COFF_ANON_BIG_OBJ:
641 return sym->ex.Type;
642 default:
643 return sym->og.Type;
644 }
645 }
646
647 __attribute__ ((always_inline)) inline
648 uint8_t* getSymShortName ( COFF_HEADER_INFO *info, COFF_symbol* sym )
649 {
650 ASSERT(info);
651 ASSERT(sym);
652 switch (info->type)
653 {
654 case COFF_ANON_BIG_OBJ:
655 return sym->ex.N.ShortName;
656 default:
657 return sym->og.N.ShortName;
658 }
659 }
660
661 const char *
662 addDLL_PEi386( pathchar *dll_name, HINSTANCE *loaded )
663 {
664 /* ------------------- Win32 DLL loader ------------------- */
665
666 pathchar* buf;
667 HINSTANCE instance;
668
669 IF_DEBUG(linker, debugBelch("addDLL; dll_name = `%" PATH_FMT "'\n", dll_name));
670
671 /* The file name has no suffix (yet) so that we can try
672 both foo.dll and foo.drv
673
674 The documentation for LoadLibrary says:
675 If no file name extension is specified in the lpFileName
676 parameter, the default library extension .dll is
677 appended. However, the file name string can include a trailing
678 point character (.) to indicate that the module name has no
679 extension. */
680
681 size_t bufsize = pathlen(dll_name) + 10;
682 buf = stgMallocBytes(bufsize * sizeof(wchar_t), "addDLL");
683
684 /* These are ordered by probability of success and order we'd like them. */
685 const wchar_t *formats[] = { L"%ls.DLL", L"%ls.DRV", L"lib%ls.DLL", L"%ls" };
686 const DWORD flags[] = { LOAD_LIBRARY_SEARCH_USER_DIRS | LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, 0 };
687
688 int cFormat, cFlag;
689 int flags_start = 1; /* Assume we don't support the new API. */
690
691 /* Detect if newer API are available, if not, skip the first flags entry. */
692 if (GetProcAddress((HMODULE)LoadLibraryW(L"Kernel32.DLL"), "AddDllDirectory")) {
693 flags_start = 0;
694 }
695
696 /* Iterate through the possible flags and formats. */
697 for (cFlag = flags_start; cFlag < 2; cFlag++)
698 {
699 for (cFormat = 0; cFormat < 4; cFormat++)
700 {
701 snwprintf(buf, bufsize, formats[cFormat], dll_name);
702 instance = LoadLibraryExW(buf, NULL, flags[cFlag]);
703 if (instance == NULL) {
704 if (GetLastError() != ERROR_MOD_NOT_FOUND)
705 {
706 goto error;
707 }
708 }
709 else
710 {
711 break; /* We're done. DLL has been loaded. */
712 }
713 }
714 }
715
716 /* Check if we managed to load the DLL. */
717 if (instance == NULL) {
718 goto error;
719 }
720
721 addDLLHandle(buf, instance);
722 if (loaded) {
723 *loaded = instance;
724 }
725 stgFree(buf);
726
727 return NULL;
728
729 error:
730 stgFree(buf);
731
732 char* errormsg = malloc(sizeof(char) * 80);
733 snprintf(errormsg, 80, "addDLL: %" PATH_FMT " or dependencies not loaded. (Win32 error %lu)", dll_name, GetLastError());
734 /* LoadLibrary failed; return a ptr to the error msg. */
735 return errormsg;
736 }
737
738 pathchar* findSystemLibrary_PEi386( pathchar* dll_name )
739 {
740 const unsigned int init_buf_size = 1024;
741 unsigned int bufsize = init_buf_size;
742 wchar_t* result = malloc(sizeof(wchar_t) * bufsize);
743 DWORD wResult = SearchPathW(NULL, dll_name, NULL, bufsize, result, NULL);
744
745 if (wResult > bufsize) {
746 result = realloc(result, sizeof(wchar_t) * wResult);
747 wResult = SearchPathW(NULL, dll_name, NULL, wResult, result, NULL);
748 }
749
750
751 if (!wResult) {
752 free(result);
753 return NULL;
754 }
755
756 return result;
757 }
758
759 HsPtr addLibrarySearchPath_PEi386(pathchar* dll_path)
760 {
761 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
762 LPAddDLLDirectory AddDllDirectory = (LPAddDLLDirectory)GetProcAddress((HMODULE)hDLL, "AddDllDirectory");
763
764 HsPtr result = NULL;
765
766 const unsigned int init_buf_size = 4096;
767 int bufsize = init_buf_size;
768
769 // Make sure the path is an absolute path
770 WCHAR* abs_path = malloc(sizeof(WCHAR) * init_buf_size);
771 DWORD wResult = GetFullPathNameW(dll_path, bufsize, abs_path, NULL);
772 if (!wResult){
773 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
774 }
775 else if (wResult > init_buf_size) {
776 abs_path = realloc(abs_path, sizeof(WCHAR) * wResult);
777 if (!GetFullPathNameW(dll_path, bufsize, abs_path, NULL)) {
778 sysErrorBelch("addLibrarySearchPath[GetFullPathNameW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
779 }
780 }
781
782 if (AddDllDirectory) {
783 result = AddDllDirectory(abs_path);
784 }
785 else
786 {
787 warnMissingKBLibraryPaths();
788 WCHAR* str = malloc(sizeof(WCHAR) * init_buf_size);
789 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
790
791 if (wResult > init_buf_size) {
792 str = realloc(str, sizeof(WCHAR) * wResult);
793 bufsize = wResult;
794 wResult = GetEnvironmentVariableW(L"PATH", str, bufsize);
795 if (!wResult) {
796 sysErrorBelch("addLibrarySearchPath[GetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", dll_path, GetLastError());
797 }
798 }
799
800 bufsize = wResult + 2 + pathlen(abs_path);
801 wchar_t* newPath = malloc(sizeof(wchar_t) * bufsize);
802
803 wcscpy(newPath, abs_path);
804 wcscat(newPath, L";");
805 wcscat(newPath, str);
806 if (!SetEnvironmentVariableW(L"PATH", (LPCWSTR)newPath)) {
807 sysErrorBelch("addLibrarySearchPath[SetEnvironmentVariableW]: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
808 }
809
810 free(newPath);
811 free(abs_path);
812
813 return str;
814 }
815
816 if (!result) {
817 sysErrorBelch("addLibrarySearchPath: %" PATH_FMT " (Win32 error %lu)", abs_path, GetLastError());
818 free(abs_path);
819 return NULL;
820 }
821
822 free(abs_path);
823 return result;
824 }
825
826 bool removeLibrarySearchPath_PEi386(HsPtr dll_path_index)
827 {
828 bool result = false;
829
830 if (dll_path_index != NULL) {
831 HINSTANCE hDLL = LoadLibraryW(L"Kernel32.DLL");
832 LPRemoveDLLDirectory RemoveDllDirectory = (LPRemoveDLLDirectory)GetProcAddress((HMODULE)hDLL, "RemoveDllDirectory");
833
834 if (RemoveDllDirectory) {
835 result = RemoveDllDirectory(dll_path_index);
836 // dll_path_index is now invalid, do not use it after this point.
837 }
838 else
839 {
840 warnMissingKBLibraryPaths();
841 result = SetEnvironmentVariableW(L"PATH", (LPCWSTR)dll_path_index);
842 free(dll_path_index);
843 }
844
845 if (!result) {
846 sysErrorBelch("removeLibrarySearchPath: (Win32 error %lu)", GetLastError());
847 return false;
848 }
849 }
850
851 return !result;
852 }
853
854
855 /* We assume file pointer is right at the
856 beginning of COFF object.
857 */
858 static uint32_t getSectionAlignment(
859 Section section) {
860 uint32_t c = section.info->props;
861 for(int i = 0; i < pe_alignments_cnt; i++)
862 {
863 if ((c & 0xF00000) == pe_alignments[i].mask)
864 return pe_alignments[i].value;
865 }
866
867 /* No alignment flag found, assume 8-byte aligned. */
868 return default_alignment;
869 }
870
871 /* ----------------------
872 * return a memory location aligned to the section requirements
873 */
874 static uint8_t* getAlignedMemory(
875 uint8_t* value, Section section) {
876 uint32_t alignment = getSectionAlignment(section);
877 uintptr_t mask = (uintptr_t)alignment - 1;
878 return (uint8_t*)(((uintptr_t)value + mask) & ~mask);
879 }
880
881 /* ----------------------
882 * return a value aligned to the section requirements
883 */
884 static size_t getAlignedValue(
885 size_t value, Section section) {
886 uint32_t alignment = getSectionAlignment(section);
887 uint32_t mask = (uint32_t)alignment - 1;
888 return (size_t)((value + mask) & ~mask);
889 }
890
891 /* -----------------------
892 * This loads import libraries following Microsoft's official standard in the PE
893 * documentation. This is a smaller more efficient format which is just a list
894 * of symbol name => dll.
895 *
896 * This function must fail gracefully and if it does, the filestream needs to
897 * be reset to what it was when the function was called.
898 */
899 bool checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f )
900 {
901 char* image;
902 static bool load_dll_warn = false;
903
904 if (load_dll_warn) { return 0; }
905
906 /* Based on Import Library specification. PE Spec section 7.1 */
907
908 COFF_import_header hdr;
909 size_t n;
910
911 n = fread(&hdr, 1, sizeof_COFF_import_Header, f);
912 if (n != sizeof_COFF_import_Header) {
913 errorBelch("loadImportLibrary: error whilst reading `%s' header "
914 "in `%" PATH_FMT "'\n",
915 member_name, arch_name);
916 fseek(f, -(long int)sizeof_COFF_import_Header, SEEK_CUR);
917 return false;
918 }
919
920 if ( hdr.Sig1 != IMAGE_FILE_MACHINE_UNKNOWN
921 || hdr.Sig2 != IMPORT_OBJECT_HDR_SIG2
922 || getObjectType ((char*)&hdr, arch_name) != COFF_IMPORT_LIB) {
923 fseek(f, -(long int)sizeof_COFF_import_Header, SEEK_CUR);
924 IF_DEBUG(linker, debugBelch("loadArchive: Object `%s` is not an import lib. Skipping...\n", member_name));
925 return false;
926 }
927
928 IF_DEBUG(linker, debugBelch("loadArchive: reading %lu bytes at %ld\n", hdr.SizeOfData, ftell(f)));
929
930 image = stgMallocBytes(hdr.SizeOfData, "checkAndLoadImportLibrary(image)");
931 n = fread(image, 1, hdr.SizeOfData, f);
932 if (n != hdr.SizeOfData) {
933 errorBelch("loadArchive: error whilst reading `%s' header in `%" PATH_FMT "'. Did not read enough bytes.\n",
934 member_name, arch_name);
935 fseek(f, -(n + sizeof_COFF_import_Header), SEEK_CUR);
936 return false;
937 }
938
939 char* symbol = strtok(image, "\0");
940 int symLen = strlen(symbol) + 1;
941 int nameLen = n - symLen;
942 char* dllName = stgMallocBytes(sizeof(char) * nameLen,
943 "checkAndLoadImportLibrary(dllname)");
944 dllName = strncpy(dllName, image + symLen, nameLen);
945 pathchar* dll = stgMallocBytes(sizeof(wchar_t) * nameLen,
946 "checkAndLoadImportLibrary(dll)");
947 mbstowcs(dll, dllName, nameLen);
948 stgFree(dllName);
949
950 IF_DEBUG(linker, debugBelch("loadArchive: read symbol %s from lib `%" PATH_FMT "'\n", symbol, dll));
951 const char* result = addDLL(dll);
952
953 stgFree(image);
954
955 if (result != NULL) {
956 errorBelch("Could not load `%" PATH_FMT "'. Reason: %s\n", dll, result);
957 load_dll_warn = true;
958
959 stgFree(dll);
960 fseek(f, -(n + sizeof_COFF_import_Header), SEEK_CUR);
961 return false;
962 }
963
964 stgFree(dll);
965 return true;
966 }
967
968 static void
969 printName ( uint8_t* name, ObjectCode* oc )
970 {
971 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
972 uint32_t strtab_offset = * (uint32_t*)(name + 4);
973 debugBelch("%s",
974 oc->info->str_tab + strtab_offset - PEi386_STRTAB_OFFSET);
975 } else {
976 int i;
977 for (i = 0; i < 8; i++) {
978 if (name[i] == 0) break;
979 debugBelch("%c", name[i] );
980 }
981 }
982 }
983
984
985 static void
986 copyName ( uint8_t* name, ObjectCode* oc, uint8_t* dst, int dstSize )
987 {
988 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
989 uint32_t strtab_offset = * (uint32_t*)(name + 4);
990 strncpy ((char*)dst,
991 oc->info->str_tab + strtab_offset - PEi386_STRTAB_OFFSET,
992 dstSize);
993 dst[dstSize-1] = 0;
994 } else {
995 int i = 0;
996 while (1) {
997 if (i >= 8) break;
998 if (name[i] == 0) break;
999 dst[i] = name[i];
1000 i++;
1001 }
1002 dst[i] = 0;
1003 }
1004 }
1005
1006
1007 char*
1008 get_sym_name ( uint8_t* name, ObjectCode* oc )
1009 {
1010 char* newstr;
1011 /* If the string is longer than 8 bytes, look in the
1012 string table for it -- this will be correctly zero terminated.
1013 */
1014 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
1015 uint32_t strtab_offset = * (uint32_t*)(name + 4);
1016 return oc->info->str_tab + strtab_offset - PEi386_STRTAB_OFFSET;
1017 }
1018 /* Otherwise, if shorter than 8 bytes, return the original,
1019 which by defn is correctly terminated.
1020 */
1021 if (name[7]==0) return (char*)name;
1022 /* The annoying case: 8 bytes. Copy into a temporary
1023 (XXX which is never freed ...)
1024 */
1025 newstr = stgMallocBytes(9, "get_sym_name");
1026 ASSERT(newstr);
1027 strncpy (newstr, (char*)name,8);
1028 newstr[8] = 0;
1029 return newstr;
1030 }
1031
1032 /* Getting the name of a section is mildly tricky, so we make a
1033 function for it. Sadly, in one case we have to copy the string
1034 (when it is exactly 8 bytes long there's no trailing '\0'), so for
1035 consistency we *always* copy the string; the caller must free it
1036 */
1037 char *
1038 get_name_string (uint8_t* name, ObjectCode* oc)
1039 {
1040 char *newstr;
1041
1042 if (name[0]=='/') {
1043 int strtab_offset = strtol((char*)name+1,NULL,10)-PEi386_STRTAB_OFFSET;
1044 char* str = oc->info->str_tab + strtab_offset;
1045 int len = strlen(str);
1046
1047 newstr = stgMallocBytes(len + 1, "cstring_from_section_symbol_name");
1048 strncpy(newstr, str, len + 1);
1049 return newstr;
1050 }
1051 else
1052 {
1053 newstr = stgMallocBytes(9, "cstring_from_section_symbol_name");
1054 ASSERT(newstr);
1055 strncpy(newstr,(char*)name,8);
1056 newstr[8] = 0;
1057 return newstr;
1058 }
1059 }
1060
1061 /* See Note [mingw-w64 name decoration scheme] */
1062 #if !defined(x86_64_HOST_ARCH)
1063 static void
1064 zapTrailingAtSign ( SymbolName* sym )
1065 {
1066 char* lst = strrchr (sym, '@');
1067 if (lst) lst[0]='\0';
1068 }
1069 #endif
1070
1071 SymbolAddr*
1072 lookupSymbolInDLLs ( const SymbolName* lbl )
1073 {
1074 OpenedDLL* o_dll;
1075 SymbolAddr* sym;
1076
1077 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
1078 /* debugBelch("look in %ls for %s\n", o_dll->name, lbl); */
1079
1080 sym = GetProcAddress(o_dll->instance, lbl+STRIP_LEADING_UNDERSCORE);
1081 if (sym != NULL) {
1082 /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/
1083 return sym;
1084 }
1085
1086 /* Ticket #2283.
1087 Long description: http://support.microsoft.com/kb/132044
1088 tl;dr:
1089 If C/C++ compiler sees __declspec(dllimport) ... foo ...
1090 it generates call *__imp_foo, and __imp_foo here has exactly
1091 the same semantics as in __imp_foo = GetProcAddress(..., "foo")
1092 */
1093 if (sym == NULL && strncmp (lbl, "__imp_", 6) == 0) {
1094 sym = GetProcAddress(o_dll->instance,
1095 lbl + 6 + STRIP_LEADING_UNDERSCORE);
1096 if (sym != NULL) {
1097 IndirectAddr* ret;
1098 ret = stgMallocBytes( sizeof(IndirectAddr), "lookupSymbolInDLLs" );
1099 ret->addr = sym;
1100 ret->next = indirects;
1101 indirects = ret;
1102 IF_DEBUG(linker,
1103 debugBelch("warning: %s from %S is linked instead of %s\n",
1104 lbl+6+STRIP_LEADING_UNDERSCORE, o_dll->name, lbl));
1105 return (void*) & ret->addr;
1106 }
1107 }
1108
1109 sym = GetProcAddress(o_dll->instance, lbl);
1110 if (sym != NULL) {
1111 /*debugBelch("found %s in %s\n", lbl,o_dll->name);*/
1112 return sym;
1113 }
1114 }
1115 return NULL;
1116 }
1117
1118 static bool
1119 verifyCOFFHeader ( uint16_t machine, IMAGE_FILE_HEADER *hdr,
1120 pathchar *fileName )
1121 {
1122 #if defined(i386_HOST_ARCH)
1123 if (machine != IMAGE_FILE_MACHINE_I386) {
1124 errorBelch("%" PATH_FMT ": Not a x86 PE file.", fileName);
1125 return false;
1126 }
1127 #elif defined(x86_64_HOST_ARCH)
1128 if (machine != IMAGE_FILE_MACHINE_AMD64) {
1129 errorBelch("%" PATH_FMT ": Not a x86_64 PE+ file.", fileName);
1130 return false;
1131 }
1132 #else
1133 errorBelch("PE/PE+ not supported on this arch.");
1134 #endif
1135
1136 if (!hdr)
1137 return true;
1138
1139 if (hdr->SizeOfOptionalHeader != 0) {
1140 errorBelch("%" PATH_FMT ": PE/PE+ with nonempty optional header",
1141 fileName);
1142 return 0;
1143 }
1144 if ( (hdr->Characteristics & IMAGE_FILE_EXECUTABLE_IMAGE) ||
1145 (hdr->Characteristics & IMAGE_FILE_DLL ) ||
1146 (hdr->Characteristics & IMAGE_FILE_SYSTEM ) ) {
1147 errorBelch("%" PATH_FMT ": Not a PE/PE+ object file", fileName);
1148 return false;
1149 }
1150 if ( (hdr->Characteristics & IMAGE_FILE_BYTES_REVERSED_HI)) {
1151 errorBelch("%" PATH_FMT ": Invalid PE/PE+ word size or endianness: %d",
1152 fileName,
1153 (int)(hdr->Characteristics));
1154 return false;
1155 }
1156 return true;
1157 }
1158
1159 bool
1160 ocVerifyImage_PEi386 ( ObjectCode* oc )
1161 {
1162 COFF_HEADER_INFO *info = getHeaderInfo (oc);
1163
1164 uint32_t i, noRelocs;
1165 COFF_section* sectab;
1166 COFF_symbol* symtab;
1167 uint8_t* strtab;
1168
1169 sectab = (COFF_section*) (
1170 ((uint8_t*)(oc->image))
1171 + info->sizeOfHeader + info->sizeOfOptionalHeader
1172 );
1173 symtab = (COFF_symbol*) (
1174 ((uint8_t*)(oc->image))
1175 + info->pointerToSymbolTable
1176 );
1177 strtab = ((uint8_t*)symtab)
1178 + info->numberOfSymbols * getSymbolSize (info);
1179
1180 /* .BSS Section is initialized in ocGetNames_PEi386
1181 but we need the Sections array initialized here already. */
1182 Section *sections;
1183 sections = (Section*)stgCallocBytes(
1184 sizeof(Section),
1185 info->numberOfSections + 1, /* +1 for the global BSS section see ocGetNames_PEi386 */
1186 "ocVerifyImage_PEi386(sections)");
1187 oc->sections = sections;
1188 oc->n_sections = info->numberOfSections + 1;
1189 oc->info = stgCallocBytes (sizeof(struct ObjectCodeFormatInfo), 1,
1190 "ocVerifyImage_PEi386(info)");
1191 oc->info->secBytesTotal = 0;
1192 oc->info->secBytesUsed = 0;
1193 oc->info->init = NULL;
1194 oc->info->finit = NULL;
1195 oc->info->ch_info = info;
1196
1197 /* Copy the tables over from object-file. Copying these allows us to
1198 simplify the indexing and to release the object file immediately after
1199 this step as all information we need would be in available. After
1200 loading we can also release everything in the info structure as it won't
1201 be needed again further freeing up memory.
1202 COFF_symbol is a union type, so we have to "adjust" the array to be able
1203 to access it using normal subscript notation. This eliminates the complex
1204 indexing later on. */
1205 uint32_t s_symbols = info->numberOfSymbols * sizeof(COFF_symbol);
1206 uint32_t sym_size = getSymbolSize (info);
1207 oc->info->symbols
1208 = stgMallocBytes (s_symbols, "ocVerifyImage_PEi386(oc->info->symbols)");
1209 for (i = 0; i < info->numberOfSymbols; i++)
1210 memcpy (oc->info->symbols+i, (char*)symtab + sym_size * i, sym_size);
1211
1212 uint32_t n_strtab = (*(uint32_t*)strtab) - PEi386_STRTAB_OFFSET;
1213 oc->info->str_tab
1214 = stgMallocBytes (n_strtab, "ocVerifyImage_PEi386(oc->info->str_tab)");
1215 memcpy (oc->info->str_tab, strtab + PEi386_STRTAB_OFFSET, n_strtab);
1216
1217 /* Initialize the Sections */
1218 for (i = 0; i < info->numberOfSections; i++) {
1219 uint32_t relocs_offset;
1220 COFF_section* sectab_i
1221 = (COFF_section*)
1222 myindex(sizeof_COFF_section, sectab, i);
1223
1224 Section *section = &sections[i];
1225 /* Calculate the start of the section data. */
1226 section->start = oc->image + sectab_i->PointerToRawData;
1227 section->size = sectab_i->SizeOfRawData;
1228 section->info = stgCallocBytes (sizeof(struct SectionFormatInfo), 1,
1229 "ocVerifyImage_PEi386(section.info)");
1230 section->info->name = get_name_string (sectab_i->Name, oc);
1231 section->info->alignment = getSectionAlignment (*section);
1232 section->info->props = sectab_i->Characteristics;
1233 section->info->virtualSize = sectab_i->Misc.VirtualSize;
1234 section->info->virtualAddr = sectab_i->VirtualAddress;
1235
1236 COFF_reloc* reltab
1237 = (COFF_reloc*) (oc->image + sectab_i->PointerToRelocations);
1238
1239 if (section->info->props & IMAGE_SCN_LNK_NRELOC_OVFL ) {
1240 /* If the relocation field (a short) has overflowed, the
1241 * real count can be found in the first reloc entry.
1242 *
1243 * See Section 4.1 (last para) of the PE spec (rev6.0).
1244 */
1245 COFF_reloc* rel = (COFF_reloc*)
1246 myindex ( sizeof_COFF_reloc, reltab, 0 );
1247 noRelocs = rel->VirtualAddress;
1248 relocs_offset = 1;
1249 } else {
1250 noRelocs = sectab_i->NumberOfRelocations;
1251 relocs_offset = 0;
1252 }
1253
1254 section->info->noRelocs = noRelocs;
1255 section->info->relocs = NULL;
1256 if (noRelocs > 0) {
1257 section->info->relocs
1258 = stgMallocBytes (noRelocs * sizeof (COFF_reloc),
1259 "ocVerifyImage_PEi386(section->info->relocs)");
1260 memcpy (section->info->relocs, reltab + relocs_offset,
1261 noRelocs * sizeof (COFF_reloc));
1262 }
1263
1264 oc->info->secBytesTotal += getAlignedValue (section->size, *section);
1265 }
1266
1267 /* Initialize the last section's info field which contains the .bss
1268 section, it doesn't need an info so set it to NULL. */
1269 sections[info->numberOfSections].info = NULL;
1270
1271 /* Calculate space for trampolines nearby.
1272 We get back 8-byte aligned memory (is that guaranteed?), but
1273 the offsets to the sections within the file are all 4 mod 8
1274 (is that guaranteed?). We therefore need to offset the image
1275 by 4, so that all the pointers are 8-byte aligned, so that
1276 pointer tagging works. */
1277 /* For 32-bit case we don't need this, hence we use macro
1278 PEi386_IMAGE_OFFSET, which equals to 4 for 64-bit case and 0 for
1279 32-bit case. */
1280 /* We allocate trampolines area for all symbols right behind
1281 image data, aligned on 8. */
1282 oc->info->trampoline
1283 = (PEi386_IMAGE_OFFSET + 2 * default_alignment
1284 + oc->info->secBytesTotal) & ~0x7;
1285 oc->info->secBytesTotal
1286 = oc->info->trampoline + info->numberOfSymbols * sizeof(SymbolExtra);
1287
1288 /* No further verification after this point; only debug printing. */
1289 i = 0;
1290 IF_DEBUG(linker, i=1);
1291 if (i == 0) return true;
1292
1293 debugBelch("sectab offset = %" FMT_SizeT "\n",
1294 ((uint8_t*)sectab) - ((uint8_t*)oc->image) );
1295 debugBelch("symtab offset = %" FMT_SizeT "\n",
1296 ((uint8_t*)symtab) - ((uint8_t*)oc->image) );
1297 debugBelch("strtab offset = %" FMT_SizeT "\n",
1298 ((uint8_t*)strtab) - ((uint8_t*)oc->image) );
1299
1300 debugBelch("\n" );
1301 if (info->type == COFF_IMAGE)
1302 {
1303 IMAGE_FILE_HEADER* hdr = (IMAGE_FILE_HEADER*)oc->image;
1304 debugBelch( "COFF Type: IMAGE_FILE_HEADER\n");
1305 debugBelch( "Machine: 0x%x\n",
1306 (uint32_t)(hdr->Machine) );
1307 debugBelch( "# sections: %d\n",
1308 (uint32_t)(hdr->NumberOfSections) );
1309 debugBelch( "time/date: 0x%x\n",
1310 (uint32_t)(hdr->TimeDateStamp) );
1311 debugBelch( "symtab offset: %d\n",
1312 (uint32_t)(hdr->PointerToSymbolTable) );
1313 debugBelch( "# symbols: %d\n",
1314 (uint32_t)(hdr->NumberOfSymbols) );
1315 debugBelch( "sz of opt hdr: %d\n",
1316 (uint32_t)(hdr->SizeOfOptionalHeader) );
1317 debugBelch( "characteristics: 0x%x\n",
1318 (uint32_t)(hdr->Characteristics) );
1319 }
1320 else if (info->type == COFF_ANON_BIG_OBJ)
1321 {
1322 ANON_OBJECT_HEADER_BIGOBJ* hdr = (ANON_OBJECT_HEADER_BIGOBJ*)oc->image;
1323 debugBelch( "COFF Type: ANON_OBJECT_HEADER_BIGOBJ\n");
1324 debugBelch( "Machine: 0x%x\n",
1325 (uint32_t)(hdr->Machine) );
1326 debugBelch( "# sections: %d\n",
1327 (uint32_t)(hdr->NumberOfSections) );
1328 debugBelch( "time/date: 0x%x\n",
1329 (uint32_t)(hdr->TimeDateStamp) );
1330 debugBelch( "symtab offset: %d\n",
1331 (uint32_t)(hdr->PointerToSymbolTable) );
1332 debugBelch( "# symbols: %d\n",
1333 (uint32_t)(hdr->NumberOfSymbols) );
1334 }
1335 else
1336 {
1337 debugBelch( "COFF Type: UNKNOWN\n");
1338 return false;
1339 }
1340
1341 /* Print the section table. */
1342 debugBelch("\n" );
1343 for (i = 0; i < info->numberOfSections; i++) {
1344 COFF_section* sectab_i
1345 = (COFF_section*)
1346 myindex ( sizeof_COFF_section, sectab, i );
1347 Section section = sections[i];
1348 debugBelch(
1349 "\n"
1350 "section %d\n"
1351 " name `",
1352 i
1353 );
1354 printName (sectab_i->Name, oc);
1355 debugBelch(
1356 "'\n"
1357 " vsize %lu\n"
1358 " vaddr %lu\n"
1359 " data sz %lu\n"
1360 " data off 0x%p\n"
1361 " num rel %hu\n"
1362 " off rel %lu\n"
1363 " ptr raw 0x%lx\n"
1364 " align %u\n"
1365 " data adj %zu\n",
1366 sectab_i->Misc.VirtualSize,
1367 sectab_i->VirtualAddress,
1368 sectab_i->SizeOfRawData,
1369 section.start,
1370 sectab_i->NumberOfRelocations,
1371 sectab_i->PointerToRelocations,
1372 sectab_i->PointerToRawData,
1373 getSectionAlignment (section),
1374 getAlignedValue (section.size, section)
1375 );
1376
1377 noRelocs = section.info->noRelocs;
1378 for (uint32_t j = 0; j < noRelocs; j++) {
1379 COFF_reloc rel = section.info->relocs[j];
1380 debugBelch(
1381 " type 0x%-4x vaddr 0x%-8lx name `",
1382 rel.Type,
1383 rel.VirtualAddress );
1384 COFF_symbol sym = oc->info->symbols[rel.SymbolTableIndex];
1385 printName (getSymShortName (info, &sym), oc);
1386 debugBelch("'\n" );
1387 }
1388
1389 debugBelch("\n" );
1390 }
1391 debugBelch("\n" );
1392 debugBelch("string table has size 0x%x\n", n_strtab + PEi386_STRTAB_OFFSET);
1393 debugBelch("---START of string table---\n");
1394 for (i = 4; i < n_strtab; i++) {
1395 if (strtab[i] == 0)
1396 debugBelch("\n"); else
1397 debugBelch("%c", strtab[i] );
1398 }
1399 debugBelch("--- END of string table---\n");
1400
1401 debugBelch("\n" );
1402
1403 for (i = 0; i < info->numberOfSymbols; i++) {
1404 COFF_symbol* symtab_i = &oc->info->symbols[i];
1405 debugBelch(
1406 "symbol %d\n"
1407 " name `",
1408 i
1409 );
1410 printName (getSymShortName (info, symtab_i), oc);
1411 debugBelch(
1412 "'\n"
1413 " value 0x%x\n"
1414 " 1+sec# %d\n"
1415 " type 0x%x\n"
1416 " sclass 0x%x\n"
1417 " nAux %d\n",
1418 getSymValue (info, symtab_i),
1419 getSymSectionNumber (info, symtab_i),
1420 getSymType (info, symtab_i),
1421 getSymStorageClass (info, symtab_i),
1422 getSymNumberOfAuxSymbols (info, symtab_i)
1423 );
1424 i += getSymNumberOfAuxSymbols (info, symtab_i);
1425 }
1426
1427 debugBelch("\n" );
1428 return true;
1429 }
1430
1431 bool
1432 ocGetNames_PEi386 ( ObjectCode* oc )
1433 {
1434 bool has_code_section = false;
1435
1436 SymbolName* sname;
1437 SymbolAddr* addr;
1438 unsigned int i;
1439
1440 COFF_HEADER_INFO *info = oc->info->ch_info;
1441
1442 /* Copy section information into the ObjectCode. */
1443
1444 for (i = 0; i < info->numberOfSections; i++) {
1445 uint8_t* start;
1446 uint8_t* end;
1447 uint32_t sz;
1448
1449 /* By default consider all section as CODE or DATA,
1450 which means we want to load them. */
1451 SectionKind kind = SECTIONKIND_CODE_OR_RODATA;
1452 Section section = oc->sections[i];
1453
1454 IF_DEBUG(linker, debugBelch("section name = %s\n", section.info->name ));
1455
1456 /* The PE file section flag indicates whether the section
1457 contains code or data. */
1458 if (section.info->props & IMAGE_SCN_CNT_CODE) {
1459 has_code_section = has_code_section || section.size > 0;
1460 kind = SECTIONKIND_CODE_OR_RODATA;
1461 }
1462
1463 if (section.info->props & IMAGE_SCN_CNT_INITIALIZED_DATA)
1464 kind = SECTIONKIND_CODE_OR_RODATA;
1465
1466 /* Check next if it contains any uninitialized data */
1467 if (section.info->props & IMAGE_SCN_CNT_UNINITIALIZED_DATA)
1468 kind = SECTIONKIND_RWDATA;
1469
1470 /* Finally check if it can be discarded.
1471 This will also ignore .debug sections */
1472 if ( section.info->props & IMAGE_SCN_MEM_DISCARDABLE
1473 || section.info->props & IMAGE_SCN_LNK_REMOVE)
1474 kind = SECTIONKIND_OTHER;
1475
1476 if (0==strncmp(".ctors", section.info->name, 6)) {
1477 kind = SECTIONKIND_INIT_ARRAY;
1478 oc->info->init = &oc->sections[i];
1479 }
1480
1481 if (0==strncmp(".dtors", section.info->name, 6)) {
1482 kind = SECTIONKIND_FINIT_ARRAY;
1483 oc->info->finit = &oc->sections[i];
1484 }
1485
1486 if ( 0 == strncmp(".stab" , section.info->name, 5 )
1487 || 0 == strncmp(".stabstr" , section.info->name, 8 )
1488 || 0 == strncmp(".pdata" , section.info->name, 6 )
1489 || 0 == strncmp(".xdata" , section.info->name, 6 )
1490 || 0 == strncmp(".debug" , section.info->name, 6 )
1491 || 0 == strncmp(".rdata$zzz", section.info->name, 10))
1492 kind = SECTIONKIND_DEBUG;
1493
1494 if (0==strncmp(".idata", section.info->name, 6))
1495 kind = SECTIONKIND_IMPORT;
1496
1497 /* See Note [BFD import library]. */
1498 if (0==strncmp(".idata$7", section.info->name, 8))
1499 kind = SECTIONKIND_IMPORT_LIBRARY;
1500
1501 if (0==strncmp(".idata$6", section.info->name, 8)) {
1502 /* The first two bytes contain the ordinal of the function
1503 in the format of lowpart highpart. The two bytes combined
1504 for the total range of 16 bits which is the function export limit
1505 of DLLs. */
1506 sname = (SymbolName*)section.start+2;
1507 COFF_symbol* sym = &oc->info->symbols[info->numberOfSymbols-1];
1508 addr = get_sym_name( getSymShortName (info, sym), oc);
1509
1510 IF_DEBUG(linker,
1511 debugBelch("addImportSymbol `%s' => `%s'\n",
1512 sname, (char*)addr));
1513 /* We're going to free the any data associated with the import
1514 library without copying the sections. So we have to duplicate
1515 the symbol name and values before the pointers become invalid. */
1516 sname = strdup (sname);
1517 addr = strdup (addr);
1518 if (!ghciInsertSymbolTable(oc->fileName, symhash, sname,
1519 addr, false, oc)) {
1520 releaseOcInfo (oc);
1521 stgFree (oc->image);
1522 oc->image = NULL;
1523 return false;
1524 }
1525 setImportSymbol (oc, sname);
1526
1527 /* Don't process this oc any futher. Just exit. */
1528 oc->n_symbols = 0;
1529 oc->symbols = NULL;
1530 stgFree (oc->image);
1531 oc->image = NULL;
1532 releaseOcInfo (oc);
1533 return true;
1534 }
1535
1536 /* Allocate space for any (local, anonymous) .bss sections. */
1537 if (0==strncmp(".bss", section.info->name, 4)) {
1538 uint32_t bss_sz;
1539 uint8_t* zspace;
1540
1541 /* sof 10/05: the PE spec text isn't too clear regarding what
1542 * the SizeOfRawData field is supposed to hold for object
1543 * file sections containing just uninitialized data -- for executables,
1544 * it is supposed to be zero; unclear what it's supposed to be
1545 * for object files. However, VirtualSize is guaranteed to be
1546 * zero for object files, which definitely suggests that SizeOfRawData
1547 * will be non-zero (where else would the size of this .bss section be
1548 * stored?) Looking at the COFF_section info for incoming object files,
1549 * this certainly appears to be the case.
1550 *
1551 * => I suspect we've been incorrectly handling .bss sections in
1552 * (relocatable) object files up until now. This turned out to bite us
1553 * with ghc-6.4.1's use of gcc-3.4.x, which has started to emit
1554 * initially-zeroed-out local 'static' variable decls into the .bss
1555 * section. (The specific function in Q which triggered this is
1556 * libraries/base/cbits/dirUtils.c:__hscore_getFolderPath())
1557 *
1558 * TODO: check if this comment is still relevant.
1559 */
1560 if (section.info->virtualSize == 0 && section.size == 0) continue;
1561 /* This is a non-empty .bss section.
1562 Allocate zeroed space for it */
1563 bss_sz = section.info->virtualSize;
1564 if (bss_sz < section.size) { bss_sz = section.size; }
1565 bss_sz = section.info->alignment;
1566 zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
1567 oc->sections[i].start = getAlignedMemory(zspace, section);
1568 oc->sections[i].size = bss_sz;
1569 addProddableBlock(oc, zspace, bss_sz);
1570 /* debugBelch("BSS anon section at 0x%x\n", zspace); */
1571 }
1572
1573 /* Allocate space for the sections since we have a real oc.
1574 We initially mark it the region as non-accessible. But will adjust
1575 as we go along. */
1576 if (!oc->info->image) {
1577 /* See Note [Memory allocation]. */
1578 ASSERT(code_heap);
1579 oc->info->image
1580 = HeapAlloc (code_heap, HEAP_ZERO_MEMORY, oc->info->secBytesTotal);
1581 if (!oc->info->image)
1582 barf ("Could not allocate any heap memory from private heap.");
1583 }
1584
1585 ASSERT(section.size == 0 || section.info->virtualSize == 0);
1586 sz = section.size;
1587 if (sz < section.info->virtualSize) sz = section.info->virtualSize;
1588
1589 start = section.start;
1590 end = start + sz - 1;
1591
1592 if (kind != SECTIONKIND_OTHER && end >= start) {
1593 /* See Note [Section alignment]. */
1594 addCopySection(oc, &oc->sections[i], kind, SECTION_NOMEM, start, sz);
1595 addProddableBlock(oc, oc->sections[i].start, sz);
1596 }
1597 }
1598
1599 /* Copy exported symbols into the ObjectCode. */
1600
1601 oc->n_symbols = info->numberOfSymbols;
1602 oc->symbols = stgCallocBytes(sizeof(SymbolName*), oc->n_symbols,
1603 "ocGetNames_PEi386(oc->symbols)");
1604
1605 /* Work out the size of the global BSS section */
1606 StgWord globalBssSize = 0;
1607 for (i=0; i < info->numberOfSymbols; i++) {
1608 COFF_symbol* sym = &oc->info->symbols[i];
1609 if (getSymSectionNumber (info, sym) == IMAGE_SYM_UNDEFINED
1610 && getSymValue (info, sym) > 0
1611 && getSymStorageClass (info, sym) != IMAGE_SYM_CLASS_SECTION) {
1612 globalBssSize += getSymValue (info, sym);
1613 }
1614 i += getSymNumberOfAuxSymbols (info, sym);
1615 }
1616
1617 /* Allocate BSS space */
1618 SymbolAddr* bss = NULL;
1619 if (globalBssSize > 0) {
1620 bss = stgCallocBytes(1, globalBssSize,
1621 "ocGetNames_PEi386(non-anonymous bss)");
1622 addSection(&oc->sections[oc->n_sections-1],
1623 SECTIONKIND_RWDATA, SECTION_MALLOC,
1624 bss, globalBssSize, 0, 0, 0);
1625 IF_DEBUG(linker, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize));
1626 addProddableBlock(oc, bss, globalBssSize);
1627 } else {
1628 addSection(&oc->sections[oc->n_sections-1],
1629 SECTIONKIND_OTHER, SECTION_NOMEM, NULL, 0, 0, 0, 0);
1630 }
1631
1632 /* At this point we're done with oc->image and all relevant memory have
1633 been copied. Release it to free up the memory. */
1634 stgFree (oc->image);
1635 oc->image = NULL;
1636
1637 for (i = 0; i < (uint32_t)oc->n_symbols; i++) {
1638 COFF_symbol* sym = &oc->info->symbols[i];
1639
1640 int32_t secNumber = getSymSectionNumber (info, sym);
1641 uint32_t symValue = getSymValue (info, sym);
1642 uint8_t symStorageClass = getSymStorageClass (info, sym);
1643
1644 addr = NULL;
1645 bool isWeak = false;
1646 sname = get_sym_name (getSymShortName (info, sym), oc);
1647 Section *section = secNumber > 0 ? &oc->sections[secNumber-1] : NULL;
1648
1649 if ( secNumber != IMAGE_SYM_UNDEFINED
1650 && secNumber > 0
1651 && section
1652 && section->kind != SECTIONKIND_IMPORT_LIBRARY) {
1653 /* This symbol is global and defined, viz, exported */
1654 /* for IMAGE_SYMCLASS_EXTERNAL
1655 && !IMAGE_SYM_UNDEFINED,
1656 the address of the symbol is:
1657 address of relevant section + offset in section
1658 */
1659 if (symStorageClass == IMAGE_SYM_CLASS_EXTERNAL
1660 || ( symStorageClass == IMAGE_SYM_CLASS_STATIC
1661 && section->info->props & IMAGE_SCN_LNK_COMDAT)
1662 ) {
1663 addr = (SymbolAddr*)((size_t)section->start + symValue);
1664 isWeak = section->info->props & IMAGE_SCN_LNK_COMDAT;
1665 }
1666 }
1667 else if (symStorageClass == IMAGE_SYM_CLASS_WEAK_EXTERNAL) {
1668 isWeak = true;
1669 }
1670 else if ( secNumber == IMAGE_SYM_UNDEFINED && symValue > 0) {
1671 /* This symbol isn't in any section at all, ie, global bss.
1672 Allocate zeroed space for it from the BSS section */
1673 addr = bss;
1674 bss = (SymbolAddr*)((StgWord)bss + (StgWord)symValue);
1675 IF_DEBUG(linker, debugBelch("bss symbol @ %p %u\n", addr, symValue));
1676 }
1677 else if (secNumber > 0
1678 && section
1679 && section->kind == SECTIONKIND_IMPORT_LIBRARY) {
1680 /* This is an import section. We should load the dll and lookup
1681 the symbols.
1682 See Note [BFD import library]. */
1683 char* dllName = section->start;
1684 if (strlen(dllName) == 0 || dllName[0] == 0 || has_code_section)
1685 continue;
1686
1687 pathchar* dirName = pathdir(oc->fileName);
1688 HsPtr token = addLibrarySearchPath(dirName);
1689 stgFree(dirName);
1690
1691 sym = &oc->info->symbols[oc->n_symbols-1];
1692 sname = get_sym_name (getSymShortName (info, sym), oc);
1693
1694 IF_DEBUG(linker,
1695 debugBelch("loading symbol `%s' from dll: '%ls' => `%s'\n",
1696 sname, oc->fileName, dllName));
1697
1698 pathchar* dll = mkPath(dllName);
1699 HINSTANCE dllInstance = 0;
1700 const char* result = addDLL_PEi386(dll, &dllInstance);
1701 removeLibrarySearchPath(token);
1702 stgFree(dll);
1703
1704 if (result != NULL || dllInstance == 0) {
1705 errorBelch("Could not load `%s'. Reason: %s\n",
1706 (char*)dllName, result);
1707 return false;
1708 }
1709
1710 /* Set the _dll_iname symbol to the dll's handle. */
1711 addr = (SymbolAddr*)dllInstance;
1712
1713 /* the symbols are named <name>_iname when defined, but are named
1714 _head_<name> when looked up. (Ugh. thanks GCC.) So correct it when
1715 stored so we don't have to correct it each time when retrieved. */
1716 int size = strlen(sname)+1;
1717 char *tmp = stgMallocBytes(size * sizeof(char),
1718 "ocGetNames_PEi386");
1719 strncpy (tmp, sname, size);
1720 char *pos = strstr(tmp, "_iname");
1721 /* drop anything after the name. There are some inconsistencies with
1722 whitespaces trailing the name. */
1723 if (pos) pos[0] = '\0';
1724 int start = 0;
1725
1726 /* msys2 project's import lib builder has some inconsistent name
1727 mangling. Their names start with _ or __ yet they drop this when
1728 making the _head_ symbol. So do the same. */
1729 while (tmp[start]=='_')
1730 start++;
1731
1732 snprintf (sname, size, "_head_%s", tmp+start);
1733 sname[size-start]='\0';
1734 stgFree(tmp);
1735 sname = strdup (sname);
1736 if (!ghciInsertSymbolTable(oc->fileName, symhash, sname,
1737 addr, false, oc))
1738 return false;
1739
1740 break;
1741 }
1742
1743 if ((addr != NULL || isWeak)
1744 && (!section || (section && section->kind != SECTIONKIND_IMPORT))) {
1745 /* debugBelch("addSymbol %p `%s' Weak:%lld \n", addr, sname, isWeak); */
1746 sname = strdup (sname);
1747 IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr, sname));
1748 ASSERT(i < (uint32_t)oc->n_symbols);
1749 oc->symbols[i] = sname;
1750 if (isWeak) {
1751 setWeakSymbol(oc, sname);
1752 }
1753
1754 if (! ghciInsertSymbolTable(oc->fileName, symhash, sname, addr,
1755 isWeak, oc))
1756 return false;
1757 } else {
1758 /* We're skipping the symbol, but if we ever load this
1759 object file we'll want to skip it then too. */
1760 oc->symbols[i] = NULL;
1761 }
1762
1763 i += getSymNumberOfAuxSymbols (info, sym);
1764 }
1765
1766 return true;
1767 }
1768
1769 #if defined(x86_64_HOST_ARCH)
1770
1771 /* We've already reserved a room for symbol extras in loadObj,
1772 * so simply set correct pointer here.
1773 */
1774 bool
1775 ocAllocateSymbolExtras_PEi386 ( ObjectCode* oc )
1776 {
1777 /* If the ObjectCode was unloaded we don't need a trampoline, it's likely
1778 an import library so we're discarding it earlier. */
1779 if (!oc->info)
1780 return false;
1781
1782 const int mask = default_alignment - 1;
1783 size_t origin = oc->info->trampoline;
1784 oc->symbol_extras
1785 = (SymbolExtra*)((uintptr_t)(oc->info->image + origin + mask) & ~mask);
1786 oc->first_symbol_extra = 0;
1787 COFF_HEADER_INFO *info = oc->info->ch_info;
1788 oc->n_symbol_extras = info->numberOfSymbols;
1789
1790 return true;
1791 }
1792
1793 static size_t
1794 makeSymbolExtra_PEi386( ObjectCode* oc, uint64_t index, size_t s, char* symbol )
1795 {
1796 unsigned int curr_thunk;
1797 SymbolExtra *extra;
1798 curr_thunk = oc->first_symbol_extra + index;
1799 if (index >= oc->n_symbol_extras) {
1800 IF_DEBUG(linker, debugBelch("makeSymbolExtra first:%d, num:%lu, member:%s, index:%llu\n", curr_thunk, oc->n_symbol_extras, oc->archiveMemberName, index));
1801 barf("Can't allocate thunk for `%s' in `%" PATH_FMT "' with member `%s'", symbol, oc->fileName, oc->archiveMemberName);
1802 }
1803
1804 extra = oc->symbol_extras + curr_thunk;
1805
1806 if (!extra->addr)
1807 {
1808 // jmp *-14(%rip)
1809 static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF };
1810 extra->addr = (uint64_t)s;
1811 memcpy(extra->jumpIsland, jmp, 6);
1812 }
1813
1814 return (size_t)extra->jumpIsland;
1815 }
1816
1817 #endif /* x86_64_HOST_ARCH */
1818
1819 bool
1820 ocResolve_PEi386 ( ObjectCode* oc )
1821 {
1822 uint64_t A;
1823 size_t S;
1824 SymbolAddr* pP;
1825
1826 unsigned int i;
1827 uint32_t j, noRelocs;
1828
1829 /* ToDo: should be variable-sized? But is at least safe in the
1830 sense of buffer-overrun-proof. */
1831 uint8_t symbol[1000];
1832 /* debugBelch("resolving for %s\n", oc->fileName); */
1833
1834 COFF_HEADER_INFO *info = oc->info->ch_info;
1835 uint32_t numberOfSections = info->numberOfSections;
1836
1837 for (i = 0; i < numberOfSections; i++) {
1838 Section section = oc->sections[i];
1839
1840 /* Ignore sections called which contain stabs debugging information. */
1841 if (section.kind == SECTIONKIND_DEBUG)
1842 continue;
1843
1844 noRelocs = section.info->noRelocs;
1845 for (j = 0; j < noRelocs; j++) {
1846 COFF_symbol* sym;
1847 COFF_reloc* reloc = &section.info->relocs[j];
1848
1849 /* the location to patch */
1850 pP = (SymbolAddr*)(
1851 (uintptr_t)section.start
1852 + (uintptr_t)reloc->VirtualAddress
1853 - (uintptr_t)section.info->virtualAddr
1854 );
1855 /* the existing contents of pP */
1856 A = *(uint32_t*)pP;
1857 /* the symbol to connect to */
1858 uint64_t symIndex = reloc->SymbolTableIndex;
1859 sym = &oc->info->symbols[symIndex];
1860
1861 IF_DEBUG(linker,
1862 debugBelch(
1863 "reloc sec %2d num %3d: type 0x%-4x "
1864 "vaddr 0x%-8lx name `",
1865 i, j,
1866 reloc->Type,
1867 reloc->VirtualAddress );
1868 printName (getSymShortName (info, sym), oc);
1869 debugBelch("'\n" ));
1870
1871 if (getSymStorageClass (info, sym) == IMAGE_SYM_CLASS_STATIC) {
1872 Section section = oc->sections[getSymSectionNumber (info, sym)-1];
1873 S = ((size_t)(section.start))
1874 + ((size_t)(getSymValue (info, sym)));
1875 } else {
1876 copyName ( getSymShortName (info, sym), oc, symbol,
1877 sizeof(symbol)-1 );
1878 S = (size_t) lookupSymbol_( (char*)symbol );
1879 if ((void*)S == NULL) {
1880 errorBelch(" | %" PATH_FMT ": unknown symbol `%s'", oc->fileName, symbol);
1881 releaseOcInfo (oc);
1882 return false;
1883 }
1884 }
1885 /* All supported relocations write at least 4 bytes */
1886 checkProddableBlock(oc, pP, 4);
1887 switch (reloc->Type) {
1888 #if defined(i386_HOST_ARCH)
1889 case IMAGE_REL_I386_DIR32:
1890 case IMAGE_REL_I386_DIR32NB:
1891 *(uint32_t *)pP = S + A;
1892 break;
1893 case IMAGE_REL_I386_REL32:
1894 /* Tricky. We have to insert a displacement at
1895 pP which, when added to the PC for the _next_
1896 insn, gives the address of the target (S).
1897 Problem is to know the address of the next insn
1898 when we only know pP. We assume that this
1899 literal field is always the last in the insn,
1900 so that the address of the next insn is pP+4
1901 -- hence the constant 4.
1902 Also I don't know if A should be added, but so
1903 far it has always been zero.
1904
1905 SOF 05/2005: 'A' (old contents of *pP) have been observed
1906 to contain values other than zero (the 'wx' object file
1907 that came with wxhaskell-0.9.4; dunno how it was compiled..).
1908 So, add displacement to old value instead of asserting
1909 A to be zero. Fixes wxhaskell-related crashes, and no other
1910 ill effects have been observed.
1911
1912 Update: the reason why we're seeing these more elaborate
1913 relocations is due to a switch in how the NCG compiles SRTs
1914 and offsets to them from info tables. SRTs live in .(ro)data,
1915 while info tables live in .text, causing GAS to emit REL32/DISP32
1916 relocations with non-zero values. Adding the displacement is
1917 the right thing to do.
1918 */
1919 *(uint32_t *)pP = ((uint32_t)S) + A - ((uint32_t)(size_t)pP) - 4;
1920 break;
1921 #elif defined(x86_64_HOST_ARCH)
1922 case 1: /* R_X86_64_64 (ELF constant 1) - IMAGE_REL_AMD64_ADDR64 (PE constant 1) */
1923 {
1924 uint64_t A;
1925 checkProddableBlock(oc, pP, 8);
1926 A = *(uint64_t*)pP;
1927 *(uint64_t *)pP = S + A;
1928 break;
1929 }
1930 case 2: /* R_X86_64_32 (ELF constant 10) - IMAGE_REL_AMD64_ADDR32 (PE constant 2) */
1931 case 3: /* R_X86_64_32S (ELF constant 11) - IMAGE_REL_AMD64_ADDR32NB (PE constant 3) */
1932 case 17: /* R_X86_64_32S ELF constant, no PE mapping. See note [ELF constant in PE file] */
1933 {
1934 uint64_t v;
1935 v = S + A;
1936 if (v >> 32) {
1937 copyName (getSymShortName (info, sym), oc,
1938 symbol, sizeof(symbol)-1);
1939 S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol);
1940 /* And retry */
1941 v = S + A;
1942 if (v >> 32) {
1943 barf("IMAGE_REL_AMD64_ADDR32[NB]: High bits are set in %zx for %s",
1944 v, (char *)symbol);
1945 }
1946 }
1947 *(uint32_t *)pP = (uint32_t)v;
1948 break;
1949 }
1950 case 4: /* R_X86_64_PC32 (ELF constant 2) - IMAGE_REL_AMD64_REL32 (PE constant 4) */
1951 {
1952 intptr_t v;
1953 v = S + (int32_t)A - ((intptr_t)pP) - 4;
1954 if ((v >> 32) && ((-v) >> 32)) {
1955 /* Make the trampoline then */
1956 copyName (getSymShortName (info, sym),
1957 oc, symbol, sizeof(symbol)-1);
1958 S = makeSymbolExtra_PEi386(oc, symIndex, S, (char *)symbol);
1959 /* And retry */
1960 v = S + (int32_t)A - ((intptr_t)pP) - 4;
1961 if ((v >> 32) && ((-v) >> 32)) {
1962 barf("IMAGE_REL_AMD64_REL32: High bits are set in %zx for %s",
1963 v, (char *)symbol);
1964 }
1965 }
1966 *(uint32_t *)pP = (uint32_t)v;
1967 break;
1968 }
1969 #endif
1970 default:
1971 debugBelch("%" PATH_FMT ": unhandled PEi386 relocation type %d\n",
1972 oc->fileName, reloc->Type);
1973 releaseOcInfo (oc);
1974 return false;
1975 }
1976
1977 }
1978 }
1979
1980 IF_DEBUG(linker, debugBelch("completed %" PATH_FMT "\n", oc->fileName));
1981 return true;
1982 }
1983
1984 /*
1985 Note [ELF constant in PE file]
1986
1987 For some reason, the PE files produced by GHC contain a linux
1988 relocation constant 17 (0x11) in the object files. As far as I (Phyx-) can tell
1989 this constant doesn't seem like it's coming from GHC, or at least I could not find
1990 anything in the .s output that GHC produces which specifies the relocation type.
1991
1992 This leads me to believe that this is a bug in GAS. However because this constant is
1993 there we must deal with it. This is done by mapping it to the equivalent in behaviour PE
1994 relocation constant 0x03.
1995
1996 See #9907
1997 */
1998
1999 bool
2000 ocRunInit_PEi386 ( ObjectCode *oc )
2001 {
2002 if (!oc || !oc->info || !oc->info->init) {
2003 return true;
2004 }
2005
2006 int argc, envc;
2007 char **argv, **envv;
2008
2009 getProgArgv(&argc, &argv);
2010 getProgEnvv(&envc, &envv);
2011
2012 Section section = *oc->info->init;
2013 ASSERT(SECTIONKIND_INIT_ARRAY == section.kind);
2014
2015 uint8_t *init_startC = section.start;
2016 init_t *init_start = (init_t*)init_startC;
2017 init_t *init_end = (init_t*)(init_startC + section.size);
2018
2019 // ctors are run *backwards*!
2020 for (init_t *init = init_end - 1; init >= init_start; init--)
2021 (*init)(argc, argv, envv);
2022
2023 freeProgEnvv(envc, envv);
2024 releaseOcInfo (oc);
2025 return true;
2026 }
2027
2028 SymbolAddr *lookupSymbol_PEi386(SymbolName *lbl)
2029 {
2030 RtsSymbolInfo *pinfo;
2031
2032 if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
2033 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol '%s' not found\n", lbl));
2034
2035 SymbolAddr* sym;
2036
2037 /* See Note [mingw-w64 name decoration scheme] */
2038 #if !defined(x86_64_HOST_ARCH)
2039 zapTrailingAtSign ( lbl );
2040 #endif
2041 sym = lookupSymbolInDLLs(lbl);
2042 return sym; // might be NULL if not found
2043 } else {
2044 #if defined(mingw32_HOST_OS)
2045 // If Windows, perform initialization of uninitialized
2046 // Symbols from the C runtime which was loaded above.
2047 // We do this on lookup to prevent the hit when
2048 // The symbol isn't being used.
2049 if (pinfo->value == (void*)0xBAADF00D)
2050 {
2051 char symBuffer[50];
2052 sprintf(symBuffer, "_%s", lbl);
2053 static HMODULE msvcrt = NULL;
2054 if (!msvcrt) msvcrt = GetModuleHandle("msvcrt");
2055 pinfo->value = GetProcAddress(msvcrt, symBuffer);
2056 }
2057 else if (pinfo && pinfo->owner && isSymbolImport (pinfo->owner, lbl))
2058 {
2059 /* See Note [BFD import library]. */
2060 HINSTANCE dllInstance = (HINSTANCE)lookupSymbol(pinfo->value);
2061 if (!dllInstance && pinfo->value)
2062 return pinfo->value;
2063
2064 if (!dllInstance)
2065 {
2066 errorBelch("Unable to load import dll symbol `%s'. "
2067 "No _iname symbol.", lbl);
2068 return NULL;
2069 }
2070 IF_DEBUG(linker,
2071 debugBelch("indexing import %s => %s using dll instance %p\n",
2072 lbl, (char*)pinfo->value, dllInstance));
2073 pinfo->value = GetProcAddress((HMODULE)dllInstance, lbl);
2074 clearImportSymbol (pinfo->owner, lbl);
2075 return pinfo->value;
2076 }
2077 #endif
2078 return loadSymbol(lbl, pinfo);
2079 }
2080 }
2081
2082 /* -----------------------------------------------------------------------------
2083 * Section management.
2084 */
2085
2086 /* See Note [Section alignment]. */
2087 static void
2088 addCopySection (ObjectCode *oc, Section *s, SectionKind kind,
2089 SectionAlloc alloc, void* start, StgWord size) {
2090 char* pos = oc->info->image + oc->info->secBytesUsed;
2091 char* newStart = (char*)getAlignedMemory ((uint8_t*)pos, *s);
2092 memcpy (newStart, start, size);
2093 uintptr_t offset = (uintptr_t)newStart - (uintptr_t)oc->info->image;
2094 oc->info->secBytesUsed = (size_t)offset + size;
2095 start = newStart;
2096
2097 /* Initially I wanted to apply the right memory protection to the region and
2098 which would leaved the gaps in between the regions as inaccessible memory
2099 to prevent exploits.
2100 The problem is protection is always on page granularity, so we can use
2101 less memory and be insecure or use more memory and be secure.
2102 For now, I've chosen lower memory over secure as the first pass, this
2103 doesn't regress security over the current implementation. After this
2104 patch I will change to different implementation that will fix the mem
2105 protection and keep the memory size small. */
2106 addSection (s, kind, alloc, start, size, 0, 0, 0);
2107 }
2108
2109 /* -----------------------------------------------------------------------------
2110 * Debugging operations.
2111 */
2112
2113 pathchar*
2114 resolveSymbolAddr_PEi386 (pathchar* buffer, int size,
2115 SymbolAddr* symbol, uintptr_t* top ){
2116 SYMBOL_INFO sym;
2117 ZeroMemory (&sym, sizeof(SYMBOL_INFO));
2118 sym.MaxNameLen = sizeof(char) * 1024;
2119
2120 DWORD64 uDisplacement = 0;
2121 HANDLE hProcess = GetCurrentProcess();
2122 ObjectCode* obj = NULL;
2123 uintptr_t start, end;
2124 *top = 0;
2125
2126 pathprintf (buffer, size, WSTR("0x%" PRIxPTR), symbol);
2127
2128 if (SymFromAddr (hProcess, (uintptr_t)symbol, &uDisplacement, &sym))
2129 {
2130 /* Try using Windows symbols. */
2131 wcscat (buffer, WSTR(" "));
2132 pathchar* name = mkPath (sym.Name);
2133 wcscat (buffer, name);
2134 stgFree (name);
2135 if (uDisplacement != 0)
2136 {
2137 int64_t displacement = (int64_t)uDisplacement;
2138 pathchar s_disp[50];
2139 if (displacement < 0)
2140 pathprintf ((pathchar*)s_disp, 50, WSTR("-%ld"), -displacement);
2141 else
2142 pathprintf ((pathchar*)s_disp, 50, WSTR("+%ld"), displacement);
2143
2144 wcscat (buffer, s_disp);
2145 }
2146 }
2147 else
2148 {
2149 /* Try to calculate from information inside the rts. */
2150 uintptr_t loc = (uintptr_t)symbol;
2151 for (ObjectCode* oc = objects; oc; oc = oc->next) {
2152 for (int i = 0; i < oc->n_sections; i++) {
2153 Section section = oc->sections[i];
2154 start = (uintptr_t)section.start;
2155 end = start + section.size;
2156 if (loc > start && loc <= end)
2157 {
2158 wcscat (buffer, WSTR(" "));
2159 if (oc->archiveMemberName)
2160 {
2161 pathchar* name = mkPath (oc->archiveMemberName);
2162 wcscat (buffer, name);
2163 stgFree (name);
2164 }
2165 else
2166 {
2167 wcscat (buffer, oc->fileName);
2168 }
2169 pathchar s_disp[50];
2170 pathprintf (s_disp, 50, WSTR("+0x%" PRIxPTR), loc - start);
2171 wcscat (buffer, s_disp);
2172 obj = oc;
2173 goto exit_loop;
2174 }
2175 }
2176 }
2177
2178 /* If we managed to make it here, we must not have any symbols nor be
2179 dealing with code we've linked. The only thing left is an internal
2180 segfault or one in a dynamic library. So let's enumerate the module
2181 address space. */
2182 HMODULE *hMods = NULL;
2183 DWORD cbNeeded;
2184 EnumProcessModules (hProcess, hMods, 0, &cbNeeded);
2185 hMods = stgMallocBytes (cbNeeded, "resolveSymbolAddr_PEi386");
2186 if (EnumProcessModules (hProcess, hMods, cbNeeded, &cbNeeded))
2187 {
2188 uintptr_t loc = (uintptr_t)symbol;
2189 MODULEINFO info;
2190 for (uint32_t i = 0; i < cbNeeded / sizeof(HMODULE); i++) {
2191 ZeroMemory (&info, sizeof (MODULEINFO));
2192 if (GetModuleInformation (hProcess, hMods[i], &info,
2193 sizeof(MODULEINFO)))
2194 {
2195 uintptr_t start = (uintptr_t)info.lpBaseOfDll;
2196 uintptr_t end = start + info.SizeOfImage;
2197 if (loc >= start && loc < end)
2198 {
2199 /* Hoera, finally found some information. */
2200 pathchar tmp[MAX_PATH];
2201 if (GetModuleFileNameExW (hProcess, hMods[i], tmp, MAX_PATH))
2202 {
2203 wcscat (buffer, WSTR(" "));
2204 wcscat (buffer, tmp);
2205 pathprintf (tmp, MAX_PATH, WSTR("+0x%" PRIxPTR), loc - start);
2206 wcscat (buffer, tmp);
2207 }
2208 break;
2209 }
2210 }
2211 }
2212 }
2213
2214 stgFree(hMods);
2215 }
2216
2217 /* Finally any file/line number. */
2218 IMAGEHLP_LINE64 lineInfo = {0};
2219 DWORD dwDisplacement = 0;
2220 exit_loop:
2221 if (SymGetLineFromAddr64(hProcess, (uintptr_t)symbol, &dwDisplacement,
2222 &lineInfo))
2223 {
2224 /* Try using Windows symbols. */
2225 pathchar s_line[512];
2226 pathprintf ((pathchar*) s_line, 512, WSTR(" %ls (%lu)"),
2227 lineInfo.FileName, lineInfo.LineNumber);
2228 wcscat (buffer, s_line);
2229 if (dwDisplacement != 0)
2230 {
2231 pathprintf ((pathchar*) s_line, 512, WSTR(" +%lu byte%s"),
2232 dwDisplacement,
2233 (dwDisplacement == 1 ? WSTR("") : WSTR("s")));
2234 }
2235 wcscat (buffer, s_line);
2236 }
2237 else if (obj)
2238 {
2239 /* Try to calculate from information inside the rts. */
2240 typedef struct _SymX { SymbolName* name; uintptr_t loc; } SymX;
2241 SymX* locs = stgCallocBytes (sizeof(SymX), obj->n_symbols,
2242 "resolveSymbolAddr");
2243 int blanks = 0;
2244 for (int i = 0; i < obj->n_symbols; i++) {
2245 SymbolName* sym = obj->symbols[i];
2246 if (sym == NULL)
2247 {
2248 blanks++;
2249 continue;
2250 }
2251 RtsSymbolInfo* a = NULL;
2252 ghciLookupSymbolInfo(symhash, sym, &a);
2253 if (a) {
2254 SymX sx = {0};
2255 sx.name = sym;
2256 sx.loc = (uintptr_t)a->value;
2257 locs[i] = sx;
2258 }
2259 }
2260 int comp (const void * elem1, const void * elem2)
2261 {
2262 SymX f = *((SymX*)elem1);
2263 SymX s = *((SymX*)elem2);
2264 if (f.loc > s.loc) return 1;
2265 if (f.loc < s.loc) return -1;
2266 return 0;
2267 }
2268 qsort (locs, obj->n_symbols, sizeof (SymX), comp);
2269 uintptr_t key = (uintptr_t)symbol;
2270 SymX* res = NULL;
2271
2272 for (int x = blanks; x < obj->n_symbols; x++) {
2273 if (x < (obj->n_symbols -1)) {
2274 if (locs[x].loc >= key && key < locs[x+1].loc) {
2275 res = &locs[x];
2276 break;
2277 }
2278 }
2279 else
2280 {
2281 if (locs[x].loc >= key) {
2282 res = &locs[x];
2283 break;
2284 }
2285 }
2286 }
2287
2288 if (res) {
2289 pathchar s_disp[512];
2290 *top = (uintptr_t)res->loc;
2291 pathprintf ((pathchar*)s_disp, 512,
2292 WSTR("\n\t\t (%s+0x%" PRIxPTR ")"),
2293 res->name, res->loc - key);
2294 wcscat (buffer, s_disp);
2295 }
2296 stgFree (locs);
2297 }
2298
2299 return buffer;
2300 }
2301 #endif /* mingw32_HOST_OS */