Updated PE linker, section alignment and cleanup.
[ghc.git] / rts / linker / LoadArchive.c
1 #include <Rts.h>
2 #include "PathUtils.h"
3
4 #include "sm/Storage.h"
5 #include "sm/OSMem.h"
6 #include "RtsUtils.h"
7 #include "LinkerInternals.h"
8 #include "linker/M32Alloc.h"
9
10 /* Platform specific headers */
11 #if defined(OBJFORMAT_PEi386)
12 # include "linker/PEi386.h"
13 #elif defined(OBJFORMAT_MACHO)
14 # include "linker/MachO.h"
15 # include <regex.h>
16 # include <mach/machine.h>
17 # include <mach-o/fat.h>
18 #elif defined(OBJFORMAT_ELF)
19 #include "linker/Elf.h"
20 #endif
21
22 #include <string.h>
23 #include <stddef.h>
24 #include <ctype.h>
25 #include <fs_rts.h>
26
27 #define FAIL(...) do {\
28 errorBelch("loadArchive: "__VA_ARGS__); \
29 goto fail;\
30 } while (0)
31
32 #define DEBUG_LOG(...) IF_DEBUG(linker, debugBelch("loadArchive: " __VA_ARGS__))
33
34 #if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
35 /* Read 4 bytes and convert to host byte order */
36 static uint32_t read4Bytes(const char buf[static 4])
37 {
38 return ntohl(*(uint32_t*)buf);
39 }
40
41 static StgBool loadFatArchive(char tmp[static 20], FILE* f, pathchar* path)
42 {
43 uint32_t nfat_arch, nfat_offset, cputype, cpusubtype;
44 #if defined(i386_HOST_ARCH)
45 const uint32_t mycputype = CPU_TYPE_X86;
46 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_ALL;
47 #elif defined(x86_64_HOST_ARCH)
48 const uint32_t mycputype = CPU_TYPE_X86_64;
49 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_64_ALL;
50 #elif defined(powerpc_HOST_ARCH)
51 const uint32_t mycputype = CPU_TYPE_POWERPC;
52 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
53 #elif defined(powerpc64_HOST_ARCH)
54 const uint32_t mycputype = CPU_TYPE_POWERPC64;
55 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
56 #elif defined(aarch64_HOST_ARCH)
57 const uint32_t mycputype = CPU_TYPE_ARM64;
58 const uint32_t mycpusubtype = CPU_SUBTYPE_ARM64_ALL;
59 #else
60 #error Unknown Darwin architecture
61 #endif
62
63 nfat_arch = read4Bytes(tmp + 4);
64 DEBUG_LOG("found a fat archive containing %d architectures\n", nfat_arch);
65 nfat_offset = 0;
66 for (uint32_t i = 0; i < nfat_arch; i++) {
67 /* search for the right arch */
68 int n = fread(tmp, 1, 12, f);
69 if (n != 12) {
70 errorBelch("Failed reading arch from `%" PATH_FMT "'", path);
71 return false;
72 }
73 cputype = read4Bytes(tmp);
74 cpusubtype = read4Bytes(tmp + 4);
75 if (cputype == mycputype && cpusubtype == mycpusubtype) {
76 DEBUG_LOG("found my archive in a fat archive\n");
77 nfat_offset = read4Bytes(tmp + 8);
78 break;
79 }
80 }
81 if (nfat_offset == 0) {
82 errorBelch("Fat archive contains %d architectures, "
83 "but none of them are compatible with the host",
84 (int)nfat_arch);
85 return false;
86 } else {
87 /* Seek to the correct architecture */
88 int n = fseek(f, nfat_offset, SEEK_SET);
89 if (n != 0) {
90 errorBelch("Failed to seek to arch in `%" PATH_FMT "'", path);
91 return false;
92 }
93
94 /* Read the header */
95 n = fread(tmp, 1, 8, f);
96 if (n != 8) {
97 errorBelch("Failed reading header from `%" PATH_FMT "'", path);
98 return false;
99 }
100
101 /* Check the magic number */
102 if (strncmp(tmp, "!<arch>\n", 8) != 0) {
103 errorBelch("couldn't find archive in `%" PATH_FMT "'"
104 "at offset %d", path, nfat_offset);
105 return false;
106 }
107 }
108 return true;
109 }
110 #endif
111
112 static StgBool readThinArchiveMember(int n, int memberSize, pathchar* path,
113 char* fileName, char* image)
114 {
115 StgBool has_succeeded = false;
116 FILE* member = NULL;
117 pathchar *pathCopy, *dirName, *memberPath, *objFileName;
118 memberPath = NULL;
119 /* Allocate and setup the dirname of the archive. We'll need
120 this to locate the thin member */
121 pathCopy = pathdup(path); // Convert the char* to a pathchar*
122 dirName = pathdir(pathCopy);
123 /* Append the relative member name to the dirname. This should be
124 be the full path to the actual thin member. */
125 int memberLen = pathlen(dirName) + 1 + strlen(fileName) + 1;
126 memberPath = stgMallocBytes(pathsize * memberLen, "loadArchive(file)");
127 objFileName = mkPath(fileName);
128 pathprintf(memberPath, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), dirName,
129 objFileName);
130 stgFree(objFileName);
131 stgFree(dirName);
132 member = pathopen(memberPath, WSTR("rb"));
133 if (!member) {
134 errorBelch("loadObj: can't read thin archive `%" PATH_FMT "'",
135 memberPath);
136 goto inner_fail;
137 }
138 n = fread(image, 1, memberSize, member);
139 if (n != memberSize) {
140 errorBelch("loadArchive: error whilst reading `%s'",
141 fileName);
142 goto inner_fail;
143 }
144 has_succeeded = true;
145
146 inner_fail:
147 fclose(member);
148 stgFree(memberPath);
149 stgFree(pathCopy);
150 return has_succeeded;
151 }
152
153 static StgBool checkFatArchive(char magic[static 20], FILE* f, pathchar* path)
154 {
155 StgBool success;
156 success = false;
157 #if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
158 /* Not a standard archive, look for a fat archive magic number: */
159 if (read4Bytes(magic) == FAT_MAGIC)
160 success = loadFatArchive(magic, f, path);
161 else
162 errorBelch("loadArchive: Neither an archive, nor a fat archive: "
163 "`%" PATH_FMT "'", path);
164 #else
165 (void)magic;
166 (void)f;
167 errorBelch("loadArchive: Not an archive: `%" PATH_FMT "'", path);
168 #endif
169 return success;
170 }
171
172 /**
173 * Look up the filename in the GNU-variant index file pointed to by
174 * gnuFileIndex.
175 * @param fileName_ a pointer to a pointer to the file name to be looked up.
176 * The file name must have been allocated with `StgMallocBytes`, and will
177 * be reallocated on return; the old value is now _invalid_.
178 * @param gnuFileIndexSize The size of the index.
179 */
180 static StgBool
181 lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_,
182 char* gnuFileIndex, pathchar* path, size_t* thisFileNameSize,
183 size_t* fileNameSize)
184 {
185 int n;
186 char *fileName = *fileName_;
187 if (isdigit(fileName[1])) {
188 int i;
189 for (n = 2; isdigit(fileName[n]); n++)
190 ;
191
192 fileName[n] = '\0';
193 n = atoi(fileName + 1);
194 if (gnuFileIndex == NULL) {
195 errorBelch("loadArchive: GNU-variant filename "
196 "without an index while reading from `%" PATH_FMT "'",
197 path);
198 return false;
199 }
200 if (n < 0 || n > gnuFileIndexSize) {
201 errorBelch("loadArchive: GNU-variant filename "
202 "offset %d out of range [0..%d] "
203 "while reading filename from `%" PATH_FMT "'",
204 n, gnuFileIndexSize, path);
205 return false;
206 }
207 if (n != 0 && gnuFileIndex[n - 1] != '\n') {
208 errorBelch("loadArchive: GNU-variant filename offset "
209 "%d invalid (range [0..%d]) while reading "
210 "filename from `%" PATH_FMT "'",
211 n, gnuFileIndexSize, path);
212 return false;
213 }
214 for (i = n; gnuFileIndex[i] != '\n'; i++)
215 ;
216
217 size_t FileNameSize = i - n - 1;
218 if (FileNameSize >= *fileNameSize) {
219 /* Double it to avoid potentially continually
220 increasing it by 1 */
221 *fileNameSize = FileNameSize * 2;
222 *fileName_ = fileName = stgReallocBytes(fileName, *fileNameSize,
223 "loadArchive(fileName)");
224 }
225 memcpy(fileName, gnuFileIndex + n, FileNameSize);
226 fileName[FileNameSize] = '\0';
227 *thisFileNameSize = FileNameSize;
228 }
229 /* Skip 32-bit symbol table ("/" + 15 blank characters)
230 and 64-bit symbol table ("/SYM64/" + 9 blank characters) */
231 else if (0 == strncmp(fileName + 1, " ", 15) ||
232 0 == strncmp(fileName + 1, "SYM64/ ", 15)) {
233 fileName[0] = '\0';
234 *thisFileNameSize = 0;
235 }
236 else {
237 errorBelch("loadArchive: invalid GNU-variant filename `%.16s' "
238 "while reading filename from `%" PATH_FMT "'",
239 fileName, path);
240 return false;
241 }
242
243 return true;
244 }
245
246 static HsInt loadArchive_ (pathchar *path)
247 {
248 ObjectCode* oc = NULL;
249 char *image = NULL;
250 HsInt retcode = 0;
251 int memberSize;
252 FILE *f = NULL;
253 int n;
254 size_t thisFileNameSize = (size_t)-1; /* shut up bogus GCC warning */
255 char *fileName;
256 size_t fileNameSize;
257 int isObject, isGnuIndex, isThin, isImportLib;
258 char tmp[20];
259 char *gnuFileIndex;
260 int gnuFileIndexSize;
261 int misalignment = 0;
262
263 DEBUG_LOG("start\n");
264 DEBUG_LOG("Loading archive `%" PATH_FMT "'\n", path);
265
266 /* Check that we haven't already loaded this archive.
267 Ignore requests to load multiple times */
268 if (isAlreadyLoaded(path)) {
269 IF_DEBUG(linker,
270 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
271 return 1; /* success */
272 }
273
274 gnuFileIndex = NULL;
275 gnuFileIndexSize = 0;
276
277 fileNameSize = 32;
278 fileName = stgMallocBytes(fileNameSize, "loadArchive(fileName)");
279
280 isThin = 0;
281 isImportLib = 0;
282
283 f = pathopen(path, WSTR("rb"));
284 if (!f)
285 FAIL("loadObj: can't read `%" PATH_FMT "'", path);
286
287 /* Check if this is an archive by looking for the magic "!<arch>\n"
288 * string. Usually, if this fails, we belch an error and return. On
289 * Darwin however, we may have a fat archive, which contains archives for
290 * more than one architecture. Fat archives start with the magic number
291 * 0xcafebabe, always stored big endian. If we find a fat_header, we scan
292 * through the fat_arch structs, searching through for one for our host
293 * architecture. If a matching struct is found, we read the offset
294 * of our archive data (nfat_offset) and seek forward nfat_offset bytes
295 * from the start of the file.
296 *
297 * A subtlety is that all of the members of the fat_header and fat_arch
298 * structs are stored big endian, so we need to call byte order
299 * conversion functions.
300 *
301 * If we find the appropriate architecture in a fat archive, we gobble
302 * its magic "!<arch>\n" string and continue processing just as if
303 * we had a single architecture archive.
304 */
305
306 n = fread ( tmp, 1, 8, f );
307 if (n != 8) {
308 FAIL("Failed reading header from `%" PATH_FMT "'", path);
309 }
310 if (strncmp(tmp, "!<arch>\n", 8) == 0) {}
311 /* Check if this is a thin archive by looking for the magic string "!<thin>\n"
312 *
313 * ar thin libraries have the exact same format as normal archives except they
314 * have a different magic string and they don't copy the object files into the
315 * archive.
316 *
317 * Instead each header entry points to the location of the object file on disk.
318 * This is useful when a library is only created to satisfy a compile time dependency
319 * instead of to be distributed. This saves the time required for copying.
320 *
321 * Thin archives are always flattened. They always only contain simple headers
322 * pointing to the object file and so we need not allocate more memory than needed
323 * to find the object file.
324 *
325 */
326 else if (strncmp(tmp, "!<thin>\n", 8) == 0) {
327 isThin = 1;
328 }
329 else {
330 StgBool success = checkFatArchive(tmp, f, path);
331 if (!success)
332 goto fail;
333 }
334 DEBUG_LOG("loading archive contents\n");
335
336 while (1) {
337 DEBUG_LOG("reading at %ld\n", ftell(f));
338 n = fread ( fileName, 1, 16, f );
339 if (n != 16) {
340 if (feof(f)) {
341 DEBUG_LOG("EOF while reading from '%" PATH_FMT "'\n", path);
342 break;
343 }
344 else {
345 FAIL("Failed reading file name from `%" PATH_FMT "'", path);
346 }
347 }
348
349 #if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
350 if (strncmp(fileName, "!<arch>\n", 8) == 0) {
351 DEBUG_LOG("found the start of another archive, breaking\n");
352 break;
353 }
354 #endif
355
356 n = fread ( tmp, 1, 12, f );
357 if (n != 12)
358 FAIL("Failed reading mod time from `%" PATH_FMT "'", path);
359 n = fread ( tmp, 1, 6, f );
360 if (n != 6)
361 FAIL("Failed reading owner from `%" PATH_FMT "'", path);
362 n = fread ( tmp, 1, 6, f );
363 if (n != 6)
364 FAIL("Failed reading group from `%" PATH_FMT "'", path);
365 n = fread ( tmp, 1, 8, f );
366 if (n != 8)
367 FAIL("Failed reading mode from `%" PATH_FMT "'", path);
368 n = fread ( tmp, 1, 10, f );
369 if (n != 10)
370 FAIL("Failed reading size from `%" PATH_FMT "'", path);
371 tmp[10] = '\0';
372 for (n = 0; isdigit(tmp[n]); n++);
373 tmp[n] = '\0';
374 memberSize = atoi(tmp);
375
376 DEBUG_LOG("size of this archive member is %d\n", memberSize);
377 n = fread ( tmp, 1, 2, f );
378 if (n != 2)
379 FAIL("Failed reading magic from `%" PATH_FMT "'", path);
380 if (strncmp(tmp, "\x60\x0A", 2) != 0)
381 FAIL("Failed reading magic from `%" PATH_FMT "' at %ld. Got %c%c",
382 path, ftell(f), tmp[0], tmp[1]);
383
384 isGnuIndex = 0;
385 /* Check for BSD-variant large filenames */
386 if (0 == strncmp(fileName, "#1/", 3)) {
387 size_t n = 0;
388 fileName[16] = '\0';
389 if (isdigit(fileName[3])) {
390 for (n = 4; isdigit(fileName[n]); n++)
391 ;
392
393 fileName[n] = '\0';
394 thisFileNameSize = atoi(fileName + 3);
395 memberSize -= thisFileNameSize;
396 if (thisFileNameSize >= fileNameSize) {
397 /* Double it to avoid potentially continually
398 increasing it by 1 */
399 fileNameSize = thisFileNameSize * 2;
400 fileName = stgReallocBytes(fileName, fileNameSize,
401 "loadArchive(fileName)");
402 }
403 n = fread(fileName, 1, thisFileNameSize, f);
404 if (n != thisFileNameSize) {
405 errorBelch("Failed reading filename from `%" PATH_FMT "'",
406 path);
407 goto fail;
408 }
409 fileName[thisFileNameSize] = 0;
410 /* On OS X at least, thisFileNameSize is the size of the
411 fileName field, not the length of the fileName
412 itself. */
413 thisFileNameSize = strlen(fileName);
414 } else {
415 errorBelch("BSD-variant filename size not found "
416 "while reading filename from `%" PATH_FMT "'", path);
417 goto fail;
418 }
419 }
420 /* Check for GNU file index file */
421 else if (0 == strncmp(fileName, "//", 2)) {
422 fileName[0] = '\0';
423 thisFileNameSize = 0;
424 isGnuIndex = 1;
425 }
426 /* Check for a file in the GNU file index */
427 else if (fileName[0] == '/') {
428 if (!lookupGNUArchiveIndex(gnuFileIndexSize, &fileName,
429 gnuFileIndex, path, &thisFileNameSize, &fileNameSize)) {
430 goto fail;
431 }
432 }
433 /* Finally, the case where the filename field actually contains
434 the filename */
435 else {
436 /* GNU ar terminates filenames with a '/', this allowing
437 spaces in filenames. So first look to see if there is a
438 terminating '/'. */
439 for (thisFileNameSize = 0;
440 thisFileNameSize < 16;
441 thisFileNameSize++) {
442 if (fileName[thisFileNameSize] == '/') {
443 fileName[thisFileNameSize] = '\0';
444 break;
445 }
446 }
447 /* If we didn't find a '/', then a space teminates the
448 filename. Note that if we don't find one, then
449 thisFileNameSize ends up as 16, and we already have the
450 '\0' at the end. */
451 if (thisFileNameSize == 16) {
452 for (thisFileNameSize = 0;
453 thisFileNameSize < 16;
454 thisFileNameSize++) {
455 if (fileName[thisFileNameSize] == ' ') {
456 fileName[thisFileNameSize] = '\0';
457 break;
458 }
459 }
460 }
461 }
462
463 DEBUG_LOG("Found member file `%s'\n", fileName);
464
465 /* TODO: Stop relying on file extensions to determine input formats.
466 Instead try to match file headers. See Trac #13103. */
467 isObject = (thisFileNameSize >= 2 && strncmp(fileName + thisFileNameSize - 2, ".o" , 2) == 0)
468 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".p_o", 4) == 0)
469 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".obj", 4) == 0);
470
471 #if defined(OBJFORMAT_PEi386)
472 /*
473 * Note [MSVC import files (ext .lib)]
474 * MSVC compilers store the object files in
475 * the import libraries with extension .dll
476 * so on Windows we should look for those too.
477 * The PE COFF format doesn't specify any specific file name
478 * for sections. So on windows, just try to load it all.
479 *
480 * Linker members (e.g. filename / are skipped since they are not needed)
481 */
482 isImportLib = thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".dll", 4) == 0;
483 #endif // windows
484
485 DEBUG_LOG("\tthisFileNameSize = %d\n", (int)thisFileNameSize);
486 DEBUG_LOG("\tisObject = %d\n", isObject);
487
488 if (isObject) {
489 char *archiveMemberName;
490
491 DEBUG_LOG("Member is an object file...loading...\n");
492
493 #if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
494 if (RTS_LINKER_USE_MMAP)
495 image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
496 else {
497 /* See loadObj() */
498 misalignment = machoGetMisalignment(f);
499 image = stgMallocBytes(memberSize + misalignment,
500 "loadArchive(image)");
501 image += misalignment;
502 }
503
504 #else // not darwin
505 image = stgMallocBytes(memberSize, "loadArchive(image)");
506 #endif
507 if (isThin) {
508 if (!readThinArchiveMember(n, memberSize, path,
509 fileName, image)) {
510 goto fail;
511 }
512 }
513 else
514 {
515 n = fread ( image, 1, memberSize, f );
516 if (n != memberSize) {
517 FAIL("error whilst reading `%" PATH_FMT "'", path);
518 }
519 }
520
521 archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
522 "loadArchive(file)");
523 sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
524 path, (int)thisFileNameSize, fileName);
525
526 oc = mkOc(path, image, memberSize, false, archiveMemberName
527 , misalignment);
528 #if defined(OBJFORMAT_MACHO)
529 ocInit_MachO( oc );
530 #endif
531 #if defined(OBJFORMAT_ELF)
532 ocInit_ELF( oc );
533 #endif
534
535 stgFree(archiveMemberName);
536
537 if (0 == loadOc(oc)) {
538 stgFree(fileName);
539 fclose(f);
540 return 0;
541 } else {
542 oc->next = objects;
543 objects = oc;
544 }
545 }
546 else if (isGnuIndex) {
547 if (gnuFileIndex != NULL) {
548 FAIL("GNU-variant index found, but already have an index, \
549 while reading filename from `%" PATH_FMT "'", path);
550 }
551 DEBUG_LOG("Found GNU-variant file index\n");
552 #if RTS_LINKER_USE_MMAP
553 gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
554 #else
555 gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
556 #endif
557 n = fread ( gnuFileIndex, 1, memberSize, f );
558 if (n != memberSize) {
559 FAIL("error whilst reading `%" PATH_FMT "'", path);
560 }
561 gnuFileIndex[memberSize] = '/';
562 gnuFileIndexSize = memberSize;
563 }
564 else if (isImportLib) {
565 #if defined(OBJFORMAT_PEi386)
566 if (checkAndLoadImportLibrary(path, fileName, f)) {
567 DEBUG_LOG("Member is an import file section... "
568 "Corresponding DLL has been loaded...\n");
569 }
570 else {
571 DEBUG_LOG("Member is not a valid import file section... "
572 "Skipping...\n");
573 n = fseek(f, memberSize, SEEK_CUR);
574 if (n != 0)
575 FAIL("error whilst seeking by %d in `%" PATH_FMT "'",
576 memberSize, path);
577 }
578 #endif
579 }
580 else {
581 DEBUG_LOG("`%s' does not appear to be an object file\n",
582 fileName);
583 if (!isThin || thisFileNameSize == 0) {
584 n = fseek(f, memberSize, SEEK_CUR);
585 if (n != 0)
586 FAIL("error whilst seeking by %d in `%" PATH_FMT "'",
587 memberSize, path);
588 }
589 }
590
591 /* .ar files are 2-byte aligned */
592 if (!(isThin && thisFileNameSize > 0) && memberSize % 2) {
593 DEBUG_LOG("trying to read one pad byte\n");
594 n = fread ( tmp, 1, 1, f );
595 if (n != 1) {
596 if (feof(f)) {
597 DEBUG_LOG("found EOF while reading one pad byte\n");
598 break;
599 }
600 else {
601 FAIL("Failed reading padding from `%" PATH_FMT "'", path);
602 }
603 }
604 DEBUG_LOG("successfully read one pad byte\n");
605 }
606 DEBUG_LOG("reached end of archive loading while loop\n");
607 }
608 retcode = 1;
609 fail:
610 if (f != NULL)
611 fclose(f);
612
613 if (fileName != NULL)
614 stgFree(fileName);
615 if (gnuFileIndex != NULL) {
616 #if RTS_LINKER_USE_MMAP
617 munmap(gnuFileIndex, gnuFileIndexSize + 1);
618 #else
619 stgFree(gnuFileIndex);
620 #endif
621 }
622
623 if (RTS_LINKER_USE_MMAP)
624 m32_allocator_flush();
625
626 DEBUG_LOG("done\n");
627 return retcode;
628 }
629
630 HsInt loadArchive (pathchar *path)
631 {
632 ACQUIRE_LOCK(&linker_mutex);
633 HsInt r = loadArchive_(path);
634 RELEASE_LOCK(&linker_mutex);
635 return r;
636 }