Merge commit '5229c43ccf77bcbffeced01dccb27398d017fa34'
[ghc.git] / rts / linker / LoadArchive.c
1 #include <Rts.h>
2 #include "PathUtils.h"
3
4 #include "sm/Storage.h"
5 #include "sm/OSMem.h"
6 #include "RtsUtils.h"
7 #include "LinkerInternals.h"
8 #include "linker/M32Alloc.h"
9
10 /* Platform specific headers */
11 #if defined(OBJFORMAT_PEi386)
12 # include "linker/PEi386.h"
13 #elif defined(OBJFORMAT_MACHO)
14 # include "linker/MachO.h"
15 # include <regex.h>
16 # include <mach/machine.h>
17 # include <mach-o/fat.h>
18 #elif defined(OBJFORMAT_ELF)
19 #include "linker/Elf.h"
20 #endif
21
22 #include <string.h>
23 #include <stddef.h>
24 #include <ctype.h>
25
26 #define FAIL(...) do {\
27 errorBelch("loadArchive: "__VA_ARGS__); \
28 goto fail;\
29 } while (0)
30
31 #define DEBUG_LOG(...) IF_DEBUG(linker, debugBelch("loadArchive: " __VA_ARGS__))
32
33 #if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
34 /* Read 4 bytes and convert to host byte order */
35 static uint32_t read4Bytes(const char buf[static 4])
36 {
37 return ntohl(*(uint32_t*)buf);
38 }
39
40 static StgBool loadFatArchive(char tmp[static 20], FILE* f, pathchar* path)
41 {
42 uint32_t nfat_arch, nfat_offset, cputype, cpusubtype;
43 #if defined(i386_HOST_ARCH)
44 const uint32_t mycputype = CPU_TYPE_X86;
45 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_ALL;
46 #elif defined(x86_64_HOST_ARCH)
47 const uint32_t mycputype = CPU_TYPE_X86_64;
48 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_64_ALL;
49 #elif defined(powerpc_HOST_ARCH)
50 const uint32_t mycputype = CPU_TYPE_POWERPC;
51 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
52 #elif defined(powerpc64_HOST_ARCH)
53 const uint32_t mycputype = CPU_TYPE_POWERPC64;
54 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
55 #elif defined(aarch64_HOST_ARCH)
56 const uint32_t mycputype = CPU_TYPE_ARM64;
57 const uint32_t mycpusubtype = CPU_SUBTYPE_ARM64_ALL;
58 #else
59 #error Unknown Darwin architecture
60 #endif
61
62 nfat_arch = read4Bytes(tmp + 4);
63 DEBUG_LOG("found a fat archive containing %d architectures\n", nfat_arch);
64 nfat_offset = 0;
65 for (uint32_t i = 0; i < nfat_arch; i++) {
66 /* search for the right arch */
67 int n = fread(tmp, 1, 12, f);
68 if (n != 12) {
69 errorBelch("Failed reading arch from `%" PATH_FMT "'", path);
70 return false;
71 }
72 cputype = read4Bytes(tmp);
73 cpusubtype = read4Bytes(tmp + 4);
74 if (cputype == mycputype && cpusubtype == mycpusubtype) {
75 DEBUG_LOG("found my archive in a fat archive\n");
76 nfat_offset = read4Bytes(tmp + 8);
77 break;
78 }
79 }
80 if (nfat_offset == 0) {
81 errorBelch("Fat archive contains %d architectures, "
82 "but none of them are compatible with the host",
83 (int)nfat_arch);
84 return false;
85 } else {
86 /* Seek to the correct architecture */
87 int n = fseek(f, nfat_offset, SEEK_SET);
88 if (n != 0) {
89 errorBelch("Failed to seek to arch in `%" PATH_FMT "'", path);
90 return false;
91 }
92
93 /* Read the header */
94 n = fread(tmp, 1, 8, f);
95 if (n != 8) {
96 errorBelch("Failed reading header from `%" PATH_FMT "'", path);
97 return false;
98 }
99
100 /* Check the magic number */
101 if (strncmp(tmp, "!<arch>\n", 8) != 0) {
102 errorBelch("couldn't find archive in `%" PATH_FMT "'"
103 "at offset %d", path, nfat_offset);
104 return false;
105 }
106 }
107 return true;
108 }
109 #endif
110
111 static StgBool readThinArchiveMember(int n, int memberSize, pathchar* path,
112 char* fileName, char* image)
113 {
114 StgBool has_succeeded = false;
115 FILE* member = NULL;
116 pathchar *pathCopy, *dirName, *memberPath, *objFileName;
117 memberPath = NULL;
118 /* Allocate and setup the dirname of the archive. We'll need
119 this to locate the thin member */
120 pathCopy = pathdup(path); // Convert the char* to a pathchar*
121 dirName = pathdir(pathCopy);
122 /* Append the relative member name to the dirname. This should be
123 be the full path to the actual thin member. */
124 int memberLen = pathlen(dirName) + 1 + strlen(fileName) + 1;
125 memberPath = stgMallocBytes(pathsize * memberLen, "loadArchive(file)");
126 objFileName = mkPath(fileName);
127 pathprintf(memberPath, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), dirName,
128 objFileName);
129 stgFree(objFileName);
130 stgFree(dirName);
131 member = pathopen(memberPath, WSTR("rb"));
132 if (!member) {
133 errorBelch("loadObj: can't read thin archive `%" PATH_FMT "'",
134 memberPath);
135 goto inner_fail;
136 }
137 n = fread(image, 1, memberSize, member);
138 if (n != memberSize) {
139 errorBelch("loadArchive: error whilst reading `%s'",
140 fileName);
141 goto inner_fail;
142 }
143 has_succeeded = true;
144
145 inner_fail:
146 fclose(member);
147 stgFree(memberPath);
148 stgFree(pathCopy);
149 return has_succeeded;
150 }
151
152 static StgBool checkFatArchive(char magic[static 20], FILE* f, pathchar* path)
153 {
154 StgBool success;
155 success = false;
156 #if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
157 /* Not a standard archive, look for a fat archive magic number: */
158 if (read4Bytes(magic) == FAT_MAGIC)
159 success = loadFatArchive(magic, f, path);
160 else
161 errorBelch("loadArchive: Neither an archive, nor a fat archive: "
162 "`%" PATH_FMT "'", path);
163 #else
164 (void)magic;
165 (void)f;
166 errorBelch("loadArchive: Not an archive: `%" PATH_FMT "'", path);
167 #endif
168 return success;
169 }
170
171 /**
172 * Look up the filename in the GNU-variant index file pointed to by
173 * gnuFileIndex.
174 * @param fileName_ a pointer to a pointer to the file name to be looked up.
175 * The file name must have been allocated with `StgMallocBytes`, and will
176 * be reallocated on return; the old value is now _invalid_.
177 * @param gnuFileIndexSize The size of the index.
178 */
179 static StgBool
180 lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_,
181 char* gnuFileIndex, pathchar* path, size_t* thisFileNameSize,
182 size_t* fileNameSize)
183 {
184 int n;
185 char *fileName = *fileName_;
186 if (isdigit(fileName[1])) {
187 int i;
188 for (n = 2; isdigit(fileName[n]); n++)
189 ;
190
191 fileName[n] = '\0';
192 n = atoi(fileName + 1);
193 if (gnuFileIndex == NULL) {
194 errorBelch("loadArchive: GNU-variant filename "
195 "without an index while reading from `%" PATH_FMT "'",
196 path);
197 return false;
198 }
199 if (n < 0 || n > gnuFileIndexSize) {
200 errorBelch("loadArchive: GNU-variant filename "
201 "offset %d out of range [0..%d] "
202 "while reading filename from `%" PATH_FMT "'",
203 n, gnuFileIndexSize, path);
204 return false;
205 }
206 if (n != 0 && gnuFileIndex[n - 1] != '\n') {
207 errorBelch("loadArchive: GNU-variant filename offset "
208 "%d invalid (range [0..%d]) while reading "
209 "filename from `%" PATH_FMT "'",
210 n, gnuFileIndexSize, path);
211 return false;
212 }
213 for (i = n; gnuFileIndex[i] != '\n'; i++)
214 ;
215
216 size_t FileNameSize = i - n - 1;
217 if (FileNameSize >= *fileNameSize) {
218 /* Double it to avoid potentially continually
219 increasing it by 1 */
220 *fileNameSize = FileNameSize * 2;
221 *fileName_ = fileName = stgReallocBytes(fileName, *fileNameSize,
222 "loadArchive(fileName)");
223 }
224 memcpy(fileName, gnuFileIndex + n, FileNameSize);
225 fileName[FileNameSize] = '\0';
226 *thisFileNameSize = FileNameSize;
227 }
228 /* Skip 32-bit symbol table ("/" + 15 blank characters)
229 and 64-bit symbol table ("/SYM64/" + 9 blank characters) */
230 else if (0 == strncmp(fileName + 1, " ", 15) ||
231 0 == strncmp(fileName + 1, "SYM64/ ", 15)) {
232 fileName[0] = '\0';
233 *thisFileNameSize = 0;
234 }
235 else {
236 errorBelch("loadArchive: invalid GNU-variant filename `%.16s' "
237 "while reading filename from `%" PATH_FMT "'",
238 fileName, path);
239 return false;
240 }
241
242 return true;
243 }
244
245 static HsInt loadArchive_ (pathchar *path)
246 {
247 ObjectCode* oc = NULL;
248 char *image = NULL;
249 HsInt retcode = 0;
250 int memberSize;
251 FILE *f = NULL;
252 int n;
253 size_t thisFileNameSize = (size_t)-1; /* shut up bogus GCC warning */
254 char *fileName;
255 size_t fileNameSize;
256 int isObject, isGnuIndex, isThin, isImportLib;
257 char tmp[20];
258 char *gnuFileIndex;
259 int gnuFileIndexSize;
260 int misalignment = 0;
261
262 DEBUG_LOG("start\n");
263 DEBUG_LOG("Loading archive `%" PATH_FMT" '\n", path);
264
265 /* Check that we haven't already loaded this archive.
266 Ignore requests to load multiple times */
267 if (isAlreadyLoaded(path)) {
268 IF_DEBUG(linker,
269 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
270 return 1; /* success */
271 }
272
273 gnuFileIndex = NULL;
274 gnuFileIndexSize = 0;
275
276 fileNameSize = 32;
277 fileName = stgMallocBytes(fileNameSize, "loadArchive(fileName)");
278
279 isThin = 0;
280 isImportLib = 0;
281
282 f = pathopen(path, WSTR("rb"));
283 if (!f)
284 FAIL("loadObj: can't read `%" PATH_FMT "'", path);
285
286 /* Check if this is an archive by looking for the magic "!<arch>\n"
287 * string. Usually, if this fails, we belch an error and return. On
288 * Darwin however, we may have a fat archive, which contains archives for
289 * more than one architecture. Fat archives start with the magic number
290 * 0xcafebabe, always stored big endian. If we find a fat_header, we scan
291 * through the fat_arch structs, searching through for one for our host
292 * architecture. If a matching struct is found, we read the offset
293 * of our archive data (nfat_offset) and seek forward nfat_offset bytes
294 * from the start of the file.
295 *
296 * A subtlety is that all of the members of the fat_header and fat_arch
297 * structs are stored big endian, so we need to call byte order
298 * conversion functions.
299 *
300 * If we find the appropriate architecture in a fat archive, we gobble
301 * its magic "!<arch>\n" string and continue processing just as if
302 * we had a single architecture archive.
303 */
304
305 n = fread ( tmp, 1, 8, f );
306 if (n != 8) {
307 FAIL("Failed reading header from `%" PATH_FMT "'", path);
308 }
309 if (strncmp(tmp, "!<arch>\n", 8) == 0) {}
310 /* Check if this is a thin archive by looking for the magic string "!<thin>\n"
311 *
312 * ar thin libraries have the exact same format as normal archives except they
313 * have a different magic string and they don't copy the object files into the
314 * archive.
315 *
316 * Instead each header entry points to the location of the object file on disk.
317 * This is useful when a library is only created to satisfy a compile time dependency
318 * instead of to be distributed. This saves the time required for copying.
319 *
320 * Thin archives are always flattened. They always only contain simple headers
321 * pointing to the object file and so we need not allocate more memory than needed
322 * to find the object file.
323 *
324 */
325 else if (strncmp(tmp, "!<thin>\n", 8) == 0) {
326 isThin = 1;
327 }
328 else {
329 StgBool success = checkFatArchive(tmp, f, path);
330 if (!success)
331 goto fail;
332 }
333 DEBUG_LOG("loading archive contents\n");
334
335 while (1) {
336 DEBUG_LOG("reading at %ld\n", ftell(f));
337 n = fread ( fileName, 1, 16, f );
338 if (n != 16) {
339 if (feof(f)) {
340 DEBUG_LOG("EOF while reading from '%" PATH_FMT "'\n", path);
341 break;
342 }
343 else {
344 FAIL("Failed reading file name from `%" PATH_FMT "'", path);
345 }
346 }
347
348 #if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
349 if (strncmp(fileName, "!<arch>\n", 8) == 0) {
350 DEBUG_LOG("found the start of another archive, breaking\n");
351 break;
352 }
353 #endif
354
355 n = fread ( tmp, 1, 12, f );
356 if (n != 12)
357 FAIL("Failed reading mod time from `%" PATH_FMT "'", path);
358 n = fread ( tmp, 1, 6, f );
359 if (n != 6)
360 FAIL("Failed reading owner from `%" PATH_FMT "'", path);
361 n = fread ( tmp, 1, 6, f );
362 if (n != 6)
363 FAIL("Failed reading group from `%" PATH_FMT "'", path);
364 n = fread ( tmp, 1, 8, f );
365 if (n != 8)
366 FAIL("Failed reading mode from `%" PATH_FMT "'", path);
367 n = fread ( tmp, 1, 10, f );
368 if (n != 10)
369 FAIL("Failed reading size from `%" PATH_FMT "'", path);
370 tmp[10] = '\0';
371 for (n = 0; isdigit(tmp[n]); n++);
372 tmp[n] = '\0';
373 memberSize = atoi(tmp);
374
375 DEBUG_LOG("size of this archive member is %d\n", memberSize);
376 n = fread ( tmp, 1, 2, f );
377 if (n != 2)
378 FAIL("Failed reading magic from `%" PATH_FMT "'", path);
379 if (strncmp(tmp, "\x60\x0A", 2) != 0)
380 FAIL("Failed reading magic from `%" PATH_FMT "' at %ld. Got %c%c",
381 path, ftell(f), tmp[0], tmp[1]);
382
383 isGnuIndex = 0;
384 /* Check for BSD-variant large filenames */
385 if (0 == strncmp(fileName, "#1/", 3)) {
386 size_t n = 0;
387 fileName[16] = '\0';
388 if (isdigit(fileName[3])) {
389 for (n = 4; isdigit(fileName[n]); n++)
390 ;
391
392 fileName[n] = '\0';
393 thisFileNameSize = atoi(fileName + 3);
394 memberSize -= thisFileNameSize;
395 if (thisFileNameSize >= fileNameSize) {
396 /* Double it to avoid potentially continually
397 increasing it by 1 */
398 fileNameSize = thisFileNameSize * 2;
399 fileName = stgReallocBytes(fileName, fileNameSize,
400 "loadArchive(fileName)");
401 }
402 n = fread(fileName, 1, thisFileNameSize, f);
403 if (n != thisFileNameSize) {
404 errorBelch("Failed reading filename from `%" PATH_FMT "'",
405 path);
406 goto fail;
407 }
408 fileName[thisFileNameSize] = 0;
409 /* On OS X at least, thisFileNameSize is the size of the
410 fileName field, not the length of the fileName
411 itself. */
412 thisFileNameSize = strlen(fileName);
413 } else {
414 errorBelch("BSD-variant filename size not found "
415 "while reading filename from `%" PATH_FMT "'", path);
416 goto fail;
417 }
418 }
419 /* Check for GNU file index file */
420 else if (0 == strncmp(fileName, "//", 2)) {
421 fileName[0] = '\0';
422 thisFileNameSize = 0;
423 isGnuIndex = 1;
424 }
425 /* Check for a file in the GNU file index */
426 else if (fileName[0] == '/') {
427 if (!lookupGNUArchiveIndex(gnuFileIndexSize, &fileName,
428 gnuFileIndex, path, &thisFileNameSize, &fileNameSize)) {
429 goto fail;
430 }
431 }
432 /* Finally, the case where the filename field actually contains
433 the filename */
434 else {
435 /* GNU ar terminates filenames with a '/', this allowing
436 spaces in filenames. So first look to see if there is a
437 terminating '/'. */
438 for (thisFileNameSize = 0;
439 thisFileNameSize < 16;
440 thisFileNameSize++) {
441 if (fileName[thisFileNameSize] == '/') {
442 fileName[thisFileNameSize] = '\0';
443 break;
444 }
445 }
446 /* If we didn't find a '/', then a space teminates the
447 filename. Note that if we don't find one, then
448 thisFileNameSize ends up as 16, and we already have the
449 '\0' at the end. */
450 if (thisFileNameSize == 16) {
451 for (thisFileNameSize = 0;
452 thisFileNameSize < 16;
453 thisFileNameSize++) {
454 if (fileName[thisFileNameSize] == ' ') {
455 fileName[thisFileNameSize] = '\0';
456 break;
457 }
458 }
459 }
460 }
461
462 DEBUG_LOG("Found member file `%s'\n", fileName);
463
464 /* TODO: Stop relying on file extensions to determine input formats.
465 Instead try to match file headers. See Trac #13103. */
466 isObject = (thisFileNameSize >= 2 && strncmp(fileName + thisFileNameSize - 2, ".o" , 2) == 0)
467 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".p_o", 4) == 0)
468 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".obj", 4) == 0);
469
470 #if defined(OBJFORMAT_PEi386)
471 /*
472 * Note [MSVC import files (ext .lib)]
473 * MSVC compilers store the object files in
474 * the import libraries with extension .dll
475 * so on Windows we should look for those too.
476 * The PE COFF format doesn't specify any specific file name
477 * for sections. So on windows, just try to load it all.
478 *
479 * Linker members (e.g. filename / are skipped since they are not needed)
480 */
481 isImportLib = thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".dll", 4) == 0;
482 #endif // windows
483
484 DEBUG_LOG("\tthisFileNameSize = %d\n", (int)thisFileNameSize);
485 DEBUG_LOG("\tisObject = %d\n", isObject);
486
487 if (isObject) {
488 char *archiveMemberName;
489
490 DEBUG_LOG("Member is an object file...loading...\n");
491
492 #if defined(mingw32_HOST_OS)
493 // TODO: We would like to use allocateExec here, but allocateExec
494 // cannot currently allocate blocks large enough.
495 image = allocateImageAndTrampolines(path, fileName, f, memberSize,
496 isThin);
497 #elif defined(darwin_HOST_OS) || defined(ios_HOST_OS)
498 if (RTS_LINKER_USE_MMAP)
499 image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
500 else {
501 /* See loadObj() */
502 misalignment = machoGetMisalignment(f);
503 image = stgMallocBytes(memberSize + misalignment,
504 "loadArchive(image)");
505 image += misalignment;
506 }
507
508 #else // not windows or darwin
509 image = stgMallocBytes(memberSize, "loadArchive(image)");
510 #endif
511 if (isThin) {
512 if (!readThinArchiveMember(n, memberSize, path,
513 fileName, image)) {
514 goto fail;
515 }
516 }
517 else
518 {
519 n = fread ( image, 1, memberSize, f );
520 if (n != memberSize) {
521 FAIL("error whilst reading `%" PATH_FMT "'", path);
522 }
523 }
524
525 archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
526 "loadArchive(file)");
527 sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
528 path, (int)thisFileNameSize, fileName);
529
530 oc = mkOc(path, image, memberSize, false, archiveMemberName
531 , misalignment);
532 #if defined(OBJFORMAT_MACHO)
533 ocInit_MachO( oc );
534 #endif
535 #if defined(OBJFORMAT_ELF)
536 ocInit_ELF( oc );
537 #endif
538
539 stgFree(archiveMemberName);
540
541 if (0 == loadOc(oc)) {
542 stgFree(fileName);
543 fclose(f);
544 return 0;
545 } else {
546 oc->next = objects;
547 objects = oc;
548 }
549 }
550 else if (isGnuIndex) {
551 if (gnuFileIndex != NULL) {
552 FAIL("GNU-variant index found, but already have an index, \
553 while reading filename from `%" PATH_FMT "'", path);
554 }
555 DEBUG_LOG("Found GNU-variant file index\n");
556 #if RTS_LINKER_USE_MMAP
557 gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
558 #else
559 gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
560 #endif
561 n = fread ( gnuFileIndex, 1, memberSize, f );
562 if (n != memberSize) {
563 FAIL("error whilst reading `%" PATH_FMT "'", path);
564 }
565 gnuFileIndex[memberSize] = '/';
566 gnuFileIndexSize = memberSize;
567 }
568 else if (isImportLib) {
569 #if defined(OBJFORMAT_PEi386)
570 if (checkAndLoadImportLibrary(path, fileName, f)) {
571 DEBUG_LOG("Member is an import file section... "
572 "Corresponding DLL has been loaded...\n");
573 }
574 else {
575 DEBUG_LOG("Member is not a valid import file section... "
576 "Skipping...\n");
577 n = fseek(f, memberSize, SEEK_CUR);
578 if (n != 0)
579 FAIL("error whilst seeking by %d in `%" PATH_FMT "'",
580 memberSize, path);
581 }
582 #endif
583 }
584 else {
585 DEBUG_LOG("`%s' does not appear to be an object file\n",
586 fileName);
587 if (!isThin || thisFileNameSize == 0) {
588 n = fseek(f, memberSize, SEEK_CUR);
589 if (n != 0)
590 FAIL("error whilst seeking by %d in `%" PATH_FMT "'",
591 memberSize, path);
592 }
593 }
594
595 /* .ar files are 2-byte aligned */
596 if (!(isThin && thisFileNameSize > 0) && memberSize % 2) {
597 DEBUG_LOG("trying to read one pad byte\n");
598 n = fread ( tmp, 1, 1, f );
599 if (n != 1) {
600 if (feof(f)) {
601 DEBUG_LOG("found EOF while reading one pad byte\n");
602 break;
603 }
604 else {
605 FAIL("Failed reading padding from `%" PATH_FMT "'", path);
606 }
607 }
608 DEBUG_LOG("successfully read one pad byte\n");
609 }
610 DEBUG_LOG("reached end of archive loading while loop\n");
611 }
612 retcode = 1;
613 fail:
614 if (f != NULL)
615 fclose(f);
616
617 if (fileName != NULL)
618 stgFree(fileName);
619 if (gnuFileIndex != NULL) {
620 #if RTS_LINKER_USE_MMAP
621 munmap(gnuFileIndex, gnuFileIndexSize + 1);
622 #else
623 stgFree(gnuFileIndex);
624 #endif
625 }
626
627 if (RTS_LINKER_USE_MMAP)
628 m32_allocator_flush();
629
630 DEBUG_LOG("done\n");
631 return retcode;
632 }
633
634 HsInt loadArchive (pathchar *path)
635 {
636 ACQUIRE_LOCK(&linker_mutex);
637 HsInt r = loadArchive_(path);
638 RELEASE_LOCK(&linker_mutex);
639 return r;
640 }