Update Trac ticket URLs to point to GitLab
[ghc.git] / rts / linker / LoadArchive.c
1 #include <Rts.h>
2 #include "PathUtils.h"
3
4 #include "sm/Storage.h"
5 #include "sm/OSMem.h"
6 #include "RtsUtils.h"
7 #include "LinkerInternals.h"
8 #include "linker/M32Alloc.h"
9
10 /* Platform specific headers */
11 #if defined(OBJFORMAT_PEi386)
12 # include "linker/PEi386.h"
13 #elif defined(OBJFORMAT_MACHO)
14 # include "linker/MachO.h"
15 # include <regex.h>
16 # include <mach/machine.h>
17 # include <mach-o/fat.h>
18 #elif defined(OBJFORMAT_ELF)
19 #include "linker/Elf.h"
20 #endif
21
22 #include <string.h>
23 #include <stddef.h>
24 #include <ctype.h>
25 #include <fs_rts.h>
26
27 #define FAIL(...) do {\
28 errorBelch("loadArchive: "__VA_ARGS__); \
29 goto fail;\
30 } while (0)
31
32 #define DEBUG_LOG(...) IF_DEBUG(linker, debugBelch("loadArchive: " __VA_ARGS__))
33
34 #if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
35 /* Read 4 bytes and convert to host byte order */
36 static uint32_t read4Bytes(const char buf[static 4])
37 {
38 return ntohl(*(uint32_t*)buf);
39 }
40
41 static StgBool loadFatArchive(char tmp[static 20], FILE* f, pathchar* path)
42 {
43 uint32_t nfat_arch, nfat_offset, cputype, cpusubtype;
44 #if defined(i386_HOST_ARCH)
45 const uint32_t mycputype = CPU_TYPE_X86;
46 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_ALL;
47 #elif defined(x86_64_HOST_ARCH)
48 const uint32_t mycputype = CPU_TYPE_X86_64;
49 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_64_ALL;
50 #elif defined(aarch64_HOST_ARCH)
51 const uint32_t mycputype = CPU_TYPE_ARM64;
52 const uint32_t mycpusubtype = CPU_SUBTYPE_ARM64_ALL;
53 #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
54 #error No Darwin support on PowerPC
55 #else
56 #error Unknown Darwin architecture
57 #endif
58
59 nfat_arch = read4Bytes(tmp + 4);
60 DEBUG_LOG("found a fat archive containing %d architectures\n", nfat_arch);
61 nfat_offset = 0;
62 for (uint32_t i = 0; i < nfat_arch; i++) {
63 /* search for the right arch */
64 int n = fread(tmp, 1, 12, f);
65 if (n != 12) {
66 errorBelch("Failed reading arch from `%" PATH_FMT "'", path);
67 return false;
68 }
69 cputype = read4Bytes(tmp);
70 cpusubtype = read4Bytes(tmp + 4);
71 if (cputype == mycputype && cpusubtype == mycpusubtype) {
72 DEBUG_LOG("found my archive in a fat archive\n");
73 nfat_offset = read4Bytes(tmp + 8);
74 break;
75 }
76 }
77 if (nfat_offset == 0) {
78 errorBelch("Fat archive contains %d architectures, "
79 "but none of them are compatible with the host",
80 (int)nfat_arch);
81 return false;
82 } else {
83 /* Seek to the correct architecture */
84 int n = fseek(f, nfat_offset, SEEK_SET);
85 if (n != 0) {
86 errorBelch("Failed to seek to arch in `%" PATH_FMT "'", path);
87 return false;
88 }
89
90 /* Read the header */
91 n = fread(tmp, 1, 8, f);
92 if (n != 8) {
93 errorBelch("Failed reading header from `%" PATH_FMT "'", path);
94 return false;
95 }
96
97 /* Check the magic number */
98 if (strncmp(tmp, "!<arch>\n", 8) != 0) {
99 errorBelch("couldn't find archive in `%" PATH_FMT "'"
100 "at offset %d", path, nfat_offset);
101 return false;
102 }
103 }
104 return true;
105 }
106 #endif
107
108 static StgBool readThinArchiveMember(int n, int memberSize, pathchar* path,
109 char* fileName, char* image)
110 {
111 StgBool has_succeeded = false;
112 FILE* member = NULL;
113 pathchar *pathCopy, *dirName, *memberPath, *objFileName;
114 memberPath = NULL;
115 /* Allocate and setup the dirname of the archive. We'll need
116 this to locate the thin member */
117 pathCopy = pathdup(path); // Convert the char* to a pathchar*
118 dirName = pathdir(pathCopy);
119 /* Append the relative member name to the dirname. This should be
120 be the full path to the actual thin member. */
121 int memberLen = pathlen(dirName) + 1 + strlen(fileName) + 1;
122 memberPath = stgMallocBytes(pathsize * memberLen, "loadArchive(file)");
123 objFileName = mkPath(fileName);
124 pathprintf(memberPath, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), dirName,
125 objFileName);
126 stgFree(objFileName);
127 stgFree(dirName);
128 member = pathopen(memberPath, WSTR("rb"));
129 if (!member) {
130 errorBelch("loadObj: can't read thin archive `%" PATH_FMT "'",
131 memberPath);
132 goto inner_fail;
133 }
134 n = fread(image, 1, memberSize, member);
135 if (n != memberSize) {
136 errorBelch("loadArchive: error whilst reading `%s'",
137 fileName);
138 goto inner_fail;
139 }
140 has_succeeded = true;
141
142 inner_fail:
143 fclose(member);
144 stgFree(memberPath);
145 stgFree(pathCopy);
146 return has_succeeded;
147 }
148
149 static StgBool checkFatArchive(char magic[static 20], FILE* f, pathchar* path)
150 {
151 StgBool success;
152 success = false;
153 #if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
154 /* Not a standard archive, look for a fat archive magic number: */
155 if (read4Bytes(magic) == FAT_MAGIC)
156 success = loadFatArchive(magic, f, path);
157 else
158 errorBelch("loadArchive: Neither an archive, nor a fat archive: "
159 "`%" PATH_FMT "'", path);
160 #else
161 (void)magic;
162 (void)f;
163 errorBelch("loadArchive: Not an archive: `%" PATH_FMT "'", path);
164 #endif
165 return success;
166 }
167
168 /**
169 * Look up the filename in the GNU-variant index file pointed to by
170 * gnuFileIndex.
171 * @param fileName_ a pointer to a pointer to the file name to be looked up.
172 * The file name must have been allocated with `StgMallocBytes`, and will
173 * be reallocated on return; the old value is now _invalid_.
174 * @param gnuFileIndexSize The size of the index.
175 */
176 static StgBool
177 lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_,
178 char* gnuFileIndex, pathchar* path, size_t* thisFileNameSize,
179 size_t* fileNameSize)
180 {
181 int n;
182 char *fileName = *fileName_;
183 if (isdigit(fileName[1])) {
184 int i;
185 for (n = 2; isdigit(fileName[n]); n++)
186 ;
187
188 fileName[n] = '\0';
189 n = atoi(fileName + 1);
190 if (gnuFileIndex == NULL) {
191 errorBelch("loadArchive: GNU-variant filename "
192 "without an index while reading from `%" PATH_FMT "'",
193 path);
194 return false;
195 }
196 if (n < 0 || n > gnuFileIndexSize) {
197 errorBelch("loadArchive: GNU-variant filename "
198 "offset %d out of range [0..%d] "
199 "while reading filename from `%" PATH_FMT "'",
200 n, gnuFileIndexSize, path);
201 return false;
202 }
203 if (n != 0 && gnuFileIndex[n - 1] != '\n') {
204 errorBelch("loadArchive: GNU-variant filename offset "
205 "%d invalid (range [0..%d]) while reading "
206 "filename from `%" PATH_FMT "'",
207 n, gnuFileIndexSize, path);
208 return false;
209 }
210 for (i = n; gnuFileIndex[i] != '\n'; i++)
211 ;
212
213 size_t FileNameSize = i - n - 1;
214 if (FileNameSize >= *fileNameSize) {
215 /* Double it to avoid potentially continually
216 increasing it by 1 */
217 *fileNameSize = FileNameSize * 2;
218 *fileName_ = fileName = stgReallocBytes(fileName, *fileNameSize,
219 "loadArchive(fileName)");
220 }
221 memcpy(fileName, gnuFileIndex + n, FileNameSize);
222 fileName[FileNameSize] = '\0';
223 *thisFileNameSize = FileNameSize;
224 }
225 /* Skip 32-bit symbol table ("/" + 15 blank characters)
226 and 64-bit symbol table ("/SYM64/" + 9 blank characters) */
227 else if (0 == strncmp(fileName + 1, " ", 15) ||
228 0 == strncmp(fileName + 1, "SYM64/ ", 15)) {
229 fileName[0] = '\0';
230 *thisFileNameSize = 0;
231 }
232 else {
233 errorBelch("loadArchive: invalid GNU-variant filename `%.16s' "
234 "while reading filename from `%" PATH_FMT "'",
235 fileName, path);
236 return false;
237 }
238
239 return true;
240 }
241
242 static HsInt loadArchive_ (pathchar *path)
243 {
244 ObjectCode* oc = NULL;
245 char *image = NULL;
246 HsInt retcode = 0;
247 int memberSize;
248 FILE *f = NULL;
249 int n;
250 size_t thisFileNameSize = (size_t)-1; /* shut up bogus GCC warning */
251 char *fileName;
252 size_t fileNameSize;
253 int isObject, isGnuIndex, isThin, isImportLib;
254 char tmp[20];
255 char *gnuFileIndex;
256 int gnuFileIndexSize;
257 int misalignment = 0;
258
259 DEBUG_LOG("start\n");
260 DEBUG_LOG("Loading archive `%" PATH_FMT "'\n", path);
261
262 /* Check that we haven't already loaded this archive.
263 Ignore requests to load multiple times */
264 if (isAlreadyLoaded(path)) {
265 IF_DEBUG(linker,
266 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
267 return 1; /* success */
268 }
269
270 gnuFileIndex = NULL;
271 gnuFileIndexSize = 0;
272
273 fileNameSize = 32;
274 fileName = stgMallocBytes(fileNameSize, "loadArchive(fileName)");
275
276 isThin = 0;
277 isImportLib = 0;
278
279 f = pathopen(path, WSTR("rb"));
280 if (!f)
281 FAIL("loadObj: can't read `%" PATH_FMT "'", path);
282
283 /* Check if this is an archive by looking for the magic "!<arch>\n"
284 * string. Usually, if this fails, we belch an error and return. On
285 * Darwin however, we may have a fat archive, which contains archives for
286 * more than one architecture. Fat archives start with the magic number
287 * 0xcafebabe, always stored big endian. If we find a fat_header, we scan
288 * through the fat_arch structs, searching through for one for our host
289 * architecture. If a matching struct is found, we read the offset
290 * of our archive data (nfat_offset) and seek forward nfat_offset bytes
291 * from the start of the file.
292 *
293 * A subtlety is that all of the members of the fat_header and fat_arch
294 * structs are stored big endian, so we need to call byte order
295 * conversion functions.
296 *
297 * If we find the appropriate architecture in a fat archive, we gobble
298 * its magic "!<arch>\n" string and continue processing just as if
299 * we had a single architecture archive.
300 */
301
302 n = fread ( tmp, 1, 8, f );
303 if (n != 8) {
304 FAIL("Failed reading header from `%" PATH_FMT "'", path);
305 }
306 if (strncmp(tmp, "!<arch>\n", 8) == 0) {}
307 /* Check if this is a thin archive by looking for the magic string "!<thin>\n"
308 *
309 * ar thin libraries have the exact same format as normal archives except they
310 * have a different magic string and they don't copy the object files into the
311 * archive.
312 *
313 * Instead each header entry points to the location of the object file on disk.
314 * This is useful when a library is only created to satisfy a compile time dependency
315 * instead of to be distributed. This saves the time required for copying.
316 *
317 * Thin archives are always flattened. They always only contain simple headers
318 * pointing to the object file and so we need not allocate more memory than needed
319 * to find the object file.
320 *
321 */
322 else if (strncmp(tmp, "!<thin>\n", 8) == 0) {
323 isThin = 1;
324 }
325 else {
326 StgBool success = checkFatArchive(tmp, f, path);
327 if (!success)
328 goto fail;
329 }
330 DEBUG_LOG("loading archive contents\n");
331
332 while (1) {
333 DEBUG_LOG("reading at %ld\n", ftell(f));
334 n = fread ( fileName, 1, 16, f );
335 if (n != 16) {
336 if (feof(f)) {
337 DEBUG_LOG("EOF while reading from '%" PATH_FMT "'\n", path);
338 break;
339 }
340 else {
341 FAIL("Failed reading file name from `%" PATH_FMT "'", path);
342 }
343 }
344
345 #if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
346 if (strncmp(fileName, "!<arch>\n", 8) == 0) {
347 DEBUG_LOG("found the start of another archive, breaking\n");
348 break;
349 }
350 #endif
351
352 n = fread ( tmp, 1, 12, f );
353 if (n != 12)
354 FAIL("Failed reading mod time from `%" PATH_FMT "'", path);
355 n = fread ( tmp, 1, 6, f );
356 if (n != 6)
357 FAIL("Failed reading owner from `%" PATH_FMT "'", path);
358 n = fread ( tmp, 1, 6, f );
359 if (n != 6)
360 FAIL("Failed reading group from `%" PATH_FMT "'", path);
361 n = fread ( tmp, 1, 8, f );
362 if (n != 8)
363 FAIL("Failed reading mode from `%" PATH_FMT "'", path);
364 n = fread ( tmp, 1, 10, f );
365 if (n != 10)
366 FAIL("Failed reading size from `%" PATH_FMT "'", path);
367 tmp[10] = '\0';
368 for (n = 0; isdigit(tmp[n]); n++);
369 tmp[n] = '\0';
370 memberSize = atoi(tmp);
371
372 DEBUG_LOG("size of this archive member is %d\n", memberSize);
373 n = fread ( tmp, 1, 2, f );
374 if (n != 2)
375 FAIL("Failed reading magic from `%" PATH_FMT "'", path);
376 if (strncmp(tmp, "\x60\x0A", 2) != 0)
377 FAIL("Failed reading magic from `%" PATH_FMT "' at %ld. Got %c%c",
378 path, ftell(f), tmp[0], tmp[1]);
379
380 isGnuIndex = 0;
381 /* Check for BSD-variant large filenames */
382 if (0 == strncmp(fileName, "#1/", 3)) {
383 size_t n = 0;
384 fileName[16] = '\0';
385 if (isdigit(fileName[3])) {
386 for (n = 4; isdigit(fileName[n]); n++)
387 ;
388
389 fileName[n] = '\0';
390 thisFileNameSize = atoi(fileName + 3);
391 memberSize -= thisFileNameSize;
392 if (thisFileNameSize >= fileNameSize) {
393 /* Double it to avoid potentially continually
394 increasing it by 1 */
395 fileNameSize = thisFileNameSize * 2;
396 fileName = stgReallocBytes(fileName, fileNameSize,
397 "loadArchive(fileName)");
398 }
399 n = fread(fileName, 1, thisFileNameSize, f);
400 if (n != thisFileNameSize) {
401 errorBelch("Failed reading filename from `%" PATH_FMT "'",
402 path);
403 goto fail;
404 }
405 fileName[thisFileNameSize] = 0;
406 /* On OS X at least, thisFileNameSize is the size of the
407 fileName field, not the length of the fileName
408 itself. */
409 thisFileNameSize = strlen(fileName);
410 } else {
411 errorBelch("BSD-variant filename size not found "
412 "while reading filename from `%" PATH_FMT "'", path);
413 goto fail;
414 }
415 }
416 /* Check for GNU file index file */
417 else if (0 == strncmp(fileName, "//", 2)) {
418 fileName[0] = '\0';
419 thisFileNameSize = 0;
420 isGnuIndex = 1;
421 }
422 /* Check for a file in the GNU file index */
423 else if (fileName[0] == '/') {
424 if (!lookupGNUArchiveIndex(gnuFileIndexSize, &fileName,
425 gnuFileIndex, path, &thisFileNameSize, &fileNameSize)) {
426 goto fail;
427 }
428 }
429 /* Finally, the case where the filename field actually contains
430 the filename */
431 else {
432 /* GNU ar terminates filenames with a '/', this allowing
433 spaces in filenames. So first look to see if there is a
434 terminating '/'. */
435 for (thisFileNameSize = 0;
436 thisFileNameSize < 16;
437 thisFileNameSize++) {
438 if (fileName[thisFileNameSize] == '/') {
439 fileName[thisFileNameSize] = '\0';
440 break;
441 }
442 }
443 /* If we didn't find a '/', then a space teminates the
444 filename. Note that if we don't find one, then
445 thisFileNameSize ends up as 16, and we already have the
446 '\0' at the end. */
447 if (thisFileNameSize == 16) {
448 for (thisFileNameSize = 0;
449 thisFileNameSize < 16;
450 thisFileNameSize++) {
451 if (fileName[thisFileNameSize] == ' ') {
452 fileName[thisFileNameSize] = '\0';
453 break;
454 }
455 }
456 }
457 }
458
459 DEBUG_LOG("Found member file `%s'\n", fileName);
460
461 /* TODO: Stop relying on file extensions to determine input formats.
462 Instead try to match file headers. See #13103. */
463 isObject = (thisFileNameSize >= 2 && strncmp(fileName + thisFileNameSize - 2, ".o" , 2) == 0)
464 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".p_o", 4) == 0)
465 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".obj", 4) == 0);
466
467 #if defined(OBJFORMAT_PEi386)
468 /*
469 * Note [MSVC import files (ext .lib)]
470 * MSVC compilers store the object files in
471 * the import libraries with extension .dll
472 * so on Windows we should look for those too.
473 * The PE COFF format doesn't specify any specific file name
474 * for sections. So on windows, just try to load it all.
475 *
476 * Linker members (e.g. filename / are skipped since they are not needed)
477 */
478 isImportLib = thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".dll", 4) == 0;
479 #endif // windows
480
481 DEBUG_LOG("\tthisFileNameSize = %d\n", (int)thisFileNameSize);
482 DEBUG_LOG("\tisObject = %d\n", isObject);
483
484 if (isObject) {
485 char *archiveMemberName;
486
487 DEBUG_LOG("Member is an object file...loading...\n");
488
489 #if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
490 if (RTS_LINKER_USE_MMAP)
491 image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
492 else {
493 /* See loadObj() */
494 misalignment = machoGetMisalignment(f);
495 image = stgMallocBytes(memberSize + misalignment,
496 "loadArchive(image)");
497 image += misalignment;
498 }
499
500 #else // not darwin
501 image = stgMallocBytes(memberSize, "loadArchive(image)");
502 #endif
503 if (isThin) {
504 if (!readThinArchiveMember(n, memberSize, path,
505 fileName, image)) {
506 goto fail;
507 }
508 }
509 else
510 {
511 n = fread ( image, 1, memberSize, f );
512 if (n != memberSize) {
513 FAIL("error whilst reading `%" PATH_FMT "'", path);
514 }
515 }
516
517 archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
518 "loadArchive(file)");
519 sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
520 path, (int)thisFileNameSize, fileName);
521
522 oc = mkOc(path, image, memberSize, false, archiveMemberName
523 , misalignment);
524 #if defined(OBJFORMAT_MACHO)
525 ocInit_MachO( oc );
526 #endif
527 #if defined(OBJFORMAT_ELF)
528 ocInit_ELF( oc );
529 #endif
530
531 stgFree(archiveMemberName);
532
533 if (0 == loadOc(oc)) {
534 stgFree(fileName);
535 fclose(f);
536 return 0;
537 } else {
538 oc->next = objects;
539 objects = oc;
540 }
541 }
542 else if (isGnuIndex) {
543 if (gnuFileIndex != NULL) {
544 FAIL("GNU-variant index found, but already have an index, \
545 while reading filename from `%" PATH_FMT "'", path);
546 }
547 DEBUG_LOG("Found GNU-variant file index\n");
548 #if RTS_LINKER_USE_MMAP
549 gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
550 #else
551 gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
552 #endif
553 n = fread ( gnuFileIndex, 1, memberSize, f );
554 if (n != memberSize) {
555 FAIL("error whilst reading `%" PATH_FMT "'", path);
556 }
557 gnuFileIndex[memberSize] = '/';
558 gnuFileIndexSize = memberSize;
559 }
560 else if (isImportLib) {
561 #if defined(OBJFORMAT_PEi386)
562 if (checkAndLoadImportLibrary(path, fileName, f)) {
563 DEBUG_LOG("Member is an import file section... "
564 "Corresponding DLL has been loaded...\n");
565 }
566 else {
567 DEBUG_LOG("Member is not a valid import file section... "
568 "Skipping...\n");
569 n = fseek(f, memberSize, SEEK_CUR);
570 if (n != 0)
571 FAIL("error whilst seeking by %d in `%" PATH_FMT "'",
572 memberSize, path);
573 }
574 #endif
575 }
576 else {
577 DEBUG_LOG("`%s' does not appear to be an object file\n",
578 fileName);
579 if (!isThin || thisFileNameSize == 0) {
580 n = fseek(f, memberSize, SEEK_CUR);
581 if (n != 0)
582 FAIL("error whilst seeking by %d in `%" PATH_FMT "'",
583 memberSize, path);
584 }
585 }
586
587 /* .ar files are 2-byte aligned */
588 if (!(isThin && thisFileNameSize > 0) && memberSize % 2) {
589 DEBUG_LOG("trying to read one pad byte\n");
590 n = fread ( tmp, 1, 1, f );
591 if (n != 1) {
592 if (feof(f)) {
593 DEBUG_LOG("found EOF while reading one pad byte\n");
594 break;
595 }
596 else {
597 FAIL("Failed reading padding from `%" PATH_FMT "'", path);
598 }
599 }
600 DEBUG_LOG("successfully read one pad byte\n");
601 }
602 DEBUG_LOG("reached end of archive loading while loop\n");
603 }
604 retcode = 1;
605 fail:
606 if (f != NULL)
607 fclose(f);
608
609 if (fileName != NULL)
610 stgFree(fileName);
611 if (gnuFileIndex != NULL) {
612 #if RTS_LINKER_USE_MMAP
613 munmap(gnuFileIndex, gnuFileIndexSize + 1);
614 #else
615 stgFree(gnuFileIndex);
616 #endif
617 }
618
619 if (RTS_LINKER_USE_MMAP)
620 m32_allocator_flush();
621
622 DEBUG_LOG("done\n");
623 return retcode;
624 }
625
626 HsInt loadArchive (pathchar *path)
627 {
628 ACQUIRE_LOCK(&linker_mutex);
629 HsInt r = loadArchive_(path);
630 RELEASE_LOCK(&linker_mutex);
631 return r;
632 }