Skip 64-bit symbol tables
[ghc.git] / rts / linker / LoadArchive.c
1 #include <string.h>
2 #include <stddef.h>
3
4 #include <Rts.h>
5
6 #include "sm/Storage.h"
7 #include "sm/OSMem.h"
8 #include "RtsUtils.h"
9 #include "PathUtils.h"
10 #include "LinkerInternals.h"
11 #include "linker/M32Alloc.h"
12
13 /* Platform specific headers */
14 #if defined(OBJFORMAT_PEi386)
15 # include "linker/PEi386.h"
16 #elif defined(darwin_HOST_OS)
17 # include "linker/MachO.h"
18 # include <regex.h>
19 # include <mach/machine.h>
20 # include <mach-o/fat.h>
21 #endif
22
23 #include <ctype.h>
24
25 static HsInt loadArchive_ (pathchar *path)
26 {
27 ObjectCode* oc;
28 char *image;
29 int memberSize;
30 FILE *f;
31 int n;
32 size_t thisFileNameSize;
33 char *fileName;
34 size_t fileNameSize;
35 int isObject, isGnuIndex, isThin, isImportLib;
36 char tmp[20];
37 char *gnuFileIndex;
38 int gnuFileIndexSize;
39 #if defined(darwin_HOST_OS)
40 int i;
41 uint32_t nfat_arch, nfat_offset, cputype, cpusubtype;
42 #if defined(i386_HOST_ARCH)
43 const uint32_t mycputype = CPU_TYPE_X86;
44 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_ALL;
45 #elif defined(x86_64_HOST_ARCH)
46 const uint32_t mycputype = CPU_TYPE_X86_64;
47 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_64_ALL;
48 #elif defined(powerpc_HOST_ARCH)
49 const uint32_t mycputype = CPU_TYPE_POWERPC;
50 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
51 #elif defined(powerpc64_HOST_ARCH)
52 const uint32_t mycputype = CPU_TYPE_POWERPC64;
53 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
54 #else
55 #error Unknown Darwin architecture
56 #endif
57 #endif
58 int misalignment = 0;
59
60 /* TODO: don't call barf() on error, instead return an error code, freeing
61 * all resources correctly. This function is pretty complex, so it needs
62 * to be refactored to make this practical. */
63
64 IF_DEBUG(linker, debugBelch("loadArchive: start\n"));
65 IF_DEBUG(linker, debugBelch("loadArchive: Loading archive `%" PATH_FMT" '\n", path));
66
67 /* Check that we haven't already loaded this archive.
68 Ignore requests to load multiple times */
69 if (isAlreadyLoaded(path)) {
70 IF_DEBUG(linker,
71 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
72 return 1; /* success */
73 }
74
75 gnuFileIndex = NULL;
76 gnuFileIndexSize = 0;
77
78 fileNameSize = 32;
79 fileName = stgMallocBytes(fileNameSize, "loadArchive(fileName)");
80
81 isThin = 0;
82 isImportLib = 0;
83
84 f = pathopen(path, WSTR("rb"));
85 if (!f)
86 barf("loadObj: can't read `%" PATH_FMT "'", path);
87
88 /* Check if this is an archive by looking for the magic "!<arch>\n"
89 * string. Usually, if this fails, we barf and quit. On Darwin however,
90 * we may have a fat archive, which contains archives for more than
91 * one architecture. Fat archives start with the magic number 0xcafebabe,
92 * always stored big endian. If we find a fat_header, we scan through
93 * the fat_arch structs, searching through for one for our host
94 * architecture. If a matching struct is found, we read the offset
95 * of our archive data (nfat_offset) and seek forward nfat_offset bytes
96 * from the start of the file.
97 *
98 * A subtlety is that all of the members of the fat_header and fat_arch
99 * structs are stored big endian, so we need to call byte order
100 * conversion functions.
101 *
102 * If we find the appropriate architecture in a fat archive, we gobble
103 * its magic "!<arch>\n" string and continue processing just as if
104 * we had a single architecture archive.
105 */
106
107 n = fread ( tmp, 1, 8, f );
108 if (n != 8)
109 barf("loadArchive: Failed reading header from `%" PATH_FMT "'", path);
110 if (strncmp(tmp, "!<arch>\n", 8) == 0) {}
111 /* Check if this is a thin archive by looking for the magic string "!<thin>\n"
112 *
113 * ar thin libraries have the exact same format as normal archives except they
114 * have a different magic string and they don't copy the object files into the
115 * archive.
116 *
117 * Instead each header entry points to the location of the object file on disk.
118 * This is useful when a library is only created to satisfy a compile time dependency
119 * instead of to be distributed. This saves the time required for copying.
120 *
121 * Thin archives are always flattened. They always only contain simple headers
122 * pointing to the object file and so we need not allocate more memory than needed
123 * to find the object file.
124 *
125 */
126 else if (strncmp(tmp, "!<thin>\n", 8) == 0) {
127 isThin = 1;
128 }
129 #if defined(darwin_HOST_OS)
130 /* Not a standard archive, look for a fat archive magic number: */
131 else if (ntohl(*(uint32_t *)tmp) == FAT_MAGIC) {
132 nfat_arch = ntohl(*(uint32_t *)(tmp + 4));
133 IF_DEBUG(linker, debugBelch("loadArchive: found a fat archive containing %d architectures\n", nfat_arch));
134 nfat_offset = 0;
135
136 for (i = 0; i < (int)nfat_arch; i++) {
137 /* search for the right arch */
138 n = fread( tmp, 1, 20, f );
139 if (n != 8)
140 barf("loadArchive: Failed reading arch from `%s'", path);
141 cputype = ntohl(*(uint32_t *)tmp);
142 cpusubtype = ntohl(*(uint32_t *)(tmp + 4));
143
144 if (cputype == mycputype && cpusubtype == mycpusubtype) {
145 IF_DEBUG(linker, debugBelch("loadArchive: found my archive in a fat archive\n"));
146 nfat_offset = ntohl(*(uint32_t *)(tmp + 8));
147 break;
148 }
149 }
150
151 if (nfat_offset == 0) {
152 barf ("loadArchive: searched %d architectures, but no host arch found", (int)nfat_arch);
153 }
154 else {
155 n = fseek( f, nfat_offset, SEEK_SET );
156 if (n != 0)
157 barf("loadArchive: Failed to seek to arch in `%s'", path);
158 n = fread ( tmp, 1, 8, f );
159 if (n != 8)
160 barf("loadArchive: Failed reading header from `%s'", path);
161 if (strncmp(tmp, "!<arch>\n", 8) != 0) {
162 barf("loadArchive: couldn't find archive in `%s' at offset %d", path, nfat_offset);
163 }
164 }
165 }
166 else {
167 barf("loadArchive: Neither an archive, nor a fat archive: `%s'", path);
168 }
169 #else
170 else {
171 barf("loadArchive: Not an archive: `%" PATH_FMT "'", path);
172 }
173 #endif
174
175 IF_DEBUG(linker, debugBelch("loadArchive: loading archive contents\n"));
176
177 while (1) {
178 IF_DEBUG(linker, debugBelch("loadArchive: reading at %ld\n", ftell(f)));
179 n = fread ( fileName, 1, 16, f );
180 if (n != 16) {
181 if (feof(f)) {
182 IF_DEBUG(linker, debugBelch("loadArchive: EOF while reading from '%" PATH_FMT "'\n", path));
183 break;
184 }
185 else {
186 barf("loadArchive: Failed reading file name from `%" PATH_FMT "'", path);
187 }
188 }
189
190 #if defined(darwin_HOST_OS)
191 if (strncmp(fileName, "!<arch>\n", 8) == 0) {
192 IF_DEBUG(linker, debugBelch("loadArchive: found the start of another archive, breaking\n"));
193 break;
194 }
195 #endif
196
197 n = fread ( tmp, 1, 12, f );
198 if (n != 12)
199 barf("loadArchive: Failed reading mod time from `%" PATH_FMT "'", path);
200 n = fread ( tmp, 1, 6, f );
201 if (n != 6)
202 barf("loadArchive: Failed reading owner from `%" PATH_FMT "'", path);
203 n = fread ( tmp, 1, 6, f );
204 if (n != 6)
205 barf("loadArchive: Failed reading group from `%" PATH_FMT "'", path);
206 n = fread ( tmp, 1, 8, f );
207 if (n != 8)
208 barf("loadArchive: Failed reading mode from `%" PATH_FMT "'", path);
209 n = fread ( tmp, 1, 10, f );
210 if (n != 10)
211 barf("loadArchive: Failed reading size from `%" PATH_FMT "'", path);
212 tmp[10] = '\0';
213 for (n = 0; isdigit(tmp[n]); n++);
214 tmp[n] = '\0';
215 memberSize = atoi(tmp);
216
217 IF_DEBUG(linker, debugBelch("loadArchive: size of this archive member is %d\n", memberSize));
218 n = fread ( tmp, 1, 2, f );
219 if (n != 2)
220 barf("loadArchive: Failed reading magic from `%" PATH_FMT "'", path);
221 if (strncmp(tmp, "\x60\x0A", 2) != 0)
222 barf("loadArchive: Failed reading magic from `%" PATH_FMT "' at %ld. Got %c%c",
223 path, ftell(f), tmp[0], tmp[1]);
224
225 isGnuIndex = 0;
226 /* Check for BSD-variant large filenames */
227 if (0 == strncmp(fileName, "#1/", 3)) {
228 fileName[16] = '\0';
229 if (isdigit(fileName[3])) {
230 for (n = 4; isdigit(fileName[n]); n++);
231 fileName[n] = '\0';
232 thisFileNameSize = atoi(fileName + 3);
233 memberSize -= thisFileNameSize;
234 if (thisFileNameSize >= fileNameSize) {
235 /* Double it to avoid potentially continually
236 increasing it by 1 */
237 fileNameSize = thisFileNameSize * 2;
238 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
239 }
240 n = fread ( fileName, 1, thisFileNameSize, f );
241 if (n != (int)thisFileNameSize) {
242 barf("loadArchive: Failed reading filename from `%" PATH_FMT "'",
243 path);
244 }
245 fileName[thisFileNameSize] = 0;
246
247 /* On OS X at least, thisFileNameSize is the size of the
248 fileName field, not the length of the fileName
249 itself. */
250 thisFileNameSize = strlen(fileName);
251 }
252 else {
253 barf("loadArchive: BSD-variant filename size not found while reading filename from `%s'", path);
254 }
255 }
256 /* Check for GNU file index file */
257 else if (0 == strncmp(fileName, "//", 2)) {
258 fileName[0] = '\0';
259 thisFileNameSize = 0;
260 isGnuIndex = 1;
261 }
262 /* Check for a file in the GNU file index */
263 else if (fileName[0] == '/') {
264 if (isdigit(fileName[1])) {
265 int i;
266
267 for (n = 2; isdigit(fileName[n]); n++);
268 fileName[n] = '\0';
269 n = atoi(fileName + 1);
270
271 if (gnuFileIndex == NULL) {
272 barf("loadArchive: GNU-variant filename without an index while reading from `%s'", path);
273 }
274 if (n < 0 || n > gnuFileIndexSize) {
275 barf("loadArchive: GNU-variant filename offset %d out of range [0..%d] while reading filename from `%s'", n, gnuFileIndexSize, path);
276 }
277 if (n != 0 && gnuFileIndex[n - 1] != '\n') {
278 barf("loadArchive: GNU-variant filename offset %d invalid (range [0..%d]) while reading filename from `%s'", n, gnuFileIndexSize, path);
279 }
280 for (i = n; gnuFileIndex[i] != '\n'; i++);
281 thisFileNameSize = i - n - 1;
282 if (thisFileNameSize >= fileNameSize) {
283 /* Double it to avoid potentially continually
284 increasing it by 1 */
285 fileNameSize = thisFileNameSize * 2;
286 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
287 }
288 memcpy(fileName, gnuFileIndex + n, thisFileNameSize);
289 fileName[thisFileNameSize] = '\0';
290 }
291 /* Skip 32-bit symbol table ("/" + 15 blank characters)
292 and 64-bit symbol table ("/SYM64/" + 9 blank characters) */
293 else if (fileName[1] == ' ' || (0 == strncmp(&fileName[1], "SYM64/", 6))) {
294 fileName[0] = '\0';
295 thisFileNameSize = 0;
296 }
297 else {
298 barf("loadArchive: invalid GNU-variant filename `%.16s' while reading filename from `%s'", fileName, path);
299 }
300 }
301 /* Finally, the case where the filename field actually contains
302 the filename */
303 else {
304 /* GNU ar terminates filenames with a '/', this allowing
305 spaces in filenames. So first look to see if there is a
306 terminating '/'. */
307 for (thisFileNameSize = 0;
308 thisFileNameSize < 16;
309 thisFileNameSize++) {
310 if (fileName[thisFileNameSize] == '/') {
311 fileName[thisFileNameSize] = '\0';
312 break;
313 }
314 }
315 /* If we didn't find a '/', then a space teminates the
316 filename. Note that if we don't find one, then
317 thisFileNameSize ends up as 16, and we already have the
318 '\0' at the end. */
319 if (thisFileNameSize == 16) {
320 for (thisFileNameSize = 0;
321 thisFileNameSize < 16;
322 thisFileNameSize++) {
323 if (fileName[thisFileNameSize] == ' ') {
324 fileName[thisFileNameSize] = '\0';
325 break;
326 }
327 }
328 }
329 }
330
331 IF_DEBUG(linker,
332 debugBelch("loadArchive: Found member file `%s'\n", fileName));
333
334 isObject = (thisFileNameSize >= 2 && strncmp(fileName + thisFileNameSize - 2, ".o" , 2) == 0)
335 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".p_o", 4) == 0);
336
337 #if defined(OBJFORMAT_PEi386)
338 /*
339 * Note [MSVC import files (ext .lib)]
340 * MSVC compilers store the object files in
341 * the import libraries with extension .dll
342 * so on Windows we should look for those too.
343 * The PE COFF format doesn't specify any specific file name
344 * for sections. So on windows, just try to load it all.
345 *
346 * Linker members (e.g. filename / are skipped since they are not needed)
347 */
348 isImportLib = thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".dll", 4) == 0;
349
350 /*
351 * Note [GCC import files (ext .dll.a)]
352 * GCC stores import information in the same binary format
353 * as the object file normally has. The only difference is that
354 * all the information are put in .idata sections. The only real
355 * way to tell if we're dealing with an import lib is by looking
356 * at the file extension.
357 */
358 isImportLib = isImportLib || endsWithPath(path, WSTR(".dll.a"));
359 #endif // windows
360
361 IF_DEBUG(linker, debugBelch("loadArchive: \tthisFileNameSize = %d\n", (int)thisFileNameSize));
362 IF_DEBUG(linker, debugBelch("loadArchive: \tisObject = %d\n", isObject));
363
364 if (isObject) {
365 char *archiveMemberName;
366
367 IF_DEBUG(linker, debugBelch("loadArchive: Member is an object file...loading...\n"));
368
369 #if defined(mingw32_HOST_OS)
370 // TODO: We would like to use allocateExec here, but allocateExec
371 // cannot currently allocate blocks large enough.
372 image = allocateImageAndTrampolines(path, fileName, f, memberSize,
373 isThin);
374 #elif defined(darwin_HOST_OS)
375 if (RTS_LINKER_USE_MMAP)
376 image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
377 else {
378 /* See loadObj() */
379 misalignment = machoGetMisalignment(f);
380 image = stgMallocBytes(memberSize + misalignment,
381 "loadArchive(image)");
382 image += misalignment;
383 }
384
385 #else // not windows or darwin
386 image = stgMallocBytes(memberSize, "loadArchive(image)");
387 #endif
388 if (isThin) {
389 FILE *member;
390 pathchar *pathCopy, *dirName, *memberPath, *objFileName;
391
392 /* Allocate and setup the dirname of the archive. We'll need
393 this to locate the thin member */
394 pathCopy = pathdup(path); // Convert the char* to a pathchar*
395 dirName = pathdir(pathCopy);
396
397 /* Append the relative member name to the dirname. This should be
398 be the full path to the actual thin member. */
399 int memberLen = pathlen(dirName) + 1 + strlen(fileName) + 1;
400 memberPath = stgMallocBytes(pathsize * memberLen, "loadArchive(file)");
401 objFileName = mkPath(fileName);
402 pathprintf(memberPath, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), dirName, objFileName);
403 stgFree(objFileName);
404 stgFree(dirName);
405
406 member = pathopen(memberPath, WSTR("rb"));
407 if (!member)
408 barf("loadObj: can't read thin archive `%" PATH_FMT "'", memberPath);
409
410 n = fread ( image, 1, memberSize, member );
411 if (n != memberSize) {
412 barf("loadArchive: error whilst reading `%s'", fileName);
413 }
414
415 fclose(member);
416 stgFree(memberPath);
417 stgFree(pathCopy);
418 }
419 else
420 {
421 n = fread ( image, 1, memberSize, f );
422 if (n != memberSize) {
423 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
424 }
425 }
426
427 archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
428 "loadArchive(file)");
429 sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
430 path, (int)thisFileNameSize, fileName);
431
432 oc = mkOc(path, image, memberSize, rtsFalse, archiveMemberName
433 , misalignment);
434
435 stgFree(archiveMemberName);
436
437 if (0 == loadOc(oc)) {
438 stgFree(fileName);
439 fclose(f);
440 return 0;
441 } else {
442 #if defined(OBJFORMAT_PEi386)
443 if (isImportLib)
444 {
445 findAndLoadImportLibrary(oc);
446 stgFree(oc);
447 oc = NULL;
448 break;
449 } else {
450 #endif
451 oc->next = objects;
452 objects = oc;
453 #if defined(OBJFORMAT_PEi386)
454 }
455 #endif
456 }
457 }
458 else if (isGnuIndex) {
459 if (gnuFileIndex != NULL) {
460 barf("loadArchive: GNU-variant index found, but already have an index, while reading filename from `%s'", path);
461 }
462 IF_DEBUG(linker, debugBelch("loadArchive: Found GNU-variant file index\n"));
463 #if RTS_LINKER_USE_MMAP
464 gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
465 #else
466 gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
467 #endif
468 n = fread ( gnuFileIndex, 1, memberSize, f );
469 if (n != memberSize) {
470 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
471 }
472 gnuFileIndex[memberSize] = '/';
473 gnuFileIndexSize = memberSize;
474 }
475 else if (isImportLib) {
476 #if defined(OBJFORMAT_PEi386)
477 if (checkAndLoadImportLibrary(path, fileName, f)) {
478 IF_DEBUG(linker, debugBelch("loadArchive: Member is an import file section... Corresponding DLL has been loaded...\n"));
479 }
480 else {
481 IF_DEBUG(linker, debugBelch("loadArchive: Member is not a valid import file section... Skipping...\n"));
482 n = fseek(f, memberSize, SEEK_CUR);
483 if (n != 0)
484 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
485 memberSize, path);
486 }
487 #endif
488 }
489 else {
490 IF_DEBUG(linker, debugBelch("loadArchive: '%s' does not appear to be an object file\n", fileName));
491 if (!isThin || thisFileNameSize == 0) {
492 n = fseek(f, memberSize, SEEK_CUR);
493 if (n != 0)
494 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
495 memberSize, path);
496 }
497 }
498
499 /* .ar files are 2-byte aligned */
500 if (!(isThin && thisFileNameSize > 0) && memberSize % 2) {
501 IF_DEBUG(linker, debugBelch("loadArchive: trying to read one pad byte\n"));
502 n = fread ( tmp, 1, 1, f );
503 if (n != 1) {
504 if (feof(f)) {
505 IF_DEBUG(linker, debugBelch("loadArchive: found EOF while reading one pad byte\n"));
506 break;
507 }
508 else {
509 barf("loadArchive: Failed reading padding from `%" PATH_FMT "'", path);
510 }
511 }
512 IF_DEBUG(linker, debugBelch("loadArchive: successfully read one pad byte\n"));
513 }
514 IF_DEBUG(linker, debugBelch("loadArchive: reached end of archive loading while loop\n"));
515 }
516
517 fclose(f);
518
519 stgFree(fileName);
520 if (gnuFileIndex != NULL) {
521 #if RTS_LINKER_USE_MMAP
522 munmap(gnuFileIndex, gnuFileIndexSize + 1);
523 #else
524 stgFree(gnuFileIndex);
525 #endif
526 }
527
528 if (RTS_LINKER_USE_MMAP)
529 m32_allocator_flush();
530
531 IF_DEBUG(linker, debugBelch("loadArchive: done\n"));
532 return 1;
533 }
534
535 HsInt loadArchive (pathchar *path)
536 {
537 ACQUIRE_LOCK(&linker_mutex);
538 HsInt r = loadArchive_(path);
539 RELEASE_LOCK(&linker_mutex);
540 return r;
541 }