rts/linker: Move loadArchive to new source file
[ghc.git] / rts / linker / LoadArchive.c
1 #include <string.h>
2 #include <stddef.h>
3
4 #include <Rts.h>
5
6 #include "sm/Storage.h"
7 #include "sm/OSMem.h"
8 #include "RtsUtils.h"
9 #include "PathUtils.h"
10 #include "LinkerInternals.h"
11 #include "linker/M32Alloc.h"
12 #if defined(OBJFORMAT_PEi386)
13 #include "linkers/PEi386.h"
14 #endif
15
16 #include <ctype.h>
17
18 static HsInt loadArchive_ (pathchar *path)
19 {
20 ObjectCode* oc;
21 char *image;
22 int memberSize;
23 FILE *f;
24 int n;
25 size_t thisFileNameSize;
26 char *fileName;
27 size_t fileNameSize;
28 int isObject, isGnuIndex, isThin, isImportLib;
29 char tmp[20];
30 char *gnuFileIndex;
31 int gnuFileIndexSize;
32 #if defined(darwin_HOST_OS)
33 int i;
34 uint32_t nfat_arch, nfat_offset, cputype, cpusubtype;
35 #if defined(i386_HOST_ARCH)
36 const uint32_t mycputype = CPU_TYPE_X86;
37 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_ALL;
38 #elif defined(x86_64_HOST_ARCH)
39 const uint32_t mycputype = CPU_TYPE_X86_64;
40 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_64_ALL;
41 #elif defined(powerpc_HOST_ARCH)
42 const uint32_t mycputype = CPU_TYPE_POWERPC;
43 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
44 #elif defined(powerpc64_HOST_ARCH)
45 const uint32_t mycputype = CPU_TYPE_POWERPC64;
46 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
47 #else
48 #error Unknown Darwin architecture
49 #endif
50 #endif
51 int misalignment = 0;
52
53 /* TODO: don't call barf() on error, instead return an error code, freeing
54 * all resources correctly. This function is pretty complex, so it needs
55 * to be refactored to make this practical. */
56
57 IF_DEBUG(linker, debugBelch("loadArchive: start\n"));
58 IF_DEBUG(linker, debugBelch("loadArchive: Loading archive `%" PATH_FMT" '\n", path));
59
60 /* Check that we haven't already loaded this archive.
61 Ignore requests to load multiple times */
62 if (isAlreadyLoaded(path)) {
63 IF_DEBUG(linker,
64 debugBelch("ignoring repeated load of %" PATH_FMT "\n", path));
65 return 1; /* success */
66 }
67
68 gnuFileIndex = NULL;
69 gnuFileIndexSize = 0;
70
71 fileNameSize = 32;
72 fileName = stgMallocBytes(fileNameSize, "loadArchive(fileName)");
73
74 isThin = 0;
75 isImportLib = 0;
76
77 f = pathopen(path, WSTR("rb"));
78 if (!f)
79 barf("loadObj: can't read `%" PATH_FMT "'", path);
80
81 /* Check if this is an archive by looking for the magic "!<arch>\n"
82 * string. Usually, if this fails, we barf and quit. On Darwin however,
83 * we may have a fat archive, which contains archives for more than
84 * one architecture. Fat archives start with the magic number 0xcafebabe,
85 * always stored big endian. If we find a fat_header, we scan through
86 * the fat_arch structs, searching through for one for our host
87 * architecture. If a matching struct is found, we read the offset
88 * of our archive data (nfat_offset) and seek forward nfat_offset bytes
89 * from the start of the file.
90 *
91 * A subtlety is that all of the members of the fat_header and fat_arch
92 * structs are stored big endian, so we need to call byte order
93 * conversion functions.
94 *
95 * If we find the appropriate architecture in a fat archive, we gobble
96 * its magic "!<arch>\n" string and continue processing just as if
97 * we had a single architecture archive.
98 */
99
100 n = fread ( tmp, 1, 8, f );
101 if (n != 8)
102 barf("loadArchive: Failed reading header from `%" PATH_FMT "'", path);
103 if (strncmp(tmp, "!<arch>\n", 8) == 0) {}
104 /* Check if this is a thin archive by looking for the magic string "!<thin>\n"
105 *
106 * ar thin libraries have the exact same format as normal archives except they
107 * have a different magic string and they don't copy the object files into the
108 * archive.
109 *
110 * Instead each header entry points to the location of the object file on disk.
111 * This is useful when a library is only created to satisfy a compile time dependency
112 * instead of to be distributed. This saves the time required for copying.
113 *
114 * Thin archives are always flattened. They always only contain simple headers
115 * pointing to the object file and so we need not allocate more memory than needed
116 * to find the object file.
117 *
118 */
119 else if (strncmp(tmp, "!<thin>\n", 8) == 0) {
120 isThin = 1;
121 }
122 #if defined(darwin_HOST_OS)
123 /* Not a standard archive, look for a fat archive magic number: */
124 else if (ntohl(*(uint32_t *)tmp) == FAT_MAGIC) {
125 nfat_arch = ntohl(*(uint32_t *)(tmp + 4));
126 IF_DEBUG(linker, debugBelch("loadArchive: found a fat archive containing %d architectures\n", nfat_arch));
127 nfat_offset = 0;
128
129 for (i = 0; i < (int)nfat_arch; i++) {
130 /* search for the right arch */
131 n = fread( tmp, 1, 20, f );
132 if (n != 8)
133 barf("loadArchive: Failed reading arch from `%s'", path);
134 cputype = ntohl(*(uint32_t *)tmp);
135 cpusubtype = ntohl(*(uint32_t *)(tmp + 4));
136
137 if (cputype == mycputype && cpusubtype == mycpusubtype) {
138 IF_DEBUG(linker, debugBelch("loadArchive: found my archive in a fat archive\n"));
139 nfat_offset = ntohl(*(uint32_t *)(tmp + 8));
140 break;
141 }
142 }
143
144 if (nfat_offset == 0) {
145 barf ("loadArchive: searched %d architectures, but no host arch found", (int)nfat_arch);
146 }
147 else {
148 n = fseek( f, nfat_offset, SEEK_SET );
149 if (n != 0)
150 barf("loadArchive: Failed to seek to arch in `%s'", path);
151 n = fread ( tmp, 1, 8, f );
152 if (n != 8)
153 barf("loadArchive: Failed reading header from `%s'", path);
154 if (strncmp(tmp, "!<arch>\n", 8) != 0) {
155 barf("loadArchive: couldn't find archive in `%s' at offset %d", path, nfat_offset);
156 }
157 }
158 }
159 else {
160 barf("loadArchive: Neither an archive, nor a fat archive: `%s'", path);
161 }
162 #else
163 else {
164 barf("loadArchive: Not an archive: `%" PATH_FMT "'", path);
165 }
166 #endif
167
168 IF_DEBUG(linker, debugBelch("loadArchive: loading archive contents\n"));
169
170 while (1) {
171 IF_DEBUG(linker, debugBelch("loadArchive: reading at %ld\n", ftell(f)));
172 n = fread ( fileName, 1, 16, f );
173 if (n != 16) {
174 if (feof(f)) {
175 IF_DEBUG(linker, debugBelch("loadArchive: EOF while reading from '%" PATH_FMT "'\n", path));
176 break;
177 }
178 else {
179 barf("loadArchive: Failed reading file name from `%" PATH_FMT "'", path);
180 }
181 }
182
183 #if defined(darwin_HOST_OS)
184 if (strncmp(fileName, "!<arch>\n", 8) == 0) {
185 IF_DEBUG(linker, debugBelch("loadArchive: found the start of another archive, breaking\n"));
186 break;
187 }
188 #endif
189
190 n = fread ( tmp, 1, 12, f );
191 if (n != 12)
192 barf("loadArchive: Failed reading mod time from `%" PATH_FMT "'", path);
193 n = fread ( tmp, 1, 6, f );
194 if (n != 6)
195 barf("loadArchive: Failed reading owner from `%" PATH_FMT "'", path);
196 n = fread ( tmp, 1, 6, f );
197 if (n != 6)
198 barf("loadArchive: Failed reading group from `%" PATH_FMT "'", path);
199 n = fread ( tmp, 1, 8, f );
200 if (n != 8)
201 barf("loadArchive: Failed reading mode from `%" PATH_FMT "'", path);
202 n = fread ( tmp, 1, 10, f );
203 if (n != 10)
204 barf("loadArchive: Failed reading size from `%" PATH_FMT "'", path);
205 tmp[10] = '\0';
206 for (n = 0; isdigit(tmp[n]); n++);
207 tmp[n] = '\0';
208 memberSize = atoi(tmp);
209
210 IF_DEBUG(linker, debugBelch("loadArchive: size of this archive member is %d\n", memberSize));
211 n = fread ( tmp, 1, 2, f );
212 if (n != 2)
213 barf("loadArchive: Failed reading magic from `%" PATH_FMT "'", path);
214 if (strncmp(tmp, "\x60\x0A", 2) != 0)
215 barf("loadArchive: Failed reading magic from `%" PATH_FMT "' at %ld. Got %c%c",
216 path, ftell(f), tmp[0], tmp[1]);
217
218 isGnuIndex = 0;
219 /* Check for BSD-variant large filenames */
220 if (0 == strncmp(fileName, "#1/", 3)) {
221 fileName[16] = '\0';
222 if (isdigit(fileName[3])) {
223 for (n = 4; isdigit(fileName[n]); n++);
224 fileName[n] = '\0';
225 thisFileNameSize = atoi(fileName + 3);
226 memberSize -= thisFileNameSize;
227 if (thisFileNameSize >= fileNameSize) {
228 /* Double it to avoid potentially continually
229 increasing it by 1 */
230 fileNameSize = thisFileNameSize * 2;
231 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
232 }
233 n = fread ( fileName, 1, thisFileNameSize, f );
234 if (n != (int)thisFileNameSize) {
235 barf("loadArchive: Failed reading filename from `%" PATH_FMT "'",
236 path);
237 }
238 fileName[thisFileNameSize] = 0;
239
240 /* On OS X at least, thisFileNameSize is the size of the
241 fileName field, not the length of the fileName
242 itself. */
243 thisFileNameSize = strlen(fileName);
244 }
245 else {
246 barf("loadArchive: BSD-variant filename size not found while reading filename from `%s'", path);
247 }
248 }
249 /* Check for GNU file index file */
250 else if (0 == strncmp(fileName, "//", 2)) {
251 fileName[0] = '\0';
252 thisFileNameSize = 0;
253 isGnuIndex = 1;
254 }
255 /* Check for a file in the GNU file index */
256 else if (fileName[0] == '/') {
257 if (isdigit(fileName[1])) {
258 int i;
259
260 for (n = 2; isdigit(fileName[n]); n++);
261 fileName[n] = '\0';
262 n = atoi(fileName + 1);
263
264 if (gnuFileIndex == NULL) {
265 barf("loadArchive: GNU-variant filename without an index while reading from `%s'", path);
266 }
267 if (n < 0 || n > gnuFileIndexSize) {
268 barf("loadArchive: GNU-variant filename offset %d out of range [0..%d] while reading filename from `%s'", n, gnuFileIndexSize, path);
269 }
270 if (n != 0 && gnuFileIndex[n - 1] != '\n') {
271 barf("loadArchive: GNU-variant filename offset %d invalid (range [0..%d]) while reading filename from `%s'", n, gnuFileIndexSize, path);
272 }
273 for (i = n; gnuFileIndex[i] != '\n'; i++);
274 thisFileNameSize = i - n - 1;
275 if (thisFileNameSize >= fileNameSize) {
276 /* Double it to avoid potentially continually
277 increasing it by 1 */
278 fileNameSize = thisFileNameSize * 2;
279 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
280 }
281 memcpy(fileName, gnuFileIndex + n, thisFileNameSize);
282 fileName[thisFileNameSize] = '\0';
283 }
284 else if (fileName[1] == ' ') {
285 fileName[0] = '\0';
286 thisFileNameSize = 0;
287 }
288 else {
289 barf("loadArchive: GNU-variant filename offset not found while reading filename from `%s'", path);
290 }
291 }
292 /* Finally, the case where the filename field actually contains
293 the filename */
294 else {
295 /* GNU ar terminates filenames with a '/', this allowing
296 spaces in filenames. So first look to see if there is a
297 terminating '/'. */
298 for (thisFileNameSize = 0;
299 thisFileNameSize < 16;
300 thisFileNameSize++) {
301 if (fileName[thisFileNameSize] == '/') {
302 fileName[thisFileNameSize] = '\0';
303 break;
304 }
305 }
306 /* If we didn't find a '/', then a space teminates the
307 filename. Note that if we don't find one, then
308 thisFileNameSize ends up as 16, and we already have the
309 '\0' at the end. */
310 if (thisFileNameSize == 16) {
311 for (thisFileNameSize = 0;
312 thisFileNameSize < 16;
313 thisFileNameSize++) {
314 if (fileName[thisFileNameSize] == ' ') {
315 fileName[thisFileNameSize] = '\0';
316 break;
317 }
318 }
319 }
320 }
321
322 IF_DEBUG(linker,
323 debugBelch("loadArchive: Found member file `%s'\n", fileName));
324
325 isObject = (thisFileNameSize >= 2 && strncmp(fileName + thisFileNameSize - 2, ".o" , 2) == 0)
326 || (thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".p_o", 4) == 0);
327
328 #if defined(OBJFORMAT_PEi386)
329 /*
330 * Note [MSVC import files (ext .lib)]
331 * MSVC compilers store the object files in
332 * the import libraries with extension .dll
333 * so on Windows we should look for those too.
334 * The PE COFF format doesn't specify any specific file name
335 * for sections. So on windows, just try to load it all.
336 *
337 * Linker members (e.g. filename / are skipped since they are not needed)
338 */
339 isImportLib = thisFileNameSize >= 4 && strncmp(fileName + thisFileNameSize - 4, ".dll", 4) == 0;
340
341 /*
342 * Note [GCC import files (ext .dll.a)]
343 * GCC stores import information in the same binary format
344 * as the object file normally has. The only difference is that
345 * all the information are put in .idata sections. The only real
346 * way to tell if we're dealing with an import lib is by looking
347 * at the file extension.
348 */
349 isImportLib = isImportLib || endsWithPath(path, WSTR(".dll.a"));
350 #endif // windows
351
352 IF_DEBUG(linker, debugBelch("loadArchive: \tthisFileNameSize = %d\n", (int)thisFileNameSize));
353 IF_DEBUG(linker, debugBelch("loadArchive: \tisObject = %d\n", isObject));
354
355 if (isObject) {
356 char *archiveMemberName;
357
358 IF_DEBUG(linker, debugBelch("loadArchive: Member is an object file...loading...\n"));
359
360 #if defined(mingw32_HOST_OS)
361 // TODO: We would like to use allocateExec here, but allocateExec
362 // cannot currently allocate blocks large enough.
363 image = allocateImageAndTrampolines(path, fileName, f, memberSize,
364 isThin);
365 #elif defined(darwin_HOST_OS)
366 if (RTS_LINKER_USE_MMAP)
367 image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
368 else {
369 /* See loadObj() */
370 misalignment = machoGetMisalignment(f);
371 image = stgMallocBytes(memberSize + misalignment,
372 "loadArchive(image)");
373 image += misalignment;
374 }
375
376 #else // not windows or darwin
377 image = stgMallocBytes(memberSize, "loadArchive(image)");
378 #endif
379 if (isThin) {
380 FILE *member;
381 pathchar *pathCopy, *dirName, *memberPath, *objFileName;
382
383 /* Allocate and setup the dirname of the archive. We'll need
384 this to locate the thin member */
385 pathCopy = pathdup(path); // Convert the char* to a pathchar*
386 dirName = pathdir(pathCopy);
387
388 /* Append the relative member name to the dirname. This should be
389 be the full path to the actual thin member. */
390 int memberLen = pathlen(dirName) + 1 + strlen(fileName) + 1;
391 memberPath = stgMallocBytes(pathsize * memberLen, "loadArchive(file)");
392 objFileName = mkPath(fileName);
393 pathprintf(memberPath, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), dirName, objFileName);
394 stgFree(objFileName);
395 stgFree(dirName);
396
397 member = pathopen(memberPath, WSTR("rb"));
398 if (!member)
399 barf("loadObj: can't read thin archive `%" PATH_FMT "'", memberPath);
400
401 n = fread ( image, 1, memberSize, member );
402 if (n != memberSize) {
403 barf("loadArchive: error whilst reading `%s'", fileName);
404 }
405
406 fclose(member);
407 stgFree(memberPath);
408 stgFree(pathCopy);
409 }
410 else
411 {
412 n = fread ( image, 1, memberSize, f );
413 if (n != memberSize) {
414 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
415 }
416 }
417
418 archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
419 "loadArchive(file)");
420 sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
421 path, (int)thisFileNameSize, fileName);
422
423 oc = mkOc(path, image, memberSize, rtsFalse, archiveMemberName
424 , misalignment);
425
426 stgFree(archiveMemberName);
427
428 if (0 == loadOc(oc)) {
429 stgFree(fileName);
430 fclose(f);
431 return 0;
432 } else {
433 #if defined(OBJFORMAT_PEi386)
434 if (isImportLib)
435 {
436 findAndLoadImportLibrary(oc);
437 stgFree(oc);
438 oc = NULL;
439 break;
440 } else {
441 #endif
442 oc->next = objects;
443 objects = oc;
444 #if defined(OBJFORMAT_PEi386)
445 }
446 #endif
447 }
448 }
449 else if (isGnuIndex) {
450 if (gnuFileIndex != NULL) {
451 barf("loadArchive: GNU-variant index found, but already have an index, while reading filename from `%s'", path);
452 }
453 IF_DEBUG(linker, debugBelch("loadArchive: Found GNU-variant file index\n"));
454 #if RTS_LINKER_USE_MMAP
455 gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1, 0);
456 #else
457 gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
458 #endif
459 n = fread ( gnuFileIndex, 1, memberSize, f );
460 if (n != memberSize) {
461 barf("loadArchive: error whilst reading `%" PATH_FMT "'", path);
462 }
463 gnuFileIndex[memberSize] = '/';
464 gnuFileIndexSize = memberSize;
465 }
466 else if (isImportLib) {
467 #if defined(OBJFORMAT_PEi386)
468 if (checkAndLoadImportLibrary(path, fileName, f)) {
469 IF_DEBUG(linker, debugBelch("loadArchive: Member is an import file section... Corresponding DLL has been loaded...\n"));
470 }
471 else {
472 IF_DEBUG(linker, debugBelch("loadArchive: Member is not a valid import file section... Skipping...\n"));
473 n = fseek(f, memberSize, SEEK_CUR);
474 if (n != 0)
475 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
476 memberSize, path);
477 }
478 #endif
479 }
480 else {
481 IF_DEBUG(linker, debugBelch("loadArchive: '%s' does not appear to be an object file\n", fileName));
482 if (!isThin || thisFileNameSize == 0) {
483 n = fseek(f, memberSize, SEEK_CUR);
484 if (n != 0)
485 barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
486 memberSize, path);
487 }
488 }
489
490 /* .ar files are 2-byte aligned */
491 if (!(isThin && thisFileNameSize > 0) && memberSize % 2) {
492 IF_DEBUG(linker, debugBelch("loadArchive: trying to read one pad byte\n"));
493 n = fread ( tmp, 1, 1, f );
494 if (n != 1) {
495 if (feof(f)) {
496 IF_DEBUG(linker, debugBelch("loadArchive: found EOF while reading one pad byte\n"));
497 break;
498 }
499 else {
500 barf("loadArchive: Failed reading padding from `%" PATH_FMT "'", path);
501 }
502 }
503 IF_DEBUG(linker, debugBelch("loadArchive: successfully read one pad byte\n"));
504 }
505 IF_DEBUG(linker, debugBelch("loadArchive: reached end of archive loading while loop\n"));
506 }
507
508 fclose(f);
509
510 stgFree(fileName);
511 if (gnuFileIndex != NULL) {
512 #if RTS_LINKER_USE_MMAP
513 munmap(gnuFileIndex, gnuFileIndexSize + 1);
514 #else
515 stgFree(gnuFileIndex);
516 #endif
517 }
518
519 if (RTS_LINKER_USE_MMAP)
520 m32_allocator_flush();
521
522 IF_DEBUG(linker, debugBelch("loadArchive: done\n"));
523 return 1;
524 }
525
526 HsInt loadArchive (pathchar *path)
527 {
528 ACQUIRE_LOCK(&linker_mutex);
529 HsInt r = loadArchive_(path);
530 RELEASE_LOCK(&linker_mutex);
531 return r;
532 }