Gracefully handle error condition in Mach-O relocateSection
[ghc.git] / rts / linker / MachO.c
1 #include "Rts.h"
2
3 #if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
4
5 #if defined(ios_HOST_OS) && !RTS_LINKER_USE_MMAP
6 #error "ios must use mmap and mprotect!"
7 #endif
8
9 /* for roundUpToPage */
10 #include "sm/OSMem.h"
11
12 #include "RtsUtils.h"
13 #include "GetEnv.h"
14 #include "LinkerInternals.h"
15 #include "linker/MachO.h"
16 #include "linker/CacheFlush.h"
17 #include "linker/SymbolExtras.h"
18
19 #include <string.h>
20 #include <regex.h>
21 #include <mach/machine.h>
22 #include <mach-o/fat.h>
23 #include <mach-o/loader.h>
24 #include <mach-o/nlist.h>
25 #include <mach-o/reloc.h>
26
27 #if defined(HAVE_SYS_MMAN_H) && RTS_LINKER_USE_MMAP
28 # include <sys/mman.h>
29 #endif
30
31 #if defined(x86_64_HOST_ARCH)
32 # include <mach-o/x86_64/reloc.h>
33 #endif
34
35 #if defined(aarch64_HOST_ARCH)
36 # include <mach-o/arm64/reloc.h>
37 #endif
38
39 /*
40 Support for MachO linking on Darwin/MacOS X
41 by Wolfgang Thaller (wolfgang.thaller@gmx.net)
42
43 I hereby formally apologize for the hackish nature of this code.
44 Things that need to be done:
45 *) implement ocVerifyImage_MachO
46 *) add still more sanity checks.
47 */
48 #if defined(aarch64_HOST_ARCH)
49 /* aarch64 linker by moritz angermann <moritz@lichtzwerge.de> */
50
51 /* often times we need to extend some value of certain number of bits
52 * int an int64_t for e.g. relative offsets.
53 */
54 int64_t signExtend(uint64_t val, uint8_t bits);
55 /* Helper functions to check some instruction properties */
56 bool isVectorPp(uint32_t *p);
57 bool isLoadStore(uint32_t *p);
58
59 /* aarch64 relocations may contain an addend alreay in the position
60 * where we want to write the address offset to. Thus decoding as well
61 * as encoding is needed.
62 */
63 bool fitsBits(size_t bits, int64_t value);
64 int64_t decodeAddend(ObjectCode * oc, Section * section,
65 MachORelocationInfo * ri);
66 void encodeAddend(ObjectCode * oc, Section * section,
67 MachORelocationInfo * ri, int64_t addend);
68
69 /* finding and making stubs. We don't need to care about the symbol they
70 * represent. As long as two stubs point to the same address, they are identical
71 */
72 bool findStub(Section * section, void ** addr);
73 bool makeStub(Section * section, void ** addr);
74 void freeStubs(Section * section);
75
76 /* Global Offset Table logic */
77 bool isGotLoad(MachORelocationInfo * ri);
78 bool needGotSlot(MachONList * symbol);
79 bool makeGot(ObjectCode * oc);
80 void freeGot(ObjectCode * oc);
81 #endif /* aarch64_HOST_ARCH */
82
83 #if defined(ios_HOST_OS)
84 /* on iOS we need to ensure we only have r+w or r+x pages hence we need to mmap
85 * pages r+w and r+x mprotect them later on.
86 */
87 bool ocMprotect_MachO( ObjectCode *oc );
88 #endif /* ios_HOST_OS */
89
90 /*
91 * Initialize some common data in the object code so we don't have to
92 * continuously look up the addresses.
93 */
94 void
95 ocInit_MachO(ObjectCode * oc)
96 {
97 ocDeinit_MachO(oc);
98
99 oc->info = (struct ObjectCodeFormatInfo*)stgCallocBytes(
100 1, sizeof *oc->info,
101 "ocInit_MachO(ObjectCodeFormatInfo)");
102 oc->info->header = (MachOHeader *) oc->image;
103 oc->info->symCmd = NULL;
104 oc->info->segCmd = NULL;
105 oc->info->dsymCmd = NULL;
106
107 MachOLoadCommand *lc = (MachOLoadCommand*)(oc->image + sizeof(MachOHeader));
108 for(size_t i = 0; i < oc->info->header->ncmds; i++) {
109 if (lc->cmd == LC_SEGMENT || lc->cmd == LC_SEGMENT_64) {
110 oc->info->segCmd = (MachOSegmentCommand*) lc;
111 }
112 else if (lc->cmd == LC_SYMTAB) {
113 oc->info->symCmd = (MachOSymtabCommand*) lc;
114 }
115 else if (lc->cmd == LC_DYSYMTAB) {
116 oc->info->dsymCmd = (MachODsymtabCommand*) lc;
117 }
118 lc = (MachOLoadCommand *) ( ((char*)lc) + lc->cmdsize );
119 }
120 if (NULL == oc->info->segCmd) {
121 barf("ocGetNames_MachO: no segment load command");
122 }
123
124 oc->info->macho_sections = (MachOSection*) (oc->info->segCmd+1);
125 oc->n_sections = oc->info->segCmd->nsects;
126
127 oc->info->nlist = oc->info->symCmd == NULL
128 ? NULL
129 : (MachONList *)(oc->image + oc->info->symCmd->symoff);
130 oc->info->names = oc->info->symCmd == NULL
131 ? NULL
132 : (oc->image + oc->info->symCmd->stroff);
133
134 /* If we have symbols, allocate and fill the macho_symbols
135 * This will make relocation easier.
136 */
137 oc->info->n_macho_symbols = 0;
138 oc->info->macho_symbols = NULL;
139
140 if(NULL != oc->info->nlist) {
141 oc->info->n_macho_symbols = oc->info->symCmd->nsyms;
142 oc->info->macho_symbols = (MachOSymbol*)stgCallocBytes(
143 oc->info->symCmd->nsyms,
144 sizeof(MachOSymbol),
145 "ocInit_MachO(MachOSymbol)");
146 for(uint32_t i = 0; i < oc->info->symCmd->nsyms; i++) {
147 oc->info->macho_symbols[i].name = oc->info->names
148 + oc->info->nlist[i].n_un.n_strx;
149 oc->info->macho_symbols[i].nlist = &oc->info->nlist[i];
150 /* We don't have an address for this symbol yet; this
151 * will be populated during ocGetNames_MachO. Hence init
152 * with NULL
153 */
154 oc->info->macho_symbols[i].addr = NULL;
155 oc->info->macho_symbols[i].got_addr = NULL;
156 }
157 }
158 }
159
160 void
161 ocDeinit_MachO(ObjectCode * oc) {
162 if (oc->info != NULL) {
163 if(oc->info->n_macho_symbols > 0) {
164 stgFree(oc->info->macho_symbols);
165 }
166 #if defined(aarch64_HOST_ARCH)
167 freeGot(oc);
168 for(int i = 0; i < oc->n_sections; i++) {
169 freeStubs(&oc->sections[i]);
170 }
171 #endif
172 stgFree(oc->info);
173 oc->info = NULL;
174 }
175 }
176
177 static int
178 resolveImports(
179 ObjectCode* oc,
180 MachOSection *sect, // ptr to lazy or non-lazy symbol pointer section
181 unsigned long *indirectSyms);
182
183 #if NEED_SYMBOL_EXTRAS
184 #if defined(x86_64_HOST_ARCH) || defined(aarch64_HOST_ARCH)
185
186 int
187 ocAllocateExtras_MachO(ObjectCode* oc)
188 {
189 IF_DEBUG(linker, debugBelch("ocAllocateExtras_MachO: start\n"));
190
191 if (NULL != oc->info->symCmd) {
192 IF_DEBUG(linker,
193 debugBelch("ocAllocateExtras_MachO: allocate %d symbols\n",
194 oc->info->symCmd->nsyms));
195 IF_DEBUG(linker, debugBelch("ocAllocateExtras_MachO: done\n"));
196 return ocAllocateExtras(oc, oc->info->symCmd->nsyms, 0, 0);
197 }
198
199 IF_DEBUG(linker,
200 debugBelch("ocAllocateExtras_MachO: allocated no symbols\n"));
201 IF_DEBUG(linker, debugBelch("ocAllocateExtras_MachO: done\n"));
202 return ocAllocateExtras(oc, 0, 0, 0);
203 }
204
205 #else
206 #error Unknown MachO architecture
207 #endif /* HOST_ARCH */
208 #endif /* NEED_SYMBOL_EXTRAS */
209
210 int
211 ocVerifyImage_MachO(ObjectCode * oc)
212 {
213 char *image = (char*) oc->image;
214 MachOHeader *header = (MachOHeader*) image;
215
216 IF_DEBUG(linker, debugBelch("ocVerifyImage_MachO: start\n"));
217
218 if(header->magic != MH_MAGIC_64) {
219 errorBelch("Could not load image %s: bad magic!\n"
220 " Expected %08x (64bit), got %08x%s\n",
221 oc->fileName, MH_MAGIC_64, header->magic,
222 header->magic == MH_MAGIC ? " (32bit)." : ".");
223 return 0;
224 }
225
226 // FIXME: do some more verifying here
227 IF_DEBUG(linker, debugBelch("ocVerifyImage_MachO: done\n"));
228 return 1;
229 }
230
231 static int
232 resolveImports(
233 ObjectCode* oc,
234 MachOSection *sect, // ptr to lazy or non-lazy symbol pointer section
235 unsigned long *indirectSyms)
236 {
237 size_t itemSize = 4;
238
239 IF_DEBUG(linker, debugBelch("resolveImports: start\n"));
240
241 for(unsigned i = 0; i * itemSize < sect->size; i++)
242 {
243 // according to otool, reserved1 contains the first index into the
244 // indirect symbol table
245 unsigned long indirectSymbolIndex = indirectSyms[sect->reserved1+i];
246 MachOSymbol *symbol = &oc->info->macho_symbols[indirectSymbolIndex];
247 SymbolAddr* addr = NULL;
248
249 IF_DEBUG(linker, debugBelch("resolveImports: resolving %s\n", symbol->name));
250
251 if ((symbol->nlist->n_type & N_TYPE) == N_UNDF
252 && (symbol->nlist->n_type & N_EXT) && (symbol->nlist->n_value != 0)) {
253 addr = (SymbolAddr*) (symbol->nlist->n_value);
254 IF_DEBUG(linker, debugBelch("resolveImports: undefined external %s has value %p\n", symbol->name, addr));
255 } else {
256 addr = lookupSymbol_(symbol->name);
257 IF_DEBUG(linker, debugBelch("resolveImports: looking up %s, %p\n", symbol->name, addr));
258 }
259
260 if (addr == NULL)
261 {
262 errorBelch("\nlookupSymbol failed in resolveImports\n"
263 "%s: unknown symbol `%s'", oc->fileName, symbol->name);
264 return 0;
265 }
266 ASSERT(addr);
267
268 checkProddableBlock(oc,
269 ((void**)(oc->image + sect->offset)) + i,
270 sizeof(void *));
271 ((void**)(oc->image + sect->offset))[i] = addr;
272 }
273
274 IF_DEBUG(linker, debugBelch("resolveImports: done\n"));
275 return 1;
276 }
277
278 #if defined(aarch64_HOST_ARCH)
279 /* aarch64 linker by moritz angermann <moritz@lichtzwerge.de> */
280
281 int64_t
282 signExtend(uint64_t val, uint8_t bits) {
283 return (int64_t)(val << (64-bits)) >> (64-bits);
284 }
285
286 bool
287 isVectorOp(uint32_t *p) {
288 return (*p & 0x04800000) == 0x04800000;
289 }
290
291 bool
292 isLoadStore(uint32_t *p) {
293 return (*p & 0x3B000000) == 0x39000000;
294 }
295
296 int64_t
297 decodeAddend(ObjectCode * oc, Section * section, MachORelocationInfo * ri) {
298
299 /* the instruction. It is 32bit wide */
300 uint32_t * p = (uint32_t*)((uint8_t*)section->start + ri->r_address);
301
302 checkProddableBlock(oc, (void*)p, 1 << ri->r_length);
303
304 switch(ri->r_type) {
305 case ARM64_RELOC_UNSIGNED:
306 case ARM64_RELOC_SUBTRACTOR: {
307 switch (ri->r_length) {
308 case 0: return signExtend(*(uint8_t*)p, 8 * (1 << ri->r_length));
309 case 1: return signExtend(*(uint16_t*)p, 8 * (1 << ri->r_length));
310 case 2: return signExtend(*(uint32_t*)p, 8 * (1 << ri->r_length));
311 case 3: return signExtend(*(uint64_t*)p, 8 * (1 << ri->r_length));
312 default:
313 barf("Unsupported r_length (%d) for SUBTRACTOR relocation",
314 ri->r_length);
315 }
316 }
317 case ARM64_RELOC_BRANCH26:
318 /* take the lower 26 bits and shift them by 2. The last two are
319 * implicilty 0 (as the instructions must be aligned!) and sign
320 * extend to 64 bits.
321 */
322 return signExtend( (*p & 0x03FFFFFF) << 2, 28 );
323 case ARM64_RELOC_PAGE21:
324 case ARM64_RELOC_GOT_LOAD_PAGE21:
325 /* take the instruction bits masked with 0x6 (0110), and push them
326 * down. into the last two bits, and mask in the
327 *
328 * the 21 bits are encoded as follows in the instruction
329 *
330 * -**- ---* **** **** **** **** ***-- ----
331 * ^^
332 * ''-- these are the low two bits.
333 */
334 return signExtend( (*p & 0x60000000) >> 29
335 | ((*p & 0x01FFFFE0) >> 3) << 12, 33);
336 case ARM64_RELOC_PAGEOFF12:
337 case ARM64_RELOC_GOT_LOAD_PAGEOFF12: {
338 /* the 12 bits for the page offset are encoded from bit 11 onwards
339 *
340 * ---- ---- --** **** **** **-- ---- ----
341 */
342 int64_t a = (*p & 0x003FFC00) >> 10;
343 int shift = 0;
344 if (isLoadStore(p)) {
345 shift = (*p >> 30) & 0x3;
346 if(0 == shift && isVectorOp(p)) {
347 shift = 4;
348 }
349 }
350 return a << shift;
351 }
352 }
353 barf("unsupported relocation type: %d\n", ri->r_type);
354 }
355
356 inline bool
357 fitsBits(size_t bits, int64_t value) {
358 if(bits == 64) return true;
359 if(bits > 64) barf("fits_bits with %d bits and an 64bit integer!", bits);
360 return 0 == (value >> bits) // All bits off: 0
361 || -1 == (value >> bits); // All bits on: -1
362 }
363
364 void
365 encodeAddend(ObjectCode * oc, Section * section,
366 MachORelocationInfo * ri, int64_t addend) {
367 uint32_t * p = (uint32_t*)((uint8_t*)section->start + ri->r_address);
368
369 checkProddableBlock(oc, (void*)p, 1 << ri->r_length);
370
371 switch (ri->r_type) {
372 case ARM64_RELOC_UNSIGNED:
373 case ARM64_RELOC_SUBTRACTOR: {
374 if(!fitsBits(8 << ri->r_length, addend))
375 barf("Relocation out of range for UNSIGNED/SUBTRACTOR");
376 switch (ri->r_length) {
377 case 0: *(uint8_t*)p = (uint8_t)addend; break;
378 case 1: *(uint16_t*)p = (uint16_t)addend; break;
379 case 2: *(uint32_t*)p = (uint32_t)addend; break;
380 case 3: *(uint64_t*)p = (uint64_t)addend; break;
381 default:
382 barf("Unsupported r_length (%d) for SUBTRACTOR relocation",
383 ri->r_length);
384 }
385 return;
386 }
387 case ARM64_RELOC_BRANCH26: {
388 /* We can only store 26 bits in the instruction, due to alignment we
389 * do not need the last two bits of the value. If the value >> 2
390 * still exceeds 26bits, we won't be able to reach it.
391 */
392 if(!fitsBits(26, addend >> 2))
393 barf("Relocation target for BRACH26 out of range.");
394 *p = (*p & 0xFC000000) | ((uint32_t)(addend >> 2) & 0x03FFFFFF);
395 return;
396 }
397 case ARM64_RELOC_PAGE21:
398 case ARM64_RELOC_GOT_LOAD_PAGE21: {
399 /* We store 21bits, in bits 6 to 24, and bits 30 and 31.
400 * The encoded value describes a multiple of 4k pages, and together
401 * with the PAGEOFF12 relocation allows to address a relative range
402 * of +-4GB.
403 */
404 if(!fitsBits(21, addend >> 12))
405 barf("Relocation target for PAGE21 out of range.");
406 *p = (*p & 0x9F00001F) | (uint32_t)((addend << 17) & 0x60000000)
407 | (uint32_t)((addend >> 9) & 0x00FFFFE0);
408 return;
409 }
410 case ARM64_RELOC_PAGEOFF12:
411 case ARM64_RELOC_GOT_LOAD_PAGEOFF12: {
412 /* Store an offset into a page (4k). Depending on the instruction
413 * the bits are stored at slightly different positions.
414 */
415 if(!fitsBits(12, addend))
416 barf("Relocation target for PAGEOFF12 out or range.");
417
418 int shift = 0;
419 if(isLoadStore(p)) {
420 shift = (*p >> 30) & 0x3;
421 if(0 == shift && isVectorOp(p)) {
422 shift = 4;
423 }
424 }
425 *p = (*p & 0xFFC003FF)
426 | ((uint32_t)(addend >> shift << 10) & 0x003FFC00);
427 return;
428 }
429 }
430 barf("unsupported relocation type: %d\n", ri->r_type);
431 }
432
433 bool
434 isGotLoad(struct relocation_info * ri) {
435 return ri->r_type == ARM64_RELOC_GOT_LOAD_PAGE21
436 || ri->r_type == ARM64_RELOC_GOT_LOAD_PAGEOFF12;
437 }
438
439 /* This is very similar to makeSymbolExtra
440 * However, as we load sections into different
441 * pages, that may be further appart than
442 * branching allows, we'll use some extra
443 * space at the end of each section allocated
444 * for stubs.
445 */
446 bool
447 findStub(Section * section, void ** addr) {
448
449 for(Stub * s = section->info->stubs; s != NULL; s = s->next) {
450 if(s->target == *addr) {
451 *addr = s->addr;
452 return EXIT_SUCCESS;
453 }
454 }
455 return EXIT_FAILURE;
456 }
457
458 bool
459 makeStub(Section * section, void ** addr) {
460
461 Stub * s = stgCallocBytes(1, sizeof(Stub), "makeStub(Stub)");
462 s->target = *addr;
463 s->addr = (uint8_t*)section->info->stub_offset
464 + ((8+8)*section->info->nstubs) + 8;
465 s->next = NULL;
466
467 /* target address */
468 *(uint64_t*)((uint8_t*)s->addr - 8) = (uint64_t)s->target;
469 /* ldr x16, - (8 bytes) */
470 *(uint32_t*)(s->addr) = (uint32_t)0x58ffffd0;
471 /* br x16 */
472 *(uint32_t*)((uint8_t*)s->addr + 4) = (uint32_t)0xd61f0200;
473
474 if(section->info->nstubs == 0) {
475 /* no stubs yet, let's just create this one */
476 section->info->stubs = s;
477 } else {
478 Stub * tail = section->info->stubs;
479 while(tail->next != NULL) tail = tail->next;
480 tail->next = s;
481 }
482 section->info->nstubs += 1;
483 *addr = s->addr;
484 return EXIT_SUCCESS;
485 }
486 void
487 freeStubs(Section * section) {
488 if(section->info->nstubs == 0)
489 return;
490 Stub * last = section->info->stubs;
491 while(last->next != NULL) {
492 Stub * t = last;
493 last = last->next;
494 stgFree(t);
495 }
496 section->info->stubs = NULL;
497 section->info->nstubs = 0;
498 }
499
500 /*
501 * Check if we need a global offset table slot for a
502 * given symbol
503 */
504 bool
505 needGotSlot(MachONList * symbol) {
506 return (symbol->n_type & N_EXT) /* is an external symbol */
507 && (N_UNDF == (symbol->n_type & N_TYPE) /* and is undefined */
508 || NO_SECT != symbol->n_sect); /* or is defined in a
509 * different section */
510 }
511
512 bool
513 makeGot(ObjectCode * oc) {
514 size_t got_slots = 0;
515
516 for(size_t i=0; i < oc->info->n_macho_symbols; i++)
517 if(needGotSlot(oc->info->macho_symbols[i].nlist))
518 got_slots += 1;
519
520 if(got_slots > 0) {
521 oc->info->got_size = got_slots * sizeof(void*);
522 oc->info->got_start = mmap(NULL, oc->info->got_size,
523 PROT_READ | PROT_WRITE,
524 MAP_ANON | MAP_PRIVATE,
525 -1, 0);
526 if( oc->info->got_start == MAP_FAILED ) {
527 barf("MAP_FAILED. errno=%d", errno );
528 return EXIT_FAILURE;
529 }
530 /* update got_addr */
531 size_t slot = 0;
532 for(size_t i=0; i < oc->info->n_macho_symbols; i++)
533 if(needGotSlot(oc->info->macho_symbols[i].nlist))
534 oc->info->macho_symbols[i].got_addr
535 = ((uint8_t*)oc->info->got_start)
536 + (slot++ * sizeof(void *));
537 }
538 return EXIT_SUCCESS;
539 }
540
541 void
542 freeGot(ObjectCode * oc) {
543 munmap(oc->info->got_start, oc->info->got_size);
544 oc->info->got_start = NULL;
545 oc->info->got_size = 0;
546 }
547
548 static int
549 relocateSectionAarch64(ObjectCode * oc, Section * section)
550 {
551 if(section->size == 0)
552 return 1;
553 /* at this point, we have:
554 *
555 * - loaded the sections (potentially into non-contiguous memory),
556 * (in ocGetNames_MachO)
557 * - registered exported sybmols
558 * (in ocGetNames_MachO)
559 * - and fixed the nlist[i].n_value for common storage symbols (N_UNDF,
560 * N_EXT and n_value != 0) so that they point into the common storage.
561 * (in ocGetNames_MachO)
562 * - All oc->symbols however should now point at the right place.
563 */
564
565 /* we need to care about the explicit addend */
566 int64_t explicit_addend = 0;
567 size_t nreloc = section->info->macho_section->nreloc;
568
569 for(size_t i = 0; i < nreloc; i++) {
570 MachORelocationInfo * ri = &section->info->relocation_info[i];
571 switch (ri->r_type) {
572 case ARM64_RELOC_UNSIGNED: {
573 MachOSymbol* symbol = &oc->info->macho_symbols[ri->r_symbolnum];
574 int64_t addend = decodeAddend(oc, section, ri);
575 uint64_t value = 0;
576 if(symbol->nlist->n_type & N_EXT) {
577 /* external symbols should be able to be
578 * looked up via the lookupSymbol_ function.
579 * Either through the global symbol hashmap
580 * or asking the system, if not found
581 * in the symbol hashmap
582 */
583 value = (uint64_t)lookupSymbol_((char*)symbol->name);
584 if(!value)
585 barf("Could not lookup symbol: %s!", symbol->name);
586 } else {
587 value = (uint64_t)symbol->addr; // address of the symbol.
588 }
589 encodeAddend(oc, section, ri, value + addend);
590 break;
591 }
592 case ARM64_RELOC_SUBTRACTOR:
593 {
594 MachOSymbol* symbol = &oc->info->macho_symbols[ri->r_symbolnum];
595 // subtractor and unsigned are called in tandem:
596 // first pc <- pc - symbol address (SUBTRACTOR)
597 // second pc <- pc + symbol address (UNSIGNED)
598 // to achieve pc <- pc + target - base.
599 //
600 // the current implementation uses absolute addresses,
601 // which is simpler than trying to do this section
602 // relative, but could more easily lead to overflow.
603 //
604 if(!(i+1 < nreloc)
605 || !(section->info->relocation_info[i+1].r_type
606 == ARM64_RELOC_UNSIGNED))
607 barf("SUBTRACTOR relocation *must* be followed by UNSIGNED relocation.");
608
609 int64_t addend = decodeAddend(oc, section, ri);
610 int64_t value = (uint64_t)symbol->addr;
611 encodeAddend(oc, section, ri, addend - value);
612 break;
613 }
614 case ARM64_RELOC_BRANCH26: {
615 MachOSymbol* symbol = &oc->info->macho_symbols[ri->r_symbolnum];
616
617 // pre-existing addend
618 int64_t addend = decodeAddend(oc, section, ri);
619 // address of the branch (b/bl) instruction.
620 uint64_t pc = (uint64_t)section->start + ri->r_address;
621 uint64_t value = 0;
622 if(symbol->nlist->n_type & N_EXT) {
623 value = (uint64_t)lookupSymbol_((char*)symbol->name);
624 if(!value)
625 barf("Could not lookup symbol: %s!", symbol->name);
626 } else {
627 value = (uint64_t)symbol->addr; // address of the symbol.
628 }
629 if((value - pc + addend) >> (2 + 26)) {
630 /* we need a stub */
631 /* check if we already have that stub */
632 if(findStub(section, (void**)&value)) {
633 /* did not find it. Crete a new stub. */
634 if(makeStub(section, (void**)&value)) {
635 barf("could not find or make stub");
636 }
637 }
638 }
639 encodeAddend(oc, section, ri, value - pc + addend);
640 break;
641 }
642 case ARM64_RELOC_PAGE21:
643 case ARM64_RELOC_GOT_LOAD_PAGE21: {
644 MachOSymbol* symbol = &oc->info->macho_symbols[ri->r_symbolnum];
645 int64_t addend = decodeAddend(oc, section, ri);
646 if(!(explicit_addend == 0 || addend == 0))
647 barf("explicit_addend and addend can't be set at the same time.");
648 uint64_t pc = (uint64_t)section->start + ri->r_address;
649 uint64_t value = (uint64_t)(isGotLoad(ri) ? symbol->got_addr : symbol->addr);
650 encodeAddend(oc, section, ri, ((value + addend + explicit_addend) & (-4096)) - (pc & (-4096)));
651
652 // reset, just in case.
653 explicit_addend = 0;
654 break;
655 }
656 case ARM64_RELOC_PAGEOFF12:
657 case ARM64_RELOC_GOT_LOAD_PAGEOFF12: {
658 MachOSymbol* symbol = &oc->info->macho_symbols[ri->r_symbolnum];
659 int64_t addend = decodeAddend(oc, section, ri);
660 if(!(explicit_addend == 0 || addend == 0))
661 barf("explicit_addend and addend can't be set at the same time.");
662 uint64_t value = (uint64_t)(isGotLoad(ri) ? symbol->got_addr : symbol->addr);
663 encodeAddend(oc, section, ri, 0xFFF & (value + addend + explicit_addend));
664
665 // reset, just in case.
666 explicit_addend = 0;
667 break;
668 }
669 case ARM64_RELOC_ADDEND: {
670 explicit_addend = signExtend(ri->r_symbolnum, 24);
671 if(!(i+1 < nreloc)
672 || !(section->info->relocation_info[i+1].r_type == ARM64_RELOC_PAGE21
673 || section->info->relocation_info[i+1].r_type == ARM64_RELOC_PAGEOFF12))
674 barf("ADDEND relocation *must* be followed by PAGE or PAGEOFF relocation");
675 break;
676 }
677 default: {
678 barf("Relocation of type: %d not (yet) supported!\n", ri->r_type);
679 }
680 }
681 }
682 return 1;
683 }
684 #endif /* aarch64_HOST_ARCH */
685
686 #if defined(x86_64_HOST_ARCH)
687 static int
688 relocateSection(ObjectCode* oc, int curSection)
689 {
690 Section * sect = &oc->sections[curSection];
691 MachOSection * msect = sect->info->macho_section; // for access convenience
692 MachORelocationInfo * relocs = sect->info->relocation_info;
693 MachOSymbol * symbols = oc->info->macho_symbols;
694
695 IF_DEBUG(linker, debugBelch("relocateSection %d (%s, %s): start\n",
696 curSection, msect->segname, msect->sectname));
697
698 if(!strcmp(msect->sectname,"__la_symbol_ptr"))
699 return 1;
700 else if(!strcmp(msect->sectname,"__nl_symbol_ptr"))
701 return 1;
702 else if(!strcmp(msect->sectname,"__la_sym_ptr2"))
703 return 1;
704 else if(!strcmp(msect->sectname,"__la_sym_ptr3"))
705 return 1;
706
707 IF_DEBUG(linker, debugBelch("relocateSection: number of relocations: %d\n", msect->nreloc));
708
709 for(uint32_t i = 0; i < msect->nreloc; i++)
710 {
711 MachORelocationInfo *reloc = &relocs[i];
712
713 char *thingPtr = (char *) sect->start + reloc->r_address;
714 uint64_t thing;
715 /* We shouldn't need to initialise this, but gcc on OS X 64 bit
716 complains that it may be used uninitialized if we don't */
717 uint64_t value = 0;
718 uint64_t baseValue;
719 int type = reloc->r_type;
720 int relocLenBytes;
721 int nextInstrAdj = 0;
722
723 IF_DEBUG(linker, debugBelch("relocateSection: relocation %d\n", i));
724 IF_DEBUG(linker, debugBelch(" : type = %d\n", reloc->r_type));
725 IF_DEBUG(linker, debugBelch(" : address = %d\n", reloc->r_address));
726 IF_DEBUG(linker, debugBelch(" : symbolnum = %u\n", reloc->r_symbolnum));
727 IF_DEBUG(linker, debugBelch(" : pcrel = %d\n", reloc->r_pcrel));
728 IF_DEBUG(linker, debugBelch(" : length = %d\n", reloc->r_length));
729 IF_DEBUG(linker, debugBelch(" : extern = %d\n", reloc->r_extern));
730 IF_DEBUG(linker, debugBelch(" : type = %d\n", reloc->r_type));
731
732 switch(reloc->r_length)
733 {
734 case 0:
735 thing = *(uint8_t*)thingPtr;
736 relocLenBytes = 1;
737 break;
738 case 1:
739 thing = *(uint16_t*)thingPtr;
740 relocLenBytes = 2;
741 break;
742 case 2:
743 thing = *(uint32_t*)thingPtr;
744 relocLenBytes = 4;
745 break;
746 case 3:
747 thing = *(uint64_t*)thingPtr;
748 relocLenBytes = 8;
749 break;
750 default:
751 barf("Unknown size.");
752 }
753 checkProddableBlock(oc,thingPtr,relocLenBytes);
754
755 /*
756 * With SIGNED_N the relocation is not at the end of the
757 * instruction and baseValue needs to be adjusted accordingly.
758 */
759 switch (type) {
760 case X86_64_RELOC_SIGNED_1:
761 nextInstrAdj = 1;
762 break;
763 case X86_64_RELOC_SIGNED_2:
764 nextInstrAdj = 2;
765 break;
766 case X86_64_RELOC_SIGNED_4:
767 nextInstrAdj = 4;
768 break;
769 }
770 baseValue = (uint64_t)thingPtr + relocLenBytes + nextInstrAdj;
771
772
773
774 IF_DEBUG(linker,
775 debugBelch("relocateSection: length = %d, thing = %" PRId64 ", baseValue = %p\n",
776 reloc->r_length, thing, (char *)baseValue));
777
778 if (type == X86_64_RELOC_GOT
779 || type == X86_64_RELOC_GOT_LOAD)
780 {
781 MachOSymbol *symbol = &symbols[reloc->r_symbolnum];
782 SymbolName* nm = symbol->name;
783 SymbolAddr* addr = NULL;
784
785 IF_DEBUG(linker, debugBelch("relocateSection: making jump island for %s, extern = %d, X86_64_RELOC_GOT\n",
786 nm, reloc->r_extern));
787
788 if (reloc->r_extern == 0) {
789 errorBelch("\nrelocateSection: global offset table relocation for symbol with r_extern == 0\n");
790 }
791
792 if (symbol->nlist->n_type & N_EXT) {
793 // The external bit is set, meaning the symbol is exported,
794 // and therefore can be looked up in this object module's
795 // symtab, or it is undefined, meaning dlsym must be used
796 // to resolve it.
797
798 addr = lookupSymbol_(nm);
799 IF_DEBUG(linker, debugBelch("relocateSection: looked up %s, "
800 "external X86_64_RELOC_GOT or X86_64_RELOC_GOT_LOAD\n"
801 " : addr = %p\n", nm, addr));
802
803 if (addr == NULL) {
804 errorBelch("\nlookupSymbol failed in relocateSection (RELOC_GOT)\n"
805 "%s: unknown symbol `%s'", oc->fileName, nm);
806 return 0;
807 }
808 } else {
809 IF_DEBUG(linker, debugBelch("relocateSection: %s is not an exported symbol\n", nm));
810
811 // The symbol is not exported, or defined in another
812 // module, so it must be in the current object module,
813 // at the location given by the section index and
814 // symbol address (symbol->n_value)
815
816 if ((symbol->nlist->n_type & N_TYPE) == N_SECT) {
817 if (symbol->addr == NULL) {
818 errorBelch("relocateSection: address of internal symbol %s was not resolved\n", nm);
819 return 0;
820 }
821
822 addr = symbol->addr;
823
824 IF_DEBUG(linker, debugBelch("relocateSection: calculated relocation of "
825 "non-external X86_64_RELOC_GOT or X86_64_RELOC_GOT_LOAD\n"));
826 IF_DEBUG(linker, debugBelch(" : addr = %p\n", addr));
827 } else {
828 errorBelch("\nrelocateSection: %s is not exported,"
829 " and should be defined in a section, but isn't!\n", nm);
830 return 0;
831 }
832 }
833
834 // creates a jump island for every relocation entry for a symbol
835 // TODO (AP): use got_addr to store the loc. of a jump island to reuse later
836 value = (uint64_t) &makeSymbolExtra(oc, reloc->r_symbolnum, (unsigned long)addr)->addr;
837
838 type = X86_64_RELOC_SIGNED;
839 }
840 else if (reloc->r_extern)
841 {
842 MachOSymbol *symbol = &symbols[reloc->r_symbolnum];
843 SymbolName* nm = symbol->name;
844 SymbolAddr* addr = NULL;
845
846 IF_DEBUG(linker, debugBelch("relocateSection: looking up external symbol %s\n", nm));
847 IF_DEBUG(linker, debugBelch(" : type = %d\n", symbol->nlist->n_type));
848 IF_DEBUG(linker, debugBelch(" : sect = %d\n", symbol->nlist->n_sect));
849 IF_DEBUG(linker, debugBelch(" : desc = %d\n", symbol->nlist->n_desc));
850 IF_DEBUG(linker, debugBelch(" : value = %p\n", (void *)symbol->nlist->n_value));
851
852 if ((symbol->nlist->n_type & N_TYPE) == N_SECT) {
853 ASSERT(symbol->addr != NULL);
854 value = (uint64_t) symbol->addr;
855 IF_DEBUG(linker, debugBelch("relocateSection, defined external symbol %s, relocated address %p\n",
856 nm, (void *)value));
857 }
858 else {
859 addr = lookupSymbol_(nm);
860 if (addr == NULL)
861 {
862 errorBelch("\nlookupSymbol failed in relocateSection (relocate external)\n"
863 "%s: unknown symbol `%s'", oc->fileName, nm);
864 return 0;
865 }
866
867 value = (uint64_t) addr;
868 IF_DEBUG(linker, debugBelch("relocateSection: external symbol %s, address %p\n", nm, (void *)value));
869 }
870 }
871 else
872 {
873 /* Since the relocation is internal, r_symbolnum contains a section
874 * number relative to which the relocation is. Depending on whether
875 * the relocation is unsigned or signed, the given displacement is
876 * relative to the image or the section respectively.
877 *
878 * For instance, in a signed case:
879 * thing = <displ. to to section r_symbolnum *in the image*> (1)
880 * + <offset within r_symbolnum section>
881 * (1) needs to be updated due to different section placement in memory.
882 */
883
884 CHECKM(reloc->r_symbolnum > 0,
885 "relocateSection: unsupported r_symbolnum = %" PRIu32 " < 1 for internal relocation",
886 reloc->r_symbolnum);
887
888 int targetSecNum = reloc->r_symbolnum - 1; // sec numbers start with 1
889 Section * targetSec = &oc->sections[targetSecNum];
890 MachOSection * targetMacho = targetSec->info->macho_section;
891
892 IF_DEBUG(linker,
893 debugBelch("relocateSection: internal relocation relative to section %d (%s, %s)\n",
894 targetSecNum, targetMacho->segname, targetMacho->sectname));
895
896 switch (type) {
897 case X86_64_RELOC_UNSIGNED: {
898 CHECKM(thing >= targetMacho->addr,
899 "relocateSection: unsigned displacement %" PRIx64 "before target section start address %" PRIx64 "\n",
900 thing, (uint64_t) targetMacho->addr);
901
902 uint64_t thingRelativeOffset = thing - targetMacho->addr;
903 IF_DEBUG(linker, debugBelch(" "
904 "unsigned displacement %" PRIx64 " with section relative offset %" PRIx64 "\n",
905 thing, thingRelativeOffset));
906
907 thing = (uint64_t) targetSec->start + thingRelativeOffset;
908 IF_DEBUG(linker, debugBelch(" "
909 "relocated address is %p\n", (void *) thing));
910
911 /* Compared to external relocation we don't need to adjust value
912 * any further since thing already has absolute address.
913 */
914 value = 0;
915 break;
916 }
917 case X86_64_RELOC_SIGNED:
918 case X86_64_RELOC_SIGNED_1:
919 case X86_64_RELOC_SIGNED_2:
920 case X86_64_RELOC_SIGNED_4: {
921 uint32_t baseValueOffset = reloc->r_address + relocLenBytes + nextInstrAdj;
922 uint64_t imThingLoc = msect->addr + baseValueOffset + (int64_t) thing;
923
924 CHECKM(imThingLoc >= targetMacho->addr,
925 "relocateSection: target location %p in image before target section start address %p\n",
926 (void *) imThingLoc, (void *) targetMacho->addr);
927
928 int64_t thingRelativeOffset = imThingLoc - targetMacho->addr;
929 IF_DEBUG(linker,
930 debugBelch(" "
931 "original displacement %" PRId64 " to %p with section relative offset %" PRIu64 "\n",
932 thing, (void *) imThingLoc, thingRelativeOffset));
933
934 thing = (int64_t) ((uint64_t) targetSec->start + thingRelativeOffset)
935 - ((uint64_t) sect->start + baseValueOffset);
936 value = baseValue; // so that it further cancels out with baseValue
937 IF_DEBUG(linker,
938 debugBelch(" "
939 "relocated displacement %" PRId64 " to %p\n",
940 (int64_t) thing, (void *) (baseValue + thing)));
941 break;
942 }
943 default:
944 barf("relocateSection: unexpected internal relocation type %d\n", type);
945 return 0;
946 }
947 }
948
949 IF_DEBUG(linker, debugBelch("relocateSection: value = %p\n", (void *) value));
950
951 if (type == X86_64_RELOC_BRANCH)
952 {
953 if((int32_t)(value - baseValue) != (int64_t)(value - baseValue))
954 {
955 ASSERT(reloc->r_extern);
956 value = (uint64_t) &makeSymbolExtra(oc, reloc->r_symbolnum, value)
957 -> jumpIsland;
958 }
959 ASSERT((int32_t)(value - baseValue) == (int64_t)(value - baseValue));
960 type = X86_64_RELOC_SIGNED;
961 }
962
963 switch(type)
964 {
965 case X86_64_RELOC_UNSIGNED:
966 ASSERT(!reloc->r_pcrel);
967 thing += value;
968 break;
969 case X86_64_RELOC_SIGNED:
970 case X86_64_RELOC_SIGNED_1:
971 case X86_64_RELOC_SIGNED_2:
972 case X86_64_RELOC_SIGNED_4:
973 ASSERT(reloc->r_pcrel);
974 thing += value - baseValue;
975 break;
976 case X86_64_RELOC_SUBTRACTOR:
977 ASSERT(!reloc->r_pcrel);
978 thing -= value;
979 break;
980 default:
981 barf("unknown relocation");
982 }
983
984 IF_DEBUG(linker, debugBelch("relocateSection: thing = %p\n", (void *) thing));
985
986 /* Thing points to memory within one of the relocated sections. We can
987 * probe the first byte to sanity check internal relocations.
988 */
989 if (0 == reloc->r_extern) {
990 if (reloc->r_pcrel) {
991 checkProddableBlock(oc, (void *)((char *)thing + baseValue), 1);
992 } else {
993 checkProddableBlock(oc, (void *)thing, 1);
994 }
995 }
996
997 switch(reloc->r_length)
998 {
999 case 0:
1000 *(uint8_t*)thingPtr = thing;
1001 break;
1002 case 1:
1003 *(uint16_t*)thingPtr = thing;
1004 break;
1005 case 2:
1006 *(uint32_t*)thingPtr = thing;
1007 break;
1008 case 3:
1009 *(uint64_t*)thingPtr = thing;
1010 break;
1011 }
1012 }
1013
1014 IF_DEBUG(linker, debugBelch("relocateSection: done\n"));
1015 return 1;
1016 }
1017 #endif /* x86_64_HOST_ARCH */
1018
1019 /* Note [mmap r+w+x]
1020 * ~~~~~~~~~~~~~~~~~
1021 *
1022 * iOS does not permit to mmap r+w+x, hence wo only mmap r+w, and later change
1023 * to r+x via mprotect. While this could would be nice to have for all hosts
1024 * and not just for iOS, it entail that the rest of the linker code supports
1025 * that, this includes:
1026 *
1027 * - mmap and mprotect need to be available.
1028 * - text and data sections need to be mapped into different pages. Ideally
1029 * the text and data sections would be aggregated, to prevent using a single
1030 * page for every section, however tiny.
1031 * - the relocation code for each object file format / architecture, needs to
1032 * respect the (now) non-contiguousness of the sections.
1033 * - with sections being mapped potentially far apart from each other, it must
1034 * be made sure that the pages are reachable within the architectures
1035 * addressability for relative or absolute access.
1036 */
1037
1038 SectionKind
1039 getSectionKind_MachO(MachOSection *section)
1040 {
1041 SectionKind kind;
1042
1043 /* todo: Use section flags instead */
1044 if (0==strcmp(section->sectname,"__text")) {
1045 kind = SECTIONKIND_CODE_OR_RODATA;
1046 } else if (0==strcmp(section->sectname,"__const") ||
1047 0==strcmp(section->sectname,"__data") ||
1048 0==strcmp(section->sectname,"__bss") ||
1049 0==strcmp(section->sectname,"__common") ||
1050 0==strcmp(section->sectname,"__mod_init_func")) {
1051 kind = SECTIONKIND_RWDATA;
1052 } else {
1053 kind = SECTIONKIND_OTHER;
1054 }
1055
1056 return kind;
1057 }
1058
1059 /* Calculate the # of active segments and their sizes based on section
1060 * sizes and alignments. This is done in 2 passes over sections:
1061 * 1. Calculate how many sections is going to be in each segment and
1062 * the total segment size.
1063 * 2. Fill in segment's sections_idx arrays.
1064 *
1065 * gbZerofillSegment is there because of this comment in mach-o/loader.h:
1066 * The gigabyte zero fill sections, those with the section type
1067 * S_GB_ZEROFILL, can only be in a segment with sections of this
1068 * type. These segments are then placed after all other segments.
1069 */
1070 int
1071 ocBuildSegments_MachO(ObjectCode *oc)
1072 {
1073 int n_rxSections = 0;
1074 size_t size_rxSegment = 0;
1075 Segment *rxSegment = NULL;
1076
1077 int n_rwSections = 0;
1078 size_t size_rwSegment = 0;
1079 Segment *rwSegment = NULL;
1080
1081 int n_gbZerofills = 0;
1082 size_t size_gbZerofillSegment = 0;
1083 Segment *gbZerofillSegment = NULL;
1084
1085 int n_activeSegments = 0;
1086 int curSegment = 0;
1087 size_t size_compound;
1088
1089 Segment *segments = NULL;
1090 void *mem = NULL, *curMem = NULL;
1091
1092 for (int i = 0; i < oc->n_sections; i++) {
1093 MachOSection *macho = &oc->info->macho_sections[i];
1094 size_t alignment = 1 << macho->align;
1095
1096 if (S_GB_ZEROFILL == (macho->flags & SECTION_TYPE)) {
1097 size_gbZerofillSegment = roundUpToAlign(size_gbZerofillSegment, alignment);
1098 size_gbZerofillSegment += macho->size;
1099 n_gbZerofills++;
1100 } else if (getSectionKind_MachO(macho) == SECTIONKIND_CODE_OR_RODATA) {
1101 size_rxSegment = roundUpToAlign(size_rxSegment, alignment);
1102 size_rxSegment += macho->size;
1103 n_rxSections++;
1104 } else {
1105 size_rwSegment = roundUpToAlign(size_rwSegment, alignment);
1106 size_rwSegment += macho->size;
1107 n_rwSections++;
1108 }
1109 }
1110
1111 size_compound = roundUpToPage(size_rxSegment) +
1112 roundUpToPage(size_rwSegment) +
1113 roundUpToPage(size_gbZerofillSegment);
1114
1115 if (n_rxSections > 0) {
1116 n_activeSegments++;
1117 }
1118 if (n_rwSections > 0) {
1119 n_activeSegments++;
1120 }
1121 if (n_gbZerofills >0) {
1122 n_activeSegments++;
1123 }
1124
1125 mem = mmapForLinker(size_compound, MAP_ANON, -1, 0);
1126 if (NULL == mem) return 0;
1127
1128 IF_DEBUG(linker, debugBelch("ocBuildSegments: allocating %d segments\n", n_activeSegments));
1129 segments = (Segment*)stgCallocBytes(n_activeSegments, sizeof(Segment),
1130 "ocBuildSegments_MachO(segments)");
1131 curMem = mem;
1132
1133 /* Allocate space for RX segment */
1134 if (n_rxSections > 0) {
1135 rxSegment = &segments[curSegment];
1136 initSegment(rxSegment,
1137 curMem,
1138 roundUpToPage(size_rxSegment),
1139 SEGMENT_PROT_RX,
1140 n_rxSections);
1141 IF_DEBUG(linker, debugBelch("ocBuildSegments_MachO: init segment %d (RX) at %p size %zu\n",
1142 curSegment, rxSegment->start, rxSegment->size));
1143 curMem = (char *)curMem + rxSegment->size;
1144 curSegment++;
1145 }
1146
1147 /* Allocate space for RW segment */
1148 if (n_rwSections > 0) {
1149 rwSegment = &segments[curSegment];
1150 initSegment(rwSegment,
1151 curMem,
1152 roundUpToPage(size_rwSegment),
1153 SEGMENT_PROT_RWO,
1154 n_rwSections);
1155 IF_DEBUG(linker, debugBelch("ocBuildSegments_MachO: init segment %d (RWO) at %p size %zu\n",
1156 curSegment, rwSegment->start, rwSegment->size));
1157 curMem = (char *)curMem + rwSegment->size;
1158 curSegment++;
1159 }
1160
1161 /* Allocate space for GB_ZEROFILL segment */
1162 if (n_gbZerofills > 0) {
1163 gbZerofillSegment = &segments[curSegment];
1164 initSegment(gbZerofillSegment,
1165 curMem,
1166 roundUpToPage(size_gbZerofillSegment),
1167 SEGMENT_PROT_RWO,
1168 n_gbZerofills);
1169 IF_DEBUG(linker, debugBelch("ocBuildSegments_MachO: init segment %d (GB_ZEROFILL) at %p size %zu\n",
1170 curSegment, gbZerofillSegment->start, gbZerofillSegment->size));
1171 curMem = (char *)curMem + gbZerofillSegment->size;
1172 curSegment++;
1173 }
1174
1175 /* Second pass over sections to fill in sections_idx arrays */
1176 for (int i = 0, rx = 0, rw = 0, gb = 0;
1177 i < oc->n_sections;
1178 i++)
1179 {
1180 MachOSection *macho = &oc->info->macho_sections[i];
1181
1182 if (S_GB_ZEROFILL == (macho->flags & SECTION_TYPE)) {
1183 gbZerofillSegment->sections_idx[gb++] = i;
1184 } else if (getSectionKind_MachO(macho) == SECTIONKIND_CODE_OR_RODATA) {
1185 rxSegment->sections_idx[rx++] = i;
1186 } else {
1187 rwSegment->sections_idx[rw++] = i;
1188 }
1189 }
1190
1191 oc->segments = segments;
1192 oc->n_segments = n_activeSegments;
1193
1194 return 1;
1195 }
1196
1197 int
1198 ocGetNames_MachO(ObjectCode* oc)
1199 {
1200 unsigned curSymbol = 0;
1201
1202 unsigned long commonSize = 0;
1203 SymbolAddr* commonStorage = NULL;
1204 unsigned long commonCounter;
1205
1206 IF_DEBUG(linker,debugBelch("ocGetNames_MachO: start\n"));
1207
1208 Section *secArray;
1209 secArray = (Section*)stgCallocBytes(
1210 oc->info->segCmd->nsects,
1211 sizeof(Section),
1212 "ocGetNames_MachO(sections)");
1213
1214 oc->sections = secArray;
1215
1216 IF_DEBUG(linker, debugBelch("ocGetNames_MachO: will load %d sections\n",
1217 oc->n_sections));
1218
1219 #if defined (ios_HOST_OS)
1220 for(int i=0; i < oc->n_sections; i++)
1221 {
1222 MachOSection * section = &oc->info->macho_sections[i];
1223
1224 IF_DEBUG(linker, debugBelch("ocGetNames_MachO: section %d\n", i));
1225
1226 if (section->size == 0) {
1227 IF_DEBUG(linker, debugBelch("ocGetNames_MachO: found a zero length section, skipping\n"));
1228 continue;
1229 }
1230
1231 SectionKind kind = getSectionKind_MachO(section);
1232
1233 switch(section->flags & SECTION_TYPE) {
1234 case S_ZEROFILL:
1235 case S_GB_ZEROFILL: {
1236 // See Note [mmap r+w+x]
1237 void * mem = mmap(NULL, section->size,
1238 PROT_READ | PROT_WRITE,
1239 MAP_ANON | MAP_PRIVATE,
1240 -1, 0);
1241 if( mem == MAP_FAILED ) {
1242 barf("failed to mmap allocate memory for zerofill section %d of size %d. errno = %d",
1243 i, section->size, errno);
1244 }
1245 addSection(&secArray[i], kind, SECTION_MMAP, mem, section->size,
1246 0, mem, roundUpToPage(section->size));
1247 addProddableBlock(oc, mem, (int)section->size);
1248
1249 secArray[i].info->nstubs = 0;
1250 secArray[i].info->stub_offset = NULL;
1251 secArray[i].info->stub_size = 0;
1252 secArray[i].info->stubs = NULL;
1253
1254 secArray[i].info->macho_section = section;
1255 secArray[i].info->relocation_info
1256 = (MachORelocationInfo*)(oc->image + section->reloff);
1257 break;
1258 }
1259 default: {
1260 // The secion should have a non-zero offset. As the offset is
1261 // relativ to the image, and must be somewhere after the header.
1262 if(section->offset == 0) barf("section with zero offset!");
1263 /* on iOS, we must allocate the code in r+x sections and
1264 * the data in r+w sections, as the system does not allow
1265 * for r+w+x, we must allocate each section in a new page
1266 * range.
1267 *
1268 * copy the sections's memory to some page-aligned place via
1269 * mmap and memcpy. This will later allow us to selectively
1270 * use mprotect on pages with data (r+w) and pages text (r+x).
1271 * We initially start with r+w, so that we can modify the
1272 * pages during relocations, prior to setting it r+x.
1273 */
1274
1275 /* We also need space for stubs. As pages can be assigned
1276 * randomly in the addressable space, we need to keep the
1277 * stubs close to the section. The strategy we are going
1278 * to use is to allocate them right after the section. And
1279 * we are going to be generous and allocare a stub slot
1280 * for each relocation to keep it simple.
1281 */
1282 size_t n_ext_sec_sym = section->nreloc; /* number of relocations
1283 * for this section. Should
1284 * be a good upper bound
1285 */
1286 size_t stub_space = /* eight bytes for the 64 bit address,
1287 * and another eight bytes for the two
1288 * instructions (ldr, br) for each relocation.
1289 */ 16 * n_ext_sec_sym;
1290 // See Note [mmap r+w+x]
1291 void * mem = mmap(NULL, section->size+stub_space,
1292 PROT_READ | PROT_WRITE,
1293 MAP_ANON | MAP_PRIVATE,
1294 -1, 0);
1295 if( mem == MAP_FAILED ) {
1296 barf("failed to mmap allocate memory to load section %d. errno = %d", i, errno );
1297 }
1298 memcpy( mem, oc->image + section->offset, section->size);
1299
1300 addSection(&secArray[i], kind, SECTION_MMAP,
1301 mem, section->size,
1302 0, mem, roundUpToPage(section->size+stub_space));
1303 addProddableBlock(oc, mem, (int)section->size);
1304
1305 secArray[i].info->nstubs = 0;
1306 secArray[i].info->stub_offset = ((uint8_t*)mem) + section->size;
1307 secArray[i].info->stub_size = stub_space;
1308 secArray[i].info->stubs = NULL;
1309
1310 secArray[i].info->macho_section = section;
1311 secArray[i].info->relocation_info
1312 = (MachORelocationInfo*)(oc->image + section->reloff);
1313 break;
1314 }
1315 }
1316 }
1317 #else /* !ios_HOST_OS */
1318 IF_DEBUG(linker, debugBelch("ocGetNames_MachO: building segments\n"));
1319
1320 CHECKM(ocBuildSegments_MachO(oc), "ocGetNames_MachO: failed to build segments\n");
1321
1322 for (int seg_n = 0; seg_n < oc->n_segments; seg_n++) {
1323 Segment *segment = &oc->segments[seg_n];
1324 void *curMem = segment->start;
1325
1326 IF_DEBUG(linker,
1327 debugBelch("ocGetNames_MachO: loading segment %d "
1328 "(address = %p, size = %zu) "
1329 "with %d sections\n",
1330 seg_n, segment->start, segment->size, segment->n_sections));
1331
1332 for (int sec_n = 0; sec_n < segment->n_sections; sec_n++) {
1333 int sec_idx = segment->sections_idx[sec_n];
1334 MachOSection *section = &oc->info->macho_sections[sec_idx];
1335
1336 size_t alignment = 1 << section->align;
1337 SectionKind kind = getSectionKind_MachO(section);
1338
1339 void *secMem = (void *)roundUpToAlign((size_t)curMem, alignment);
1340
1341 IF_DEBUG(linker,
1342 debugBelch("ocGetNames_MachO: loading section %d in segment %d "
1343 "(#%d, %s %s)\n"
1344 " skipped %zu bytes due to alignment of %zu\n",
1345 sec_n, seg_n, sec_idx, section->segname, section->sectname,
1346 (char *)secMem - (char *)curMem, alignment));
1347
1348 switch (section->flags & SECTION_TYPE) {
1349 case S_ZEROFILL:
1350 case S_GB_ZEROFILL:
1351 IF_DEBUG(linker, debugBelch("ocGetNames_MachO: memset to 0 a ZEROFILL section\n"));
1352 memset(secMem, 0, section->size);
1353 break;
1354 default:
1355 IF_DEBUG(linker,
1356 debugBelch("ocGetNames_MachO: copying from %p to %p"
1357 " a block of %" PRIu64 " bytes\n",
1358 (void *) (oc->image + section->offset), secMem, section->size));
1359
1360 memcpy(secMem, oc->image + section->offset, section->size);
1361 }
1362
1363 /* SECTION_NOMEM since memory is already allocated in segments */
1364 addSection(&secArray[sec_idx], kind, SECTION_NOMEM,
1365 secMem, section->size,
1366 0, 0, 0);
1367 addProddableBlock(oc, secMem, section->size);
1368
1369 curMem = (char*) secMem + section->size;
1370
1371 secArray[sec_idx].info->nstubs = 0;
1372 secArray[sec_idx].info->stub_offset = NULL;
1373 secArray[sec_idx].info->stub_size = 0;
1374 secArray[sec_idx].info->stubs = NULL;
1375
1376 secArray[sec_idx].info->macho_section = section;
1377 secArray[sec_idx].info->relocation_info
1378 = (MachORelocationInfo*)(oc->image + section->reloff);
1379
1380 }
1381
1382 }
1383 #endif
1384
1385 /* now, as all sections have been loaded, we can resolve the absolute
1386 * address of symbols defined in those sections.
1387 */
1388 for(size_t i=0; i < oc->info->n_macho_symbols; i++) {
1389 MachOSymbol * s = &oc->info->macho_symbols[i];
1390 if( N_SECT == (s->nlist->n_type & N_TYPE) ) {
1391 if( NO_SECT == s->nlist->n_sect )
1392 barf("Symbol with N_SECT type, but no section.");
1393
1394 /* section is given, and n_sect is >0 */
1395 uint8_t n = s->nlist->n_sect - 1;
1396 if(0 == oc->info->macho_sections[n].size) {
1397 continue;
1398 }
1399
1400 /* addr <- address in memory where the relocated section resides | (a)
1401 * - section's address in the image | (b)
1402 * + symbol's address in the image | (c)
1403 * (c) - (b) gives symbol's offset relative to section start
1404 * (a) - (b) + (c) gives symbol's address for the relocated section
1405 *
1406 * (c) and (b) are not _real_ addresses and not equal
1407 * to file offsets in the image.
1408 * Rather they are (virtual) aligned addresses within
1409 * a single segment of MH_OBJECT object file.
1410 */
1411 s->addr = (uint8_t*)oc->sections[n].start
1412 - oc->info->macho_sections[n].addr
1413 + s->nlist->n_value;
1414 if(NULL == s->addr)
1415 barf("Failed to compute address for symbol %s", s->name);
1416 }
1417 }
1418
1419 // count external symbols defined here
1420 oc->n_symbols = 0;
1421 if (oc->info->symCmd) {
1422 for (size_t i = 0; i < oc->info->n_macho_symbols; i++) {
1423 if (oc->info->nlist[i].n_type & N_STAB) {
1424 ;
1425 }
1426 else if(oc->info->nlist[i].n_type & N_EXT)
1427 {
1428 if((oc->info->nlist[i].n_type & N_TYPE) == N_UNDF
1429 && (oc->info->nlist[i].n_value != 0))
1430 {
1431 commonSize += oc->info->nlist[i].n_value;
1432 oc->n_symbols++;
1433 }
1434 else if((oc->info->nlist[i].n_type & N_TYPE) == N_SECT)
1435 oc->n_symbols++;
1436 }
1437 }
1438 }
1439 /* allocate space for the exported symbols
1440 * in the object code. This is used to track
1441 * which symbols will have to be removed when
1442 * this object code is unloaded
1443 */
1444 IF_DEBUG(linker, debugBelch("ocGetNames_MachO: %d external symbols\n",
1445 oc->n_symbols));
1446 oc->symbols = stgMallocBytes(oc->n_symbols * sizeof(Symbol_t),
1447 "ocGetNames_MachO(oc->symbols)");
1448
1449 if (oc->info->symCmd) {
1450 for (size_t i = 0; i < oc->info->n_macho_symbols; i++) {
1451 SymbolName* nm = oc->info->macho_symbols[i].name;
1452 if (oc->info->nlist[i].n_type & N_STAB)
1453 {
1454 IF_DEBUG(linker, debugBelch("ocGetNames_MachO: Skip STAB: %s\n", nm));
1455 }
1456 else if ((oc->info->nlist[i].n_type & N_TYPE) == N_SECT)
1457 {
1458 if (oc->info->nlist[i].n_type & N_EXT)
1459 {
1460 if ( (oc->info->nlist[i].n_desc & N_WEAK_DEF)
1461 && lookupSymbol_(nm)) {
1462 // weak definition, and we already have a definition
1463 IF_DEBUG(linker, debugBelch(" weak: %s\n", nm));
1464 }
1465 else
1466 {
1467 IF_DEBUG(linker, debugBelch("ocGetNames_MachO: inserting %s\n", nm));
1468 SymbolAddr* addr = oc->info->macho_symbols[i].addr;
1469
1470 ghciInsertSymbolTable( oc->fileName
1471 , symhash
1472 , nm
1473 , addr
1474 , HS_BOOL_FALSE
1475 , oc);
1476
1477 oc->symbols[curSymbol].name = nm;
1478 oc->symbols[curSymbol].addr = addr;
1479 curSymbol++;
1480 }
1481 }
1482 else
1483 {
1484 IF_DEBUG(linker, debugBelch("ocGetNames_MachO: \t...not external, skipping %s\n", nm));
1485 }
1486 }
1487 else
1488 {
1489 IF_DEBUG(linker, debugBelch("ocGetNames_MachO: \t...not defined in this section, skipping %s\n", nm));
1490 }
1491 }
1492 }
1493
1494 /* setup the common storage */
1495 commonStorage = stgCallocBytes(1,commonSize,"ocGetNames_MachO(common symbols)");
1496 commonCounter = (unsigned long)commonStorage;
1497
1498 if (oc->info->symCmd) {
1499 for (size_t i = 0; i < oc->info->n_macho_symbols; i++) {
1500 SymbolName* nm = oc->info->macho_symbols[i].name;
1501 MachONList *nlist = &oc->info->nlist[i];
1502 if((nlist->n_type & N_TYPE) == N_UNDF
1503 && (nlist->n_type & N_EXT)
1504 && (nlist->n_value != 0)) {
1505 unsigned long sz = nlist->n_value;
1506
1507 nlist->n_value = commonCounter;
1508
1509 /* also set the final address to the macho_symbol */
1510 oc->info->macho_symbols[i].addr = (void*)commonCounter;
1511
1512 IF_DEBUG(linker, debugBelch("ocGetNames_MachO: inserting common symbol: %s\n", nm));
1513 ghciInsertSymbolTable(oc->fileName, symhash, nm,
1514 (void*)commonCounter, HS_BOOL_FALSE, oc);
1515 oc->symbols[curSymbol].name = nm;
1516 oc->symbols[curSymbol].addr = oc->info->macho_symbols[i].addr;
1517 curSymbol++;
1518
1519 commonCounter += sz;
1520 }
1521 }
1522 }
1523 #if defined(aarch64_HOST_ARCH)
1524 /* Setup the global offset table
1525 * This is for symbols that are external, and not defined here.
1526 * So that we can load their address indirectly.
1527 *
1528 * We will get GOT request for any symbol that is
1529 * - EXT and UNDF
1530 * - EXT and not in the same section.
1531 *
1532 * As sections are not necessarily contiguous and can live
1533 * anywhere in the addressable space. This obviously makes
1534 * sense. However it took me a while to figure this out.
1535 */
1536 makeGot(oc);
1537
1538 /* at this point, macho_symbols, should know the addresses for
1539 * all symbols defined by this object code.
1540 * - those that are defined in sections.
1541 * - those that are undefined, but have a value (common storage).
1542 */
1543 #endif
1544 IF_DEBUG(linker, debugBelch("ocGetNames_MachO: done\n"));
1545 return 1;
1546 }
1547
1548 #if defined(ios_HOST_OS)
1549 bool
1550 ocMprotect_MachO( ObjectCode *oc ) {
1551 for(int i=0; i < oc->n_sections; i++) {
1552 Section * section = &oc->sections[i];
1553 if(section->size == 0) continue;
1554 if( (section->info->macho_section->flags & SECTION_ATTRIBUTES_USR)
1555 == S_ATTR_PURE_INSTRUCTIONS) {
1556 if( 0 != mprotect(section->start,
1557 section->size + section->info->stub_size,
1558 PROT_READ | PROT_EXEC) ) {
1559 barf("mprotect failed! errno = %d", errno);
1560 return false;
1561 }
1562 }
1563 }
1564 return true;
1565 }
1566 #endif
1567
1568 int
1569 ocResolve_MachO(ObjectCode* oc)
1570 {
1571 IF_DEBUG(linker, debugBelch("ocResolve_MachO: start\n"));
1572
1573 if(NULL != oc->info->dsymCmd)
1574 {
1575 unsigned long *indirectSyms
1576 = (unsigned long*) (oc->image + oc->info->dsymCmd->indirectsymoff);
1577
1578 IF_DEBUG(linker, debugBelch("ocResolve_MachO: resolving dsymLC\n"));
1579 for (int i = 0; i < oc->n_sections; i++)
1580 {
1581 const char * sectionName = oc->info->macho_sections[i].sectname;
1582 if( !strcmp(sectionName,"__la_symbol_ptr")
1583 || !strcmp(sectionName,"__la_sym_ptr2")
1584 || !strcmp(sectionName,"__la_sym_ptr3"))
1585 {
1586 if(!resolveImports(oc,&oc->info->macho_sections[i],
1587 indirectSyms))
1588 return 0;
1589 }
1590 else if(!strcmp(sectionName,"__nl_symbol_ptr")
1591 || !strcmp(sectionName,"__pointers"))
1592 {
1593 if(!resolveImports(oc,&oc->info->macho_sections[i],
1594 indirectSyms))
1595 return 0;
1596 }
1597 else if(!strcmp(sectionName,"__jump_table"))
1598 {
1599 if(!resolveImports(oc,&oc->info->macho_sections[i],
1600 indirectSyms))
1601 return 0;
1602 }
1603 else
1604 {
1605 IF_DEBUG(linker, debugBelch("ocResolve_MachO: unknown section\n"));
1606 }
1607 }
1608 }
1609 #if defined(aarch64_HOST_ARCH)
1610 /* fill the GOT table */
1611 for(size_t i = 0; i < oc->info->n_macho_symbols; i++) {
1612 MachOSymbol * symbol = &oc->info->macho_symbols[i];
1613 if(needGotSlot(symbol->nlist)) {
1614 if(N_UNDF == (symbol->nlist->n_type & N_TYPE)) {
1615 /* an undefined symbol. So we need to ensure we
1616 * have the address.
1617 */
1618 if(NULL == symbol->addr) {
1619 symbol->addr = lookupSymbol_((char*)symbol->name);
1620 if(NULL == symbol->addr)
1621 barf("Failed to lookup symbol: %s", symbol->name);
1622 } else {
1623 // we already have the address.
1624 }
1625 } /* else it was defined in the same object,
1626 * just a different section. We should have
1627 * the address as well already
1628 */
1629 if(NULL == symbol->addr) {
1630 barf("Something went wrong!");
1631 }
1632 if(NULL == symbol->got_addr) {
1633 barf("Not good either!");
1634 }
1635 *(uint64_t*)symbol->got_addr = (uint64_t)symbol->addr;
1636 }
1637 }
1638 #endif
1639
1640 for(int i = 0; i < oc->n_sections; i++)
1641 {
1642 IF_DEBUG(linker, debugBelch("ocResolve_MachO: relocating section %d\n", i));
1643
1644 #if defined aarch64_HOST_ARCH
1645 if (!relocateSectionAarch64(oc, &oc->sections[i]))
1646 return 0;
1647 #else
1648 if (!relocateSection(oc, i))
1649 return 0;
1650 #endif
1651 }
1652 #if defined(ios_HOST_OS)
1653 if(!ocMprotect_MachO ( oc ))
1654 return 0;
1655 #endif
1656
1657 return 1;
1658 }
1659
1660 int
1661 ocRunInit_MachO ( ObjectCode *oc )
1662 {
1663 if (NULL == oc->info->segCmd) {
1664 barf("ocRunInit_MachO: no segment load command");
1665 }
1666
1667 int argc, envc;
1668 char **argv, **envv;
1669
1670 getProgArgv(&argc, &argv);
1671 getProgEnvv(&envc, &envv);
1672
1673 for (int i = 0; i < oc->n_sections; i++) {
1674 IF_DEBUG(linker, debugBelch("ocRunInit_MachO: checking section %d\n", i));
1675
1676 // ToDo: replace this with a proper check for the S_MOD_INIT_FUNC_POINTERS
1677 // flag. We should do this elsewhere in the Mach-O linker code
1678 // too. Note that the system linker will *refuse* to honor
1679 // sections which don't have this flag, so this could cause
1680 // weird behavior divergence (albeit reproducible).
1681 if (0 == strcmp(oc->info->macho_sections[i].sectname, "__mod_init_func")) {
1682 IF_DEBUG(linker, debugBelch("ocRunInit_MachO: running mod init functions\n"));
1683
1684 void *init_startC = oc->sections[i].start;
1685 init_t *init = (init_t*)init_startC;
1686 init_t *init_end = (init_t*)((uint8_t*)init_startC
1687 + oc->sections[i].info->macho_section->size);
1688
1689 for (int pn = 0; init < init_end; init++, pn++) {
1690 IF_DEBUG(linker, debugBelch("ocRunInit_MachO: function pointer %d at %p to %p\n",
1691 pn, (void *) init, (void *) *init));
1692 (*init)(argc, argv, envv);
1693 }
1694 }
1695 }
1696
1697 freeProgEnvv(envc, envv);
1698 return 1;
1699 }
1700
1701 /*
1702 * Figure out by how much to shift the entire Mach-O file in memory
1703 * when loading so that its single segment ends up 16-byte-aligned
1704 */
1705 int
1706 machoGetMisalignment( FILE * f )
1707 {
1708 MachOHeader header;
1709 int misalignment;
1710
1711 {
1712 size_t n = fread(&header, sizeof(header), 1, f);
1713 if (n != 1) {
1714 barf("machoGetMisalignment: can't read the Mach-O header");
1715 }
1716 }
1717 fseek(f, -sizeof(header), SEEK_CUR);
1718
1719 if(header.magic != MH_MAGIC_64) {
1720 barf("Bad magic. Expected: %08x, got: %08x.",
1721 MH_MAGIC_64, header.magic);
1722 }
1723
1724 misalignment = (header.sizeofcmds + sizeof(header))
1725 & 0xF;
1726
1727 IF_DEBUG(linker, debugBelch("mach-o misalignment %d\n", misalignment));
1728 return misalignment ? (16 - misalignment) : 0;
1729 }
1730
1731 #endif /* darwin_HOST_OS || ios_HOST_OS */