[linker] fix armv7 & add aarch64
authorMoritz Angermann <moritz.angermann@gmail.com>
Thu, 8 Jun 2017 18:58:38 +0000 (14:58 -0400)
committerBen Gamari <ben@smart-cactus.org>
Thu, 8 Jun 2017 19:35:58 +0000 (15:35 -0400)
This adds Global Offset Table logic, as well as PLT like logic for armv7
and aarch64; which replaces the preexisting symbolExtras logic, by
placing the PLT tables next to the separtely loaded sections. This is
needed to ensure that the symbol stubs are in range.

Reviewers: bgamari, austin, erikd, simonmar

Reviewed By: bgamari

Subscribers: Ericson2314, ryantrinkle, rwbarton, thomie

Differential Revision: https://phabricator.haskell.org/D3448

22 files changed:
rts/LinkerInternals.h
rts/linker/CacheFlush.c
rts/linker/Elf.c
rts/linker/Elf.h
rts/linker/ElfTypes.h
rts/linker/MachO.c
rts/linker/SymbolExtras.c
rts/linker/elf_got.c [new file with mode: 0644]
rts/linker/elf_got.h [new file with mode: 0644]
rts/linker/elf_plt.c [new file with mode: 0644]
rts/linker/elf_plt.h [new file with mode: 0644]
rts/linker/elf_plt_aarch64.c [new file with mode: 0644]
rts/linker/elf_plt_aarch64.h [new file with mode: 0644]
rts/linker/elf_plt_arm.c [new file with mode: 0644]
rts/linker/elf_plt_arm.h [new file with mode: 0644]
rts/linker/elf_reloc.c [new file with mode: 0644]
rts/linker/elf_reloc.h [new file with mode: 0644]
rts/linker/elf_reloc_aarch64.c [new file with mode: 0644]
rts/linker/elf_reloc_aarch64.h [new file with mode: 0644]
rts/linker/elf_util.c
rts/linker/elf_util.h
rts/linker/util.h

index 48c43eb..05fa770 100644 (file)
@@ -111,16 +111,7 @@ typedef struct ForeignExportStablePtr_ {
     struct ForeignExportStablePtr_ *next;
 } ForeignExportStablePtr;
 
-#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) \
-    || defined(arm_HOST_ARCH)
-/* ios currently uses adjacent got tables, and no symbol extras */
-#if !defined(ios_HOST_OS)
-#define NEED_SYMBOL_EXTRAS 1
-#endif /* ios_HOST_OS */
-#endif
-
-/* iOS Simulator however, needs symbol extras for now (#13678) */
-#if defined(ios_HOST_OS) && defined(x86_64_HOST_ARCH)
+#if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
 #define NEED_SYMBOL_EXTRAS 1
 #endif
 
index 206b2ef..d14af15 100644 (file)
@@ -3,28 +3,7 @@
 #include "Rts.h"
 #include "linker/CacheFlush.h"
 
-#if defined(arm_HOST_ARCH)
-
-void
-ocFlushInstructionCache( ObjectCode *oc )
-{
-    int i;
-    // Object code
-    for (i=0; i < oc->n_sections; i++) {
-        Section *s = &oc->sections[i];
-        // This is a bit too broad but we don't have any way to determine what
-        // is certainly code
-        if (s->kind == SECTIONKIND_CODE_OR_RODATA)
-            __clear_cache(s->start, (void*) ((uintptr_t) s->start + s->size));
-    }
-
-    // Jump islands
-    // Note the (+1) to ensure that the last symbol extra is covered by the
-    // flush.
-    __clear_cache(oc->symbol_extras, &oc->symbol_extras[oc->n_symbol_extras+1]);
-}
-
-#elif defined(powerpc_HOST_ARCH)
+#if defined(powerpc_HOST_ARCH)
 /*
    ocFlushInstructionCache
 
index da3e7c6..2ae731b 100644 (file)
 #  include <elf_abi.h>
 #endif
 
+#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
+#  define NEED_GOT
+#  define NEED_PLT
+#  include "elf_got.h"
+#  include "elf_plt.h"
+#  include "elf_reloc.h"
+#endif
+
 /*
 
    Note [Many ELF Sections]
@@ -170,6 +178,8 @@ ocInit_ELF(ObjectCode * oc)
                                             + oc->info->elfHeader->e_phoff);
     oc->info->sectionHeader = (Elf_Shdr *) ((uint8_t*)oc->image
                                             + oc->info->elfHeader->e_shoff);
+    oc->info->sectionHeaderStrtab = (char*)((uint8_t*)oc->image +
+            oc->info->sectionHeader[oc->info->elfHeader->e_shstrndx].sh_offset);
 
     oc->n_sections = elf_shnum(oc->info->elfHeader);
 
@@ -277,6 +287,9 @@ ocDeinit_ELF(ObjectCode * oc)
      * ElfSymbols
      */
     if(oc->info != NULL) {
+#if defined(NEED_GOT)
+        freeGot(oc);
+#endif
         ElfSymbolTable * last = oc->info->symbolTables;
 
         while(last != NULL) {
@@ -378,7 +391,10 @@ ocVerifyImage_ELF ( ObjectCode* oc )
 #elif defined(EM_AMD64)
       case EM_AMD64: IF_DEBUG(linker,debugBelch( "amd64" )); break;
 #endif
-      default:       IF_DEBUG(linker,debugBelch( "unknown" ));
+#if defined(EM_AARCH64)
+      case EM_AARCH64: IF_DEBUG(linker,debugBelch( "aarch64" )); break;
+#endif
+       default:       IF_DEBUG(linker,debugBelch( "unknown" ));
                      errorBelch("%s: unknown architecture (e_machine == %d)"
                                 , oc->fileName, ehdr->e_machine);
                      return 0;
@@ -598,6 +614,8 @@ static int getSectionKind_ELF( Elf_Shdr *hdr, int *is_bss )
     return SECTIONKIND_OTHER;
 }
 
+#if !defined(NEED_PLT)
+
 static void *
 mapObjectFileSection (int fd, Elf_Word offset, Elf_Word size,
                       void **mapped_start, StgWord *mapped_size,
@@ -615,17 +633,17 @@ mapObjectFileSection (int fd, Elf_Word offset, Elf_Word size,
     *mapped_start = p;
     return (void*)((StgWord)p + offset - pageOffset);
 }
+#endif
 
 int
 ocGetNames_ELF ( ObjectCode* oc )
 {
    Elf_Word i;
-   int j, nent, result, fd = -1;
-   Elf_Sym* stab;
+   int result, fd = -1;
 
    char*     ehdrC    = (char*)(oc->image);
    Elf_Ehdr* ehdr     = (Elf_Ehdr*)ehdrC;
-   char*     strtab;
+
    Elf_Shdr* shdr     = (Elf_Shdr*) (ehdrC + ehdr->e_shoff);
    Section * sections;
 #if defined(SHN_XINDEX)
@@ -640,7 +658,6 @@ ocGetNames_ELF ( ObjectCode* oc )
    oc->sections = sections;
    oc->n_sections = shnum;
 
-
    if (oc->imageMapped) {
 #if defined(openbsd_HOST_OS)
        fd = open(oc->fileName, O_RDONLY, S_IRUSR);
@@ -666,16 +683,77 @@ ocGetNames_ELF ( ObjectCode* oc )
          /* This is a non-empty .bss section.  Allocate zeroed space for
             it, and set its .sh_offset field such that
             ehdrC + .sh_offset == addr_of_zeroed_space.  */
+#if defined(NEED_GOT)
+          /* always use mmap if we use GOT slots.  Otherwise the malloced
+           * address might be out of range for sections that are mmaped.
+           */
+          alloc = SECTION_MMAP;
+          start = mmap(NULL, size,
+                       PROT_READ | PROT_WRITE | PROT_EXEC,
+                       MAP_ANON | MAP_PRIVATE,
+                       -1, 0);
+          mapped_start = start;
+          mapped_offset = 0;
+          mapped_size = roundUpToPage(size);
+#else
           alloc = SECTION_MALLOC;
           start = stgCallocBytes(1, size, "ocGetNames_ELF(BSS)");
           mapped_start = start;
+#endif
          /*
          debugBelch("BSS section at 0x%x, size %d\n",
                          zspace, shdr[i].sh_size);
          */
-      }
+          addSection(&sections[i], kind, alloc, start, size,
+                     mapped_offset, mapped_start, mapped_size);
+
+          oc->sections[i].info->nstubs = 0;
+          oc->sections[i].info->stub_offset = NULL;
+          oc->sections[i].info->stub_size = 0;
+          oc->sections[i].info->stubs = NULL;
+      } else if (kind != SECTIONKIND_OTHER && size > 0) {
+
+#if defined(NEED_PLT)
+          /* To support stubs next to sections, we will use the following
+           * layout:
+           *
+           * .--------------.
+           * | Section data |
+           * |--------------|
+           * | Stub space   |
+           * '--------------'
+           *
+           * This ensures that the plt stubs are in range for the section data,
+           * Unless the section data exceeds the size for relative jump, in
+           * which case I wouldn't know how to solve this, without starting to
+           * break up the section itself.
+           */
+
+          unsigned nstubs = numberOfStubsForSection(oc, i);
+          unsigned stub_space = STUB_SIZE * nstubs;
+
+          void * mem = mmap(NULL, size+stub_space,
+                            PROT_READ | PROT_WRITE | PROT_EXEC,
+                            MAP_ANON | MAP_PRIVATE,
+                            -1, 0);
 
-      else if (kind != SECTIONKIND_OTHER && size > 0) {
+          if( mem == MAP_FAILED ) {
+              barf("failed to mmap allocated memory to load section %d. "
+                   "errno = %d", i, errno);
+          }
+
+          /* copy only the image part over; we don't want to copy data
+           * into the stub part.
+           */
+          memcpy( mem, oc->image + offset, size );
+
+          alloc = SECTION_MMAP;
+
+          mapped_offset = 0;
+          mapped_size = roundUpToPage(size+stub_space);
+          start = mem;
+          mapped_start = mem;
+#else
           if (USE_CONTIGUOUS_MMAP) {
               // already mapped.
               start = oc->image + offset;
@@ -696,155 +774,189 @@ ocGetNames_ELF ( ObjectCode* oc )
               if (start == NULL) goto fail;
               alloc = SECTION_MMAP;
           }
+#endif
+          addSection(&sections[i], kind, alloc, start, size,
+                     mapped_offset, mapped_start, mapped_size);
+
+#if defined(NEED_PLT)
+          oc->sections[i].info->nstubs = 0;
+          oc->sections[i].info->stub_offset = (uint8_t*)mem + size;
+          oc->sections[i].info->stub_size = stub_space;
+          oc->sections[i].info->stubs = NULL;
+#else
+          oc->sections[i].info->nstubs = 0;
+          oc->sections[i].info->stub_offset = NULL;
+          oc->sections[i].info->stub_size = 0;
+          oc->sections[i].info->stubs = NULL;
+#endif
+
           addProddableBlock(oc, start, size);
+      } else {
+          addSection(&oc->sections[i], kind, alloc, oc->image+offset, size,
+                     0, 0, 0);
+          oc->sections[i].info->nstubs = 0;
+          oc->sections[i].info->stub_offset = NULL;
+          oc->sections[i].info->stub_size = 0;
+          oc->sections[i].info->stubs = NULL;
       }
+      oc->sections[i].info->name          = oc->info->sectionHeaderStrtab
+                                            + shdr[i].sh_name;
+      oc->sections[i].info->sectionHeader = &shdr[i];
+
+
 
-      addSection(&sections[i], kind, alloc, start, size,
-                 mapped_offset, mapped_start, mapped_size);
 
       if (shdr[i].sh_type != SHT_SYMTAB) continue;
 
       /* copy stuff into this module's object symbol table */
-      stab = (Elf_Sym*) (ehdrC + offset);
-      strtab = ehdrC + shdr[shdr[i].sh_link].sh_offset;
-      nent = shdr[i].sh_size / sizeof(Elf_Sym);
 
-      oc->n_symbols = nent;
+      oc->n_symbols = 0;
+      for(ElfSymbolTable *symTab = oc->info->symbolTables;
+          symTab != NULL; symTab = symTab->next) {
+          oc->n_symbols += symTab->n_symbols;
+      }
+
       oc->symbols = stgCallocBytes(oc->n_symbols, sizeof(SymbolName*),
                                    "ocGetNames_ELF(oc->symbols)");
       // Note calloc: if we fail partway through initializing symbols, we need
       // to undo the additions to the symbol table so far. We know which ones
       // have been added by whether the entry is NULL or not.
 
+      unsigned curSymbol = 0;
+
       //TODO: we ignore local symbols anyway right? So we can use the
       //      shdr[i].sh_info to get the index of the first non-local symbol
       // ie we should use j = shdr[i].sh_info
-      for (j = 0; j < nent; j++) {
+       for(ElfSymbolTable *symTab = oc->info->symbolTables;
+           symTab != NULL; symTab = symTab->next) {
+           for (size_t j = 0; j < symTab->n_symbols; j++) {
 
-         char  isLocal  = false; /* avoids uninit-var warning */
-         HsBool isWeak  = HS_BOOL_FALSE;
-         SymbolAddr* ad  = NULL;
-         SymbolName* nm  = strtab + stab[j].st_name;
-         unsigned short shndx = stab[j].st_shndx;
-         Elf_Word secno;
+               char isLocal = false; /* avoids uninit-var warning */
+               HsBool isWeak = HS_BOOL_FALSE;
+               SymbolName *nm = symTab->symbols[j].name;
+               unsigned short shndx = symTab->symbols[j].elf_sym->st_shndx;
 
-         /* See Note [Many ELF Sections] */
-         /* Note that future checks for special SHN_* numbers should check the
-          * shndx variable, not the section number in secno. Sections with the
-          * real number in the SHN_LORESERVE..HIRESERVE range will have shndx
-          * SHN_XINDEX and a secno with one of the reserved values. */
-         secno = shndx;
+               ElfSymbol *symbol = &symTab->symbols[j];
+
+               Elf_Word secno;
+
+
+               /* See Note [Many ELF Sections] */
+               /* Note that future checks for special SHN_* numbers should check
+                * the shndx variable, not the section number in secno. Sections
+                * with the real number in the SHN_LORESERVE..HIRESERVE range
+                * will have shndx SHN_XINDEX and a secno with one of the
+                * reserved values. */
+               secno = shndx;
 #if defined(SHN_XINDEX)
-         if (shndx == SHN_XINDEX) {
-            ASSERT(shndxTable);
-            secno = shndxTable[j];
-         }
+               if (shndx == SHN_XINDEX) {
+                  ASSERT(shndxTable);
+                  secno = shndxTable[j];
+               }
 #endif
-         /* Figure out if we want to add it; if so, set ad to its
-            address.  Otherwise leave ad == NULL. */
-
-         if (shndx == SHN_COMMON) {
-            isLocal = false;
-            ad = stgCallocBytes(1, stab[j].st_size, "ocGetNames_ELF(COMMON)");
-            /*
-            debugBelch("COMMON symbol, size %d name %s\n",
-                            stab[j].st_size, nm);
-            */
-            /* Pointless to do addProddableBlock() for this area,
-               since the linker should never poke around in it. */
-         }
-         else
-         if ( ( ELF_ST_BIND(stab[j].st_info)==STB_GLOBAL
-                || ELF_ST_BIND(stab[j].st_info)==STB_LOCAL
-                || ELF_ST_BIND(stab[j].st_info)==STB_WEAK
-              )
-              /* and not an undefined symbol */
-              && shndx != SHN_UNDEF
-              /* and not in a "special section" */
-              && (shndx < SHN_LORESERVE
+               /* Figure out if we want to add it; if so, set ad to its
+                  address.  Otherwise leave ad == NULL. */
+
+               if (shndx == SHN_COMMON) {
+                   isLocal = false;
+                   symbol->addr = stgCallocBytes(1, symbol->elf_sym->st_size,
+                                       "ocGetNames_ELF(COMMON)");
+                   /*
+                   debugBelch("COMMON symbol, size %d name %s\n",
+                                   stab[j].st_size, nm);
+                   */
+                   /* Pointless to do addProddableBlock() for this area,
+                      since the linker should never poke around in it. */
+               } else if ((ELF_ST_BIND(symbol->elf_sym->st_info) == STB_GLOBAL
+                           || ELF_ST_BIND(symbol->elf_sym->st_info) == STB_LOCAL
+                           || ELF_ST_BIND(symbol->elf_sym->st_info) == STB_WEAK
+                                                                  )
+                          /* and not an undefined symbol */
+                          && shndx != SHN_UNDEF
+                          /* and not in a "special section" */
+                          && (shndx < SHN_LORESERVE
 #if defined(SHN_XINDEX)
-                  || shndx == SHN_XINDEX
+                                  || shndx == SHN_XINDEX
 #endif
-                 )
-              &&
-              /* and it's a not a section or string table or anything silly */
-              ( ELF_ST_TYPE(stab[j].st_info)==STT_FUNC ||
-                ELF_ST_TYPE(stab[j].st_info)==STT_OBJECT ||
-                ELF_ST_TYPE(stab[j].st_info)==STT_NOTYPE
-              )
-            ) {
-            /* Section 0 is the undefined section, hence > and not >=. */
-            ASSERT(secno > 0 && secno < shnum);
-            /*
-            if (shdr[secno].sh_type == SHT_NOBITS) {
-               debugBelch("   BSS symbol, size %d off %d name %s\n",
-                               stab[j].st_size, stab[j].st_value, nm);
-            }
-            */
-            ad = (SymbolAddr*)((intptr_t)sections[secno].start +
-                         (intptr_t)stab[j].st_value);
-            if (ELF_ST_BIND(stab[j].st_info)==STB_LOCAL) {
-               isLocal = true;
-               isWeak = false;
-            } else { /* STB_GLOBAL or STB_WEAK */
-#if defined(ELF_FUNCTION_DESC)
-               /* dlsym() and the initialisation table both give us function
-                * descriptors, so to be consistent we store function descriptors
-                * in the symbol table */
-               if (ELF_ST_TYPE(stab[j].st_info) == STT_FUNC)
-                   ad = (SymbolAddr*)allocateFunctionDesc((Elf_Addr)ad);
-#endif
-               IF_DEBUG(linker,debugBelch( "addOTabName(GLOB): %10p  %s %s\n",
-                                      ad, oc->fileName, nm ));
-               isLocal = false;
-               isWeak = (ELF_ST_BIND(stab[j].st_info)==STB_WEAK);
-            }
-         }
-
-         /* And the decision is ... */
-
-         oc->symbols[j] = nm;
-
-         if (ad != NULL) {
-            ASSERT(nm != NULL);
-            /* Acquire! */
-            if (isLocal) {
-                /* Ignore entirely. */
-                oc->symbols[j] = NULL;
-            } else {
-
-                if (isWeak == HS_BOOL_TRUE) {
-                    setWeakSymbol(oc, nm);
-                }
+                          )
+                          &&
+                          /* and it's a not a section or string table or
+                           * anything silly */
+                          (ELF_ST_TYPE(symbol->elf_sym->st_info) == STT_FUNC
+                          || ELF_ST_TYPE(symbol->elf_sym->st_info) == STT_OBJECT
+                          || ELF_ST_TYPE(symbol->elf_sym->st_info) == STT_NOTYPE
+                          )
+                       ) {
+                   /* Section 0 is the undefined section, hence > and not >=. */
+                   ASSERT(secno > 0 && secno < shnum);
+                   /*
+                   if (shdr[secno].sh_type == SHT_NOBITS) {
+                      debugBelch("   BSS symbol, size %d off %d name %s\n",
+                                      stab[j].st_size, stab[j].st_value, nm);
+                   }
+                   */
+                   symbol->addr = (SymbolAddr*)(
+                           (intptr_t) oc->sections[secno].start +
+                           (intptr_t) symbol->elf_sym->st_value);
+
+                   if (ELF_ST_BIND(symbol->elf_sym->st_info) == STB_LOCAL) {
+                       isLocal = true;
+                       isWeak = false;
+                   } else { /* STB_GLOBAL or STB_WEAK */
+                       IF_DEBUG(linker,
+                                debugBelch("addOTabName(GLOB): %10p  %s %s\n",
+                                           symbol->addr, oc->fileName, nm));
+                       isLocal = false;
+                       isWeak = ELF_ST_BIND(symbol->elf_sym->st_info)
+                                == STB_WEAK;
+                   }
+               }
 
-                if (! ghciInsertSymbolTable(oc->fileName, symhash,
-                                            nm, ad, isWeak, oc)) {
-                    goto fail;
-                }
-            }
-         } else {
-            /* Skip. */
-            IF_DEBUG(linker,debugBelch( "skipping `%s'\n",
-                                   nm ));
-
-            /* We're skipping the symbol, but if we ever load this
-               object file we'll want to skip it then too. */
-            oc->symbols[j] = NULL;
-
-            /*
-            debugBelch(
-                    "skipping   bind = %d,  type = %d,  secno = %d   `%s'\n",
-                    (int)ELF_ST_BIND(stab[j].st_info),
-                    (int)ELF_ST_TYPE(stab[j].st_info),
-                    (int)secno,
-                    nm
+               /* And the decision is ... */
+
+               if (symbol->addr != NULL) {
+                   ASSERT(nm != NULL);
+                   /* Acquire! */
+                   if (!isLocal) {
+
+                       if (isWeak == HS_BOOL_TRUE) {
+                           setWeakSymbol(oc, nm);
+                       }
+                       if (!ghciInsertSymbolTable(oc->fileName, symhash,
+                                                  nm, symbol->addr, isWeak, oc)
+                           ) {
+                           goto fail;
+                       }
+                       oc->symbols[curSymbol++] = nm;
+                   }
+               } else {
+                   /* Skip. */
+                   IF_DEBUG(linker,
+                            debugBelch("skipping `%s'\n",
+                                               nm)
                    );
-            */
-         }
 
+                   /*
+                   debugBelch(
+                      "skipping   bind = %d,  type = %d,  secno = %d   `%s'\n",
+                      (int)ELF_ST_BIND(stab[j].st_info),
+                      (int)ELF_ST_TYPE(stab[j].st_info),
+                      (int)secno,
+                      nm
+                   );
+                   */
+               }
+           }
       }
    }
 
+#if defined(NEED_GOT)
+   if(makeGot( oc ))
+       errorBelch("Failed to create GOT for %s",
+                  oc->archiveMemberName
+                  ? oc->archiveMemberName
+                  : oc->fileName);
+#endif
    result = 1;
    goto end;
 
@@ -857,6 +969,10 @@ end:
    return result;
 }
 
+// the aarch64 linker uses relocacteObjectCodeAarch64,
+// see elf_reloc_aarch64.{h,c}
+#if !defined(aarch64_HOST_ARCH)
+
 /* Do ELF relocations which lack an explicit addend.  All x86-linux
    and arm-linux relocations appear to be of this form. */
 static int
@@ -864,347 +980,381 @@ do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
                          Elf_Shdr* shdr, int shnum )
 {
    int j;
-   SymbolName* symbol;
+
    Elf_Word* targ;
    Elf_Rel*  rtab = (Elf_Rel*) (ehdrC + shdr[shnum].sh_offset);
-   Elf_Sym*  stab;
-   char*     strtab;
+
    int         nent = shdr[shnum].sh_size / sizeof(Elf_Rel);
    int target_shndx = shdr[shnum].sh_info;
    int symtab_shndx = shdr[shnum].sh_link;
-   int strtab_shndx = shdr[symtab_shndx].sh_link;
-#if defined(SHN_XINDEX)
-   Elf_Word* shndx_table = get_shndx_table((Elf_Ehdr*)ehdrC);
-#endif
 
-   stab  = (Elf_Sym*) (ehdrC + shdr[ symtab_shndx ].sh_offset);
-   strtab= (char*)    (ehdrC + shdr[ strtab_shndx ].sh_offset);
+   ElfSymbolTable *stab = NULL;
+   for(ElfSymbolTable * st = oc->info->symbolTables;
+       st != NULL; st = st->next) {
+       if((int)st->index == symtab_shndx) {
+           stab = st;
+           break;
+       }
+   }
+   ASSERT(stab != NULL);
+
    targ  = (Elf_Word*)oc->sections[target_shndx].start;
-   IF_DEBUG(linker,debugBelch( "relocations for section %d using symtab %d and strtab %d\n",
-                          target_shndx, symtab_shndx, strtab_shndx ));
+   IF_DEBUG(linker,debugBelch(
+                "relocations for section %d using symtab %d\n",
+                target_shndx, symtab_shndx));
 
    /* Skip sections that we're not interested in. */
    if (oc->sections[target_shndx].kind == SECTIONKIND_OTHER) {
-           IF_DEBUG(linker,debugBelch( "skipping (target section not loaded)"));
-           return 1;
+       IF_DEBUG(linker,debugBelch( "skipping (target section not loaded)"));
+       return 1;
    }
 
    for (j = 0; j < nent; j++) {
-      Elf_Addr offset = rtab[j].r_offset;
-      Elf_Addr info   = rtab[j].r_info;
+       Elf_Addr offset = rtab[j].r_offset;
+       Elf_Addr info   = rtab[j].r_info;
 
-      Elf_Addr  P  = ((Elf_Addr)targ) + offset;
-      Elf_Word* pP = (Elf_Word*)P;
+       Elf_Addr  P  = ((Elf_Addr)targ) + offset;
+       Elf_Word* pP = (Elf_Word*)P;
 #if defined(i386_HOST_ARCH) || defined(DEBUG)
-      Elf_Addr  A  = *pP;
+       Elf_Addr  A  = *pP;
 #endif
-      Elf_Addr  S;
-      void*     S_tmp;
+       Elf_Addr  S;
+       void*     S_tmp;
 #if defined(i386_HOST_ARCH)
-      Elf_Addr  value;
+       Elf_Addr  value;
 #endif
 #if defined(arm_HOST_ARCH)
-      int is_target_thm=0, T=0;
+       int is_target_thm=0, T=0;
 #endif
 
-      IF_DEBUG(linker,debugBelch( "Rel entry %3d is raw(%6p %6p): ",
-                             j, (void*)offset, (void*)info ));
-      if (!info) {
-         IF_DEBUG(linker,debugBelch( " ZERO" ));
-         S = 0;
-      } else {
-         Elf_Sym sym = stab[ELF_R_SYM(info)];
-         /* First see if it is a local symbol. */
-         if (ELF_ST_BIND(sym.st_info) == STB_LOCAL) {
-            /* Yes, so we can get the address directly from the ELF symbol
-               table. */
-            symbol = sym.st_name==0 ? "(noname)" : strtab+sym.st_name;
-            /* See Note [Many ELF Sections] */
-            Elf_Word secno = sym.st_shndx;
-#if defined(SHN_XINDEX)
-            if (secno == SHN_XINDEX) {
-               ASSERT(shndx_table);
-               secno = shndx_table[ELF_R_SYM(info)];
-            }
-#endif
-            S = (Elf_Addr)oc->sections[ secno ].start +
-                stab[ELF_R_SYM(info)].st_value;
-         } else {
-            symbol = strtab + sym.st_name;
-            S_tmp = lookupSymbol_( symbol );
-            S = (Elf_Addr)S_tmp;
-         }
-         if (!S) {
-            errorBelch("%s: unknown symbol `%s'", oc->fileName, symbol);
-            return 0;
-         }
-         IF_DEBUG(linker,debugBelch( "`%s' resolves to %p\n", symbol, (void*)S ));
+       ElfSymbol * symbol = NULL;
+
+       IF_DEBUG(linker,debugBelch( "Rel entry %3d is raw(%6p %6p): ",
+                                   j, (void*)offset, (void*)info ));
+       if (!info) {
+           IF_DEBUG(linker,debugBelch( " ZERO" ));
+           S = 0;
+       } else {
+           symbol = &stab->symbols[ELF_R_SYM(info)];
+           /* First see if it is a local symbol. */
+           if (ELF_ST_BIND(symbol->elf_sym->st_info) == STB_LOCAL) {
+               S = (Elf_Addr)symbol->addr;
+           } else {
+               S_tmp = lookupSymbol_( symbol->name );
+               S = (Elf_Addr)S_tmp;
+           }
+           if (!S) {
+               errorBelch("%s: unknown symbol `%s'",
+                          oc->fileName, symbol->name);
+               return 0;
+           }
+           IF_DEBUG(linker,debugBelch( "`%s' resolves to %p\n", symbol->name,
+                                       (void*)S ));
 
 #if defined(arm_HOST_ARCH)
-          /*
-           * 4.5.3 Symbol Values
-           *
-           * In addition to the normal rules for symbol values the following
-           * rules shall also apply to symbols of type STT_FUNC:
-           * - If the symbol addresses an ARM instruction, its value is the
-           *   address of the instruction (in a relocatable object, the
-           *   offset of the instruction from the start of the section
-           *   containing it).
-           * - If the symbol addresses a Thumb instruction, its value is the
-           *   address of the instruction with bit zero set (in a relocatable
-           *   object, the section offset with bit zero set).
-           * - For the purposes of relocation the value used shall be the
-           *   address of the instruction (st_value & ~1).
-           *
-           *  Note: This allows a linker to distinguish ARM and Thumb code
-           *        symbols without having to refer to the map. An ARM symbol
-           *        will always have an even value, while a Thumb symbol will
-           *        always have an odd value. However, a linker should strip
-           *        the discriminating bit from the value before using it for
-           *        relocation.
-           *
-           * (source: ELF for the ARM Architecture
-           *          ARM IHI 0044F, current through ABI release 2.10
-           *          24th November 2015)
-           */
-          if(ELF_ST_TYPE(sym.st_info) == STT_FUNC) {
-              is_target_thm = S & 0x1;
-              T = is_target_thm;
-              S &= ~1;
-          }
+           /*
+            * 4.5.3 Symbol Values
+            *
+            * In addition to the normal rules for symbol values the following
+            * rules shall also apply to symbols of type STT_FUNC:
+            * - If the symbol addresses an ARM instruction, its value is the
+            *   address of the instruction (in a relocatable object, the
+            *   offset of the instruction from the start of the section
+            *   containing it).
+            * - If the symbol addresses a Thumb instruction, its value is the
+            *   address of the instruction with bit zero set (in a relocatable
+            *   object, the section offset with bit zero set).
+            * - For the purposes of relocation the value used shall be the
+            *   address of the instruction (st_value & ~1).
+            *
+            *  Note: This allows a linker to distinguish ARM and Thumb code
+            *        symbols without having to refer to the map. An ARM symbol
+            *        will always have an even value, while a Thumb symbol will
+            *        always have an odd value. However, a linker should strip
+            *        the discriminating bit from the value before using it for
+            *        relocation.
+            *
+            * (source: ELF for the ARM Architecture
+            *          ARM IHI 0044F, current through ABI release 2.10
+            *          24th November 2015)
+            */
+           if(ELF_ST_TYPE(symbol->elf_sym->st_info) == STT_FUNC) {
+               is_target_thm = S & 0x1;
+               T = is_target_thm;
+               S &= ~1;
+           }
 #endif
-      }
+       }
 
-      int reloc_type = ELF_R_TYPE(info);
-      IF_DEBUG(linker,debugBelch( "Reloc: P = %p   S = %p   A = %p   type=%d\n",
-                             (void*)P, (void*)S, (void*)A, reloc_type ));
-      checkProddableBlock ( oc, pP, sizeof(Elf_Word) );
+       int reloc_type = ELF_R_TYPE(info);
+       IF_DEBUG(linker,debugBelch("Reloc: P = %p   S = %p   A = %p   type=%d\n",
+                                  (void*)P, (void*)S, (void*)A, reloc_type ));
+       checkProddableBlock ( oc, pP, sizeof(Elf_Word) );
 
 #if defined(i386_HOST_ARCH)
-      value = S + A;
+       value = S + A;
 #endif
 
-      switch (reloc_type) {
+       switch (reloc_type) {
 #        ifdef i386_HOST_ARCH
-         case COMPAT_R_386_32:   *pP = value;     break;
-         case COMPAT_R_386_PC32: *pP = value - P; break;
+       case COMPAT_R_386_32:   *pP = value;     break;
+       case COMPAT_R_386_PC32: *pP = value - P; break;
 #        endif
 
 #        ifdef arm_HOST_ARCH
-         case COMPAT_R_ARM_ABS32:
-         // Specified by Linux ARM ABI to be equivalent to ABS32
-         case COMPAT_R_ARM_TARGET1:
-            *(Elf32_Word *)P += S;
-            *(Elf32_Word *)P |= T;
-            break;
-
-         case COMPAT_R_ARM_REL32:
-            *(Elf32_Word *)P += S;
-            *(Elf32_Word *)P |= T;
-            *(Elf32_Word *)P -= P;
-            break;
-
-         case COMPAT_R_ARM_CALL:
-         case COMPAT_R_ARM_JUMP24:
-         {
-            // N.B. LLVM's LLD linker's relocation implement is a fantastic
-            // resource
-            StgWord32 *word = (StgWord32 *)P;
-            StgInt32 imm = (*word & ((1<<24)-1)) << 2;
-
-            const StgBool is_blx = (*word & 0xf0000000) == 0xf0000000;
-            const StgWord32 hBit = is_blx ? ((*word >> 24) & 1) : 0;
-            imm |= hBit << 1;
+       case COMPAT_R_ARM_ABS32:
+           // Specified by Linux ARM ABI to be equivalent to ABS32
+       case COMPAT_R_ARM_TARGET1:
+           *(Elf32_Word *)P += S;
+           *(Elf32_Word *)P |= T;
+           break;
+
+       case COMPAT_R_ARM_REL32:
+           *(Elf32_Word *)P += S;
+           *(Elf32_Word *)P |= T;
+           *(Elf32_Word *)P -= P;
+           break;
+
+       case COMPAT_R_ARM_CALL:
+       case COMPAT_R_ARM_JUMP24:
+       {
+           // N.B. LLVM's LLD linker's relocation implement is a fantastic
+           // resource
+           StgWord32 *word = (StgWord32 *)P;
+           StgInt32 imm = (*word & ((1<<24)-1)) << 2;
+
+           const StgBool is_blx = (*word & 0xf0000000) == 0xf0000000;
+           const StgWord32 hBit = is_blx ? ((*word >> 24) & 1) : 0;
+           imm |= hBit << 1;
+
+           // Sign extend to 32 bits
+           // I would have thought this would be 24 bits but LLD uses 26 here.
+           // Hmm.
+           int32_t A = signExtend32(26, imm);
+
+           S = S + A; A = 0;
+
+           StgWord32 result = ((S + A) | T) - P;
+
+           const StgBool overflow = !isInt(26, (StgInt32) result);
+           // Handle overflow and Thumb interworking
+           const StgBool needs_veneer =
+               (is_target_thm && ELF_R_TYPE(info) == COMPAT_R_ARM_JUMP24)
+               || overflow;
+
+           if(needs_veneer) { /* overflow or thum interworking */
+               // Note [PC bias]
+               // From the ELF for the ARM Architecture documentation:
+               // > 4.6.1.1 Addends and PC-bias compensation
+               // > A binary file may use REL or RELA relocations or a mixture
+               // > of the two (but multiple relocations for the same address
+               // > must use only one type).
+               // > If the relocation is pc-relative then compensation for the
+               // > PC bias (the PC value is 8 bytes ahead of the executing
+               // > instruction in ARM state and 4 bytes in Thumb state) must
+               // > be encoded in the relocation by the object producer.
+               int32_t bias = 8;
+
+               S += bias;
+               /* try to locate an existing stub for this target */
+               if(findStub(&oc->sections[target_shndx], (void**)&S, 0)) {
+                   /* didn't find any. Need to create one */
+                   if(makeStub(&oc->sections[target_shndx], (void**)&S, 0)) {
+                       errorBelch("Unable to create veneer for ARM_CALL\n");
+                       return 0;
+                   }
+               }
+               S -= bias;
 
-            // Sign extend to 32 bits
-            // I would have thought this would be 24 bits but LLD uses 26 here.
-            // Hmm.
-            imm = sign_extend32(26, imm);
+               result = ((S + A) | T) - P;
+               result &= ~1; // Clear thumb indicator bit
 
-            StgWord32 result = ((S + imm) | T) - P;
+               ASSERT(isInt(26, result)); /* X in range */
+           }
 
-            const StgBool overflow = !is_int(26, (StgInt32) result);
-            // Handle overflow and Thumb interworking
-            const StgBool needs_veneer =
-                (is_target_thm && ELF_R_TYPE(info) == COMPAT_R_ARM_JUMP24)
-                || overflow;
+           // Update the branch target
+           const StgWord32 imm24 = (result & 0x03fffffc) >> 2;
+           *word = (*word & ~0x00ffffff)
+                 | (imm24 & 0x00ffffff);
 
-            if (needs_veneer) {
-               // Generate veneer
-               // The +8 below is to undo the PC-bias compensation done by the
-               // object producer
-               SymbolExtra *extra = makeArmSymbolExtra(oc, ELF_R_SYM(info),
-                                                       S+imm+8, 0,
-                                                       is_target_thm);
-               // The -8 below is to compensate for PC bias
-               result = (StgWord32) ((StgInt32) extra->jumpIsland - P - 8);
-               result &= ~1; // Clear thumb indicator bit
-               if (!is_int(26, (StgInt32) result)) {
-                  errorBelch("Unable to fixup overflow'd R_ARM_CALL: "
-                             "jump island=%p, reloc=%p\n",
-                             (void*) extra->jumpIsland, (void*) P);
-                  return 0;
-               }
-            }
-            // Update the branch target
-            const StgWord32 imm24 = (result & 0x03fffffc) >> 2;
-            *word = (*word & ~0x00ffffff)
-                  | (imm24 & 0x00ffffff);
-
-            // Change the relocated branch into a BLX if necessary
-            const StgBool switch_mode =
-                is_target_thm && (reloc_type == COMPAT_R_ARM_CALL);
-            if (!needs_veneer && switch_mode) {
+           // Change the relocated branch into a BLX if necessary
+           const StgBool switch_mode =
+               is_target_thm && (reloc_type == COMPAT_R_ARM_CALL);
+           if (!needs_veneer && switch_mode) {
                const StgWord32 hBit = (result & 0x2) >> 1;
                // Change instruction to BLX
                *word = (*word & ~0xFF000000) | ((0xfa | hBit) << 24);
                IF_DEBUG(linker, debugBelch("Changed BL to BLX at %p\n", word));
-            }
-            break;
-         }
+           }
+           break;
+       }
 
-         case COMPAT_R_ARM_MOVT_ABS:
-         case COMPAT_R_ARM_MOVW_ABS_NC:
-         {
-            StgWord32 *word = (StgWord32 *)P;
-            StgWord32 imm12 = *word & 0xfff;
-            StgWord32 imm4 = (*word >> 16) & 0xf;
-            StgInt32 offset = imm4 << 12 | imm12;
-            StgWord32 result = (S + offset) | T;
-
-            if (reloc_type == COMPAT_R_ARM_MOVT_ABS)
-                result = (result & 0xffff0000) >> 16;
-
-            StgWord32 result12 = result & 0xfff;
-            StgWord32 result4 = (result >> 12) & 0xf;
-            *word = (*word & ~0xf0fff) | (result4 << 16) | result12;
-            break;
-         }
+       case COMPAT_R_ARM_MOVT_ABS:
+       case COMPAT_R_ARM_MOVW_ABS_NC:
+       {
+           StgWord32 *word = (StgWord32 *)P;
+           StgWord32 imm12 = *word & 0xfff;
+           StgWord32 imm4 = (*word >> 16) & 0xf;
+           StgInt32 offset = imm4 << 12 | imm12;
+           StgWord32 result = (S + offset) | T;
+
+           if (reloc_type == COMPAT_R_ARM_MOVT_ABS)
+               result = (result & 0xffff0000) >> 16;
+
+           StgWord32 result12 = result & 0xfff;
+           StgWord32 result4 = (result >> 12) & 0xf;
+           *word = (*word & ~0xf0fff) | (result4 << 16) | result12;
+           break;
+       }
 
-         case COMPAT_R_ARM_THM_CALL:
-         case COMPAT_R_ARM_THM_JUMP24:
-         {
-            StgWord16 *upper = (StgWord16 *)P;
-            StgWord16 *lower = (StgWord16 *)(P + 2);
-
-            int overflow;
-            int to_thm = (*lower >> 12) & 1;
-            int sign = (*upper >> 10) & 1;
-            int j1, j2, i1, i2;
-
-            // Decode immediate value
-            j1 = (*lower >> 13) & 1; i1 = ~(j1 ^ sign) & 1;
-            j2 = (*lower >> 11) & 1; i2 = ~(j2 ^ sign) & 1;
-            StgInt32 imm = (sign << 24)
-                         | (i1 << 23)
-                         | (i2 << 22)
-                         | ((*upper & 0x03ff) << 12)
-                         | ((*lower & 0x07ff) << 1);
+       case COMPAT_R_ARM_THM_CALL:
+       case COMPAT_R_ARM_THM_JUMP24:
+       {
+           StgWord16 *upper = (StgWord16 *)P;
+           StgWord16 *lower = (StgWord16 *)(P + 2);
+
+           int overflow;
+           int to_thm = (*lower >> 12) & 1;
+           int sign = (*upper >> 10) & 1;
+           int j1, j2, i1, i2;
+
+           // Decode immediate value
+           j1 = (*lower >> 13) & 1; i1 = ~(j1 ^ sign) & 1;
+           j2 = (*lower >> 11) & 1; i2 = ~(j2 ^ sign) & 1;
+
+           StgInt32 A = (sign << 24)
+                        | (i1 << 23)
+                        | (i2 << 22)
+                        | ((*upper & 0x03ff) << 12)
+                        | ((*lower & 0x07ff) << 1);
 
             // Sign extend 25 to 32 bits
-            if (imm & 0x01000000)
-               imm -= 0x02000000;
+           if (A & 0x01000000)
+               A -= 0x02000000;
 
-            offset = ((imm + S) | T) - P;
-            overflow = offset <= (StgWord32)0xff000000
-                    || offset >= (StgWord32)0x01000000;
+           S = S + A; A = 0;
 
-            if ((!is_target_thm && ELF_R_TYPE(info) == COMPAT_R_ARM_THM_JUMP24)
-                || overflow) {
+           offset = ((S + A) | T) - P;
+           overflow = offset <= (StgWord32)0xff000000
+                   || offset >= (StgWord32)0x01000000;
+
+           if ((!is_target_thm && ELF_R_TYPE(info) == COMPAT_R_ARM_THM_JUMP24)
+               || overflow) {
                // Generate veneer
-               SymbolExtra *extra = makeArmSymbolExtra(oc, ELF_R_SYM(info),
-                                                       S+imm+4, 1,
-                                                       is_target_thm);
-               offset = (StgWord32) &extra->jumpIsland - P - 4;
+
+               // see [PC bias] above.
+               int32_t bias = 4;
+               S += bias;
+               // set the Thumb indicator to S, the final address should
+               // carry the correct thumb indicator.
+               S |= T;
+               /* try to locate an existing stub for this target */
+               if(findStub(&oc->sections[target_shndx], (void**)&S, 1)) {
+                   /* didn't find any. Need to create one */
+                   if(makeStub(&oc->sections[target_shndx], (void**)&S, 1)) {
+                       errorBelch("Unable to create veneer for ARM_THM_CALL\n");
+                       return 0;
+                   }
+               }
+               S -= bias;
+
+               offset = ((S + A) | T) - P;
+
                sign = offset >> 31;
                to_thm = 1;
-            } else if (!is_target_thm
-                       && ELF_R_TYPE(info) == COMPAT_R_ARM_THM_CALL) {
+           } else if (!is_target_thm
+                      && ELF_R_TYPE(info) == COMPAT_R_ARM_THM_CALL) {
                offset &= ~0x3;
                to_thm = 0;
-            }
-
-            // Reencode instruction
-            i1 = ~(offset >> 23) & 1; j1 = sign ^ i1;
-            i2 = ~(offset >> 22) & 1; j2 = sign ^ i2;
-            *upper = ( (*upper & 0xf800)
-                   | (sign << 10)
-                   | ((offset >> 12) & 0x03ff) );
-            *lower = ( (*lower & 0xd000)
-                   | (j1 << 13)
-                   | (to_thm << 12)
-                   | (j2 << 11)
-                   | ((offset >> 1) & 0x07ff) );
-            break;
-         }
+           }
+
+           // Reencode instruction
+           i1 = ~(offset >> 23) & 1; j1 = sign ^ i1;
+           i2 = ~(offset >> 22) & 1; j2 = sign ^ i2;
+           *upper = ( (*upper & 0xf800)
+                  | (sign << 10)
+                  | ((offset >> 12) & 0x03ff) );
+           *lower = ( (*lower & 0xd000)
+                  | (j1 << 13)
+                  | (to_thm << 12)
+                  | (j2 << 11)
+                  | ((offset >> 1) & 0x07ff) );
+           break;
+       }
 
-         case COMPAT_R_ARM_THM_MOVT_ABS:
-         case COMPAT_R_ARM_THM_MOVW_ABS_NC:
-         {
-            StgWord16 *upper = (StgWord16 *)P;
-            StgWord16 *lower = (StgWord16 *)(P + 2);
-            StgInt32 offset = ((*upper & 0x000f) << 12)
-                            | ((*upper & 0x0400) << 1)
-                            | ((*lower & 0x7000) >> 4)
-                            | (*lower & 0x00ff);
-
-            offset = (offset ^ 0x8000) - 0x8000; // Sign extend
-            offset += S;
-            if (ELF_R_TYPE(info) == COMPAT_R_ARM_THM_MOVW_ABS_NC)
-                   offset |= T;
-            else if (ELF_R_TYPE(info) == COMPAT_R_ARM_THM_MOVT_ABS)
-                   offset >>= 16;
-
-            *upper = ( (*upper & 0xfbf0)
-                   | ((offset & 0xf000) >> 12)
-                   | ((offset & 0x0800) >> 1) );
-            *lower = ( (*lower & 0x8f00)
-                   | ((offset & 0x0700) << 4)
-                   | (offset & 0x00ff) );
-            break;
-         }
+       case COMPAT_R_ARM_THM_MOVT_ABS:
+       case COMPAT_R_ARM_THM_MOVW_ABS_NC:
+       {
+           StgWord16 *upper = (StgWord16 *)P;
+           StgWord16 *lower = (StgWord16 *)(P + 2);
+           StgInt32 offset = ((*upper & 0x000f) << 12)
+                           | ((*upper & 0x0400) << 1)
+                           | ((*lower & 0x7000) >> 4)
+                           | (*lower & 0x00ff);
+
+           offset = (offset ^ 0x8000) - 0x8000; // Sign extend
+           offset += S;
+           if (ELF_R_TYPE(info) == COMPAT_R_ARM_THM_MOVW_ABS_NC)
+               offset |= T;
+           else if (ELF_R_TYPE(info) == COMPAT_R_ARM_THM_MOVT_ABS)
+               offset >>= 16;
+
+           *upper = ( (*upper & 0xfbf0)
+                  | ((offset & 0xf000) >> 12)
+                  | ((offset & 0x0800) >> 1) );
+           *lower = ( (*lower & 0x8f00)
+                  | ((offset & 0x0700) << 4)
+                  | (offset & 0x00ff) );
+           break;
+       }
 
-         case COMPAT_R_ARM_THM_JUMP8:
-         {
-            StgWord16 *word = (StgWord16 *)P;
-            StgWord offset = *word & 0x01fe;
-            offset += S - P;
-            if (!is_target_thm) {
+       case COMPAT_R_ARM_THM_JUMP8:
+       {
+           StgWord16 *word = (StgWord16 *)P;
+           StgWord offset = *word & 0x01fe;
+           offset += S - P;
+           if (!is_target_thm) {
                errorBelch("%s: Thumb to ARM transition with JUMP8 relocation "
                           "not supported\n",
-                     oc->fileName);
+                          oc->fileName);
                return 0;
-            }
+           }
 
-            *word = (*word & ~0x01fe)
-                  | (offset & 0x01fe);
-            break;
-         }
+           *word = (*word & ~0x01fe)
+                 | (offset & 0x01fe);
+           break;
+       }
 
-         case COMPAT_R_ARM_THM_JUMP11:
-         {
-            StgWord16 *word = (StgWord16 *)P;
-            StgWord offset = *word & 0x0ffe;
-            offset += S - P;
-            if (!is_target_thm) {
+       case COMPAT_R_ARM_THM_JUMP11:
+       {
+           StgWord16 *word = (StgWord16 *)P;
+           StgWord offset = *word & 0x0ffe;
+           offset += S - P;
+           if (!is_target_thm) {
                errorBelch("%s: Thumb to ARM transition with JUMP11 relocation "
                           "not supported\n",
-                     oc->fileName);
+                          oc->fileName);
                return 0;
-            }
-
-            *word = (*word & ~0x0ffe)
-                  | (offset & 0x0ffe);
-            break;
-         }
+           }
 
+           *word = (*word & ~0x0ffe)
+                 | (offset & 0x0ffe);
+           break;
+       }
+       case COMPAT_R_ARM_GOT_PREL: {
+              int32_t A = *pP;
+              void* GOT_S = symbol->got_addr;
+              ASSERT(GOT_S);
+              *(uint32_t *)P = (uint32_t) GOT_S + A - P;
+              break;
+       }
 #        endif // arm_HOST_ARCH
 
-         default:
-            errorBelch("%s: unhandled ELF relocation(Rel) type %" FMT_Word "\n",
-                  oc->fileName, (W_)ELF_R_TYPE(info));
-            return 0;
-      }
+       default:
+           errorBelch("%s: unhandled ELF relocation(Rel) type %" FMT_Word "\n",
+                      oc->fileName, (W_)ELF_R_TYPE(info));
+           return 0;
+       }
 
    }
    return 1;
@@ -1300,7 +1450,7 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC,
            errorBelch("%s: unknown symbol `%s'", oc->fileName, symbol);
            return 0;
          }
-         IF_DEBUG(linker,debugBelch( "`%s' resolves to %p\n", symbol, (void*)S ));
+         IF_DEBUG(linker,debugBelch("`%s' resolves to %p\n", symbol, (void*)S));
       }
 
 #if defined(DEBUG) || defined(sparc_HOST_ARCH) || defined(powerpc_HOST_ARCH) \
@@ -1391,7 +1541,7 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC,
 
             if( delta << 6 >> 6 != delta )
             {
-               value = (Elf_Addr) (&makeSymbolExtra( oc, ELF_R_SYM(info), value )
+               value = (Elf_Addr)(&makeSymbolExtra( oc, ELF_R_SYM(info), value )
                                         ->jumpIsland);
                delta = value - P;
 
@@ -1552,6 +1702,8 @@ do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC,
    }
    return 1;
 }
+#endif /* !aarch64_HOST_ARCH */
+
 
 int
 ocResolve_ELF ( ObjectCode* oc )
@@ -1563,20 +1715,78 @@ ocResolve_ELF ( ObjectCode* oc )
    Elf_Shdr* shdr  = (Elf_Shdr*) (ehdrC + ehdr->e_shoff);
    const Elf_Word shnum = elf_shnum(ehdr);
 
+#if defined(SHN_XINDEX)
+   Elf_Word* shndxTable = get_shndx_table(ehdr);
+#endif
+
+    /* resolve section symbols
+     * these are special symbols that point to sections, and have no name.
+     * Usually there should be one symbol for each text and data section.
+     *
+     * We need to resolve (assign addresses) to them, to be able to use them
+     * during relocation.
+     */
+    for(ElfSymbolTable *symTab = oc->info->symbolTables;
+        symTab != NULL; symTab = symTab->next) {
+        for (size_t i = 0; i < symTab->n_symbols; i++) {
+            ElfSymbol *symbol = &symTab->symbols[i];
+            if(STT_SECTION == ELF_ST_TYPE(symbol->elf_sym->st_info)) {
+                /* NOTE: We assume that oc->sections corresponds to the
+                 *       sections in the object file.  This is currently true,
+                 *       and will stay true, unless we start to compress
+                 *       oc->sections by not having an entry for sections we
+                 *       are not interested in.
+                 */
+
+
+                /* See Note [Many ELF Sections] */
+                /* Note that future checks for special SHN_* numbers should
+                 * check the shndx variable, not the section number in secno.
+                 * Sections with the real number in the SHN_LORESERVE..HIRESERVE
+                 * range will have shndx SHN_XINDEX and a secno with one of the
+                 * reserved values.
+                 */
+                Elf_Word secno = symbol->elf_sym->st_shndx;
+#if defined(SHN_XINDEX)
+                if (secno == SHN_XINDEX) {
+                 ASSERT(shndxTable);
+                 secno = shndxTable[i];
+              }
+#endif
+                ASSERT(symbol->elf_sym->st_name == 0);
+                ASSERT(symbol->elf_sym->st_value == 0);
+                symbol->addr = oc->sections[ secno ].start;
+            }
+        }
+    }
+
+#if defined(NEED_GOT)
+    if(fillGot( oc ))
+        return 0;
+#endif /* NEED_GOT */
+
+#if defined(aarch64_HOST_ARCH)
+    /* use new relocation design */
+    if(relocateObjectCode( oc ))
+        return 0;
+#else
    /* Process the relocation sections. */
    for (i = 0; i < shnum; i++) {
       if (shdr[i].sh_type == SHT_REL) {
          ok = do_Elf_Rel_relocations ( oc, ehdrC, shdr, i );
-         if (!ok) return ok;
+         if (!ok)
+             return ok;
       }
       else
       if (shdr[i].sh_type == SHT_RELA) {
          ok = do_Elf_Rela_relocations ( oc, ehdrC, shdr, i );
-         if (!ok) return ok;
+         if (!ok)
+             return ok;
       }
    }
+#endif
 
-#if defined(powerpc_HOST_ARCH) || defined(arm_HOST_ARCH)
+#if defined(powerpc_HOST_ARCH)
    ocFlushInstructionCache( oc );
 #endif
 
index 42548a0..b0d6638 100644 (file)
@@ -5,7 +5,7 @@
 
 #include "BeginPrivate.h"
 
-#include "ElfTypes.h"
+#include <linker/ElfTypes.h>
 
 void ocInit_ELF          ( ObjectCode* oc );
 void ocDeinit_ELF        ( ObjectCode* oc );
index ca5bc58..9e2e42f 100644 (file)
@@ -147,6 +147,11 @@ typedef
 struct _Stub {
     void * addr;
     void * target;
+    /* flags can hold architecture specific information they are used during
+     * lookup of stubs as well. Thus two stubs for the same target with
+     * different flags are considerd unequal.
+    */
+    uint8_t flags;
     struct _Stub * next;
 } Stub;
 
index 8895482..5812e89 100644 (file)
 /* often times we need to extend some value of certain number of bits
  * int an int64_t for e.g. relative offsets.
  */
-int64_t sign_extend(uint64_t val, uint8_t bits);
+int64_t signExtend(uint64_t val, uint8_t bits);
 /* Helper functions to check some instruction properties */
-bool is_vector_op(uint32_t *p);
-bool is_load_store(uint32_t *p);
+bool isVectorPp(uint32_t *p);
+bool isLoadStore(uint32_t *p);
 
 /* aarch64 relocations may contain an addend alreay in the position
  * where we want to write the address offset to. Thus decoding as well
  * as encoding is needed.
  */
-bool fits_bits(size_t bits, int64_t value);
-int64_t decode_addend(ObjectCode * oc, Section * section,
-                      MachORelocationInfo * ri);
-void encode_addend(ObjectCode * oc, Section * section,
-                   MachORelocationInfo * ri, int64_t addend);
+bool fitsBits(size_t bits, int64_t value);
+int64_t decodeAddend(ObjectCode * oc, Section * section,
+                     MachORelocationInfo * ri);
+void encodeAddend(ObjectCode * oc, Section * section,
+                  MachORelocationInfo * ri, int64_t addend);
 
 /* finding and making stubs. We don't need to care about the symbol they
  * represent. As long as two stubs point to the same address, they are identical
  */
-bool find_stub(Section * section, void ** addr);
-bool make_stub(Section * section, void ** addr);
-void free_stubs(Section * section);
+bool findStub(Section * section, void ** addr);
+bool makeStub(Section * section, void ** addr);
+void freeStubs(Section * section);
 
 /* Global Offset Table logic */
-bool is_got_load(MachORelocationInfo * ri);
-bool need_got_slot(MachONList * symbol);
-bool make_got(ObjectCode * oc);
-void free_got(ObjectCode * oc);
+bool isGotLoad(MachORelocationInfo * ri);
+bool needGotSlot(MachONList * symbol);
+bool makeGot(ObjectCode * oc);
+void freeGot(ObjectCode * oc);
 #endif /* aarch64_HOST_ARCH */
 
 #if defined(ios_HOST_OS)
@@ -164,9 +164,9 @@ ocDeinit_MachO(ObjectCode * oc) {
         stgFree(oc->info->macho_symbols);
     }
 #if defined(aarch64_HOST_ARCH)
-    free_got(oc);
+    freeGot(oc);
     for(int i = 0; i < oc->n_sections; i++) {
-        free_stubs(&oc->sections[i]);
+        freeStubs(&oc->sections[i]);
     }
 #endif
     stgFree(oc->info);
@@ -348,22 +348,22 @@ resolveImports(
 /* aarch64 linker by moritz angermann <moritz@lichtzwerge.de> */
 
 int64_t
-sign_extend(uint64_t val, uint8_t bits) {
+signExtend(uint64_t val, uint8_t bits) {
     return (int64_t)(val << (64-bits)) >> (64-bits);
 }
 
 bool
-is_vector_op(uint32_t *p) {
+isVectorOp(uint32_t *p) {
     return (*p & 0x04800000) == 0x04800000;
 }
 
 bool
-is_load_store(uint32_t *p) {
+isLoadStore(uint32_t *p) {
     return (*p & 0x3B000000) == 0x39000000;
 }
 
 int64_t
-decode_addend(ObjectCode * oc, Section * section, MachORelocationInfo * ri) {
+decodeAddend(ObjectCode * oc, Section * section, MachORelocationInfo * ri) {
 
     /* the instruction. It is 32bit wide */
     uint32_t * p = (uint32_t*)((uint8_t*)section->start + ri->r_address);
@@ -374,10 +374,10 @@ decode_addend(ObjectCode * oc, Section * section, MachORelocationInfo * ri) {
         case ARM64_RELOC_UNSIGNED:
         case ARM64_RELOC_SUBTRACTOR: {
             switch (ri->r_length) {
-                case 0: return sign_extend(*(uint8_t*)p,  8 * (1 << ri->r_length));
-                case 1: return sign_extend(*(uint16_t*)p, 8 * (1 << ri->r_length));
-                case 2: return sign_extend(*(uint32_t*)p, 8 * (1 << ri->r_length));
-                case 3: return sign_extend(*(uint64_t*)p, 8 * (1 << ri->r_length));
+                case 0: return signExtend(*(uint8_t*)p,  8 * (1 << ri->r_length));
+                case 1: return signExtend(*(uint16_t*)p, 8 * (1 << ri->r_length));
+                case 2: return signExtend(*(uint32_t*)p, 8 * (1 << ri->r_length));
+                case 3: return signExtend(*(uint64_t*)p, 8 * (1 << ri->r_length));
                 default:
                     barf("Unsupported r_length (%d) for SUBTRACTOR relocation",
                          ri->r_length);
@@ -388,7 +388,7 @@ decode_addend(ObjectCode * oc, Section * section, MachORelocationInfo * ri) {
              * implicilty 0 (as the instructions must be aligned!) and sign
              * extend to 64 bits.
              */
-            return sign_extend( (*p & 0x03FFFFFF) << 2, 28 );
+            return signExtend( (*p & 0x03FFFFFF) << 2, 28 );
         case ARM64_RELOC_PAGE21:
         case ARM64_RELOC_GOT_LOAD_PAGE21:
             /* take the instruction bits masked with 0x6 (0110), and push them
@@ -400,7 +400,7 @@ decode_addend(ObjectCode * oc, Section * section, MachORelocationInfo * ri) {
              *  ^^
              *  ''-- these are the low two bits.
              */
-            return sign_extend(   (*p & 0x60000000) >> 29
+            return signExtend(   (*p & 0x60000000) >> 29
                                | ((*p & 0x01FFFFE0) >> 3) << 12, 33);
         case ARM64_RELOC_PAGEOFF12:
         case ARM64_RELOC_GOT_LOAD_PAGEOFF12: {
@@ -410,9 +410,9 @@ decode_addend(ObjectCode * oc, Section * section, MachORelocationInfo * ri) {
              */
             int64_t a = (*p & 0x003FFC00) >> 10;
             int shift = 0;
-            if (is_load_store(p)) {
+            if (isLoadStore(p)) {
                 shift = (*p >> 30) & 0x3;
-                if(0 == shift && is_vector_op(p)) {
+                if(0 == shift && isVectorOp(p)) {
                     shift = 4;
                 }
             }
@@ -423,7 +423,7 @@ decode_addend(ObjectCode * oc, Section * section, MachORelocationInfo * ri) {
 }
 
 inline bool
-fits_bits(size_t bits, int64_t value) {
+fitsBits(size_t bits, int64_t value) {
     if(bits == 64) return true;
     if(bits > 64) barf("fits_bits with %d bits and an 64bit integer!", bits);
     return  0 == (value >> bits)   // All bits off: 0
@@ -431,8 +431,8 @@ fits_bits(size_t bits, int64_t value) {
 }
 
 void
-encode_addend(ObjectCode * oc, Section * section,
-              MachORelocationInfo * ri, int64_t addend) {
+encodeAddend(ObjectCode * oc, Section * section,
+             MachORelocationInfo * ri, int64_t addend) {
     uint32_t * p = (uint32_t*)((uint8_t*)section->start + ri->r_address);
 
     checkProddableBlock(oc, (void*)p, 1 << ri->r_length);
@@ -440,7 +440,7 @@ encode_addend(ObjectCode * oc, Section * section,
     switch (ri->r_type) {
         case ARM64_RELOC_UNSIGNED:
         case ARM64_RELOC_SUBTRACTOR: {
-            if(!fits_bits(8 << ri->r_length, addend))
+            if(!fitsBits(8 << ri->r_length, addend))
                 barf("Relocation out of range for UNSIGNED/SUBTRACTOR");
             switch (ri->r_length) {
                 case 0: *(uint8_t*)p  = (uint8_t)addend; break;
@@ -458,7 +458,7 @@ encode_addend(ObjectCode * oc, Section * section,
              * do not need the last two bits of the value. If the value >> 2
              * still exceeds 26bits, we won't be able to reach it.
              */
-            if(!fits_bits(26, addend >> 2))
+            if(!fitsBits(26, addend >> 2))
                 barf("Relocation target for BRACH26 out of range.");
             *p = (*p & 0xFC000000) | ((uint32_t)(addend >> 2) & 0x03FFFFFF);
             return;
@@ -470,7 +470,7 @@ encode_addend(ObjectCode * oc, Section * section,
              * with the PAGEOFF12 relocation allows to address a relative range
              * of +-4GB.
              */
-            if(!fits_bits(21, addend >> 12))
+            if(!fitsBits(21, addend >> 12))
                 barf("Relocation target for PAGE21 out of range.");
             *p = (*p & 0x9F00001F) | (uint32_t)((addend << 17) & 0x60000000)
                                    | (uint32_t)((addend >> 9)  & 0x00FFFFE0);
@@ -481,13 +481,13 @@ encode_addend(ObjectCode * oc, Section * section,
             /* Store an offset into a page (4k). Depending on the instruction
              * the bits are stored at slightly different positions.
              */
-            if(!fits_bits(12, addend))
+            if(!fitsBits(12, addend))
                 barf("Relocation target for PAGEOFF12 out or range.");
 
             int shift = 0;
-            if(is_load_store(p)) {
+            if(isLoadStore(p)) {
                 shift = (*p >> 30) & 0x3;
-                if(0 == shift && is_vector_op(p)) {
+                if(0 == shift && isVectorOp(p)) {
                     shift = 4;
                 }
             }
@@ -500,7 +500,7 @@ encode_addend(ObjectCode * oc, Section * section,
 }
 
 bool
-is_got_load(struct relocation_info * ri) {
+isGotLoad(struct relocation_info * ri) {
     return ri->r_type == ARM64_RELOC_GOT_LOAD_PAGE21
     ||  ri->r_type == ARM64_RELOC_GOT_LOAD_PAGEOFF12;
 }
@@ -513,7 +513,7 @@ is_got_load(struct relocation_info * ri) {
  * for stubs.
  */
 bool
-find_stub(Section * section, void ** addr) {
+findStub(Section * section, void ** addr) {
 
     for(Stub * s = section->info->stubs; s != NULL; s = s->next) {
         if(s->target == *addr) {
@@ -525,9 +525,9 @@ find_stub(Section * section, void ** addr) {
 }
 
 bool
-make_stub(Section * section, void ** addr) {
+makeStub(Section * section, void ** addr) {
 
-    Stub * s = stgCallocBytes(1, sizeof(Stub), "make_stub(Stub)");
+    Stub * s = stgCallocBytes(1, sizeof(Stub), "makeStub(Stub)");
     s->target = *addr;
     s->addr = (uint8_t*)section->info->stub_offset
             + ((8+8)*section->info->nstubs) + 8;
@@ -553,7 +553,7 @@ make_stub(Section * section, void ** addr) {
     return EXIT_SUCCESS;
 }
 void
-free_stubs(Section * section) {
+freeStubs(Section * section) {
     if(section->info->nstubs == 0)
         return;
     Stub * last = section->info->stubs;
@@ -571,7 +571,7 @@ free_stubs(Section * section) {
  * given symbol
  */
 bool
-need_got_slot(MachONList * symbol) {
+needGotSlot(MachONList * symbol) {
     return (symbol->n_type & N_EXT)             /* is an external symbol      */
         && (N_UNDF == (symbol->n_type & N_TYPE) /* and is undefined           */
             || NO_SECT != symbol->n_sect);      /*     or is defined in a
@@ -579,11 +579,11 @@ need_got_slot(MachONList * symbol) {
 }
 
 bool
-make_got(ObjectCode * oc) {
+makeGot(ObjectCode * oc) {
     size_t got_slots = 0;
 
     for(size_t i=0; i < oc->info->n_macho_symbols; i++)
-        if(need_got_slot(oc->info->macho_symbols[i].nlist))
+        if(needGotSlot(oc->info->macho_symbols[i].nlist))
             got_slots += 1;
 
     if(got_slots > 0) {
@@ -599,7 +599,7 @@ make_got(ObjectCode * oc) {
         /* update got_addr */
         size_t slot = 0;
         for(size_t i=0; i < oc->info->n_macho_symbols; i++)
-            if(need_got_slot(oc->info->macho_symbols[i].nlist))
+            if(needGotSlot(oc->info->macho_symbols[i].nlist))
                 oc->info->macho_symbols[i].got_addr
                     = ((uint8_t*)oc->info->got_start)
                     + (slot++ * sizeof(void *));
@@ -608,14 +608,14 @@ make_got(ObjectCode * oc) {
 }
 
 void
-free_got(ObjectCode * oc) {
+freeGot(ObjectCode * oc) {
     munmap(oc->info->got_start, oc->info->got_size);
     oc->info->got_start = NULL;
     oc->info->got_size = 0;
 }
 
 static int
-relocateSection_aarch64(ObjectCode * oc, Section * section)
+relocateSectionAarch64(ObjectCode * oc, Section * section)
 {
     if(section->size == 0)
         return 1;
@@ -640,7 +640,7 @@ relocateSection_aarch64(ObjectCode * oc, Section * section)
         switch (ri->r_type) {
             case ARM64_RELOC_UNSIGNED: {
                 MachOSymbol* symbol = &oc->info->macho_symbols[ri->r_symbolnum];
-                int64_t addend = decode_addend(oc, section, ri);
+                int64_t addend = decodeAddend(oc, section, ri);
                 uint64_t value = 0;
                 if(symbol->nlist->n_type & N_EXT) {
                     /* external symbols should be able to be
@@ -655,7 +655,7 @@ relocateSection_aarch64(ObjectCode * oc, Section * section)
                 } else {
                     value = (uint64_t)symbol->addr;    // address of the symbol.
                 }
-                encode_addend(oc, section, ri, value + addend);
+                encodeAddend(oc, section, ri, value + addend);
                 break;
             }
             case ARM64_RELOC_SUBTRACTOR:
@@ -675,16 +675,16 @@ relocateSection_aarch64(ObjectCode * oc, Section * section)
                           == ARM64_RELOC_UNSIGNED))
                     barf("SUBTRACTOR relocation *must* be followed by UNSIGNED relocation.");
 
-                int64_t addend = decode_addend(oc, section, ri);
+                int64_t addend = decodeAddend(oc, section, ri);
                 int64_t value = (uint64_t)symbol->addr;
-                encode_addend(oc, section, ri, addend - value);
+                encodeAddend(oc, section, ri, addend - value);
                 break;
             }
             case ARM64_RELOC_BRANCH26: {
                 MachOSymbol* symbol = &oc->info->macho_symbols[ri->r_symbolnum];
 
                 // pre-existing addend
-                int64_t addend = decode_addend(oc, section, ri);
+                int64_t addend = decodeAddend(oc, section, ri);
                 // address of the branch (b/bl) instruction.
                 uint64_t pc = (uint64_t)section->start + ri->r_address;
                 uint64_t value = 0;
@@ -698,25 +698,25 @@ relocateSection_aarch64(ObjectCode * oc, Section * section)
                 if((value - pc + addend) >> (2 + 26)) {
                     /* we need a stub */
                     /* check if we already have that stub */
-                    if(find_stub(section, (void**)&value)) {
+                    if(findStub(section, (void**)&value)) {
                         /* did not find it. Crete a new stub. */
-                        if(make_stub(section, (void**)&value)) {
+                        if(makeStub(section, (void**)&value)) {
                             barf("could not find or make stub");
                         }
                     }
                 }
-                encode_addend(oc, section, ri, value - pc + addend);
+                encodeAddend(oc, section, ri, value - pc + addend);
                 break;
             }
             case ARM64_RELOC_PAGE21:
             case ARM64_RELOC_GOT_LOAD_PAGE21: {
                 MachOSymbol* symbol = &oc->info->macho_symbols[ri->r_symbolnum];
-                int64_t addend = decode_addend(oc, section, ri);
+                int64_t addend = decodeAddend(oc, section, ri);
                 if(!(explicit_addend == 0 || addend == 0))
                     barf("explicit_addend and addend can't be set at the same time.");
                 uint64_t pc = (uint64_t)section->start + ri->r_address;
-                uint64_t value = (uint64_t)(is_got_load(ri) ? symbol->got_addr : symbol->addr);
-                encode_addend(oc, section, ri, ((value + addend + explicit_addend) & (-4096)) - (pc & (-4096)));
+                uint64_t value = (uint64_t)(isGotLoad(ri) ? symbol->got_addr : symbol->addr);
+                encodeAddend(oc, section, ri, ((value + addend + explicit_addend) & (-4096)) - (pc & (-4096)));
 
                 // reset, just in case.
                 explicit_addend = 0;
@@ -725,18 +725,18 @@ relocateSection_aarch64(ObjectCode * oc, Section * section)
             case ARM64_RELOC_PAGEOFF12:
             case ARM64_RELOC_GOT_LOAD_PAGEOFF12: {
                 MachOSymbol* symbol = &oc->info->macho_symbols[ri->r_symbolnum];
-                int64_t addend = decode_addend(oc, section, ri);
+                int64_t addend = decodeAddend(oc, section, ri);
                 if(!(explicit_addend == 0 || addend == 0))
                     barf("explicit_addend and addend can't be set at the same time.");
-                uint64_t value = (uint64_t)(is_got_load(ri) ? symbol->got_addr : symbol->addr);
-                encode_addend(oc, section, ri, 0xFFF & (value + addend + explicit_addend));
+                uint64_t value = (uint64_t)(isGotLoad(ri) ? symbol->got_addr : symbol->addr);
+                encodeAddend(oc, section, ri, 0xFFF & (value + addend + explicit_addend));
 
                 // reset, just in case.
                 explicit_addend = 0;
                 break;
             }
             case ARM64_RELOC_ADDEND: {
-                explicit_addend = sign_extend(ri->r_symbolnum, 24);
+                explicit_addend = signExtend(ri->r_symbolnum, 24);
                 if(!(i+1 < nreloc)
                    || !(section->info->relocation_info[i+1].r_type == ARM64_RELOC_PAGE21
                         || section->info->relocation_info[i+1].r_type == ARM64_RELOC_PAGEOFF12))
@@ -1694,7 +1694,7 @@ ocGetNames_MachO(ObjectCode* oc)
      * anywhere in the addressable space. This obviously makes
      * sense.  However it took me a while to figure this out.
      */
-    make_got(oc);
+    makeGot(oc);
 
     /* at this point, macho_symbols, should know the addresses for
      * all symbols defined by this object code.
@@ -1771,7 +1771,7 @@ ocResolve_MachO(ObjectCode* oc)
     /* fill the GOT table */
     for(size_t i = 0; i < oc->info->n_macho_symbols; i++) {
         MachOSymbol * symbol = &oc->info->macho_symbols[i];
-        if(need_got_slot(symbol->nlist)) {
+        if(needGotSlot(symbol->nlist)) {
             if(N_UNDF == (symbol->nlist->n_type & N_TYPE)) {
                 /* an undefined symbol. So we need to ensure we
                  * have the address.
@@ -1803,7 +1803,7 @@ ocResolve_MachO(ObjectCode* oc)
         IF_DEBUG(linker, debugBelch("ocResolve_MachO: relocating section %d\n", i));
 
 #if defined aarch64_HOST_ARCH
-        if (!relocateSection_aarch64(oc, &oc->sections[i]))
+        if (!relocateSectionAarch64(oc, &oc->sections[i]))
             return 0;
 #else
         if (!relocateSection(oc,oc->image,oc->info->symCmd,oc->info->nlist,
index 20591fe..486fa4a 100644 (file)
@@ -138,98 +138,6 @@ SymbolExtra* makeSymbolExtra( ObjectCode const* oc,
 
     return extra;
 }
-#endif
-
-#if defined(arm_HOST_ARCH)
-/*
-  Note [The ARM/Thumb Story]
-  ~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-  Support for the ARM architecture is complicated by the fact that ARM has not
-  one but several instruction encodings. The two relevant ones here are the original
-  ARM encoding and Thumb, a more dense variant of ARM supporting only a subset
-  of the instruction set.
-
-  How the CPU decodes a particular instruction is determined by a mode bit. This
-  mode bit is set on jump instructions, the value being determined by the low
-  bit of the target address: An odd address means the target is a procedure
-  encoded in the Thumb encoding whereas an even address means it's a traditional
-  ARM procedure (the actual address jumped to is even regardless of the encoding bit).
-
-  Interoperation between Thumb- and ARM-encoded object code (known as "interworking")
-  is tricky. If the linker needs to link a call by an ARM object into Thumb code
-  (or vice-versa) it will produce a jump island using makeArmSymbolExtra. This,
-  however, is incompatible with GHC's tables-next-to-code since pointers
-  fixed-up in this way will point to a bit of generated code, not a info
-  table/Haskell closure like TNTC expects. For this reason, it is critical that
-  GHC emit exclusively ARM or Thumb objects for all Haskell code.
-
-  We still do, however, need to worry about calls to foreign code, hence the
-  need for makeArmSymbolExtra.
-*/
-
-/* Produce a jump island for ARM/Thumb interworking */
-SymbolExtra* makeArmSymbolExtra( ObjectCode const* oc,
-                                 unsigned long symbolNumber,
-                                 unsigned long target,
-                                 bool fromThumb,
-                                 bool toThumb )
-{
-  ASSERT( symbolNumber >= oc->first_symbol_extra
-        && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
-
-  SymbolExtra *extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
-
-  // Make sure instruction mode bit is set properly
-  if (toThumb)
-    target |= 1;
-  else
-    target &= ~1;
-
-  if (!fromThumb) {
-    // In ARM encoding:
-    //   movw r12, #0
-    //   movt r12, #0
-    //   bx r12
-    uint32_t code[] = { 0xe300c000, 0xe340c000, 0xe12fff1c };
-
-    // Patch lower half-word into movw
-    code[0] |= ((target>>12) & 0xf) << 16;
-    code[0] |= target & 0xfff;
-    // Patch upper half-word into movt
-    target >>= 16;
-    code[1] |= ((target>>12) & 0xf) << 16;
-    code[1] |= target & 0xfff;
-
-    memcpy(extra->jumpIsland, code, 12);
-
-  } else {
-    // In Thumb encoding:
-    //   movw r12, #0
-    //   movt r12, #0
-    //   bx r12
-    uint16_t code[] = { 0xf240,  0x0c00,
-                        0xf2c0,  0x0c00,
-                        0x4760 };
-
-    // Patch lower half-word into movw
-    code[0] |= (target>>12) & 0xf;
-    code[0] |= ((target>>11) & 0x1) << 10;
-    code[1] |= ((target>>8) & 0x7) << 12;
-    code[1] |= target & 0xff;
-    // Patch upper half-word into movt
-    target >>= 16;
-    code[2] |= (target>>12) & 0xf;
-    code[2] |= ((target>>11) & 0x1) << 10;
-    code[3] |= ((target>>8) & 0x7) << 12;
-    code[3] |= target & 0xff;
-
-    memcpy(extra->jumpIsland, code, 10);
-  }
-
-  return extra;
-}
-#endif // arm_HOST_ARCH
-
-#endif
+#endif /* !arm_HOST_ARCH */
+#endif /* !x86_64_HOST_ARCH) || !mingw32_HOST_OS */
 #endif // NEED_SYMBOL_EXTRAS
diff --git a/rts/linker/elf_got.c b/rts/linker/elf_got.c
new file mode 100644 (file)
index 0000000..41a7bd1
--- /dev/null
@@ -0,0 +1,131 @@
+#include "elf_got.h"
+#if defined(OBJFORMAT_ELF)
+/*
+ * Check if we need a global offset table slot for a
+ * given symbol
+ */
+bool
+needGotSlot(Elf_Sym * symbol) {
+    /* using global here should give an upper bound */
+    /* I don't believe we need to relocate STB_LOCAL
+     * symbols via the GOT; however I'm unsure about
+     * STB_WEAK.
+     *
+     * Any more restrictive filter here would result
+     * in a smaller GOT, which is preferrable.
+     */
+    return ELF_ST_BIND(symbol->st_info) == STB_GLOBAL
+        || ELF_ST_BIND(symbol->st_info) == STB_WEAK;
+}
+
+bool
+makeGot(ObjectCode * oc) {
+    size_t got_slots = 0;
+
+    /* we need to find all symbol tables (elf can have multiple)
+     * and need to iterate over all symbols, to check how many
+     * got slots we need at most
+     */
+    ASSERT( oc->info != NULL );
+    ASSERT( oc->info->sectionHeader != NULL );
+    for(int i=0; i < oc->n_sections; i++) {
+        if(SHT_SYMTAB == oc->info->sectionHeader[i].sh_type) {
+            Elf_Sym *symTab =
+                (Elf_Sym*)((uint8_t*)oc->info->elfHeader
+                                   + oc->info->sectionHeader[i].sh_offset);
+            size_t n_symbols = oc->info->sectionHeader[i].sh_size
+                               / sizeof(Elf_Sym);
+            for(size_t j=0; j < n_symbols; j++) {
+                if(needGotSlot(&symTab[j])) {
+                    got_slots += 1;
+                }
+            }
+        }
+    }
+    if(got_slots > 0) {
+        oc->info->got_size = got_slots * sizeof(void *);
+         void * mem = mmap(NULL, oc->info->got_size,
+                           PROT_READ | PROT_WRITE,
+                           MAP_ANON | MAP_PRIVATE,
+                           -1, 0);
+        if (mem == MAP_FAILED) {
+            errorBelch("MAP_FAILED. errno=%d", errno);
+            return EXIT_FAILURE;
+        }
+        oc->info->got_start = (void*)mem;
+        /* update got_addr */
+        size_t slot = 0;
+        for(ElfSymbolTable *symTab = oc->info->symbolTables;
+            symTab != NULL; symTab = symTab->next)
+            for(size_t i=0; i < symTab->n_symbols; i++)
+                if(needGotSlot(symTab->symbols[i].elf_sym))
+                    symTab->symbols[i].got_addr
+                            = (uint8_t *)oc->info->got_start
+                              + (slot++ * sizeof(void*));
+    }
+    return EXIT_SUCCESS;
+}
+
+bool
+fillGot(ObjectCode * oc) {
+    /* fill the GOT table */
+    for(ElfSymbolTable *symTab = oc->info->symbolTables;
+        symTab != NULL; symTab = symTab->next) {
+        for(size_t i=0; i < symTab->n_symbols; i++) {
+            ElfSymbol * symbol = &symTab->symbols[i];
+            if(needGotSlot(symbol->elf_sym)) {
+                /* no type are undefined symbols */
+                if(   STT_NOTYPE == ELF_ST_TYPE(symbol->elf_sym->st_info)
+                   || STB_WEAK   == ELF_ST_BIND(symbol->elf_sym->st_info)) {
+                    if(0x0 == symbol->addr) {
+                        symbol->addr = lookupSymbol_(symbol->name);
+                        if(0x0 == symbol->addr) {
+                            errorBelch("Failed to lookup symbol: %s\n",
+                                       symbol->name);
+                            return EXIT_FAILURE;
+                        }
+                    } else {
+                        // we already have the address.
+                    }
+                } /* else it was defined somewhere in the same object, and
+                  * we should have the address already.
+                  */
+                if(0x0 == symbol->addr) {
+                    errorBelch(
+                        "Something went wrong! Symbol %s has null address.\n",
+                            symbol->name);
+                    return EXIT_FAILURE;
+                }
+                if(0x0 == symbol->got_addr) {
+                    errorBelch("Not good either!");
+                    return EXIT_FAILURE;
+                }
+                *(void**)symbol->got_addr = symbol->addr;
+            }
+        }
+    }
+    return EXIT_SUCCESS;
+}
+bool
+verifyGot(ObjectCode * oc) {
+    for(ElfSymbolTable *symTab = oc->info->symbolTables;
+        symTab != NULL; symTab = symTab->next) {
+        for(size_t i=0; i < symTab->n_symbols; i++) {
+            ElfSymbol * symbol = &symTab->symbols[i];
+            if(symbol->got_addr) {
+                ASSERT((void*)(*(void**)symbol->got_addr)
+                       == (void*)symbol->addr);
+            }
+            ASSERT(0 == ((uint64_t)symbol->addr & 0xffff000000000000));
+        }
+    }
+    return EXIT_SUCCESS;
+}
+
+void
+freeGot(ObjectCode * oc) {
+//    munmap(oc->info->got_start, oc->info->got_size);
+    oc->info->got_start = 0x0;
+    oc->info->got_size = 0;
+}
+#endif
diff --git a/rts/linker/elf_got.h b/rts/linker/elf_got.h
new file mode 100644 (file)
index 0000000..4653030
--- /dev/null
@@ -0,0 +1,15 @@
+#pragma once
+
+#include "LinkerInternals.h"
+#include "ghcplatform.h"
+
+#include <stdbool.h>
+#include <linker/ElfTypes.h>
+
+#if defined(OBJFORMAT_ELF)
+bool needGotSlot(Elf_Sym * symbol);
+bool makeGot(ObjectCode * oc);
+bool fillGot(ObjectCode * oc);
+bool verifyGot(ObjectCode * oc);
+void freeGot(ObjectCode * oc);
+#endif
diff --git a/rts/linker/elf_plt.c b/rts/linker/elf_plt.c
new file mode 100644 (file)
index 0000000..69a9f07
--- /dev/null
@@ -0,0 +1,92 @@
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include "elf_plt.h"
+
+#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
+#if defined(OBJFORMAT_ELF)
+
+#define STRINGIFY(x) #x
+#define TOSTRING(x) STRINGIFY(x)
+
+#define _makeStub       ADD_SUFFIX(makeStub)
+#define needStubForRel  ADD_SUFFIX(needStubForRel)
+#define needStubForRela ADD_SUFFIX(needStubForRela)
+
+unsigned
+numberOfStubsForSection( ObjectCode *oc, unsigned sectionIndex) {
+    unsigned n = 0;
+    for(ElfRelocationTable *t = oc->info->relTable; t != NULL; t = t->next)
+        if(t->targetSectionIndex == sectionIndex)
+            for(size_t i=0; i < t->n_relocations; i++)
+                if(needStubForRel(&t->relocations[i]))
+                    n += 1;
+
+    for(ElfRelocationATable *t = oc->info->relaTable; t != NULL; t = t->next)
+        if(t->targetSectionIndex == sectionIndex)
+            for(size_t i=0; i < t->n_relocations; i++)
+                if(needStubForRela(&t->relocations[i]))
+                    n += 1;
+    return n;
+}
+
+bool
+findStub(Section * section,
+          void* * addr,
+          uint8_t flags) {
+    for(Stub * s = section->info->stubs; s != NULL; s = s->next) {
+        if(   s->target == *addr
+           && s->flags  == flags) {
+            *addr = s->addr;
+            return EXIT_SUCCESS;
+        }
+    }
+    return EXIT_FAILURE;
+}
+
+bool
+makeStub(Section * section,
+          void* * addr,
+          uint8_t flags) {
+
+    Stub * s = calloc(1, sizeof(Stub));
+    ASSERT(s != NULL);
+    s->target = *addr;
+    s->flags  = flags;
+    s->next = NULL;
+    s->addr = (uint8_t *)section->info->stub_offset + 8
+            + STUB_SIZE * section->info->nstubs;
+
+    if((*_makeStub)(s))
+        return EXIT_FAILURE;
+
+    if(section->info->stubs == NULL) {
+        ASSERT(section->info->nstubs == 0);
+        /* no stubs yet, let's just create this one */
+        section->info->stubs = s;
+    } else {
+        Stub * tail = section->info->stubs;
+        while(tail->next != NULL) tail = tail->next;
+        tail->next = s;
+    }
+    section->info->nstubs += 1;
+    *addr = s->addr;
+    return EXIT_SUCCESS;
+}
+
+void
+freeStubs(Section * section) {
+    if(section->info->nstubs == 0)
+        return;
+    Stub * last = section->info->stubs;
+    while(last->next != NULL) {
+        Stub * t = last;
+        last = last->next;
+        free(t);
+    }
+    section->info->stubs = NULL;
+    section->info->nstubs = 0;
+}
+
+#endif // OBJECTFORMAT_ELF
+#endif // arm/aarch64_HOST_ARCH
diff --git a/rts/linker/elf_plt.h b/rts/linker/elf_plt.h
new file mode 100644 (file)
index 0000000..1995d51
--- /dev/null
@@ -0,0 +1,44 @@
+#pragma once
+
+#include "ghcplatform.h"
+#include <LinkerInternals.h>
+
+#if defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
+
+#include "elf_plt_arm.h"
+#include "elf_plt_aarch64.h"
+
+#if defined(OBJFORMAT_ELF)
+
+#if defined(__x86_64__)
+#define __suffix__ X86_64
+#elif defined(__aarch64__)
+#define __suffix__ Aarch64
+#elif defined(__mips64__)
+#define __suffix__ Mips64
+#elif defined(__i386__)
+#define __suffix__ X86
+#elif defined(__arm__)
+#define __suffix__ Arm
+#elif defined(__mips__)
+#define __suffix__ Mips
+#else
+#error "unknown architecture"
+#endif
+
+#define PASTE(x,y) x ## y
+#define EVAL(x,y) PASTE(x,y)
+#define ADD_SUFFIX(x) EVAL(PASTE(x,),__suffix__)
+
+unsigned  numberOfStubsForSection( ObjectCode *oc, unsigned sectionIndex);
+
+#define STUB_SIZE          ADD_SUFFIX(stubSize)
+
+bool findStub(Section * section, void* * addr, uint8_t flags);
+bool makeStub(Section * section, void* * addr, uint8_t flags);
+
+void freeStubs(Section * section);
+
+#endif // OBJECTFORMAT_ELF
+
+#endif // arm/aarch64_HOST_ARCH
diff --git a/rts/linker/elf_plt_aarch64.c b/rts/linker/elf_plt_aarch64.c
new file mode 100644 (file)
index 0000000..dc528bc
--- /dev/null
@@ -0,0 +1,120 @@
+#include <stdlib.h>
+#include "elf_compat.h"
+#include "ghcplatform.h"
+
+#if defined(aarch64_HOST_ARCH)
+
+#include "elf_plt_aarch64.h"
+
+#if defined(OBJFORMAT_ELF)
+
+/* five 4 byte instructions */
+const size_t instSizeAarch64 = 4;
+const size_t stubSizeAarch64 = 5 * 4;
+
+/*
+ * Compute the number of stub (PLT entries) for a given section by iterating
+ * over the relocations and relocations with explicit addend and counting those
+ * relocations that might require a PLT relocation.
+ *
+ * This will be an upper bound, and we might not use all stubs.  However by
+ * calculating the number of potential stubs beforehand, we can allocate enough
+ * space adjacent to the section, such that the PLT is rather close to the
+ * section, and the risk of the stubs being out of reach for the instruction to
+ * be relocated is minimal.
+ */
+bool needStubForRelAarch64(Elf_Rel * rel) {
+    switch(ELF64_R_TYPE(rel->r_info)) {
+        case COMPAT_R_AARCH64_CALL26:
+        case COMPAT_R_AARCH64_JUMP26:
+            return true;
+        default:
+            return false;
+    }
+}
+bool needStubForRelaAarch64(Elf_Rela * rela) {
+    switch(ELF64_R_TYPE(rela->r_info)) {
+        case COMPAT_R_AARCH64_CALL26:
+        case COMPAT_R_AARCH64_JUMP26:
+            return true;
+        default:
+            return false;
+    }
+}
+
+
+bool
+makeStubAarch64(Stub * s) {
+    // We (the linker) may corrupt registers x16 (IP0) and x17 (IP1) [AAPCS64]
+    // and the condition flags, according to the "ELF for the ARM64
+    // Architecture".
+    //
+    // [Special purpose regs]
+    // X16 and X17 are IP0 and IP1, intra-procedure-call temporary registers.
+    // These can be used by call veneers and similar code, or as temporary
+    // registers for intermediate values between subroutine calls. They are
+    // corruptible by a function. Veneers are small pieces of code which are
+    // automatically inserted by the linker, for example when the branch target
+    // is out of range of the branch instruction.
+    // (Sect 9.9.1 of ARM Cortex-A Series Programmer's Guide for ARMv8-A, V1.0)
+
+    // Move wide
+    // mov <Wd>, #<imm16> (sf == 0)
+    // mov <Xd>, #<imm16> (sf == 1) looks like:
+    // 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
+    // sf  1  0  1  0  0  1  0  1 [hw ] [   imm16 ...
+    //
+    // 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+    // ...        imm16               ] [     Rd     ]
+    // hw is the half word shift.
+
+    // Move keep
+    // movk <Wd>, #<imm16> (sf == 0)
+    // movk <Xd>, #<imm16> (sf == 1) looks like:
+    // 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
+    // sf  1  1  1  0  0  1  0  1 [hw ] [   imm16 ...
+    //
+    // 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+    // ...        imm16               ] [     Rd     ]
+    // hw is the half word shift.
+
+    // br <Xn> (Encoding A1) looks like:
+    // 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
+    //  1  1  0  1  0  1  1  0  0  0  0  1  1  1  1  1
+    //
+    // 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+    //  0  0  0  0  0  0 [    Rd      ]  0  0  0  0  0
+    //
+    // We'll use 0b1110 for the condition.
+
+
+    uint32_t mov__hw0_x16 = 0xd2800000 | 16;
+    uint32_t movk_hw0_x16 = mov__hw0_x16 | (1 << 29);
+
+    uint32_t mov__hw3_x16 = mov__hw0_x16 | (3 << 21);
+    uint32_t movk_hw2_x16 = movk_hw0_x16 | (2 << 21);
+    uint32_t movk_hw1_x16 = movk_hw0_x16 | (1 << 21);
+
+
+    uint32_t br_x16 = 0xd61f0000 | 16 << 5;
+
+    uint32_t *P = (uint32_t*)s->addr;
+
+    /* target address */
+    uint64_t addr = (uint64_t)s->target;
+    uint16_t  addr_hw0 = (uint16_t)(addr >>  0);
+    uint16_t  addr_hw1 = (uint16_t)(addr >> 16);
+    uint16_t  addr_hw2 = (uint16_t)(addr >> 32);
+    uint16_t  addr_hw3 = (uint16_t)(addr >> 48);
+
+    P[0] = mov__hw3_x16 | ((uint32_t)addr_hw3 << 5);
+    P[1] = movk_hw2_x16 | ((uint32_t)addr_hw2 << 5);
+    P[2] = movk_hw1_x16 | ((uint32_t)addr_hw1 << 5);
+    P[3] = movk_hw0_x16 | ((uint32_t)addr_hw0 << 5);
+    P[4] = br_x16;
+
+    return EXIT_SUCCESS;
+}
+#endif // OBJECTFORMAT_ELF
+
+#endif // aarch64_HOST_ARCH
diff --git a/rts/linker/elf_plt_aarch64.h b/rts/linker/elf_plt_aarch64.h
new file mode 100644 (file)
index 0000000..15f2f76
--- /dev/null
@@ -0,0 +1,12 @@
+#pragma once
+
+#include "LinkerInternals.h"
+
+#if defined(OBJFORMAT_ELF)
+
+extern const size_t stubSizeAarch64;
+bool needStubForRelAarch64(Elf_Rel * rel);
+bool needStubForRelaAarch64(Elf_Rela * rel);
+bool makeStubAarch64(Stub * s);
+
+#endif
diff --git a/rts/linker/elf_plt_arm.c b/rts/linker/elf_plt_arm.c
new file mode 100644 (file)
index 0000000..4ef50c6
--- /dev/null
@@ -0,0 +1,183 @@
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include "elf_compat.h"
+#include "ghcplatform.h"
+#if defined(arm_HOST_ARCH)
+
+#include "Elf.h"
+#include "elf_plt.h"
+
+#if defined(OBJFORMAT_ELF)
+
+/* three 4 byte instructions */
+const size_t stubSizeArm = 12;
+
+/*
+ * Compute the number of stub (PLT entries) for a given section by iterating
+ * over the relocations and relocations with explicit addend and counting those
+ * relocations that might require a PLT relocation.
+ *
+ * This will be an upper bound, and we might not use all stubs.  However by
+ * calculating the number of potential stubs beforehand, we can allocate enough
+ * space adjacent to the section, such that the PLT is rather close to the
+ * section, and the risk of the stubs being out of reach for the instruction to
+ * be relocated is minimal.
+ */
+bool needStubForRelArm(Elf_Rel * rel) {
+    switch(ELF32_R_TYPE(rel->r_info)) {
+        case COMPAT_R_ARM_PC24:
+        case COMPAT_R_ARM_CALL:
+        case COMPAT_R_ARM_JUMP24:
+        case COMPAT_R_ARM_THM_CALL:
+        case COMPAT_R_ARM_THM_JUMP24:
+        case COMPAT_R_ARM_THM_JUMP19:
+            return true;
+        default:
+            return false;
+    }
+}
+bool needStubForRelaArm(Elf_Rela * rela) {
+    switch(ELF32_R_TYPE(rela->r_info)) {
+        case COMPAT_R_ARM_PC24:
+        case COMPAT_R_ARM_CALL:
+        case COMPAT_R_ARM_JUMP24:
+        case COMPAT_R_ARM_THM_CALL:
+        case COMPAT_R_ARM_THM_JUMP24:
+        case COMPAT_R_ARM_THM_JUMP19:
+            return true;
+        default:
+            return false;
+    }
+}
+
+bool makeStubArmArm(Stub * s);
+bool makeStubArmThm(Stub * s);
+/*
+  Note [The ARM/Thumb Story]
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+  Support for the ARM architecture is complicated by the fact that ARM has not
+  one but several instruction encodings. The two relevant ones here are the
+  original ARM encoding and Thumb, a more dense variant of ARM supporting only
+  a subset of the instruction set.
+
+  How the CPU decodes a particular instruction is determined by a mode bit. This
+  mode bit is set on jump instructions, the value being determined by the low
+  bit of the target address: An odd address means the target is a procedure
+  encoded in the Thumb encoding whereas an even address means it's a traditional
+  ARM procedure (the actual address jumped to is even regardless of the encoding
+  bit).
+
+  Interoperation between Thumb- and ARM-encoded object code (known as
+  "interworking") is tricky. If the linker needs to link a call by an ARM object
+  into Thumb code (or vice-versa) it will produce a jump island using stubs.
+  This, however, is incompatible with GHC's tables-next-to-code since pointers
+  fixed-up in this way will point to a bit of generated code, not a info
+  table/Haskell closure like TNTC expects. For this reason, it is critical that
+  GHC emit exclusively ARM or Thumb objects for all Haskell code.
+
+  We still do, however, need to worry about calls to foreign code, hence the
+  need for makeArmSymbolExtra.
+*/
+
+bool
+makeStubArmArm(Stub * s) {
+
+    // We (the linker) may corrupt r12 (ip) according to the "ELF for the ARM
+    // Architecture" reference.
+
+    // movw<c> <Rd>, #<imm16> (Encoding A2) looks like:
+    // 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
+    // [   cond  ]  0  0  1  1  0  0  0  0 [   imm4  ]
+    //
+    // 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+    // [    Rd   ] [              imm12              ]
+    //
+    // movt<c> <Rd>, #<imm16> (Encoding A1) looks like:
+    // 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
+    // [   cond  ]  0  0  1  1  0  1  0  0 [   imm4  ]
+    //
+    // 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+    // [    Rd   ] [              imm12              ]
+    //
+    // bx<c> <Rd> (Encoding A1) looks like:
+    // 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
+    // [   cond  ]  0  0  0  1  0  0  1  0  1  1  1  1
+    //
+    // 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+    //  1  1  1  1  1  1  1  1  0  0  0  1  [    Rd   ]
+    //
+    // The difference for the movw and movt is only bit 22.
+    // We'll use 0b1110 for the condition.
+
+    uint32_t movw_r12 = 0xe300c000;
+    uint32_t movt_r12 = 0xe340c000;
+    uint32_t bx_r12   = 0xe12fff1c;
+
+    *((uint32_t*)s->addr+0) = movw_r12
+                              | (((uint32_t )s->target & 0xf000) << 4)
+                              |  ((uint32_t )s->target & 0x0fff);
+    *((uint32_t*)s->addr+1) = movt_r12
+                              | ((((uint32_t )s->target >> 16) & 0xf000) << 4)
+                              |  (((uint32_t )s->target >> 16) & 0x0fff);
+    *((uint32_t*)s->addr+2) = bx_r12;
+
+    return EXIT_SUCCESS;
+}
+
+bool
+makeStubArmThm(Stub * s) {
+
+    // movw<c> <Rd>, #<imm16> (Encoding T3) looks like:
+    // 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
+    //  1  1  1  1  0  i  1  0  0  1  0  0 [  imm4   ]
+    //
+    // 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+    //  0 [ imm3 ] [   Rd    ] [        imm8         ]
+    //
+    // imm32 = zero_extend(imm4:i:imm3:imm8,32)
+    //
+    // movt<c> <Rd>, #<imm16> (Encoding T1) looks like:
+    // 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
+    //  1  1  1  1  0  i  1  0  1  1  0  0 [  imm4   ]
+    //
+    // imm16 = imm4:i:imm3:imm8
+    //
+    // 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+    //  0 [ imm3 ] [    Rd   ] [        imm8         ]
+    //
+    // bx<c> <Rd> (Encoding T1) looks like:
+    // 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+    //  0  1  0  0  0  1  1  1  0 [    Rd   ]  0  0  0
+
+    uint32_t movw_r12 = 0xf2400c00;
+    uint32_t movt_r12 = 0xf2c00c00;
+    uint32_t bx_r12   = 0x47600000;
+
+    *((uint32_t*)s->addr+0) = movw_r12
+                              | (((uint32_t )s->target & 0xf000) << 4)
+                              | (((uint32_t )s->target & 0x0800) << 16)
+                              | (((uint32_t )s->target & 0x0700) << 4)
+                              |  ((uint32_t )s->target & 0x00ff);
+    *((uint32_t*)s->addr+1) = movt_r12
+                              | ((((uint32_t )s->target >> 16) & 0xf000) << 4)
+                              | ((((uint32_t )s->target >> 16) & 0x0800) << 16)
+                              | ((((uint32_t )s->target >> 16) & 0x0700) << 4)
+                              |  (((uint32_t )s->target >> 16) & 0x00ff);
+    *((uint32_t*)s->addr+2) = bx_r12;
+
+    return EXIT_SUCCESS;
+}
+
+bool
+makeStubArm(Stub * s) {
+    if((s->flags & 1) == 0)
+        return makeStubArmArm(s);
+    else
+        return makeStubArmThm(s);
+}
+
+#endif // OBJECTFORMAT_ELF
+
+#endif // arm_HOST_ARCH
diff --git a/rts/linker/elf_plt_arm.h b/rts/linker/elf_plt_arm.h
new file mode 100644 (file)
index 0000000..a495cef
--- /dev/null
@@ -0,0 +1,12 @@
+#pragma once
+
+#include "LinkerInternals.h"
+
+#if defined(OBJFORMAT_ELF)
+
+extern const size_t stubSizeArm;
+bool needStubForRelArm(Elf_Rel * rel);
+bool needStubForRelaArm(Elf_Rela * rel);
+bool makeStubArm(Stub * s);
+
+#endif
diff --git a/rts/linker/elf_reloc.c b/rts/linker/elf_reloc.c
new file mode 100644 (file)
index 0000000..954512d
--- /dev/null
@@ -0,0 +1,12 @@
+#include "elf_reloc.h"
+#include "elf_plt.h"
+
+#if defined(OBJFORMAT_ELF)
+/* we currently only use this abstraction for elf/arm64 */
+#if defined(aarch64_HOST_ARCH)
+bool
+relocateObjectCode(ObjectCode * oc) {
+    return ADD_SUFFIX(relocateObjectCode)(oc);
+}
+#endif
+#endif
diff --git a/rts/linker/elf_reloc.h b/rts/linker/elf_reloc.h
new file mode 100644 (file)
index 0000000..3b52bc7
--- /dev/null
@@ -0,0 +1,13 @@
+#pragma once
+
+#include "LinkerInternals.h"
+
+#if defined(OBJFORMAT_ELF)
+
+#include "elf_reloc_aarch64.h"
+
+bool
+relocateObjectCode(ObjectCode * oc);
+
+
+#endif /* OBJETFORMAT_ELF */
diff --git a/rts/linker/elf_reloc_aarch64.c b/rts/linker/elf_reloc_aarch64.c
new file mode 100644 (file)
index 0000000..1d8f9e8
--- /dev/null
@@ -0,0 +1,330 @@
+#include <stdlib.h>
+#include <assert.h>
+#include "elf_compat.h"
+#include "elf_reloc_aarch64.h"
+#include "util.h"
+#include "elf_util.h"
+#include "elf_plt.h"
+
+#if defined(aarch64_HOST_ARCH)
+
+#if defined(OBJFORMAT_ELF)
+
+#define Page(x) ((x) & ~0xFFF)
+
+typedef uint64_t addr_t;
+
+bool isBranch(addr_t p);
+bool isBranchLink(addr_t p);
+bool isAdrp(addr_t p);
+bool isLoadStore(addr_t p);
+bool isAddSub(addr_t p);
+bool isVectorOp(addr_t p);
+int64_t decodeAddendAarch64(Section * section, Elf_Rel * rel);
+bool encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend);
+
+bool isBranch(addr_t p) {
+    return (*(addr_t*)p & 0xFC000000) == 0x14000000;
+}
+
+bool isBranchLink(addr_t p) {
+    return (*(addr_t*)p & 0xFC000000) == 0x94000000;
+}
+
+bool isAdrp(addr_t p) {
+    return (*(addr_t*)p & 0x9F000000) == 0x90000000;
+}
+
+bool isLoadStore(addr_t p) {
+    return (*(addr_t*)p & 0x3B000000) == 0x39000000;
+}
+bool isAddSub(addr_t p) {
+    return (*(addr_t*)p & 0x11C00000) == 0x11000000;
+}
+bool isVectorOp(addr_t p) {
+    return (*(addr_t*)p & 0x04800000) == 0x04800000;
+}
+
+/* instructions are 32bit */
+typedef uint32_t inst_t;
+
+int64_t
+decodeAddendAarch64(Section * section __attribute__((unused)),
+                    Elf_Rel * rel __attribute__((unused)))
+{
+    abort(/* we don't support Rel locations yet. */);
+}
+
+bool
+encodeAddendAarch64(Section * section, Elf_Rel * rel, int64_t addend) {
+    /* instructions are 32bit! */
+    addr_t P = (addr_t)((uint8_t*)section->start + rel->r_offset);
+    int exp_shift = -1;
+    switch(ELF64_R_TYPE(rel->r_info)) {
+        /* static misc relocations */
+        /* static data relocations */
+        case COMPAT_R_AARCH64_ABS64:
+        case COMPAT_R_AARCH64_PREL64:
+            *(uint64_t*)P = (uint64_t)addend;
+            break;
+        case COMPAT_R_AARCH64_ABS32:
+            assert(isInt64(32, addend));
+        case COMPAT_R_AARCH64_PREL32:
+            assert(isInt64(32, addend));
+            *(uint32_t*)P = (uint32_t)addend;
+            break;
+        case COMPAT_R_AARCH64_ABS16:
+            assert(isInt64(16, addend));
+        case COMPAT_R_AARCH64_PREL16:
+            assert(isInt64(16, addend));
+            *(uint16_t*)P = (uint16_t)addend;
+            break;
+        /* static aarch64 relocations */
+        /* - pc relative relocations */
+        case COMPAT_R_AARCH64_ADR_PREL_PG_HI21: {
+            // adrp <Xd>, <label> looks like:
+            // 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
+            //  1 [ lo]  1  0  0  0 [            hi        ...
+            //
+            // 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+            // ...              hi            ] [     Rd     ]
+            //
+            // imm64 = SignExtend(hi:lo:0x000,64)
+            assert(isInt64(32, addend));
+            assert((addend & 0xfff) == 0); /* page relative */
+
+            *(inst_t *)P = (*(inst_t *)P & 0x9f00001f)
+                           | (inst_t) (((uint64_t) addend << 17) & 0x60000000)
+                           | (inst_t) (((uint64_t) addend >> 9) & 0x00ffffe0);
+            break;
+        }
+        /* - control flow relocations */
+        case COMPAT_R_AARCH64_JUMP26:   /* relocate b ... */
+        case COMPAT_R_AARCH64_CALL26: { /* relocate bl ... */
+            assert(isInt64(26+2, addend)); /* X in range */
+            *(inst_t *)P = (*(inst_t *)P & 0xfc000000) /* keep upper 6 (32-6)
+ * bits */
+                         | ((uint32_t)(addend >> 2) & 0x03ffffff);
+            break;
+        }
+        case COMPAT_R_AARCH64_ADR_GOT_PAGE: {
+
+            assert(isInt64(32, addend)); /* X in range */
+            assert((addend & 0xfff) == 0); /* page relative */
+
+            *(inst_t *)P = (*(inst_t *)P & 0x9f00001f)
+               | (inst_t)(((uint64_t)addend << 17) & 0x60000000)  // lo
+               | (inst_t)(((uint64_t)addend >> 9)  & 0x00ffffe0); // hi
+            break;
+        }
+        case COMPAT_R_AARCH64_ADD_ABS_LO12_NC: {
+            // add <Xd>, <Xn>, #imm looks like:
+            // 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
+            // sf  0  0  1  0  0  0  1 [ sh] [    imm12    ...
+            //
+            // 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+            // ...   imm12     ] [     Rn     ] [    Rd      ]
+
+            /* fall through */
+        }
+        case COMPAT_R_AARCH64_LDST8_ABS_LO12_NC:
+            if(exp_shift == -1) exp_shift = 0;
+        case COMPAT_R_AARCH64_LDST16_ABS_LO12_NC:
+            if(exp_shift == -1) exp_shift = 1;
+        case COMPAT_R_AARCH64_LDST32_ABS_LO12_NC:
+            if(exp_shift == -1) exp_shift = 2;
+        case COMPAT_R_AARCH64_LDST64_ABS_LO12_NC:
+            if(exp_shift == -1) exp_shift = 3;
+        case COMPAT_R_AARCH64_LDST128_ABS_LO12_NC:
+            if(exp_shift == -1) exp_shift = 4;
+        case COMPAT_R_AARCH64_LD64_GOT_LO12_NC: {
+            if(exp_shift == -1) {
+                assert( (addend & 7) == 0 );
+                exp_shift = 3;
+            }
+            assert((addend & 0xfff) == addend);
+            int shift = 0;
+            if(isLoadStore(P)) {
+                /* bits 31, 30 encode the size. */
+                shift = (*(inst_t*)P >> 30) & 0x3;
+                if(0 == shift && isVectorOp(P)) {
+                    shift = 4;
+                }
+            }
+            assert(addend == 0 || exp_shift == shift);
+            *(inst_t *)P = (*(inst_t *)P & 0xffc003ff)
+               | ((inst_t)(addend >> shift << 10) & 0x003ffc00);
+            break;
+        }
+        default:
+            abort();
+    }
+    return EXIT_SUCCESS;
+}
+
+
+/**
+ * Compute the *new* addend for a relocation, given a pre-existing addend.
+ * @param section The section the relocation is in.
+ * @param rel     The Relocation struct.
+ * @param symbol  The target symbol.
+ * @param addend  The existing addend. Either explicit or implicit.
+ * @return The new computed addend.
+ */
+static int64_t
+computeAddend(Section * section, Elf_Rel * rel,
+              ElfSymbol * symbol, int64_t addend) {
+
+    /* Position where something is relocated */
+    addr_t P = (addr_t)((uint8_t*)section->start + rel->r_offset);
+
+    assert(0x0 != P);
+    assert((uint64_t)section->start <= P);
+    assert(P <= (uint64_t)section->start + section->size);
+    /* Address of the symbol */
+    addr_t S = (addr_t) symbol->addr;
+    assert(0x0 != S);
+    /* GOT slot for the symbol */
+    addr_t GOT_S = (addr_t) symbol->got_addr;
+
+    int64_t A = addend;
+
+    switch(ELF64_R_TYPE(rel->r_info)) {
+        case COMPAT_R_AARCH64_ABS64:
+            /* type: static, class: data, op: S + A; overflow: none */
+        case COMPAT_R_AARCH64_ABS32:
+            /* type: static, class: data, op: S + A; overflow: int32 */
+        case COMPAT_R_AARCH64_ABS16:
+            /* type: static, class: data, op: S + A; overflow: int16 */
+            return S + A;
+        case COMPAT_R_AARCH64_PREL64:
+            /* type: static, class: data, op: S + A - P; overflow: none */
+        case COMPAT_R_AARCH64_PREL32:
+            /* type: static, class: data, op: S + A - P; overflow: int32 */
+        case COMPAT_R_AARCH64_PREL16:
+            /* type: static, class: data, op: S + A - P; overflow: int16 */
+            return S + A - P;
+        case COMPAT_R_AARCH64_ADR_PREL_PG_HI21:
+            /* type: static, class: aarch64, op: Page(S + A) - Page(P);
+             * overflow: int32 */
+            return Page(S + A) - Page(P);
+        case COMPAT_R_AARCH64_ADD_ABS_LO12_NC:
+            /* type: static, class: aarch64, op: S + A */
+            return (S + A) & 0xfff;
+        case COMPAT_R_AARCH64_JUMP26:
+        case COMPAT_R_AARCH64_CALL26: {
+            // S+A-P
+            int64_t V = S + A - P;
+            /* note: we are encoding bits [27:2] */
+            if(!isInt64(26+2, V)) {
+                // Note [PC bias aarch64]
+                // There is no PC bias to accommodate in the
+                // relocation of a place containing an instruction
+                // that formulates a PC-relative address. The program
+                // counter reflects the address of the currently
+                // executing instruction.
+
+                /* need a stub */
+                /* check if we already have that stub */
+                if(findStub(section, (void**)&S, 0)) {
+                    /* did not find it. Crete a new stub. */
+                    if(makeStub(section, (void**)&S, 0)) {
+                        abort(/* could not find or make stub */);
+                    }
+                }
+
+                assert(0 == (0xffff000000000000 & S));
+                V = S + A - P;
+                assert(isInt64(26+2, V)); /* X in range */
+            }
+            return V;
+        }
+        case COMPAT_R_AARCH64_LDST128_ABS_LO12_NC: assert(0 == ((S+A) & 0x0f));
+        case COMPAT_R_AARCH64_LDST64_ABS_LO12_NC:  assert(0 == ((S+A) & 0x07));
+        case COMPAT_R_AARCH64_LDST32_ABS_LO12_NC:  assert(0 == ((S+A) & 0x03));
+        case COMPAT_R_AARCH64_LDST16_ABS_LO12_NC:  assert(0 == ((S+A) & 0x01));
+        case COMPAT_R_AARCH64_LDST8_ABS_LO12_NC:
+            /* type: static, class: aarch64, op: S + A */
+            return (S + A) & 0xfff;
+
+        case COMPAT_R_AARCH64_ADR_GOT_PAGE: {
+            // Page(G(GDAT(S+A))) - Page(P)
+            // Set the immediate value of an ADRP to bits [32:12] of X;
+            // check that -2^32 <= X < 2^32
+            // NOTE: we'll do what seemingly everyone else does, and
+            //       reduce this to Page(GOT(S)+A) - Page(P)
+            // TODO: fix this story proper, so that the transformation
+            //       makes sense without resorting to: everyone else
+            //       does it like this as well.
+            assert(0x0 != GOT_S);
+            return Page(GOT_S+A) - Page(P);
+        }
+        case COMPAT_R_AARCH64_LD64_GOT_LO12_NC: {
+            // G(GDAT(S+A))
+            assert(0x0 != GOT_S);
+            return (GOT_S + A) & 0xfff;
+        }
+        default:
+            abort(/* unhandled rel */);
+    }
+}
+
+bool
+relocateObjectCodeAarch64(ObjectCode * oc) {
+    for(ElfRelocationTable *relTab = oc->info->relTable;
+        relTab != NULL; relTab = relTab->next) {
+        /* only relocate interesting sections */
+        if (SECTIONKIND_OTHER == oc->sections[relTab->targetSectionIndex].kind)
+            continue;
+
+        Section *targetSection = &oc->sections[relTab->targetSectionIndex];
+
+        for (unsigned i = 0; i < relTab->n_relocations; i++) {
+            Elf_Rel *rel = &relTab->relocations[i];
+
+            ElfSymbol *symbol =
+                    findSymbol(oc,
+                               relTab->sectionHeader->sh_link,
+                               ELF64_R_SYM((Elf64_Xword)rel->r_info));
+
+            assert(symbol != NULL);
+
+            /* decode implicit addend */
+            int64_t addend = decodeAddendAarch64(targetSection, rel);
+
+            addend = computeAddend(targetSection, rel, symbol, addend);
+            encodeAddendAarch64(targetSection, rel, addend);
+        }
+    }
+    for(ElfRelocationATable *relaTab = oc->info->relaTable;
+        relaTab != NULL; relaTab = relaTab->next) {
+        /* only relocate interesting sections */
+        if (SECTIONKIND_OTHER == oc->sections[relaTab->targetSectionIndex].kind)
+            continue;
+
+        Section *targetSection = &oc->sections[relaTab->targetSectionIndex];
+
+        for(unsigned i=0; i < relaTab->n_relocations; i++) {
+
+            Elf_Rela *rel = &relaTab->relocations[i];
+
+            ElfSymbol *symbol =
+                    findSymbol(oc,
+                               relaTab->sectionHeader->sh_link,
+                               ELF64_R_SYM((Elf64_Xword)rel->r_info));
+
+            assert(0x0 != symbol);
+
+            /* take explicit addend */
+            int64_t addend = rel->r_addend;
+
+            addend = computeAddend(targetSection, (Elf_Rel*)rel,
+                                   symbol, addend);
+            encodeAddendAarch64(targetSection, (Elf_Rel*)rel, addend);
+        }
+    }
+    return EXIT_SUCCESS;
+}
+
+#endif /* OBJECTFORMAT_ELF */
+#endif /* aarch64_HOST_ARCH */
diff --git a/rts/linker/elf_reloc_aarch64.h b/rts/linker/elf_reloc_aarch64.h
new file mode 100644 (file)
index 0000000..ac7a90e
--- /dev/null
@@ -0,0 +1,10 @@
+#pragma once
+
+#include "LinkerInternals.h"
+
+#if defined(OBJFORMAT_ELF)
+
+bool
+relocateObjectCodeAarch64(ObjectCode * oc);
+
+#endif /* OBJETFORMAT_ELF */
index bdfab22..9ff9d62 100644 (file)
@@ -1,10 +1,9 @@
 #include "linker/elf_util.h"
-#include "ElfTypes.h"
 
 #if defined(OBJFORMAT_ELF)
 
 ElfSymbolTable *
-find_symbol_table(ObjectCode * oc, unsigned symolTableIndex) {
+findSymbolTable(ObjectCode * oc, unsigned symolTableIndex) {
     for(ElfSymbolTable * t=oc->info->symbolTables; t != NULL; t = t->next)
         if(t->index == symolTableIndex)
             return t;
@@ -12,9 +11,9 @@ find_symbol_table(ObjectCode * oc, unsigned symolTableIndex) {
 }
 
 ElfSymbol *
-find_symbol(ObjectCode * oc, unsigned symbolTableIndex, unsigned long
-symbolIndex) {
-    ElfSymbolTable * t = find_symbol_table(oc, symbolTableIndex);
+findSymbol(ObjectCode * oc, unsigned symbolTableIndex,
+           unsigned long symbolIndex) {
+    ElfSymbolTable * t = findSymbolTable(oc, symbolTableIndex);
     if(NULL != t && symbolIndex < t->n_symbols) {
         return &t->symbols[symbolIndex];
     }
index cae84a9..d94eb69 100644 (file)
@@ -1,18 +1,16 @@
 #ifndef RTS_LINKER_ELF_UTIL_H
 #define RTS_LINKER_ELF_UTIL_H
 
-#include <stdint.h>
-#include <stdbool.h>
 #include "LinkerInternals.h"
 
 #if defined(OBJFORMAT_ELF)
 
-ElfSymbolTable * find_symbol_table(ObjectCode * oc,
-                                   unsigned symbolTableIndex);
+ElfSymbolTable * findSymbolTable(ObjectCode * oc,
+                                 unsigned symbolTableIndex);
 
-ElfSymbol * find_symbol(ObjectCode * oc,
-                        unsigned symbolTableIndex,
-                        unsigned long symbolIndex);
+ElfSymbol * findSymbol(ObjectCode * oc,
+                       unsigned symbolTableIndex,
+                       unsigned long symbolIndex);
 
 #endif
 #endif //RTS_LINKER_ELF_UTIL_H
index 650e7f4..f2aa506 100644 (file)
@@ -8,21 +8,21 @@
 // Signed extend a number to a 32-bit int.
 // Does the given signed integer fit into the given bit width?
 static inline int32_t
-sign_extend32(uint32_t bits, uint32_t x)
+signExtend32(uint32_t bits, uint32_t x)
 {
     return ((int32_t) (x << (32 - bits))) >> (32 - bits);
 }
 
 // Does the given signed integer fit into the given bit width?
 static inline bool
-is_int(uint32_t bits, int32_t x)
+isInt(uint32_t bits, int32_t x)
 {
     return bits > 32 || (-(1 << (bits-1)) <= x
                          && x < (1 << (bits-1)));
 }
 
 static inline bool
-is_int64(uint32_t bits, int64_t x) {
+isInt64(uint32_t bits, int64_t x) {
     return bits > 64 || (-((int64_t)1 << (bits-1)) <= x
                          && x < ((int64_t)1 << (bits-1)));
 }