rts: enable parallel GC scan of large (32M+) allocation area
[ghc.git] / rts / Linker.c
index a1f72e5..b41bc1a 100644 (file)
@@ -241,7 +241,6 @@ static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
 #define open wopen
 #define WSTR(s) L##s
 #define pathprintf swprintf
-#define pathsplit _wsplitpath_s
 #define pathsize sizeof(wchar_t)
 #else
 #define pathcmp strcmp
@@ -251,7 +250,6 @@ static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
 #define struct_stat struct stat
 #define WSTR(s) s
 #define pathprintf snprintf
-#define pathsplit _splitpath_s
 #define pathsize sizeof(char)
 #endif
 
@@ -268,6 +266,30 @@ static pathchar* pathdup(pathchar *path)
     return ret;
 }
 
+static pathchar* pathdir(pathchar *path)
+{
+    pathchar *ret;
+#if defined(mingw32_HOST_OS)
+    pathchar *drive, *dirName;
+    size_t memberLen = pathlen(path) + 1;
+    dirName = stgMallocBytes(pathsize * memberLen, "pathdir(path)");
+    ret     = stgMallocBytes(pathsize * memberLen, "pathdir(path)");
+    drive   = stgMallocBytes(pathsize * _MAX_DRIVE, "pathdir(path)");
+    _wsplitpath_s(path, drive, _MAX_DRIVE, dirName, pathsize * pathlen(path), NULL, 0, NULL, 0);
+    pathprintf(ret, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), drive, dirName);
+    stgFree(drive);
+    stgFree(dirName);
+#else
+    pathchar* dirName = dirname(path);
+    size_t memberLen  = pathlen(dirName);
+    ret = stgMallocBytes(pathsize * (memberLen + 2), "pathdir(path)");
+    strcpy(ret, dirName);
+    ret[memberLen  ] = '/';
+    ret[memberLen+1] = '\0';
+#endif
+    return ret;
+}
+
 static pathchar* mkPath(char* path)
 {
 #if defined(mingw32_HOST_OS)
@@ -305,12 +327,18 @@ static void *lookupSymbolInDLLs ( unsigned char *lbl );
 #ifndef x86_64_HOST_ARCH
  static void zapTrailingAtSign   ( unsigned char *sym );
 #endif
+
+#if defined(x86_64_HOST_ARCH)
+#define USED_IF_x86_64_HOST_ARCH    /* Nothing */
+#else
+#define USED_IF_x86_64_HOST_ARCH    STG_UNUSED
+#endif
+
 static char *allocateImageAndTrampolines (
    pathchar* arch_name, char* member_name,
-#if defined(x86_64_HOST_ARCH)
    FILE* f,
-#endif
-   int size );
+   int size,
+   int isThin);
 #if defined(x86_64_HOST_ARCH)
 static int ocAllocateSymbolExtras_PEi386 ( ObjectCode* oc );
 static size_t makeSymbolExtra_PEi386( ObjectCode* oc, size_t, char* symbol );
@@ -1675,9 +1703,7 @@ void freeObjectCode (ObjectCode *oc)
 * Sets the initial status of a fresh ObjectCode
 */
 static void setOcInitialStatus(ObjectCode* oc) {
-    if (oc->isImportLib == HS_BOOL_TRUE) {
-        oc->status = OBJECT_DONT_RESOLVE;
-    } else if (oc->archiveMemberName == NULL) {
+    if (oc->archiveMemberName == NULL) {
         oc->status = OBJECT_NEEDED;
     } else {
         oc->status = OBJECT_LOADED;
@@ -1839,12 +1865,24 @@ static HsInt loadArchive_ (pathchar *path)
     if (n != 8)
         barf("loadArchive: Failed reading header from `%" PATH_FMT "'", path);
     if (strncmp(tmp, "!<arch>\n", 8) == 0) {}
-#if !defined(mingw32_HOST_OS)
-    /* See Note [thin archives on Windows] */
+    /* Check if this is a thin archive by looking for the magic string "!<thin>\n"
+     *
+     * ar thin libraries have the exact same format as normal archives except they
+     * have a different magic string and they don't copy the object files into the
+     * archive.
+     *
+     * Instead each header entry points to the location of the object file on disk.
+     * This is useful when a library is only created to satisfy a compile time dependency
+     * instead of to be distributed. This saves the time required for copying.
+     *
+     * Thin archives are always flattened. They always only contain simple headers
+     * pointing to the object file and so we need not allocate more memory than needed
+     * to find the object file.
+     *
+     */
     else if (strncmp(tmp, "!<thin>\n", 8) == 0) {
         isThin = 1;
     }
-#endif
 #if defined(darwin_HOST_OS)
     /* Not a standard archive, look for a fat archive magic number: */
     else if (ntohl(*(uint32_t *)tmp) == FAT_MAGIC) {
@@ -2086,11 +2124,8 @@ static HsInt loadArchive_ (pathchar *path)
 #if defined(mingw32_HOST_OS)
             // TODO: We would like to use allocateExec here, but allocateExec
             //       cannot currently allocate blocks large enough.
-            image = allocateImageAndTrampolines(path, fileName,
-#if defined(x86_64_HOST_ARCH)
-               f,
-#endif
-               memberSize);
+            image = allocateImageAndTrampolines(path, fileName, f, memberSize,
+                                                isThin);
 #elif defined(darwin_HOST_OS)
             if (RTS_LINKER_USE_MMAP)
                 image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1, 0);
@@ -2105,36 +2140,27 @@ static HsInt loadArchive_ (pathchar *path)
 #else // not windows or darwin
             image = stgMallocBytes(memberSize, "loadArchive(image)");
 #endif
-
-#if !defined(mingw32_HOST_OS)
-            /*
-             * Note [thin archives on Windows]
-             * This doesn't compile on Windows because it assumes
-             * char* pathnames, and we use wchar_t* on Windows.  It's
-             * not trivial to fix, so I'm leaving it disabled on
-             * Windows for now --SDM
-             */
             if (isThin) {
                 FILE *member;
-                char *pathCopy, *dirName, *memberPath;
+                pathchar *pathCopy, *dirName, *memberPath, *objFileName;
 
                 /* Allocate and setup the dirname of the archive.  We'll need
-                   this to locate the thin member */
-                pathCopy = stgMallocBytes(strlen(path) + 1, "loadArchive(file)");
-                strcpy(pathCopy, path);
-                dirName = dirname(pathCopy);
+                    this to locate the thin member */
+                pathCopy = pathdup(path); // Convert the char* to a pathchar*
+                dirName  = pathdir(pathCopy);
 
                 /* Append the relative member name to the dirname.  This should be
                    be the full path to the actual thin member. */
-                memberPath = stgMallocBytes(
-                    strlen(path) + 1 + strlen(fileName) + 1, "loadArchive(file)");
-                strcpy(memberPath, dirName);
-                memberPath[strlen(dirName)] = '/';
-                strcpy(memberPath + strlen(dirName) + 1, fileName);
+                int memberLen = pathlen(dirName) + 1 + strlen(fileName) + 1;
+                memberPath    = stgMallocBytes(pathsize * memberLen, "loadArchive(file)");
+                objFileName   = mkPath(fileName);
+                pathprintf(memberPath, memberLen, WSTR("%" PATH_FMT "%" PATH_FMT), dirName, objFileName);
+                stgFree(objFileName);
+                stgFree(dirName);
 
                 member = pathopen(memberPath, WSTR("rb"));
                 if (!member)
-                    barf("loadObj: can't read `%s'", path);
+                    barf("loadObj: can't read thin archive `%" PATH_FMT "'", memberPath);
 
                 n = fread ( image, 1, memberSize, member );
                 if (n != memberSize) {
@@ -2146,7 +2172,6 @@ static HsInt loadArchive_ (pathchar *path)
                 stgFree(pathCopy);
             }
             else
-#endif
             {
                 n = fread ( image, 1, memberSize, f );
                 if (n != memberSize) {
@@ -2221,7 +2246,7 @@ static HsInt loadArchive_ (pathchar *path)
             if (!isThin || thisFileNameSize == 0) {
                 n = fseek(f, memberSize, SEEK_CUR);
                 if (n != 0)
-                    barf("loadArchive: error whilst seeking by %d in `%s'",
+                    barf("loadArchive: error whilst seeking by %d in `%" PATH_FMT "'",
                          memberSize, path);
             }
         }
@@ -2321,7 +2346,7 @@ preloadObjectFile (pathchar *path)
    /* coverity[toctou] */
    f = pathopen(path, WSTR("rb"));
    if (!f) {
-       errorBelch("loadObj: can't read `%" PATH_FMT "'", path);
+       errorBelch("loadObj: can't preload `%" PATH_FMT "'", path);
        return NULL;
    }
 
@@ -2329,11 +2354,8 @@ preloadObjectFile (pathchar *path)
 
         // TODO: We would like to use allocateExec here, but allocateExec
         //       cannot currently allocate blocks large enough.
-    image = allocateImageAndTrampolines(path, "itself",
-#if defined(x86_64_HOST_ARCH)
-       f,
-#endif
-       fileSize);
+    image = allocateImageAndTrampolines(path, "itself", f, fileSize,
+                                        HS_BOOL_FALSE);
     if (image == NULL) {
         fclose(f);
         return NULL;
@@ -3049,41 +3071,43 @@ static int verifyCOFFHeader ( COFF_header *hdr, pathchar *filename);
 static char *
 allocateImageAndTrampolines (
    pathchar* arch_name, char* member_name,
-#if defined(x86_64_HOST_ARCH)
-   FILE* f,
-#endif
-   int size )
+   FILE* f USED_IF_x86_64_HOST_ARCH,
+   int size,
+   int isThin USED_IF_x86_64_HOST_ARCH)
 {
    char* image;
 #if defined(x86_64_HOST_ARCH)
-   /* PeCoff contains number of symbols right in it's header, so
-      we can reserve the room for symbolExtras right here. */
-   COFF_header hdr;
-   size_t n;
-
-   n = fread ( &hdr, 1, sizeof_COFF_header, f );
-   if (n != sizeof( COFF_header )) {
-       errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%S'",
-                  member_name, arch_name);
-       return NULL;
-   }
-   fseek( f, -sizeof_COFF_header, SEEK_CUR );
+   if (!isThin)
+   {
+       /* PeCoff contains number of symbols right in it's header, so
+          we can reserve the room for symbolExtras right here. */
+       COFF_header hdr;
+       size_t n;
+
+       n = fread(&hdr, 1, sizeof_COFF_header, f);
+       if (n != sizeof(COFF_header)) {
+           errorBelch("getNumberOfSymbols: error whilst reading `%s' header in `%S'",
+               member_name, arch_name);
+           return NULL;
+       }
+       fseek(f, -sizeof_COFF_header, SEEK_CUR);
 
-   if (!verifyCOFFHeader(&hdr, arch_name)) {
-       return 0;
-   }
+       if (!verifyCOFFHeader(&hdr, arch_name)) {
+           return 0;
+       }
 
-   /* We get back 8-byte aligned memory (is that guaranteed?), but
-      the offsets to the sections within the file are all 4 mod 8
-      (is that guaranteed?). We therefore need to offset the image
-      by 4, so that all the pointers are 8-byte aligned, so that
-      pointer tagging works. */
-   /* For 32-bit case we don't need this, hence we use macro PEi386_IMAGE_OFFSET,
-      which equals to 4 for 64-bit case and 0 for 32-bit case. */
-   /* We allocate trampolines area for all symbols right behind
-      image data, aligned on 8. */
-   size = ((PEi386_IMAGE_OFFSET + size + 0x7) & ~0x7)
-              + hdr.NumberOfSymbols * sizeof(SymbolExtra);
+       /* We get back 8-byte aligned memory (is that guaranteed?), but
+          the offsets to the sections within the file are all 4 mod 8
+          (is that guaranteed?). We therefore need to offset the image
+          by 4, so that all the pointers are 8-byte aligned, so that
+          pointer tagging works. */
+       /* For 32-bit case we don't need this, hence we use macro PEi386_IMAGE_OFFSET,
+          which equals to 4 for 64-bit case and 0 for 32-bit case. */
+       /* We allocate trampolines area for all symbols right behind
+          image data, aligned on 8. */
+       size = ((PEi386_IMAGE_OFFSET + size + 0x7) & ~0x7)
+           + hdr.NumberOfSymbols * sizeof(SymbolExtra);
+   }
 #endif
    image = VirtualAlloc(NULL, size,
                         MEM_RESERVE | MEM_COMMIT,
@@ -3133,9 +3157,9 @@ static int findAndLoadImportLibrary(ObjectCode* oc)
             /* First load the containing DLL if not loaded. */
             Section section = oc->sections[i];
 
-            pathchar* dirName = stgMallocBytes(pathsize * pathlen(oc->fileName), "findAndLoadImportLibrary(oc)");
-            pathsplit(oc->fileName, NULL, 0, dirName, pathsize * pathlen(oc->fileName), NULL, 0, NULL, 0);
-            HsPtr token = addLibrarySearchPath(dirName);
+            pathchar* dirName = pathdir(oc->fileName);
+            HsPtr token       = addLibrarySearchPath(dirName);
+            stgFree(dirName);
             char* dllName = (char*)section.start;
 
             if (strlen(dllName) == 0 || dllName[0] == ' ')
@@ -3497,6 +3521,26 @@ ocVerifyImage_PEi386 ( ObjectCode* oc )
    }
 #endif
 
+   /* .BSS Section is initialized in ocGetNames_PEi386
+      but we need the Sections array initialized here already. */
+   Section *sections;
+   sections = (Section*)stgCallocBytes(
+       sizeof(Section),
+       hdr->NumberOfSections + 1, /* +1 for the global BSS section see ocGetNames_PEi386 */
+       "ocVerifyImage_PEi386(sections)");
+   oc->sections = sections;
+   oc->n_sections = hdr->NumberOfSections + 1;
+
+   /* Initialize the Sections */
+   for (i = 0; i < hdr->NumberOfSections; i++) {
+       COFF_section* sectab_i
+           = (COFF_section*)
+           myindex(sizeof_COFF_section, sectab, i);
+
+       /* Calculate the start of the data section */
+       sections[i].start = oc->image + sectab_i->PointerToRawData;
+   }
+
    /* No further verification after this point; only debug printing. */
    i = 0;
    IF_DEBUG(linker, i=1);
@@ -3525,6 +3569,7 @@ ocVerifyImage_PEi386 ( ObjectCode* oc )
       COFF_section* sectab_i
          = (COFF_section*)
            myindex ( sizeof_COFF_section, sectab, i );
+      Section section = sections[i];
       debugBelch(
                 "\n"
                 "section %d\n"
@@ -3537,14 +3582,14 @@ ocVerifyImage_PEi386 ( ObjectCode* oc )
                 "    vsize %d\n"
                 "    vaddr %d\n"
                 "  data sz %d\n"
-                " data off %d\n"
+                " data off 0x%p\n"
                 "  num rel %d\n"
                 "  off rel %d\n"
                 "  ptr raw 0x%x\n",
                 sectab_i->VirtualSize,
                 sectab_i->VirtualAddress,
                 sectab_i->SizeOfRawData,
-                sectab_i->PointerToRawData,
+                section.start,
                 sectab_i->NumberOfRelocations,
                 sectab_i->PointerToRelocations,
                 sectab_i->PointerToRawData
@@ -3690,25 +3735,16 @@ ocGetNames_PEi386 ( ObjectCode* oc )
        * triggered this is libraries/base/cbits/dirUtils.c:__hscore_getFolderPath())
        */
       if (sectab_i->VirtualSize == 0 && sectab_i->SizeOfRawData == 0) continue;
-      /* This is a non-empty .bss section.  Allocate zeroed space for
-         it, and set its PointerToRawData field such that oc->image +
-         PointerToRawData == addr_of_zeroed_space.  */
+      /* This is a non-empty .bss section.
+         Allocate zeroed space for it */
       bss_sz = sectab_i->VirtualSize;
       if ( bss_sz < sectab_i->SizeOfRawData) { bss_sz = sectab_i->SizeOfRawData; }
       zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
-      sectab_i->PointerToRawData = ((UChar*)zspace) - ((UChar*)(oc->image));
+      oc->sections[i].start = zspace;
       addProddableBlock(oc, zspace, bss_sz);
       /* debugBelch("BSS anon section at 0x%x\n", zspace); */
    }
 
-   Section *sections;
-   sections = (Section*)stgCallocBytes(
-       sizeof(Section),
-       hdr->NumberOfSections + 1, /* +1 for the global BSS section see below */
-       "ocGetNames_PEi386(sections)");
-   oc->sections = sections;
-   oc->n_sections = hdr->NumberOfSections + 1;
-
    /* Copy section information into the ObjectCode. */
 
    for (i = 0; i < hdr->NumberOfSections; i++) {
@@ -3722,6 +3758,7 @@ ocGetNames_PEi386 ( ObjectCode* oc )
       COFF_section* sectab_i
          = (COFF_section*)
            myindex ( sizeof_COFF_section, sectab, i );
+      Section section = oc->sections[i];
 
       char *secname = cstring_from_section_name(sectab_i->Name, strtab);
 
@@ -3748,11 +3785,11 @@ ocGetNames_PEi386 ( ObjectCode* oc )
       sz = sectab_i->SizeOfRawData;
       if (sz < sectab_i->VirtualSize) sz = sectab_i->VirtualSize;
 
-      start = ((UChar*)(oc->image)) + sectab_i->PointerToRawData;
+      start = section.start;
       end   = start + sz - 1;
 
       if (kind != SECTIONKIND_OTHER && end >= start) {
-          addSection(&sections[i], kind, SECTION_NOMEM, start, sz, 0, 0, 0);
+          addSection(&oc->sections[i], kind, SECTION_NOMEM, start, sz, 0, 0, 0);
           addProddableBlock(oc, start, sz);
       }
 
@@ -3784,13 +3821,13 @@ ocGetNames_PEi386 ( ObjectCode* oc )
    if (globalBssSize > 0) {
        bss = stgCallocBytes(1, globalBssSize,
                             "ocGetNames_PEi386(non-anonymous bss)");
-       addSection(&sections[oc->n_sections-1],
+       addSection(&oc->sections[oc->n_sections-1],
                   SECTIONKIND_RWDATA, SECTION_MALLOC,
                   bss, globalBssSize, 0, 0, 0);
        IF_DEBUG(linker, debugBelch("bss @ %p %" FMT_Word "\n", bss, globalBssSize));
        addProddableBlock(oc, bss, globalBssSize);
    } else {
-       addSection(&sections[oc->n_sections-1],
+       addSection(&oc->sections[oc->n_sections-1],
                   SECTIONKIND_OTHER, SECTION_NOMEM, NULL, 0, 0, 0, 0);
    }
 
@@ -3817,9 +3854,8 @@ ocGetNames_PEi386 ( ObjectCode* oc )
             || (   symtab_i->StorageClass == MYIMAGE_SYM_CLASS_STATIC
                 && sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT)
             ) {
-                 addr = ((UChar*)(oc->image))
-                        + (sectabent->PointerToRawData
-                           + symtab_i->Value);
+                 addr = (void*)((size_t)oc->sections[symtab_i->SectionNumber-1].start
+                      + symtab_i->Value);
                  if (sectabent->Characteristics & MYIMAGE_SCN_LNK_COMDAT) {
                     isWeak = HS_BOOL_TRUE;
               }
@@ -3969,6 +4005,7 @@ ocResolve_PEi386 ( ObjectCode* oc )
          = (COFF_reloc*) (
               ((UChar*)(oc->image)) + sectab_i->PointerToRelocations
            );
+      Section section = oc->sections[i];
 
       char *secname = cstring_from_section_name(sectab_i->Name, strtab);
 
@@ -4020,11 +4057,10 @@ ocResolve_PEi386 ( ObjectCode* oc )
               myindex ( sizeof_COFF_reloc, reltab, j );
 
          /* the location to patch */
-         pP = (
-                 ((UChar*)(oc->image))
-                 + (sectab_i->PointerToRawData
-                    + reltab_j->VirtualAddress
-                    - sectab_i->VirtualAddress )
+         pP = (void*)(
+                   (size_t)section.start
+                 + reltab_j->VirtualAddress
+                 - sectab_i->VirtualAddress
               );
          /* the existing contents of pP */
          A = *(UInt32*)pP;
@@ -4043,10 +4079,8 @@ ocResolve_PEi386 ( ObjectCode* oc )
                             debugBelch("'\n" ));
 
          if (sym->StorageClass == MYIMAGE_SYM_CLASS_STATIC) {
-            COFF_section* section_sym
-              = (COFF_section*) myindex ( sizeof_COFF_section, sectab, sym->SectionNumber-1 );
-            S = ((size_t)(oc->image))
-              + ((size_t)(section_sym->PointerToRawData))
+            Section section = oc->sections[sym->SectionNumber-1];
+            S = ((size_t)(section.start))
               + ((size_t)(sym->Value));
          } else {
             copyName ( sym->Name, strtab, symbol, 1000-1 );
@@ -4201,9 +4235,10 @@ ocRunInit_PEi386 ( ObjectCode *oc )
         COFF_section* sectab_i
             = (COFF_section*)
                 myindex ( sizeof_COFF_section, sectab, i );
+        Section section = oc->sections[i];
         char *secname = cstring_from_section_name(sectab_i->Name, strtab);
         if (0 == strcmp(".ctors", (char*)secname)) {
-            UChar *init_startC = (UChar*)(oc->image) + sectab_i->PointerToRawData;
+            UChar *init_startC = section.start;
             init_t *init_start, *init_end, *init;
             init_start = (init_t*)init_startC;
             init_end = (init_t*)(init_startC + sectab_i->SizeOfRawData);