Use binary search to speedup checkUnload
authorPhuong Trinh <lolotp@fb.com>
Mon, 1 Apr 2019 19:05:47 +0000 (20:05 +0100)
committerMarge Bot <ben+marge-bot@smart-cactus.org>
Thu, 30 May 2019 20:43:31 +0000 (16:43 -0400)
We are iterating through all object code for each heap objects when
checking whether object code can be unloaded. For large projects in
GHCi, this can be very expensive due to the large number of object code
that needs to be loaded/unloaded. To speed it up, this arrangess all
mapped sections of unloaded object code in a sorted array and use binary
search to check if an address location fall on them.

rts/CheckUnload.c
testsuite/tests/rts/linker/unload_multiple_objs/A.hs [new file with mode: 0644]
testsuite/tests/rts/linker/unload_multiple_objs/B.hs [new file with mode: 0644]
testsuite/tests/rts/linker/unload_multiple_objs/C.hs [new file with mode: 0644]
testsuite/tests/rts/linker/unload_multiple_objs/D.hs [new file with mode: 0644]
testsuite/tests/rts/linker/unload_multiple_objs/Makefile [new file with mode: 0644]
testsuite/tests/rts/linker/unload_multiple_objs/all.T [new file with mode: 0644]
testsuite/tests/rts/linker/unload_multiple_objs/linker_unload_multiple_objs.c [new file with mode: 0644]
testsuite/tests/rts/linker/unload_multiple_objs/linker_unload_multiple_objs.stdout [new file with mode: 0644]

index fa4843d..de81805 100644 (file)
 // object as referenced so that it won't get unloaded in this round.
 //
 
-static void checkAddress (HashTable *addrs, const void *addr)
+//
+// In certain circumstances, there may be a lot of unloaded ObjectCode structs
+// chained in `unloaded_objects` (such as when users `:load` a module in a very
+// big repo in GHCi). To speed up checking whether an address lies within any of
+// these objects, we populate the addresses of the their mapped sections in
+// an array sorted by their `start` address and do binary search for our address
+// on that array. Note that this works because the sections are mapped to mutual
+// exclusive memory regions, so we can simply find the largest lower bound among
+// the `start` addresses of the sections and then check if our address is inside
+// that section. In particular, we store the start address and end address of
+// each mapped section in a OCSectionIndex, arrange them all on a contiguous
+// memory range and then sort by start address. We then put this array in an
+// OCSectionIndices struct to be passed into `checkAddress` to do binary search
+// on.
+//
+
+typedef struct {
+    W_ start;
+    W_ end;
+    ObjectCode *oc;
+} OCSectionIndex;
+
+typedef struct {
+    int n_sections;
+    OCSectionIndex *indices;
+} OCSectionIndices;
+
+static OCSectionIndices *createOCSectionIndices(int n_sections)
+{
+    OCSectionIndices *s_indices;
+    s_indices = stgMallocBytes(sizeof(OCSectionIndices), "OCSectionIndices");
+    s_indices->n_sections = n_sections;
+    s_indices->indices = stgMallocBytes(n_sections*sizeof(OCSectionIndex),
+        "OCSectionIndices::indices");
+    return s_indices;
+}
+
+static int cmpSectionIndex(const void* indexa, const void *indexb)
+{
+    W_ s1 = ((OCSectionIndex*)indexa)->start;
+    W_ s2 = ((OCSectionIndex*)indexb)->start;
+    if (s1 < s2) {
+        return -1;
+    } else if (s1 > s2) {
+        return 1;
+    }
+    return 0;
+}
+
+static OCSectionIndices* buildOCSectionIndices(ObjectCode *ocs)
+{
+    int cnt_sections = 0;
+    ObjectCode *oc;
+    for (oc = ocs; oc; oc = oc->next) {
+        cnt_sections += oc->n_sections;
+    }
+    OCSectionIndices* s_indices = createOCSectionIndices(cnt_sections);
+    int s_i = 0, i;
+    for (oc = ocs; oc; oc = oc->next) {
+        for (i = 0; i < oc->n_sections; i++) {
+            if (oc->sections[i].kind != SECTIONKIND_OTHER) {
+                s_indices->indices[s_i].start = (W_)oc->sections[i].start;
+                s_indices->indices[s_i].end = (W_)oc->sections[i].start
+                    + oc->sections[i].size;
+                s_indices->indices[s_i].oc = oc;
+                s_i++;
+            }
+        }
+    }
+    s_indices->n_sections = s_i;
+    qsort(s_indices->indices,
+        s_indices->n_sections,
+        sizeof(OCSectionIndex),
+        cmpSectionIndex);
+    return s_indices;
+}
+
+static void freeOCSectionIndices(OCSectionIndices *section_indices)
+{
+    free(section_indices->indices);
+    free(section_indices);
+}
+
+static ObjectCode *findOC(OCSectionIndices *s_indices, const void *addr) {
+    W_ w_addr = (W_)addr;
+    if (s_indices->n_sections <= 0) return NULL;
+    if (w_addr < s_indices->indices[0].start) return NULL;
+
+    int left = 0, right = s_indices->n_sections;
+    while (left + 1 < right) {
+        int mid = (left + right)/2;
+        W_ w_mid = s_indices->indices[mid].start;
+        if (w_mid <= w_addr) {
+            left = mid;
+        } else {
+            right = mid;
+        }
+    }
+    ASSERT(w_addr >= s_indices->indices[left].start);
+    if (w_addr < s_indices->indices[left].end) {
+        return s_indices->indices[left].oc;
+    }
+    return NULL;
+}
+
+static void checkAddress (HashTable *addrs, const void *addr,
+        OCSectionIndices *s_indices)
 {
     ObjectCode *oc;
-    int i;
 
     if (!lookupHashTable(addrs, (W_)addr)) {
         insertHashTable(addrs, (W_)addr, addr);
 
-        for (oc = unloaded_objects; oc; oc = oc->next) {
-            for (i = 0; i < oc->n_sections; i++) {
-                if (oc->sections[i].kind != SECTIONKIND_OTHER) {
-                    if ((W_)addr >= (W_)oc->sections[i].start &&
-                        (W_)addr <  (W_)oc->sections[i].start
-                                    + oc->sections[i].size) {
-                        oc->referenced = 1;
-                        return;
-                    }
-                }
-            }
+        oc = findOC(s_indices, addr);
+        if (oc != NULL) {
+            oc->referenced = 1;
+            return;
         }
     }
 }
 
-static void searchStackChunk (HashTable *addrs, StgPtr sp, StgPtr stack_end)
+static void searchStackChunk (HashTable *addrs, StgPtr sp, StgPtr stack_end,
+        OCSectionIndices *s_indices)
 {
     StgPtr p;
     const StgRetInfoTable *info;
@@ -73,7 +172,7 @@ static void searchStackChunk (HashTable *addrs, StgPtr sp, StgPtr stack_end)
         switch (info->i.type) {
         case RET_SMALL:
         case RET_BIG:
-            checkAddress(addrs, (const void*)info);
+            checkAddress(addrs, (const void*)info, s_indices);
             break;
 
         default:
@@ -85,7 +184,8 @@ static void searchStackChunk (HashTable *addrs, StgPtr sp, StgPtr stack_end)
 }
 
 
-static void searchHeapBlocks (HashTable *addrs, bdescr *bd)
+static void searchHeapBlocks (HashTable *addrs, bdescr *bd,
+        OCSectionIndices *s_indices)
 {
     StgPtr p;
     const StgInfoTable *info;
@@ -189,7 +289,7 @@ static void searchHeapBlocks (HashTable *addrs, bdescr *bd)
                 prim = true;
                 size = ap_stack_sizeW(ap);
                 searchStackChunk(addrs, (StgPtr)ap->payload,
-                                 (StgPtr)ap->payload + ap->size);
+                                 (StgPtr)ap->payload + ap->size, s_indices);
                 break;
             }
 
@@ -223,7 +323,7 @@ static void searchHeapBlocks (HashTable *addrs, bdescr *bd)
                 StgStack *stack = (StgStack*)p;
                 prim = true;
                 searchStackChunk(addrs, stack->sp,
-                                 stack->stack + stack->stack_size);
+                                 stack->stack + stack->stack_size, s_indices);
                 size = stack_sizeW(stack);
                 break;
             }
@@ -238,7 +338,7 @@ static void searchHeapBlocks (HashTable *addrs, bdescr *bd)
             }
 
             if (!prim) {
-                checkAddress(addrs,info);
+                checkAddress(addrs,info, s_indices);
             }
 
             p += size;
@@ -251,15 +351,16 @@ static void searchHeapBlocks (HashTable *addrs, bdescr *bd)
 // Do not unload the object if the CCS tree refers to a CCS or CC which
 // originates in the object.
 //
-static void searchCostCentres (HashTable *addrs, CostCentreStack *ccs)
+static void searchCostCentres (HashTable *addrs, CostCentreStack *ccs,
+        OCSectionIndices* s_indices)
 {
     IndexTable *i;
 
-    checkAddress(addrs, ccs);
-    checkAddress(addrs, ccs->cc);
+    checkAddress(addrs, ccs, s_indices);
+    checkAddress(addrs, ccs->cc, s_indices);
     for (i = ccs->indexTable; i != NULL; i = i->next) {
         if (!i->back_edge) {
-            searchCostCentres(addrs, i->ccs);
+            searchCostCentres(addrs, i->ccs, s_indices);
         }
     }
 }
@@ -288,6 +389,7 @@ void checkUnload (StgClosure *static_objects)
 
   ACQUIRE_LOCK(&linker_unloaded_mutex);
 
+  OCSectionIndices *s_indices = buildOCSectionIndices(unloaded_objects);
   // Mark every unloadable object as unreferenced initially
   for (oc = unloaded_objects; oc; oc = oc->next) {
       IF_DEBUG(linker, debugBelch("Checking whether to unload %" PATH_FMT "\n",
@@ -299,7 +401,7 @@ void checkUnload (StgClosure *static_objects)
 
   for (p = static_objects; p != END_OF_STATIC_OBJECT_LIST; p = link) {
       p = UNTAG_STATIC_LIST_PTR(p);
-      checkAddress(addrs, p);
+      checkAddress(addrs, p, s_indices);
       info = get_itbl(p);
       link = *STATIC_LINK(info, p);
   }
@@ -309,32 +411,33 @@ void checkUnload (StgClosure *static_objects)
        p != END_OF_CAF_LIST;
        p = ((StgIndStatic *)p)->static_link) {
       p = UNTAG_STATIC_LIST_PTR(p);
-      checkAddress(addrs, p);
+      checkAddress(addrs, p, s_indices);
   }
 
   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-      searchHeapBlocks (addrs, generations[g].blocks);
-      searchHeapBlocks (addrs, generations[g].large_objects);
+      searchHeapBlocks (addrs, generations[g].blocks, s_indices);
+      searchHeapBlocks (addrs, generations[g].large_objects, s_indices);
 
       for (n = 0; n < n_capabilities; n++) {
           ws = &gc_threads[n]->gens[g];
-          searchHeapBlocks(addrs, ws->todo_bd);
-          searchHeapBlocks(addrs, ws->part_list);
-          searchHeapBlocks(addrs, ws->scavd_list);
+          searchHeapBlocks(addrs, ws->todo_bd, s_indices);
+          searchHeapBlocks(addrs, ws->part_list, s_indices);
+          searchHeapBlocks(addrs, ws->scavd_list, s_indices);
       }
   }
 
 #if defined(PROFILING)
   /* Traverse the cost centre tree, calling checkAddress on each CCS/CC */
-  searchCostCentres(addrs, CCS_MAIN);
+  searchCostCentres(addrs, CCS_MAIN, s_indices);
 
   /* Also check each cost centre in the CC_LIST */
   CostCentre *cc;
   for (cc = CC_LIST; cc != NULL; cc = cc->link) {
-      checkAddress(addrs, cc);
+      checkAddress(addrs, cc, s_indices);
   }
 #endif /* PROFILING */
 
+  freeOCSectionIndices(s_indices);
   // Look through the unloadable objects, and any object that is still
   // marked as unreferenced can be physically unloaded, because we
   // have no references to it.
diff --git a/testsuite/tests/rts/linker/unload_multiple_objs/A.hs b/testsuite/tests/rts/linker/unload_multiple_objs/A.hs
new file mode 100644 (file)
index 0000000..5ae1954
--- /dev/null
@@ -0,0 +1,16 @@
+module A where
+
+import Foreign.StablePtr
+
+id1 :: Int
+id1 = 1
+
+createHeapObjectA :: IO (StablePtr [Int])
+createHeapObjectA = do
+  newStablePtr [2+id1]
+
+freeHeapObjectA :: StablePtr [Int] -> IO ()
+freeHeapObjectA obj = freeStablePtr obj
+
+foreign export ccall createHeapObjectA :: IO (StablePtr [Int])
+foreign export ccall freeHeapObjectA   :: StablePtr [Int] -> IO ()
diff --git a/testsuite/tests/rts/linker/unload_multiple_objs/B.hs b/testsuite/tests/rts/linker/unload_multiple_objs/B.hs
new file mode 100644 (file)
index 0000000..be271c1
--- /dev/null
@@ -0,0 +1,16 @@
+module B where
+
+import Foreign.StablePtr
+
+id2 :: Int
+id2 = 2
+
+createHeapObjectB :: IO (StablePtr [Int])
+createHeapObjectB = do
+  newStablePtr [2+id2]
+
+freeHeapObjectB :: StablePtr [Int] -> IO ()
+freeHeapObjectB obj = freeStablePtr obj
+
+foreign export ccall createHeapObjectB :: IO (StablePtr [Int])
+foreign export ccall freeHeapObjectB   :: StablePtr [Int] -> IO ()
diff --git a/testsuite/tests/rts/linker/unload_multiple_objs/C.hs b/testsuite/tests/rts/linker/unload_multiple_objs/C.hs
new file mode 100644 (file)
index 0000000..89cf413
--- /dev/null
@@ -0,0 +1,16 @@
+module C where
+
+import Foreign.StablePtr
+
+id3 :: Int
+id3 = 3
+
+createHeapObjectC :: IO (StablePtr [Int])
+createHeapObjectC = do
+  newStablePtr [2+id3]
+
+freeHeapObjectC :: StablePtr [Int] -> IO ()
+freeHeapObjectC obj = freeStablePtr obj
+
+foreign export ccall createHeapObjectC :: IO (StablePtr [Int])
+foreign export ccall freeHeapObjectC   :: StablePtr [Int] -> IO ()
diff --git a/testsuite/tests/rts/linker/unload_multiple_objs/D.hs b/testsuite/tests/rts/linker/unload_multiple_objs/D.hs
new file mode 100644 (file)
index 0000000..39a20f6
--- /dev/null
@@ -0,0 +1,16 @@
+module D where
+
+import Foreign.StablePtr
+
+id4 :: Int
+id4 = 4
+
+createHeapObjectD :: IO (StablePtr [Int])
+createHeapObjectD = do
+  newStablePtr [2+id4]
+
+freeHeapObjectD :: StablePtr [Int] -> IO ()
+freeHeapObjectD obj = freeStablePtr obj
+
+foreign export ccall createHeapObjectD :: IO (StablePtr [Int])
+foreign export ccall freeHeapObjectD   :: StablePtr [Int] -> IO ()
diff --git a/testsuite/tests/rts/linker/unload_multiple_objs/Makefile b/testsuite/tests/rts/linker/unload_multiple_objs/Makefile
new file mode 100644 (file)
index 0000000..70046c0
--- /dev/null
@@ -0,0 +1,17 @@
+TOP=../../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
+
+.PHONY: linker_unload_multiple_objs
+linker_unload_multiple_objs:
+       $(RM) A.o  B.o  C.o  D.o
+       $(RM) A.hi B.hi C.hi D.hi
+       "$(TEST_HC)" $(TEST_HC_OPTS) -c A.hs -v0
+       "$(TEST_HC)" $(TEST_HC_OPTS) -c B.hs -v0
+       "$(TEST_HC)" $(TEST_HC_OPTS) -c C.hs -v0
+       "$(TEST_HC)" $(TEST_HC_OPTS) -c D.hs -v0
+       # -rtsopts causes a warning
+       "$(TEST_HC)" LinkerUnload.hs -package ghc $(filter-out -rtsopts, $(TEST_HC_OPTS)) linker_unload_multiple_objs.c -o linker_unload_multiple_objs -no-hs-main -optc-Werror
+       ./linker_unload_multiple_objs "`'$(TEST_HC)' $(TEST_HC_OPTS) --print-libdir | tr -d '\r'`"
+
+
diff --git a/testsuite/tests/rts/linker/unload_multiple_objs/all.T b/testsuite/tests/rts/linker/unload_multiple_objs/all.T
new file mode 100644 (file)
index 0000000..e7b1e2f
--- /dev/null
@@ -0,0 +1,4 @@
+test('linker_unload_multiple_objs',
+     [extra_files(['../LinkerUnload.hs', 'A.hs', 'B.hs', 'C.hs', 'D.hs',]),
+      when(arch('powerpc64') or arch('powerpc64le'), expect_broken(11259))],
+     run_command, ['$MAKE -s --no-print-directory linker_unload_multiple_objs'])
diff --git a/testsuite/tests/rts/linker/unload_multiple_objs/linker_unload_multiple_objs.c b/testsuite/tests/rts/linker/unload_multiple_objs/linker_unload_multiple_objs.c
new file mode 100644 (file)
index 0000000..d64246e
--- /dev/null
@@ -0,0 +1,147 @@
+#include "ghcconfig.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include "Rts.h"
+#include <string.h>
+#include "HsFFI.h"
+
+extern void loadPackages(void);
+
+#define NUM_OBJS 4
+
+static char *objs[NUM_OBJS] = {"A.o", "B.o", "C.o", "D.o"};
+
+pathchar* toPathchar(char* path)
+{
+#if defined(mingw32_HOST_OS)
+    size_t required = strlen(path);
+    pathchar *ret = (pathchar*)malloc(sizeof(pathchar) * (required + 1));
+    if (mbstowcs(ret, path, required) == (size_t)-1)
+    {
+        errorBelch("toPathchar failed converting char* to wchar_t*: %s", path);
+        exit(1);
+    }
+    ret[required] = '\0';
+    return ret;
+#else
+    return path;
+#endif
+}
+
+void load_and_resolve_all_objects() {
+    int i, r;
+    for (i = 0; i < NUM_OBJS; i++) {
+        r = loadObj(toPathchar(objs[i]));
+        if (!r) {
+            errorBelch("loadObj(%s) failed", objs[i]);
+            exit(1);
+        }
+    }
+
+    r = resolveObjs();
+    if (!r) {
+        errorBelch("resolveObjs failed");
+        exit(1);
+    }
+
+    for (i = 0; i < NUM_OBJS; i++) {
+        char sym_name[138] = {0};
+#if LEADING_UNDERSCORE
+        sprintf(sym_name, "_createHeapObject%c", 'A'+i);
+#else
+        sprintf(sym_name, "createHeapObject%c", 'A'+i);
+#endif
+        void *sym_addr = lookupSymbol(sym_name);
+        if (!sym_addr) {
+            errorBelch("lookupSymbol(%s) failed", sym_name);
+            exit(1);
+        }
+    }
+}
+
+void check_object_freed(char *obj_path) {
+    OStatus st;
+    st = getObjectLoadStatus(toPathchar(obj_path));
+    if (st != OBJECT_NOT_LOADED) {
+        errorBelch("object %s status != OBJECT_NOT_LOADED", obj_path);
+        exit(1);
+    }
+}
+
+void check_object_unloaded_but_not_freed(char *obj_path) {
+    OStatus st;
+    st = getObjectLoadStatus(toPathchar(obj_path));
+    if (st != OBJECT_UNLOADED) {
+        errorBelch("object %s status != OBJECT_UNLOADED, is %d instead", obj_path, st);
+        exit(1);
+    }
+}
+
+void test_no_dangling_references_to_unloaded_objects()
+{
+    load_and_resolve_all_objects();
+
+    unloadObj(toPathchar("A.o"));
+    unloadObj(toPathchar("B.o"));
+    unloadObj(toPathchar("C.o"));
+    unloadObj(toPathchar("D.o"));
+    performMajorGC();
+
+    check_object_freed("A.o");
+    check_object_freed("B.o");
+    check_object_freed("C.o");
+    check_object_freed("D.o");
+
+}
+
+typedef HsStablePtr stableptrfun_t(void);
+typedef void freeptrfun_t(HsStablePtr);
+
+void test_still_has_references_to_unloaded_objects()
+{
+    load_and_resolve_all_objects();
+#if LEADING_UNDERSCORE
+    stableptrfun_t *createHeapObject = lookupSymbol("_createHeapObjectD");
+    freeptrfun_t *freeHeapObject = lookupSymbol("_freeHeapObjectD");
+#else
+    stableptrfun_t *createHeapObject = lookupSymbol("createHeapObjectD");
+    freeptrfun_t *freeHeapObject = lookupSymbol("freeHeapObjectD");
+#endif
+    HsStablePtr ptr = createHeapObject();
+
+    unloadObj(toPathchar("A.o"));
+    unloadObj(toPathchar("B.o"));
+    unloadObj(toPathchar("C.o"));
+    unloadObj(toPathchar("D.o"));
+    performMajorGC();
+
+    check_object_freed("A.o");
+    check_object_freed("B.o");
+    check_object_freed("C.o");
+    check_object_unloaded_but_not_freed("D.o");
+
+
+    freeHeapObject(ptr);
+    performMajorGC();
+
+    check_object_freed("A.o");
+    check_object_freed("B.o");
+    check_object_freed("C.o");
+    check_object_freed("D.o");
+}
+
+int main (int argc, char *argv[])
+{
+    RtsConfig conf = defaultRtsConfig;
+    conf.rts_opts_enabled = RtsOptsAll;
+    hs_init_ghc(&argc, &argv, conf);
+
+    initLinker_(0);
+    loadPackages();
+
+    test_still_has_references_to_unloaded_objects();
+    test_no_dangling_references_to_unloaded_objects();
+
+    hs_exit();
+    exit(0);
+}
diff --git a/testsuite/tests/rts/linker/unload_multiple_objs/linker_unload_multiple_objs.stdout b/testsuite/tests/rts/linker/unload_multiple_objs/linker_unload_multiple_objs.stdout
new file mode 100644 (file)
index 0000000..82f7a2f
--- /dev/null
@@ -0,0 +1,2 @@
+[1 of 1] Compiling LinkerUnload     ( LinkerUnload.hs, LinkerUnload.o )
+Linking linker_unload_multiple_objs ...