Use binary search to speedup checkUnload
[ghc.git] / rts / CheckUnload.c
1 /* ----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2013-
4 *
5 * Check whether dynamically-loaded object code can be safely
6 * unloaded, by searching for references to it from the heap and RTS
7 * data structures.
8 *
9 * --------------------------------------------------------------------------*/
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "Hash.h"
16 #include "LinkerInternals.h"
17 #include "CheckUnload.h"
18 #include "sm/Storage.h"
19 #include "sm/GCThread.h"
20
21 //
22 // Code that we unload may be referenced from:
23 // - info pointers in heap objects and stack frames
24 // - pointers to static objects from the heap
25 // - StablePtrs to static objects
26 // - pointers to cost centres from the cost centre tree
27 //
28 // We can find live static objects after a major GC, so we don't have
29 // to look at every closure pointer in the heap. However, we do have
30 // to look at every info pointer. So this is like a heap census
31 // traversal: we look at the header of every object, but not its
32 // contents.
33 //
34 // On the assumption that there aren't many different info pointers in
35 // a typical heap, we insert addresses into a hash table. The
36 // first time we see an address, we check it against the pending
37 // unloadable objects and if it lies within any of them, we mark that
38 // object as referenced so that it won't get unloaded in this round.
39 //
40
41 //
42 // In certain circumstances, there may be a lot of unloaded ObjectCode structs
43 // chained in `unloaded_objects` (such as when users `:load` a module in a very
44 // big repo in GHCi). To speed up checking whether an address lies within any of
45 // these objects, we populate the addresses of the their mapped sections in
46 // an array sorted by their `start` address and do binary search for our address
47 // on that array. Note that this works because the sections are mapped to mutual
48 // exclusive memory regions, so we can simply find the largest lower bound among
49 // the `start` addresses of the sections and then check if our address is inside
50 // that section. In particular, we store the start address and end address of
51 // each mapped section in a OCSectionIndex, arrange them all on a contiguous
52 // memory range and then sort by start address. We then put this array in an
53 // OCSectionIndices struct to be passed into `checkAddress` to do binary search
54 // on.
55 //
56
57 typedef struct {
58 W_ start;
59 W_ end;
60 ObjectCode *oc;
61 } OCSectionIndex;
62
63 typedef struct {
64 int n_sections;
65 OCSectionIndex *indices;
66 } OCSectionIndices;
67
68 static OCSectionIndices *createOCSectionIndices(int n_sections)
69 {
70 OCSectionIndices *s_indices;
71 s_indices = stgMallocBytes(sizeof(OCSectionIndices), "OCSectionIndices");
72 s_indices->n_sections = n_sections;
73 s_indices->indices = stgMallocBytes(n_sections*sizeof(OCSectionIndex),
74 "OCSectionIndices::indices");
75 return s_indices;
76 }
77
78 static int cmpSectionIndex(const void* indexa, const void *indexb)
79 {
80 W_ s1 = ((OCSectionIndex*)indexa)->start;
81 W_ s2 = ((OCSectionIndex*)indexb)->start;
82 if (s1 < s2) {
83 return -1;
84 } else if (s1 > s2) {
85 return 1;
86 }
87 return 0;
88 }
89
90 static OCSectionIndices* buildOCSectionIndices(ObjectCode *ocs)
91 {
92 int cnt_sections = 0;
93 ObjectCode *oc;
94 for (oc = ocs; oc; oc = oc->next) {
95 cnt_sections += oc->n_sections;
96 }
97 OCSectionIndices* s_indices = createOCSectionIndices(cnt_sections);
98 int s_i = 0, i;
99 for (oc = ocs; oc; oc = oc->next) {
100 for (i = 0; i < oc->n_sections; i++) {
101 if (oc->sections[i].kind != SECTIONKIND_OTHER) {
102 s_indices->indices[s_i].start = (W_)oc->sections[i].start;
103 s_indices->indices[s_i].end = (W_)oc->sections[i].start
104 + oc->sections[i].size;
105 s_indices->indices[s_i].oc = oc;
106 s_i++;
107 }
108 }
109 }
110 s_indices->n_sections = s_i;
111 qsort(s_indices->indices,
112 s_indices->n_sections,
113 sizeof(OCSectionIndex),
114 cmpSectionIndex);
115 return s_indices;
116 }
117
118 static void freeOCSectionIndices(OCSectionIndices *section_indices)
119 {
120 free(section_indices->indices);
121 free(section_indices);
122 }
123
124 static ObjectCode *findOC(OCSectionIndices *s_indices, const void *addr) {
125 W_ w_addr = (W_)addr;
126 if (s_indices->n_sections <= 0) return NULL;
127 if (w_addr < s_indices->indices[0].start) return NULL;
128
129 int left = 0, right = s_indices->n_sections;
130 while (left + 1 < right) {
131 int mid = (left + right)/2;
132 W_ w_mid = s_indices->indices[mid].start;
133 if (w_mid <= w_addr) {
134 left = mid;
135 } else {
136 right = mid;
137 }
138 }
139 ASSERT(w_addr >= s_indices->indices[left].start);
140 if (w_addr < s_indices->indices[left].end) {
141 return s_indices->indices[left].oc;
142 }
143 return NULL;
144 }
145
146 static void checkAddress (HashTable *addrs, const void *addr,
147 OCSectionIndices *s_indices)
148 {
149 ObjectCode *oc;
150
151 if (!lookupHashTable(addrs, (W_)addr)) {
152 insertHashTable(addrs, (W_)addr, addr);
153
154 oc = findOC(s_indices, addr);
155 if (oc != NULL) {
156 oc->referenced = 1;
157 return;
158 }
159 }
160 }
161
162 static void searchStackChunk (HashTable *addrs, StgPtr sp, StgPtr stack_end,
163 OCSectionIndices *s_indices)
164 {
165 StgPtr p;
166 const StgRetInfoTable *info;
167
168 p = sp;
169 while (p < stack_end) {
170 info = get_ret_itbl((StgClosure *)p);
171
172 switch (info->i.type) {
173 case RET_SMALL:
174 case RET_BIG:
175 checkAddress(addrs, (const void*)info, s_indices);
176 break;
177
178 default:
179 break;
180 }
181
182 p += stack_frame_sizeW((StgClosure*)p);
183 }
184 }
185
186
187 static void searchHeapBlocks (HashTable *addrs, bdescr *bd,
188 OCSectionIndices *s_indices)
189 {
190 StgPtr p;
191 const StgInfoTable *info;
192 uint32_t size;
193 bool prim;
194
195 for (; bd != NULL; bd = bd->link) {
196
197 if (bd->flags & BF_PINNED) {
198 // Assume that objects in PINNED blocks cannot refer to
199 continue;
200 }
201
202 p = bd->start;
203 while (p < bd->free) {
204 info = get_itbl((StgClosure *)p);
205 prim = false;
206
207 switch (info->type) {
208
209 case THUNK:
210 size = thunk_sizeW_fromITBL(info);
211 break;
212
213 case THUNK_1_1:
214 case THUNK_0_2:
215 case THUNK_2_0:
216 size = sizeofW(StgThunkHeader) + 2;
217 break;
218
219 case THUNK_1_0:
220 case THUNK_0_1:
221 case THUNK_SELECTOR:
222 size = sizeofW(StgThunkHeader) + 1;
223 break;
224
225 case FUN:
226 case FUN_1_0:
227 case FUN_0_1:
228 case FUN_1_1:
229 case FUN_0_2:
230 case FUN_2_0:
231 case CONSTR:
232 case CONSTR_NOCAF:
233 case CONSTR_1_0:
234 case CONSTR_0_1:
235 case CONSTR_1_1:
236 case CONSTR_0_2:
237 case CONSTR_2_0:
238 size = sizeW_fromITBL(info);
239 break;
240
241 case BLACKHOLE:
242 case BLOCKING_QUEUE:
243 prim = true;
244 size = sizeW_fromITBL(info);
245 break;
246
247 case IND:
248 // Special case/Delicate Hack: INDs don't normally
249 // appear, since we're doing this heap census right
250 // after GC. However, GarbageCollect() also does
251 // resurrectThreads(), which can update some
252 // blackholes when it calls raiseAsync() on the
253 // resurrected threads. So we know that any IND will
254 // be the size of a BLACKHOLE.
255 prim = true;
256 size = BLACKHOLE_sizeW();
257 break;
258
259 case BCO:
260 prim = true;
261 size = bco_sizeW((StgBCO *)p);
262 break;
263
264 case MVAR_CLEAN:
265 case MVAR_DIRTY:
266 case TVAR:
267 case WEAK:
268 case PRIM:
269 case MUT_PRIM:
270 case MUT_VAR_CLEAN:
271 case MUT_VAR_DIRTY:
272 prim = true;
273 size = sizeW_fromITBL(info);
274 break;
275
276 case AP:
277 prim = true;
278 size = ap_sizeW((StgAP *)p);
279 break;
280
281 case PAP:
282 prim = true;
283 size = pap_sizeW((StgPAP *)p);
284 break;
285
286 case AP_STACK:
287 {
288 StgAP_STACK *ap = (StgAP_STACK *)p;
289 prim = true;
290 size = ap_stack_sizeW(ap);
291 searchStackChunk(addrs, (StgPtr)ap->payload,
292 (StgPtr)ap->payload + ap->size, s_indices);
293 break;
294 }
295
296 case ARR_WORDS:
297 prim = true;
298 size = arr_words_sizeW((StgArrBytes*)p);
299 break;
300
301 case MUT_ARR_PTRS_CLEAN:
302 case MUT_ARR_PTRS_DIRTY:
303 case MUT_ARR_PTRS_FROZEN_CLEAN:
304 case MUT_ARR_PTRS_FROZEN_DIRTY:
305 prim = true;
306 size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
307 break;
308
309 case SMALL_MUT_ARR_PTRS_CLEAN:
310 case SMALL_MUT_ARR_PTRS_DIRTY:
311 case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
312 case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
313 prim = true;
314 size = small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs *)p);
315 break;
316
317 case TSO:
318 prim = true;
319 size = sizeofW(StgTSO);
320 break;
321
322 case STACK: {
323 StgStack *stack = (StgStack*)p;
324 prim = true;
325 searchStackChunk(addrs, stack->sp,
326 stack->stack + stack->stack_size, s_indices);
327 size = stack_sizeW(stack);
328 break;
329 }
330
331 case TREC_CHUNK:
332 prim = true;
333 size = sizeofW(StgTRecChunk);
334 break;
335
336 default:
337 barf("heapCensus, unknown object: %d", info->type);
338 }
339
340 if (!prim) {
341 checkAddress(addrs,info, s_indices);
342 }
343
344 p += size;
345 }
346 }
347 }
348
349 #if defined(PROFILING)
350 //
351 // Do not unload the object if the CCS tree refers to a CCS or CC which
352 // originates in the object.
353 //
354 static void searchCostCentres (HashTable *addrs, CostCentreStack *ccs,
355 OCSectionIndices* s_indices)
356 {
357 IndexTable *i;
358
359 checkAddress(addrs, ccs, s_indices);
360 checkAddress(addrs, ccs->cc, s_indices);
361 for (i = ccs->indexTable; i != NULL; i = i->next) {
362 if (!i->back_edge) {
363 searchCostCentres(addrs, i->ccs, s_indices);
364 }
365 }
366 }
367 #endif
368
369 //
370 // Check whether we can unload any object code. This is called at the
371 // appropriate point during a GC, where all the heap data is nice and
372 // packed together and we have a linked list of the static objects.
373 //
374 // The check involves a complete heap traversal, but you only pay for
375 // this (a) when you have called unloadObj(), and (b) at a major GC,
376 // which is much more expensive than the traversal we're doing here.
377 //
378 void checkUnload (StgClosure *static_objects)
379 {
380 uint32_t g, n;
381 HashTable *addrs;
382 StgClosure* p;
383 const StgInfoTable *info;
384 ObjectCode *oc, *prev, *next;
385 gen_workspace *ws;
386 StgClosure* link;
387
388 if (unloaded_objects == NULL) return;
389
390 ACQUIRE_LOCK(&linker_unloaded_mutex);
391
392 OCSectionIndices *s_indices = buildOCSectionIndices(unloaded_objects);
393 // Mark every unloadable object as unreferenced initially
394 for (oc = unloaded_objects; oc; oc = oc->next) {
395 IF_DEBUG(linker, debugBelch("Checking whether to unload %" PATH_FMT "\n",
396 oc->fileName));
397 oc->referenced = false;
398 }
399
400 addrs = allocHashTable();
401
402 for (p = static_objects; p != END_OF_STATIC_OBJECT_LIST; p = link) {
403 p = UNTAG_STATIC_LIST_PTR(p);
404 checkAddress(addrs, p, s_indices);
405 info = get_itbl(p);
406 link = *STATIC_LINK(info, p);
407 }
408
409 // CAFs on revertible_caf_list are not on static_objects
410 for (p = (StgClosure*)revertible_caf_list;
411 p != END_OF_CAF_LIST;
412 p = ((StgIndStatic *)p)->static_link) {
413 p = UNTAG_STATIC_LIST_PTR(p);
414 checkAddress(addrs, p, s_indices);
415 }
416
417 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
418 searchHeapBlocks (addrs, generations[g].blocks, s_indices);
419 searchHeapBlocks (addrs, generations[g].large_objects, s_indices);
420
421 for (n = 0; n < n_capabilities; n++) {
422 ws = &gc_threads[n]->gens[g];
423 searchHeapBlocks(addrs, ws->todo_bd, s_indices);
424 searchHeapBlocks(addrs, ws->part_list, s_indices);
425 searchHeapBlocks(addrs, ws->scavd_list, s_indices);
426 }
427 }
428
429 #if defined(PROFILING)
430 /* Traverse the cost centre tree, calling checkAddress on each CCS/CC */
431 searchCostCentres(addrs, CCS_MAIN, s_indices);
432
433 /* Also check each cost centre in the CC_LIST */
434 CostCentre *cc;
435 for (cc = CC_LIST; cc != NULL; cc = cc->link) {
436 checkAddress(addrs, cc, s_indices);
437 }
438 #endif /* PROFILING */
439
440 freeOCSectionIndices(s_indices);
441 // Look through the unloadable objects, and any object that is still
442 // marked as unreferenced can be physically unloaded, because we
443 // have no references to it.
444 prev = NULL;
445 for (oc = unloaded_objects; oc; oc = next) {
446 next = oc->next;
447 if (oc->referenced == 0) {
448 if (prev == NULL) {
449 unloaded_objects = oc->next;
450 } else {
451 prev->next = oc->next;
452 }
453 IF_DEBUG(linker, debugBelch("Unloading object file %" PATH_FMT "\n",
454 oc->fileName));
455 freeObjectCode(oc);
456 } else {
457 IF_DEBUG(linker, debugBelch("Object file still in use: %"
458 PATH_FMT "\n", oc->fileName));
459 prev = oc;
460 }
461 }
462
463 freeHashTable(addrs, NULL);
464
465 RELEASE_LOCK(&linker_unloaded_mutex);
466 }