Detect overly long GC sync
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "GC.h"
19 #include "GCThread.h"
20 #include "GCTDecl.h" // NB. before RtsSignals.h which
21 // clobbers REG_R1 on arm/Linux
22 #include "Compact.h"
23 #include "Evac.h"
24 #include "Scav.h"
25 #include "GCUtils.h"
26 #include "MarkStack.h"
27 #include "MarkWeak.h"
28 #include "Sparks.h"
29 #include "Sweep.h"
30
31 #include "Storage.h"
32 #include "RtsUtils.h"
33 #include "Apply.h"
34 #include "Updates.h"
35 #include "Stats.h"
36 #include "Schedule.h"
37 #include "Sanity.h"
38 #include "BlockAlloc.h"
39 #include "ProfHeap.h"
40 #include "Weak.h"
41 #include "Prelude.h"
42 #include "RtsSignals.h"
43 #include "STM.h"
44 #include "Trace.h"
45 #include "RetainerProfile.h"
46 #include "LdvProfile.h"
47 #include "RaiseAsync.h"
48 #include "Stable.h"
49 #include "CheckUnload.h"
50 #include "CNF.h"
51 #include "RtsFlags.h"
52
53 #include <string.h> // for memset()
54 #include <unistd.h>
55
56 /* -----------------------------------------------------------------------------
57 Global variables
58 -------------------------------------------------------------------------- */
59
60 /* STATIC OBJECT LIST.
61 *
62 * During GC:
63 * We maintain a linked list of static objects that are still live.
64 * The requirements for this list are:
65 *
66 * - we need to scan the list while adding to it, in order to
67 * scavenge all the static objects (in the same way that
68 * breadth-first scavenging works for dynamic objects).
69 *
70 * - we need to be able to tell whether an object is already on
71 * the list, to break loops.
72 *
73 * Each static object has a "static link field", which we use for
74 * linking objects on to the list. We use a stack-type list, consing
75 * objects on the front as they are added (this means that the
76 * scavenge phase is depth-first, not breadth-first, but that
77 * shouldn't matter).
78 *
79 * A separate list is kept for objects that have been scavenged
80 * already - this is so that we can zero all the marks afterwards.
81 *
82 * An object is on the list if its static link field is non-zero; this
83 * means that we have to mark the end of the list with '1', not NULL.
84 *
85 * Extra notes for generational GC:
86 *
87 * Each generation has a static object list associated with it. When
88 * collecting generations up to N, we treat the static object lists
89 * from generations > N as roots.
90 *
91 * We build up a static object list while collecting generations 0..N,
92 * which is then appended to the static object list of generation N+1.
93 */
94
95 /* N is the oldest generation being collected, where the generations
96 * are numbered starting at 0. A major GC (indicated by the major_gc
97 * flag) is when we're collecting all generations. We only attempt to
98 * deal with static objects and GC CAFs when doing a major GC.
99 */
100 uint32_t N;
101 bool major_gc;
102
103 /* Data used for allocation area sizing.
104 */
105 static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
106
107 /* Mut-list stats */
108 #if defined(DEBUG)
109 uint32_t mutlist_MUTVARS,
110 mutlist_MUTARRS,
111 mutlist_MVARS,
112 mutlist_TVAR,
113 mutlist_TVAR_WATCH_QUEUE,
114 mutlist_TREC_CHUNK,
115 mutlist_TREC_HEADER,
116 mutlist_ATOMIC_INVARIANT,
117 mutlist_INVARIANT_CHECK_QUEUE,
118 mutlist_OTHERS;
119 #endif
120
121 /* Thread-local data for each GC thread
122 */
123 gc_thread **gc_threads = NULL;
124
125 #if !defined(THREADED_RTS)
126 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
127 #endif
128
129 // Number of threads running in *this* GC. Affects how many
130 // step->todos[] lists we have to look in to find work.
131 uint32_t n_gc_threads;
132
133 // For stats:
134 static long copied; // *words* copied & scavenged during this GC
135
136 bool work_stealing;
137
138 uint32_t static_flag = STATIC_FLAG_B;
139 uint32_t prev_static_flag = STATIC_FLAG_A;
140
141 DECLARE_GCT
142
143 /* -----------------------------------------------------------------------------
144 Static function declarations
145 -------------------------------------------------------------------------- */
146
147 static void mark_root (void *user, StgClosure **root);
148 static void prepare_collected_gen (generation *gen);
149 static void prepare_uncollected_gen (generation *gen);
150 static void init_gc_thread (gc_thread *t);
151 static void resize_generations (void);
152 static void resize_nursery (void);
153 static void start_gc_threads (void);
154 static void scavenge_until_all_done (void);
155 static StgWord inc_running (void);
156 static StgWord dec_running (void);
157 static void wakeup_gc_threads (uint32_t me, bool idle_cap[]);
158 static void shutdown_gc_threads (uint32_t me, bool idle_cap[]);
159 static void collect_gct_blocks (void);
160 static void collect_pinned_object_blocks (void);
161 static void heapOverflow (void);
162
163 #if defined(DEBUG)
164 static void gcCAFs (void);
165 #endif
166
167 /* -----------------------------------------------------------------------------
168 The mark stack.
169 -------------------------------------------------------------------------- */
170
171 bdescr *mark_stack_top_bd; // topmost block in the mark stack
172 bdescr *mark_stack_bd; // current block in the mark stack
173 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
174
175 /* -----------------------------------------------------------------------------
176 GarbageCollect: the main entry point to the garbage collector.
177
178 The collect_gen parameter is gotten by calling calcNeeded().
179
180 Locks held: all capabilities are held throughout GarbageCollect().
181 -------------------------------------------------------------------------- */
182
183 void
184 GarbageCollect (uint32_t collect_gen,
185 bool do_heap_census,
186 uint32_t gc_type USED_IF_THREADS,
187 Capability *cap,
188 bool idle_cap[])
189 {
190 bdescr *bd;
191 generation *gen;
192 StgWord live_blocks, live_words, par_max_copied, par_balanced_copied;
193 #if defined(THREADED_RTS)
194 gc_thread *saved_gct;
195 #endif
196 uint32_t g, n;
197
198 // necessary if we stole a callee-saves register for gct:
199 #if defined(THREADED_RTS)
200 saved_gct = gct;
201 #endif
202
203 #if defined(PROFILING)
204 CostCentreStack *save_CCS[n_capabilities];
205 #endif
206
207 ACQUIRE_SM_LOCK;
208
209 #if defined(RTS_USER_SIGNALS)
210 if (RtsFlags.MiscFlags.install_signal_handlers) {
211 // block signals
212 blockUserSignals();
213 }
214 #endif
215
216 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
217 // otherwise adjust the padding in gen_workspace.
218
219 // this is the main thread
220 SET_GCT(gc_threads[cap->no]);
221
222 // tell the stats department that we've started a GC
223 stat_startGC(cap, gct);
224
225 // lock the StablePtr table
226 stableLock();
227
228 #if defined(DEBUG)
229 mutlist_MUTVARS = 0;
230 mutlist_MUTARRS = 0;
231 mutlist_MVARS = 0;
232 mutlist_TVAR = 0;
233 mutlist_TVAR_WATCH_QUEUE = 0;
234 mutlist_TREC_CHUNK = 0;
235 mutlist_TREC_HEADER = 0;
236 mutlist_ATOMIC_INVARIANT = 0;
237 mutlist_INVARIANT_CHECK_QUEUE = 0;
238 mutlist_OTHERS = 0;
239 #endif
240
241 // attribute any costs to CCS_GC
242 #if defined(PROFILING)
243 for (n = 0; n < n_capabilities; n++) {
244 save_CCS[n] = capabilities[n]->r.rCCCS;
245 capabilities[n]->r.rCCCS = CCS_GC;
246 }
247 #endif
248
249 /* Figure out which generation to collect
250 */
251 N = collect_gen;
252 major_gc = (N == RtsFlags.GcFlags.generations-1);
253
254 if (major_gc) {
255 prev_static_flag = static_flag;
256 static_flag =
257 static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
258 }
259
260 #if defined(THREADED_RTS)
261 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
262 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
263 // It's not always a good idea to do load balancing in parallel
264 // GC. In particular, for a parallel program we don't want to
265 // lose locality by moving cached data into another CPU's cache
266 // (this effect can be quite significant).
267 //
268 // We could have a more complex way to deterimine whether to do
269 // work stealing or not, e.g. it might be a good idea to do it
270 // if the heap is big. For now, we just turn it on or off with
271 // a flag.
272 #endif
273
274 /* Start threads, so they can be spinning up while we finish initialisation.
275 */
276 start_gc_threads();
277
278 #if defined(THREADED_RTS)
279 /* How many threads will be participating in this GC?
280 * We don't try to parallelise minor GCs (unless the user asks for
281 * it with +RTS -gn0), or mark/compact/sweep GC.
282 */
283 if (gc_type == SYNC_GC_PAR) {
284 n_gc_threads = n_capabilities;
285 } else {
286 n_gc_threads = 1;
287 }
288 #else
289 n_gc_threads = 1;
290 #endif
291
292 debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
293 N, n_gc_threads);
294
295 #if defined(DEBUG)
296 // check for memory leaks if DEBUG is on
297 memInventory(DEBUG_gc);
298 #endif
299
300 // do this *before* we start scavenging
301 collectFreshWeakPtrs();
302
303 // check sanity *before* GC
304 IF_DEBUG(sanity, checkSanity(false /* before GC */, major_gc));
305
306 // gather blocks allocated using allocatePinned() from each capability
307 // and put them on the g0->large_object list.
308 collect_pinned_object_blocks();
309
310 // Initialise all the generations that we're collecting.
311 for (g = 0; g <= N; g++) {
312 prepare_collected_gen(&generations[g]);
313 }
314 // Initialise all the generations that we're *not* collecting.
315 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
316 prepare_uncollected_gen(&generations[g]);
317 }
318
319 // Prepare this gc_thread
320 init_gc_thread(gct);
321
322 /* Allocate a mark stack if we're doing a major collection.
323 */
324 if (major_gc && oldest_gen->mark) {
325 mark_stack_bd = allocBlock();
326 mark_stack_top_bd = mark_stack_bd;
327 mark_stack_bd->link = NULL;
328 mark_stack_bd->u.back = NULL;
329 mark_sp = mark_stack_bd->start;
330 } else {
331 mark_stack_bd = NULL;
332 mark_stack_top_bd = NULL;
333 mark_sp = NULL;
334 }
335
336 /* -----------------------------------------------------------------------
337 * follow all the roots that we know about:
338 */
339
340 // the main thread is running: this prevents any other threads from
341 // exiting prematurely, so we can start them now.
342 // NB. do this after the mutable lists have been saved above, otherwise
343 // the other GC threads will be writing into the old mutable lists.
344 inc_running();
345 wakeup_gc_threads(gct->thread_index, idle_cap);
346
347 traceEventGcWork(gct->cap);
348
349 // scavenge the capability-private mutable lists. This isn't part
350 // of markSomeCapabilities() because markSomeCapabilities() can only
351 // call back into the GC via mark_root() (due to the gct register
352 // variable).
353 if (n_gc_threads == 1) {
354 for (n = 0; n < n_capabilities; n++) {
355 #if defined(THREADED_RTS)
356 scavenge_capability_mut_Lists1(capabilities[n]);
357 #else
358 scavenge_capability_mut_lists(capabilities[n]);
359 #endif
360 }
361 } else {
362 scavenge_capability_mut_lists(gct->cap);
363 for (n = 0; n < n_capabilities; n++) {
364 if (idle_cap[n]) {
365 markCapability(mark_root, gct, capabilities[n],
366 true/*don't mark sparks*/);
367 scavenge_capability_mut_lists(capabilities[n]);
368 }
369 }
370 }
371
372 // follow roots from the CAF list (used by GHCi)
373 gct->evac_gen_no = 0;
374 markCAFs(mark_root, gct);
375
376 // follow all the roots that the application knows about.
377 gct->evac_gen_no = 0;
378 if (n_gc_threads == 1) {
379 for (n = 0; n < n_capabilities; n++) {
380 markCapability(mark_root, gct, capabilities[n],
381 true/*don't mark sparks*/);
382 }
383 } else {
384 markCapability(mark_root, gct, cap, true/*don't mark sparks*/);
385 }
386
387 markScheduler(mark_root, gct);
388
389 #if defined(RTS_USER_SIGNALS)
390 // mark the signal handlers (signals should be already blocked)
391 markSignalHandlers(mark_root, gct);
392 #endif
393
394 // Mark the weak pointer list, and prepare to detect dead weak pointers.
395 markWeakPtrList();
396 initWeakForGC();
397
398 // Mark the stable pointer table.
399 markStableTables(mark_root, gct);
400
401 /* -------------------------------------------------------------------------
402 * Repeatedly scavenge all the areas we know about until there's no
403 * more scavenging to be done.
404 */
405 for (;;)
406 {
407 scavenge_until_all_done();
408 // The other threads are now stopped. We might recurse back to
409 // here, but from now on this is the only thread.
410
411 // must be last... invariant is that everything is fully
412 // scavenged at this point.
413 if (traverseWeakPtrList()) { // returns true if evaced something
414 inc_running();
415 continue;
416 }
417
418 // If we get to here, there's really nothing left to do.
419 break;
420 }
421
422 shutdown_gc_threads(gct->thread_index, idle_cap);
423
424 // Now see which stable names are still alive.
425 gcStableTables();
426
427 #if defined(THREADED_RTS)
428 if (n_gc_threads == 1) {
429 for (n = 0; n < n_capabilities; n++) {
430 pruneSparkQueue(capabilities[n]);
431 }
432 } else {
433 for (n = 0; n < n_capabilities; n++) {
434 if (n == cap->no || idle_cap[n]) {
435 pruneSparkQueue(capabilities[n]);
436 }
437 }
438 }
439 #endif
440
441 #if defined(PROFILING)
442 // We call processHeapClosureForDead() on every closure destroyed during
443 // the current garbage collection, so we invoke LdvCensusForDead().
444 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
445 || RtsFlags.ProfFlags.bioSelector != NULL) {
446 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
447 LdvCensusForDead(N);
448 ACQUIRE_SM_LOCK;
449 }
450 #endif
451
452 // NO MORE EVACUATION AFTER THIS POINT!
453
454 // Finally: compact or sweep the oldest generation.
455 if (major_gc && oldest_gen->mark) {
456 if (oldest_gen->compact)
457 compact(gct->scavenged_static_objects);
458 else
459 sweep(oldest_gen);
460 }
461
462 copied = 0;
463 par_max_copied = 0;
464 par_balanced_copied = 0;
465 {
466 uint32_t i;
467 uint64_t par_balanced_copied_acc = 0;
468
469 for (i=0; i < n_gc_threads; i++) {
470 copied += gc_threads[i]->copied;
471 }
472 for (i=0; i < n_gc_threads; i++) {
473 if (n_gc_threads > 1) {
474 debugTrace(DEBUG_gc,"thread %d:", i);
475 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
476 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
477 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
478 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
479 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
480 }
481 par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
482 par_balanced_copied_acc +=
483 stg_min(n_gc_threads * gc_threads[i]->copied, copied);
484 }
485 if (n_gc_threads == 1) {
486 par_max_copied = 0;
487 par_balanced_copied = 0;
488 }
489 else
490 {
491 // See Note [Work Balance] for an explanation of this computation
492 par_balanced_copied =
493 (par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) /
494 (n_gc_threads - 1);
495 }
496 }
497
498 // Run through all the generations and tidy up.
499 // We're going to:
500 // - count the amount of "live" data (live_words, live_blocks)
501 // - count the amount of "copied" data in this GC (copied)
502 // - free from-space
503 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
504 //
505 live_words = 0;
506 live_blocks = 0;
507
508 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
509
510 if (g == N) {
511 generations[g].collections++; // for stats
512 if (n_gc_threads > 1) generations[g].par_collections++;
513 }
514
515 // Count the mutable list as bytes "copied" for the purposes of
516 // stats. Every mutable list is copied during every GC.
517 if (g > 0) {
518 W_ mut_list_size = 0;
519 for (n = 0; n < n_capabilities; n++) {
520 mut_list_size += countOccupied(capabilities[n]->mut_lists[g]);
521 }
522 copied += mut_list_size;
523
524 debugTrace(DEBUG_gc,
525 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d ATOMIC_INVARIANTs, %d INVARIANT_CHECK_QUEUEs, %d others)",
526 (unsigned long)(mut_list_size * sizeof(W_)),
527 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS,
528 mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE,
529 mutlist_TREC_CHUNK, mutlist_TREC_HEADER,
530 mutlist_ATOMIC_INVARIANT,
531 mutlist_INVARIANT_CHECK_QUEUE,
532 mutlist_OTHERS);
533 }
534
535 bdescr *next, *prev;
536 gen = &generations[g];
537
538 // for generations we collected...
539 if (g <= N) {
540
541 /* free old memory and shift to-space into from-space for all
542 * the collected generations (except the allocation area). These
543 * freed blocks will probaby be quickly recycled.
544 */
545 if (gen->mark)
546 {
547 // tack the new blocks on the end of the existing blocks
548 if (gen->old_blocks != NULL) {
549
550 prev = NULL;
551 for (bd = gen->old_blocks; bd != NULL; bd = next) {
552
553 next = bd->link;
554
555 if (!(bd->flags & BF_MARKED))
556 {
557 if (prev == NULL) {
558 gen->old_blocks = next;
559 } else {
560 prev->link = next;
561 }
562 freeGroup(bd);
563 gen->n_old_blocks--;
564 }
565 else
566 {
567 gen->n_words += bd->free - bd->start;
568
569 // NB. this step might not be compacted next
570 // time, so reset the BF_MARKED flags.
571 // They are set before GC if we're going to
572 // compact. (search for BF_MARKED above).
573 bd->flags &= ~BF_MARKED;
574
575 // between GCs, all blocks in the heap except
576 // for the nursery have the BF_EVACUATED flag set.
577 bd->flags |= BF_EVACUATED;
578
579 prev = bd;
580 }
581 }
582
583 if (prev != NULL) {
584 prev->link = gen->blocks;
585 gen->blocks = gen->old_blocks;
586 }
587 }
588 // add the new blocks to the block tally
589 gen->n_blocks += gen->n_old_blocks;
590 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
591 ASSERT(countOccupied(gen->blocks) == gen->n_words);
592 }
593 else // not copacted
594 {
595 freeChain(gen->old_blocks);
596 }
597
598 gen->old_blocks = NULL;
599 gen->n_old_blocks = 0;
600
601 /* LARGE OBJECTS. The current live large objects are chained on
602 * scavenged_large, having been moved during garbage
603 * collection from large_objects. Any objects left on the
604 * large_objects list are therefore dead, so we free them here.
605 */
606 freeChain(gen->large_objects);
607 gen->large_objects = gen->scavenged_large_objects;
608 gen->n_large_blocks = gen->n_scavenged_large_blocks;
609 gen->n_large_words = countOccupied(gen->large_objects);
610 gen->n_new_large_words = 0;
611
612 /* COMPACT_NFDATA. The currently live compacts are chained
613 * to live_compact_objects, quite like large objects. And
614 * objects left on the compact_objects list are dead.
615 *
616 * We don't run a simple freeChain because want to give the
617 * CNF module some chance to free memory that freeChain would
618 * not see (namely blocks appended to a CNF through a compactResize).
619 *
620 * See Note [Compact Normal Forms] for details.
621 */
622 for (bd = gen->compact_objects; bd; bd = next) {
623 next = bd->link;
624 compactFree(((StgCompactNFDataBlock*)bd->start)->owner);
625 }
626 gen->compact_objects = gen->live_compact_objects;
627 gen->n_compact_blocks = gen->n_live_compact_blocks;
628 }
629 else // for generations > N
630 {
631 /* For older generations, we need to append the
632 * scavenged_large_object list (i.e. large objects that have been
633 * promoted during this GC) to the large_object list for that step.
634 */
635 for (bd = gen->scavenged_large_objects; bd; bd = next) {
636 next = bd->link;
637 dbl_link_onto(bd, &gen->large_objects);
638 gen->n_large_words += bd->free - bd->start;
639 }
640
641 // And same for compacts
642 for (bd = gen->live_compact_objects; bd; bd = next) {
643 next = bd->link;
644 dbl_link_onto(bd, &gen->compact_objects);
645 }
646
647 // add the new blocks we promoted during this GC
648 gen->n_large_blocks += gen->n_scavenged_large_blocks;
649 gen->n_compact_blocks += gen->n_live_compact_blocks;
650 }
651
652 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
653 ASSERT(countOccupied(gen->large_objects) == gen->n_large_words);
654 // We can run the same assertion on compact objects because there
655 // is memory "the GC doesn't see" (directly), but which is still
656 // accounted in gen->n_compact_blocks
657
658 gen->scavenged_large_objects = NULL;
659 gen->n_scavenged_large_blocks = 0;
660 gen->live_compact_objects = NULL;
661 gen->n_live_compact_blocks = 0;
662
663 // Count "live" data
664 live_words += genLiveWords(gen);
665 live_blocks += genLiveBlocks(gen);
666
667 // add in the partial blocks in the gen_workspaces
668 {
669 uint32_t i;
670 for (i = 0; i < n_capabilities; i++) {
671 live_words += gcThreadLiveWords(i, gen->no);
672 live_blocks += gcThreadLiveBlocks(i, gen->no);
673 }
674 }
675 } // for all generations
676
677 // update the max size of older generations after a major GC
678 resize_generations();
679
680 // Free the mark stack.
681 if (mark_stack_top_bd != NULL) {
682 debugTrace(DEBUG_gc, "mark stack: %d blocks",
683 countBlocks(mark_stack_top_bd));
684 freeChain(mark_stack_top_bd);
685 }
686
687 // Free any bitmaps.
688 for (g = 0; g <= N; g++) {
689 gen = &generations[g];
690 if (gen->bitmap != NULL) {
691 freeGroup(gen->bitmap);
692 gen->bitmap = NULL;
693 }
694 }
695
696 resize_nursery();
697
698 resetNurseries();
699
700 // mark the garbage collected CAFs as dead
701 #if defined(DEBUG)
702 if (major_gc) { gcCAFs(); }
703 #endif
704
705 // Update the stable pointer hash table.
706 updateStableTables(major_gc);
707
708 // unlock the StablePtr table. Must be before scheduleFinalizers(),
709 // because a finalizer may call hs_free_fun_ptr() or
710 // hs_free_stable_ptr(), both of which access the StablePtr table.
711 stableUnlock();
712
713 // Must be after stableUnlock(), because it might free stable ptrs.
714 if (major_gc) {
715 checkUnload (gct->scavenged_static_objects);
716 }
717
718 #if defined(PROFILING)
719 // resetStaticObjectForRetainerProfiling() must be called before
720 // zeroing below.
721
722 // ToDo: fix the gct->scavenged_static_objects below
723 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
724 #endif
725
726 // Start any pending finalizers. Must be after
727 // updateStableTables() and stableUnlock() (see #4221).
728 RELEASE_SM_LOCK;
729 scheduleFinalizers(cap, dead_weak_ptr_list);
730 ACQUIRE_SM_LOCK;
731
732 // check sanity after GC
733 // before resurrectThreads(), because that might overwrite some
734 // closures, which will cause problems with THREADED where we don't
735 // fill slop.
736 IF_DEBUG(sanity, checkSanity(true /* after GC */, major_gc));
737
738 // If a heap census is due, we need to do it before
739 // resurrectThreads(), for the same reason as checkSanity above:
740 // resurrectThreads() will overwrite some closures and leave slop
741 // behind.
742 if (do_heap_census) {
743 debugTrace(DEBUG_sched, "performing heap census");
744 RELEASE_SM_LOCK;
745 heapCensus(gct->gc_start_cpu);
746 ACQUIRE_SM_LOCK;
747 }
748
749 // send exceptions to any threads which were about to die
750 RELEASE_SM_LOCK;
751 resurrectThreads(resurrected_threads);
752 ACQUIRE_SM_LOCK;
753
754 if (major_gc) {
755 W_ need, got;
756 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
757 got = mblocks_allocated;
758 /* If the amount of data remains constant, next major GC we'll
759 require (F+1)*need. We leave (F+2)*need in order to reduce
760 repeated deallocation and reallocation. */
761 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
762 /* But with a large nursery, the above estimate might exceed
763 * maxHeapSize. A large resident set size might make the OS
764 * kill this process, or swap unnecessarily. Therefore we
765 * ensure that our estimate does not exceed maxHeapSize.
766 */
767 if (RtsFlags.GcFlags.maxHeapSize != 0) {
768 W_ max = BLOCKS_TO_MBLOCKS(RtsFlags.GcFlags.maxHeapSize);
769 if (need > max) {
770 need = max;
771 }
772 }
773 if (got > need) {
774 returnMemoryToOS(got - need);
775 }
776 }
777
778 // extra GC trace info
779 IF_DEBUG(gc, statDescribeGens());
780
781 #if defined(DEBUG)
782 // symbol-table based profiling
783 /* heapCensus(to_blocks); */ /* ToDo */
784 #endif
785
786 // restore enclosing cost centre
787 #if defined(PROFILING)
788 for (n = 0; n < n_capabilities; n++) {
789 capabilities[n]->r.rCCCS = save_CCS[n];
790 }
791 #endif
792
793 #if defined(DEBUG)
794 // check for memory leaks if DEBUG is on
795 memInventory(DEBUG_gc);
796 #endif
797
798 // ok, GC over: tell the stats department what happened.
799 stat_endGC(cap, gct, live_words, copied,
800 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
801 N, n_gc_threads, par_max_copied, par_balanced_copied);
802
803 #if defined(RTS_USER_SIGNALS)
804 if (RtsFlags.MiscFlags.install_signal_handlers) {
805 // unblock signals again
806 unblockUserSignals();
807 }
808 #endif
809
810 RELEASE_SM_LOCK;
811
812 SET_GCT(saved_gct);
813 }
814
815 /* -----------------------------------------------------------------------------
816 Heap overflow is indicated by setting a flag that the caller of
817 GarbageCollect can check. (not ideal, TODO: better)
818 -------------------------------------------------------------------------- */
819
820 static void heapOverflow(void)
821 {
822 heap_overflow = true;
823 }
824
825 /* -----------------------------------------------------------------------------
826 Initialise the gc_thread structures.
827 -------------------------------------------------------------------------- */
828
829 static void
830 new_gc_thread (uint32_t n, gc_thread *t)
831 {
832 uint32_t g;
833 gen_workspace *ws;
834
835 t->cap = capabilities[n];
836
837 #if defined(THREADED_RTS)
838 t->id = 0;
839 initSpinLock(&t->gc_spin);
840 initSpinLock(&t->mut_spin);
841 ACQUIRE_SPIN_LOCK(&t->gc_spin);
842 ACQUIRE_SPIN_LOCK(&t->mut_spin);
843 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
844 // thread to start up, see wakeup_gc_threads
845 #endif
846
847 t->thread_index = n;
848 t->free_blocks = NULL;
849 t->gc_count = 0;
850
851 init_gc_thread(t);
852
853 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
854 {
855 ws = &t->gens[g];
856 ws->gen = &generations[g];
857 ASSERT(g == ws->gen->no);
858 ws->my_gct = t;
859
860 // We want to call
861 // alloc_todo_block(ws,0);
862 // but can't, because it uses gct which isn't set up at this point.
863 // Hence, allocate a block for todo_bd manually:
864 {
865 bdescr *bd = allocBlockOnNode(capNoToNumaNode(n));
866 // no lock, locks aren't initialised yet
867 initBdescr(bd, ws->gen, ws->gen->to);
868 bd->flags = BF_EVACUATED;
869 bd->u.scan = bd->free = bd->start;
870
871 ws->todo_bd = bd;
872 ws->todo_free = bd->free;
873 ws->todo_lim = bd->start + BLOCK_SIZE_W;
874 }
875
876 ws->todo_q = newWSDeque(128);
877 ws->todo_overflow = NULL;
878 ws->n_todo_overflow = 0;
879 ws->todo_large_objects = NULL;
880
881 ws->part_list = NULL;
882 ws->n_part_blocks = 0;
883 ws->n_part_words = 0;
884
885 ws->scavd_list = NULL;
886 ws->n_scavd_blocks = 0;
887 ws->n_scavd_words = 0;
888 }
889 }
890
891
892 void
893 initGcThreads (uint32_t from USED_IF_THREADS, uint32_t to USED_IF_THREADS)
894 {
895 #if defined(THREADED_RTS)
896 uint32_t i;
897
898 if (from > 0) {
899 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
900 "initGcThreads");
901 } else {
902 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
903 "initGcThreads");
904 }
905
906 for (i = from; i < to; i++) {
907 gc_threads[i] =
908 stgMallocBytes(sizeof(gc_thread) +
909 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
910 "alloc_gc_threads");
911
912 new_gc_thread(i, gc_threads[i]);
913 }
914 #else
915 ASSERT(from == 0 && to == 1);
916 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
917 gc_threads[0] = gct;
918 new_gc_thread(0,gc_threads[0]);
919 #endif
920 }
921
922 void
923 freeGcThreads (void)
924 {
925 uint32_t g;
926 if (gc_threads != NULL) {
927 #if defined(THREADED_RTS)
928 uint32_t i;
929 for (i = 0; i < n_capabilities; i++) {
930 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
931 {
932 freeWSDeque(gc_threads[i]->gens[g].todo_q);
933 }
934 stgFree (gc_threads[i]);
935 }
936 stgFree (gc_threads);
937 #else
938 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
939 {
940 freeWSDeque(gc_threads[0]->gens[g].todo_q);
941 }
942 stgFree (gc_threads);
943 #endif
944 gc_threads = NULL;
945 }
946 }
947
948 /* ----------------------------------------------------------------------------
949 Start GC threads
950 ------------------------------------------------------------------------- */
951
952 static volatile StgWord gc_running_threads;
953
954 static StgWord
955 inc_running (void)
956 {
957 StgWord new;
958 new = atomic_inc(&gc_running_threads, 1);
959 ASSERT(new <= n_gc_threads);
960 return new;
961 }
962
963 static StgWord
964 dec_running (void)
965 {
966 ASSERT(gc_running_threads != 0);
967 return atomic_dec(&gc_running_threads);
968 }
969
970 static bool
971 any_work (void)
972 {
973 int g;
974 gen_workspace *ws;
975
976 gct->any_work++;
977
978 write_barrier();
979
980 // scavenge objects in compacted generation
981 if (mark_stack_bd != NULL && !mark_stack_empty()) {
982 return true;
983 }
984
985 // Check for global work in any gen. We don't need to check for
986 // local work, because we have already exited scavenge_loop(),
987 // which means there is no local work for this thread.
988 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
989 ws = &gct->gens[g];
990 if (ws->todo_large_objects) return true;
991 if (!looksEmptyWSDeque(ws->todo_q)) return true;
992 if (ws->todo_overflow) return true;
993 }
994
995 #if defined(THREADED_RTS)
996 if (work_stealing) {
997 uint32_t n;
998 // look for work to steal
999 for (n = 0; n < n_gc_threads; n++) {
1000 if (n == gct->thread_index) continue;
1001 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
1002 ws = &gc_threads[n]->gens[g];
1003 if (!looksEmptyWSDeque(ws->todo_q)) return true;
1004 }
1005 }
1006 }
1007 #endif
1008
1009 gct->no_work++;
1010 #if defined(THREADED_RTS)
1011 yieldThread();
1012 #endif
1013
1014 return false;
1015 }
1016
1017 static void
1018 scavenge_until_all_done (void)
1019 {
1020 DEBUG_ONLY( uint32_t r );
1021
1022
1023 loop:
1024 #if defined(THREADED_RTS)
1025 if (n_gc_threads > 1) {
1026 scavenge_loop();
1027 } else {
1028 scavenge_loop1();
1029 }
1030 #else
1031 scavenge_loop();
1032 #endif
1033
1034 collect_gct_blocks();
1035
1036 // scavenge_loop() only exits when there's no work to do
1037
1038 #if defined(DEBUG)
1039 r = dec_running();
1040 #else
1041 dec_running();
1042 #endif
1043
1044 traceEventGcIdle(gct->cap);
1045
1046 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1047
1048 while (gc_running_threads != 0) {
1049 // usleep(1);
1050 if (any_work()) {
1051 inc_running();
1052 traceEventGcWork(gct->cap);
1053 goto loop;
1054 }
1055 // any_work() does not remove the work from the queue, it
1056 // just checks for the presence of work. If we find any,
1057 // then we increment gc_running_threads and go back to
1058 // scavenge_loop() to perform any pending work.
1059 }
1060
1061 traceEventGcDone(gct->cap);
1062 }
1063
1064 #if defined(THREADED_RTS)
1065
1066 void
1067 gcWorkerThread (Capability *cap)
1068 {
1069 gc_thread *saved_gct;
1070
1071 // necessary if we stole a callee-saves register for gct:
1072 saved_gct = gct;
1073
1074 SET_GCT(gc_threads[cap->no]);
1075 gct->id = osThreadId();
1076
1077 // Wait until we're told to wake up
1078 RELEASE_SPIN_LOCK(&gct->mut_spin);
1079 // yieldThread();
1080 // Strangely, adding a yieldThread() here makes the CPU time
1081 // measurements more accurate on Linux, perhaps because it syncs
1082 // the CPU time across the multiple cores. Without this, CPU time
1083 // is heavily skewed towards GC rather than MUT.
1084 gct->wakeup = GC_THREAD_STANDING_BY;
1085 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1086 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1087
1088 init_gc_thread(gct);
1089
1090 traceEventGcWork(gct->cap);
1091
1092 // Every thread evacuates some roots.
1093 gct->evac_gen_no = 0;
1094 markCapability(mark_root, gct, cap, true/*prune sparks*/);
1095 scavenge_capability_mut_lists(cap);
1096
1097 scavenge_until_all_done();
1098
1099 #if defined(THREADED_RTS)
1100 // Now that the whole heap is marked, we discard any sparks that
1101 // were found to be unreachable. The main GC thread is currently
1102 // marking heap reachable via weak pointers, so it is
1103 // non-deterministic whether a spark will be retained if it is
1104 // only reachable via weak pointers. To fix this problem would
1105 // require another GC barrier, which is too high a price.
1106 pruneSparkQueue(cap);
1107 #endif
1108
1109 // Wait until we're told to continue
1110 RELEASE_SPIN_LOCK(&gct->gc_spin);
1111 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1112 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1113 gct->thread_index);
1114 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1115 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1116
1117 SET_GCT(saved_gct);
1118 }
1119
1120 #endif
1121
1122 #if defined(THREADED_RTS)
1123
1124 void
1125 waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
1126 {
1127 const uint32_t n_threads = n_capabilities;
1128 const uint32_t me = cap->no;
1129 uint32_t i, j;
1130 bool retry = true;
1131 Time t0, t1, t2;
1132
1133 t0 = t1 = t2 = getProcessElapsedTime();
1134
1135 while(retry) {
1136 for (i=0; i < n_threads; i++) {
1137 if (i == me || idle_cap[i]) continue;
1138 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1139 prodCapability(capabilities[i], cap->running_task);
1140 }
1141 }
1142 for (j=0; j < 10; j++) {
1143 retry = false;
1144 for (i=0; i < n_threads; i++) {
1145 if (i == me || idle_cap[i]) continue;
1146 write_barrier();
1147 interruptCapability(capabilities[i]);
1148 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1149 retry = true;
1150 }
1151 }
1152 if (!retry) break;
1153 yieldThread();
1154 }
1155
1156 t2 = getProcessElapsedTime();
1157 if (RtsFlags.GcFlags.longGCSync != 0 &&
1158 t2 - t1 > RtsFlags.GcFlags.longGCSync) {
1159 /* call this every longGCSync of delay */
1160 rtsConfig.longGCSync(cap->no, t2 - t0);
1161 t1 = t2;
1162 }
1163 }
1164
1165 if (RtsFlags.GcFlags.longGCSync != 0 &&
1166 t2 - t0 > RtsFlags.GcFlags.longGCSync) {
1167 rtsConfig.longGCSyncEnd(t2 - t0);
1168 }
1169 }
1170
1171 #endif // THREADED_RTS
1172
1173 static void
1174 start_gc_threads (void)
1175 {
1176 #if defined(THREADED_RTS)
1177 gc_running_threads = 0;
1178 #endif
1179 }
1180
1181 static void
1182 wakeup_gc_threads (uint32_t me USED_IF_THREADS,
1183 bool idle_cap[] USED_IF_THREADS)
1184 {
1185 #if defined(THREADED_RTS)
1186 uint32_t i;
1187
1188 if (n_gc_threads == 1) return;
1189
1190 for (i=0; i < n_gc_threads; i++) {
1191 if (i == me || idle_cap[i]) continue;
1192 inc_running();
1193 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1194 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY)
1195 barf("wakeup_gc_threads");
1196
1197 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1198 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1199 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1200 }
1201 #endif
1202 }
1203
1204 // After GC is complete, we must wait for all GC threads to enter the
1205 // standby state, otherwise they may still be executing inside
1206 // any_work(), and may even remain awake until the next GC starts.
1207 static void
1208 shutdown_gc_threads (uint32_t me USED_IF_THREADS,
1209 bool idle_cap[] USED_IF_THREADS)
1210 {
1211 #if defined(THREADED_RTS)
1212 uint32_t i;
1213
1214 if (n_gc_threads == 1) return;
1215
1216 for (i=0; i < n_gc_threads; i++) {
1217 if (i == me || idle_cap[i]) continue;
1218 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) {
1219 busy_wait_nop();
1220 write_barrier();
1221 }
1222 }
1223 #endif
1224 }
1225
1226 #if defined(THREADED_RTS)
1227 void
1228 releaseGCThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
1229 {
1230 const uint32_t n_threads = n_capabilities;
1231 const uint32_t me = cap->no;
1232 uint32_t i;
1233 for (i=0; i < n_threads; i++) {
1234 if (i == me || idle_cap[i]) continue;
1235 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1236 barf("releaseGCThreads");
1237
1238 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1239 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1240 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1241 }
1242 }
1243 #endif
1244
1245 /* ----------------------------------------------------------------------------
1246 Initialise a generation that is to be collected
1247 ------------------------------------------------------------------------- */
1248
1249 static void
1250 prepare_collected_gen (generation *gen)
1251 {
1252 uint32_t i, g, n;
1253 gen_workspace *ws;
1254 bdescr *bd, *next;
1255
1256 // Throw away the current mutable list. Invariant: the mutable
1257 // list always has at least one block; this means we can avoid a
1258 // check for NULL in recordMutable().
1259 g = gen->no;
1260 if (g != 0) {
1261 for (i = 0; i < n_capabilities; i++) {
1262 freeChain(capabilities[i]->mut_lists[g]);
1263 capabilities[i]->mut_lists[g] =
1264 allocBlockOnNode(capNoToNumaNode(i));
1265 }
1266 }
1267
1268 gen = &generations[g];
1269 ASSERT(gen->no == g);
1270
1271 // we'll construct a new list of threads in this step
1272 // during GC, throw away the current list.
1273 gen->old_threads = gen->threads;
1274 gen->threads = END_TSO_QUEUE;
1275
1276 // deprecate the existing blocks
1277 gen->old_blocks = gen->blocks;
1278 gen->n_old_blocks = gen->n_blocks;
1279 gen->blocks = NULL;
1280 gen->n_blocks = 0;
1281 gen->n_words = 0;
1282 gen->live_estimate = 0;
1283
1284 // initialise the large object queues.
1285 ASSERT(gen->scavenged_large_objects == NULL);
1286 ASSERT(gen->n_scavenged_large_blocks == 0);
1287 ASSERT(gen->live_compact_objects == NULL);
1288 ASSERT(gen->n_live_compact_blocks == 0);
1289
1290 // grab all the partial blocks stashed in the gc_thread workspaces and
1291 // move them to the old_blocks list of this gen.
1292 for (n = 0; n < n_capabilities; n++) {
1293 ws = &gc_threads[n]->gens[gen->no];
1294
1295 for (bd = ws->part_list; bd != NULL; bd = next) {
1296 next = bd->link;
1297 bd->link = gen->old_blocks;
1298 gen->old_blocks = bd;
1299 gen->n_old_blocks += bd->blocks;
1300 }
1301 ws->part_list = NULL;
1302 ws->n_part_blocks = 0;
1303 ws->n_part_words = 0;
1304
1305 ASSERT(ws->scavd_list == NULL);
1306 ASSERT(ws->n_scavd_blocks == 0);
1307 ASSERT(ws->n_scavd_words == 0);
1308
1309 if (ws->todo_free != ws->todo_bd->start) {
1310 ws->todo_bd->free = ws->todo_free;
1311 ws->todo_bd->link = gen->old_blocks;
1312 gen->old_blocks = ws->todo_bd;
1313 gen->n_old_blocks += ws->todo_bd->blocks;
1314 alloc_todo_block(ws,0); // always has one block.
1315 }
1316 }
1317
1318 // mark the small objects as from-space
1319 for (bd = gen->old_blocks; bd; bd = bd->link) {
1320 bd->flags &= ~BF_EVACUATED;
1321 }
1322
1323 // mark the large objects as from-space
1324 for (bd = gen->large_objects; bd; bd = bd->link) {
1325 bd->flags &= ~BF_EVACUATED;
1326 }
1327
1328 // mark the compact objects as from-space
1329 for (bd = gen->compact_objects; bd; bd = bd->link) {
1330 bd->flags &= ~BF_EVACUATED;
1331 }
1332
1333 // for a compacted generation, we need to allocate the bitmap
1334 if (gen->mark) {
1335 StgWord bitmap_size; // in bytes
1336 bdescr *bitmap_bdescr;
1337 StgWord *bitmap;
1338
1339 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1340
1341 if (bitmap_size > 0) {
1342 bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
1343 / BLOCK_SIZE);
1344 gen->bitmap = bitmap_bdescr;
1345 bitmap = bitmap_bdescr->start;
1346
1347 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1348 bitmap_size, bitmap);
1349
1350 // don't forget to fill it with zeros!
1351 memset(bitmap, 0, bitmap_size);
1352
1353 // For each block in this step, point to its bitmap from the
1354 // block descriptor.
1355 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1356 bd->u.bitmap = bitmap;
1357 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1358
1359 // Also at this point we set the BF_MARKED flag
1360 // for this block. The invariant is that
1361 // BF_MARKED is always unset, except during GC
1362 // when it is set on those blocks which will be
1363 // compacted.
1364 if (!(bd->flags & BF_FRAGMENTED)) {
1365 bd->flags |= BF_MARKED;
1366 }
1367
1368 // BF_SWEPT should be marked only for blocks that are being
1369 // collected in sweep()
1370 bd->flags &= ~BF_SWEPT;
1371 }
1372 }
1373 }
1374 }
1375
1376
1377 /* ----------------------------------------------------------------------------
1378 Save the mutable lists in saved_mut_lists
1379 ------------------------------------------------------------------------- */
1380
1381 static void
1382 stash_mut_list (Capability *cap, uint32_t gen_no)
1383 {
1384 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1385 cap->mut_lists[gen_no] = allocBlockOnNode_sync(cap->node);
1386 }
1387
1388 /* ----------------------------------------------------------------------------
1389 Initialise a generation that is *not* to be collected
1390 ------------------------------------------------------------------------- */
1391
1392 static void
1393 prepare_uncollected_gen (generation *gen)
1394 {
1395 uint32_t i;
1396
1397
1398 ASSERT(gen->no > 0);
1399
1400 // save the current mutable lists for this generation, and
1401 // allocate a fresh block for each one. We'll traverse these
1402 // mutable lists as roots early on in the GC.
1403 for (i = 0; i < n_capabilities; i++) {
1404 stash_mut_list(capabilities[i], gen->no);
1405 }
1406
1407 ASSERT(gen->scavenged_large_objects == NULL);
1408 ASSERT(gen->n_scavenged_large_blocks == 0);
1409 }
1410
1411 /* -----------------------------------------------------------------------------
1412 Collect the completed blocks from a GC thread and attach them to
1413 the generation.
1414 -------------------------------------------------------------------------- */
1415
1416 static void
1417 collect_gct_blocks (void)
1418 {
1419 uint32_t g;
1420 gen_workspace *ws;
1421 bdescr *bd, *prev;
1422
1423 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1424 ws = &gct->gens[g];
1425
1426 // there may still be a block attached to ws->todo_bd;
1427 // leave it there to use next time.
1428
1429 if (ws->scavd_list != NULL) {
1430 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1431
1432 ASSERT(gct->scan_bd == NULL);
1433 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1434
1435 prev = NULL;
1436 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1437 prev = bd;
1438 }
1439 if (prev != NULL) {
1440 prev->link = ws->gen->blocks;
1441 ws->gen->blocks = ws->scavd_list;
1442 }
1443 ws->gen->n_blocks += ws->n_scavd_blocks;
1444 ws->gen->n_words += ws->n_scavd_words;
1445
1446 ws->scavd_list = NULL;
1447 ws->n_scavd_blocks = 0;
1448 ws->n_scavd_words = 0;
1449
1450 RELEASE_SPIN_LOCK(&ws->gen->sync);
1451 }
1452 }
1453 }
1454
1455 /* -----------------------------------------------------------------------------
1456 During mutation, any blocks that are filled by allocatePinned() are
1457 stashed on the local pinned_object_blocks list, to avoid needing to
1458 take a global lock. Here we collect those blocks from the
1459 cap->pinned_object_blocks lists and put them on the
1460 main g0->large_object list.
1461
1462 Returns: the number of words allocated this way, for stats
1463 purposes.
1464 -------------------------------------------------------------------------- */
1465
1466 static void
1467 collect_pinned_object_blocks (void)
1468 {
1469 uint32_t n;
1470 bdescr *bd, *prev;
1471
1472 for (n = 0; n < n_capabilities; n++) {
1473 prev = NULL;
1474 for (bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
1475 prev = bd;
1476 }
1477 if (prev != NULL) {
1478 prev->link = g0->large_objects;
1479 if (g0->large_objects != NULL) {
1480 g0->large_objects->u.back = prev;
1481 }
1482 g0->large_objects = capabilities[n]->pinned_object_blocks;
1483 capabilities[n]->pinned_object_blocks = 0;
1484 }
1485 }
1486 }
1487
1488 /* -----------------------------------------------------------------------------
1489 Initialise a gc_thread before GC
1490 -------------------------------------------------------------------------- */
1491
1492 static void
1493 init_gc_thread (gc_thread *t)
1494 {
1495 t->static_objects = END_OF_STATIC_OBJECT_LIST;
1496 t->scavenged_static_objects = END_OF_STATIC_OBJECT_LIST;
1497 t->scan_bd = NULL;
1498 t->mut_lists = t->cap->mut_lists;
1499 t->evac_gen_no = 0;
1500 t->failed_to_evac = false;
1501 t->eager_promotion = true;
1502 t->thunk_selector_depth = 0;
1503 t->copied = 0;
1504 t->scanned = 0;
1505 t->any_work = 0;
1506 t->no_work = 0;
1507 t->scav_find_work = 0;
1508 }
1509
1510 /* -----------------------------------------------------------------------------
1511 Function we pass to evacuate roots.
1512 -------------------------------------------------------------------------- */
1513
1514 static void
1515 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1516 {
1517 // we stole a register for gct, but this function is called from
1518 // *outside* the GC where the register variable is not in effect,
1519 // so we need to save and restore it here. NB. only call
1520 // mark_root() from the main GC thread, otherwise gct will be
1521 // incorrect.
1522 #if defined(THREADED_RTS)
1523 gc_thread *saved_gct;
1524 saved_gct = gct;
1525 #endif
1526 SET_GCT(user);
1527
1528 evacuate(root);
1529
1530 SET_GCT(saved_gct);
1531 }
1532
1533 /* ----------------------------------------------------------------------------
1534 Reset the sizes of the older generations when we do a major
1535 collection.
1536
1537 CURRENT STRATEGY: make all generations except zero the same size.
1538 We have to stay within the maximum heap size, and leave a certain
1539 percentage of the maximum heap size available to allocate into.
1540 ------------------------------------------------------------------------- */
1541
1542 static void
1543 resize_generations (void)
1544 {
1545 uint32_t g;
1546
1547 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1548 W_ live, size, min_alloc, words;
1549 const W_ max = RtsFlags.GcFlags.maxHeapSize;
1550 const W_ gens = RtsFlags.GcFlags.generations;
1551
1552 // live in the oldest generations
1553 if (oldest_gen->live_estimate != 0) {
1554 words = oldest_gen->live_estimate;
1555 } else {
1556 words = oldest_gen->n_words;
1557 }
1558 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1559 oldest_gen->n_large_blocks +
1560 oldest_gen->n_compact_blocks;
1561
1562 // default max size for all generations except zero
1563 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1564 RtsFlags.GcFlags.minOldGenSize);
1565
1566 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1567 if (max > 0) {
1568 RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
1569 } else {
1570 RtsFlags.GcFlags.heapSizeSuggestion = size;
1571 }
1572 }
1573
1574 // minimum size for generation zero
1575 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1576 RtsFlags.GcFlags.minAllocAreaSize
1577 * (W_)n_capabilities);
1578
1579 // Auto-enable compaction when the residency reaches a
1580 // certain percentage of the maximum heap size (default: 30%).
1581 if (RtsFlags.GcFlags.compact ||
1582 (max > 0 &&
1583 oldest_gen->n_blocks >
1584 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1585 oldest_gen->mark = 1;
1586 oldest_gen->compact = 1;
1587 // debugBelch("compaction: on\n", live);
1588 } else {
1589 oldest_gen->mark = 0;
1590 oldest_gen->compact = 0;
1591 // debugBelch("compaction: off\n", live);
1592 }
1593
1594 if (RtsFlags.GcFlags.sweep) {
1595 oldest_gen->mark = 1;
1596 }
1597
1598 // if we're going to go over the maximum heap size, reduce the
1599 // size of the generations accordingly. The calculation is
1600 // different if compaction is turned on, because we don't need
1601 // to double the space required to collect the old generation.
1602 if (max != 0) {
1603
1604 // this test is necessary to ensure that the calculations
1605 // below don't have any negative results - we're working
1606 // with unsigned values here.
1607 if (max < min_alloc) {
1608 heapOverflow();
1609 }
1610
1611 if (oldest_gen->compact) {
1612 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1613 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1614 }
1615 } else {
1616 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1617 size = (max - min_alloc) / ((gens - 1) * 2);
1618 }
1619 }
1620
1621 if (size < live) {
1622 heapOverflow();
1623 }
1624 }
1625
1626 #if 0
1627 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1628 min_alloc, size, max);
1629 #endif
1630
1631 for (g = 0; g < gens; g++) {
1632 generations[g].max_blocks = size;
1633 }
1634 }
1635 }
1636
1637 /* -----------------------------------------------------------------------------
1638 Calculate the new size of the nursery, and resize it.
1639 -------------------------------------------------------------------------- */
1640
1641 static void
1642 resize_nursery (void)
1643 {
1644 const StgWord min_nursery =
1645 RtsFlags.GcFlags.minAllocAreaSize * (StgWord)n_capabilities;
1646
1647 if (RtsFlags.GcFlags.generations == 1)
1648 { // Two-space collector:
1649 W_ blocks;
1650
1651 /* set up a new nursery. Allocate a nursery size based on a
1652 * function of the amount of live data (by default a factor of 2)
1653 * Use the blocks from the old nursery if possible, freeing up any
1654 * left over blocks.
1655 *
1656 * If we get near the maximum heap size, then adjust our nursery
1657 * size accordingly. If the nursery is the same size as the live
1658 * data (L), then we need 3L bytes. We can reduce the size of the
1659 * nursery to bring the required memory down near 2L bytes.
1660 *
1661 * A normal 2-space collector would need 4L bytes to give the same
1662 * performance we get from 3L bytes, reducing to the same
1663 * performance at 2L bytes.
1664 */
1665 blocks = generations[0].n_blocks;
1666
1667 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1668 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1669 RtsFlags.GcFlags.maxHeapSize )
1670 {
1671 long adjusted_blocks; // signed on purpose
1672 int pc_free;
1673
1674 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1675
1676 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1677 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1678
1679 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1680 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1681 {
1682 heapOverflow();
1683 }
1684 blocks = adjusted_blocks;
1685 }
1686 else
1687 {
1688 blocks *= RtsFlags.GcFlags.oldGenFactor;
1689 if (blocks < min_nursery)
1690 {
1691 blocks = min_nursery;
1692 }
1693 }
1694 resizeNurseries(blocks);
1695 }
1696 else // Generational collector
1697 {
1698 /*
1699 * If the user has given us a suggested heap size, adjust our
1700 * allocation area to make best use of the memory available.
1701 */
1702 if (RtsFlags.GcFlags.heapSizeSuggestion)
1703 {
1704 long blocks;
1705 StgWord needed;
1706
1707 calcNeeded(false, &needed); // approx blocks needed at next GC
1708
1709 /* Guess how much will be live in generation 0 step 0 next time.
1710 * A good approximation is obtained by finding the
1711 * percentage of g0 that was live at the last minor GC.
1712 *
1713 * We have an accurate figure for the amount of copied data in
1714 * 'copied', but we must convert this to a number of blocks, with
1715 * a small adjustment for estimated slop at the end of a block
1716 * (- 10 words).
1717 */
1718 if (N == 0)
1719 {
1720 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1721 / countNurseryBlocks();
1722 }
1723
1724 /* Estimate a size for the allocation area based on the
1725 * information available. We might end up going slightly under
1726 * or over the suggested heap size, but we should be pretty
1727 * close on average.
1728 *
1729 * Formula: suggested - needed
1730 * ----------------------------
1731 * 1 + g0_pcnt_kept/100
1732 *
1733 * where 'needed' is the amount of memory needed at the next
1734 * collection for collecting all gens except g0.
1735 */
1736 blocks =
1737 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1738 (100 + (long)g0_pcnt_kept);
1739
1740 if (blocks < (long)min_nursery) {
1741 blocks = min_nursery;
1742 }
1743
1744 resizeNurseries((W_)blocks);
1745 }
1746 else
1747 {
1748 // we might have added extra blocks to the nursery, so
1749 // resize back to the original size again.
1750 resizeNurseriesFixed();
1751 }
1752 }
1753 }
1754
1755 /* -----------------------------------------------------------------------------
1756 Sanity code for CAF garbage collection.
1757
1758 With DEBUG turned on, we manage a CAF list in addition to the SRT
1759 mechanism. After GC, we run down the CAF list and blackhole any
1760 CAFs which have been garbage collected. This means we get an error
1761 whenever the program tries to enter a garbage collected CAF.
1762
1763 Any garbage collected CAFs are taken off the CAF list at the same
1764 time.
1765 -------------------------------------------------------------------------- */
1766
1767 #if defined(DEBUG)
1768
1769 static void gcCAFs(void)
1770 {
1771 StgIndStatic *p, *prev;
1772
1773 const StgInfoTable *info;
1774 uint32_t i;
1775
1776 i = 0;
1777 p = debug_caf_list;
1778 prev = NULL;
1779
1780 for (p = debug_caf_list; p != (StgIndStatic*)END_OF_CAF_LIST;
1781 p = (StgIndStatic*)p->saved_info) {
1782
1783 info = get_itbl((StgClosure*)p);
1784 ASSERT(info->type == IND_STATIC);
1785
1786 if (p->static_link == NULL) {
1787 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", p);
1788 SET_INFO((StgClosure*)p,&stg_GCD_CAF_info); // stub it
1789 if (prev == NULL) {
1790 debug_caf_list = (StgIndStatic*)p->saved_info;
1791 } else {
1792 prev->saved_info = p->saved_info;
1793 }
1794 } else {
1795 prev = p;
1796 i++;
1797 }
1798 }
1799
1800 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1801 }
1802 #endif