Fix Work Balance computation in RTS stats
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "GC.h"
19 #include "GCThread.h"
20 #include "GCTDecl.h" // NB. before RtsSignals.h which
21 // clobbers REG_R1 on arm/Linux
22 #include "Compact.h"
23 #include "Evac.h"
24 #include "Scav.h"
25 #include "GCUtils.h"
26 #include "MarkStack.h"
27 #include "MarkWeak.h"
28 #include "Sparks.h"
29 #include "Sweep.h"
30
31 #include "Storage.h"
32 #include "RtsUtils.h"
33 #include "Apply.h"
34 #include "Updates.h"
35 #include "Stats.h"
36 #include "Schedule.h"
37 #include "Sanity.h"
38 #include "BlockAlloc.h"
39 #include "ProfHeap.h"
40 #include "Weak.h"
41 #include "Prelude.h"
42 #include "RtsSignals.h"
43 #include "STM.h"
44 #include "Trace.h"
45 #include "RetainerProfile.h"
46 #include "LdvProfile.h"
47 #include "RaiseAsync.h"
48 #include "Stable.h"
49 #include "CheckUnload.h"
50 #include "CNF.h"
51
52 #include <string.h> // for memset()
53 #include <unistd.h>
54
55 /* -----------------------------------------------------------------------------
56 Global variables
57 -------------------------------------------------------------------------- */
58
59 /* STATIC OBJECT LIST.
60 *
61 * During GC:
62 * We maintain a linked list of static objects that are still live.
63 * The requirements for this list are:
64 *
65 * - we need to scan the list while adding to it, in order to
66 * scavenge all the static objects (in the same way that
67 * breadth-first scavenging works for dynamic objects).
68 *
69 * - we need to be able to tell whether an object is already on
70 * the list, to break loops.
71 *
72 * Each static object has a "static link field", which we use for
73 * linking objects on to the list. We use a stack-type list, consing
74 * objects on the front as they are added (this means that the
75 * scavenge phase is depth-first, not breadth-first, but that
76 * shouldn't matter).
77 *
78 * A separate list is kept for objects that have been scavenged
79 * already - this is so that we can zero all the marks afterwards.
80 *
81 * An object is on the list if its static link field is non-zero; this
82 * means that we have to mark the end of the list with '1', not NULL.
83 *
84 * Extra notes for generational GC:
85 *
86 * Each generation has a static object list associated with it. When
87 * collecting generations up to N, we treat the static object lists
88 * from generations > N as roots.
89 *
90 * We build up a static object list while collecting generations 0..N,
91 * which is then appended to the static object list of generation N+1.
92 */
93
94 /* N is the oldest generation being collected, where the generations
95 * are numbered starting at 0. A major GC (indicated by the major_gc
96 * flag) is when we're collecting all generations. We only attempt to
97 * deal with static objects and GC CAFs when doing a major GC.
98 */
99 uint32_t N;
100 bool major_gc;
101
102 /* Data used for allocation area sizing.
103 */
104 static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
105
106 /* Mut-list stats */
107 #if defined(DEBUG)
108 uint32_t mutlist_MUTVARS,
109 mutlist_MUTARRS,
110 mutlist_MVARS,
111 mutlist_TVAR,
112 mutlist_TVAR_WATCH_QUEUE,
113 mutlist_TREC_CHUNK,
114 mutlist_TREC_HEADER,
115 mutlist_ATOMIC_INVARIANT,
116 mutlist_INVARIANT_CHECK_QUEUE,
117 mutlist_OTHERS;
118 #endif
119
120 /* Thread-local data for each GC thread
121 */
122 gc_thread **gc_threads = NULL;
123
124 #if !defined(THREADED_RTS)
125 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
126 #endif
127
128 // Number of threads running in *this* GC. Affects how many
129 // step->todos[] lists we have to look in to find work.
130 uint32_t n_gc_threads;
131
132 // For stats:
133 static long copied; // *words* copied & scavenged during this GC
134
135 bool work_stealing;
136
137 uint32_t static_flag = STATIC_FLAG_B;
138 uint32_t prev_static_flag = STATIC_FLAG_A;
139
140 DECLARE_GCT
141
142 /* -----------------------------------------------------------------------------
143 Static function declarations
144 -------------------------------------------------------------------------- */
145
146 static void mark_root (void *user, StgClosure **root);
147 static void prepare_collected_gen (generation *gen);
148 static void prepare_uncollected_gen (generation *gen);
149 static void init_gc_thread (gc_thread *t);
150 static void resize_generations (void);
151 static void resize_nursery (void);
152 static void start_gc_threads (void);
153 static void scavenge_until_all_done (void);
154 static StgWord inc_running (void);
155 static StgWord dec_running (void);
156 static void wakeup_gc_threads (uint32_t me, bool idle_cap[]);
157 static void shutdown_gc_threads (uint32_t me, bool idle_cap[]);
158 static void collect_gct_blocks (void);
159 static void collect_pinned_object_blocks (void);
160 static void heapOverflow (void);
161
162 #if defined(DEBUG)
163 static void gcCAFs (void);
164 #endif
165
166 /* -----------------------------------------------------------------------------
167 The mark stack.
168 -------------------------------------------------------------------------- */
169
170 bdescr *mark_stack_top_bd; // topmost block in the mark stack
171 bdescr *mark_stack_bd; // current block in the mark stack
172 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
173
174 /* -----------------------------------------------------------------------------
175 GarbageCollect: the main entry point to the garbage collector.
176
177 The collect_gen parameter is gotten by calling calcNeeded().
178
179 Locks held: all capabilities are held throughout GarbageCollect().
180 -------------------------------------------------------------------------- */
181
182 void
183 GarbageCollect (uint32_t collect_gen,
184 bool do_heap_census,
185 uint32_t gc_type USED_IF_THREADS,
186 Capability *cap,
187 bool idle_cap[])
188 {
189 bdescr *bd;
190 generation *gen;
191 StgWord live_blocks, live_words, par_max_copied, par_balanced_copied;
192 #if defined(THREADED_RTS)
193 gc_thread *saved_gct;
194 #endif
195 uint32_t g, n;
196
197 // necessary if we stole a callee-saves register for gct:
198 #if defined(THREADED_RTS)
199 saved_gct = gct;
200 #endif
201
202 #if defined(PROFILING)
203 CostCentreStack *save_CCS[n_capabilities];
204 #endif
205
206 ACQUIRE_SM_LOCK;
207
208 #if defined(RTS_USER_SIGNALS)
209 if (RtsFlags.MiscFlags.install_signal_handlers) {
210 // block signals
211 blockUserSignals();
212 }
213 #endif
214
215 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
216 // otherwise adjust the padding in gen_workspace.
217
218 // this is the main thread
219 SET_GCT(gc_threads[cap->no]);
220
221 // tell the stats department that we've started a GC
222 stat_startGC(cap, gct);
223
224 // lock the StablePtr table
225 stableLock();
226
227 #if defined(DEBUG)
228 mutlist_MUTVARS = 0;
229 mutlist_MUTARRS = 0;
230 mutlist_MVARS = 0;
231 mutlist_TVAR = 0;
232 mutlist_TVAR_WATCH_QUEUE = 0;
233 mutlist_TREC_CHUNK = 0;
234 mutlist_TREC_HEADER = 0;
235 mutlist_ATOMIC_INVARIANT = 0;
236 mutlist_INVARIANT_CHECK_QUEUE = 0;
237 mutlist_OTHERS = 0;
238 #endif
239
240 // attribute any costs to CCS_GC
241 #if defined(PROFILING)
242 for (n = 0; n < n_capabilities; n++) {
243 save_CCS[n] = capabilities[n]->r.rCCCS;
244 capabilities[n]->r.rCCCS = CCS_GC;
245 }
246 #endif
247
248 /* Figure out which generation to collect
249 */
250 N = collect_gen;
251 major_gc = (N == RtsFlags.GcFlags.generations-1);
252
253 if (major_gc) {
254 prev_static_flag = static_flag;
255 static_flag =
256 static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
257 }
258
259 #if defined(THREADED_RTS)
260 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
261 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
262 // It's not always a good idea to do load balancing in parallel
263 // GC. In particular, for a parallel program we don't want to
264 // lose locality by moving cached data into another CPU's cache
265 // (this effect can be quite significant).
266 //
267 // We could have a more complex way to deterimine whether to do
268 // work stealing or not, e.g. it might be a good idea to do it
269 // if the heap is big. For now, we just turn it on or off with
270 // a flag.
271 #endif
272
273 /* Start threads, so they can be spinning up while we finish initialisation.
274 */
275 start_gc_threads();
276
277 #if defined(THREADED_RTS)
278 /* How many threads will be participating in this GC?
279 * We don't try to parallelise minor GCs (unless the user asks for
280 * it with +RTS -gn0), or mark/compact/sweep GC.
281 */
282 if (gc_type == SYNC_GC_PAR) {
283 n_gc_threads = n_capabilities;
284 } else {
285 n_gc_threads = 1;
286 }
287 #else
288 n_gc_threads = 1;
289 #endif
290
291 debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
292 N, n_gc_threads);
293
294 #if defined(DEBUG)
295 // check for memory leaks if DEBUG is on
296 memInventory(DEBUG_gc);
297 #endif
298
299 // do this *before* we start scavenging
300 collectFreshWeakPtrs();
301
302 // check sanity *before* GC
303 IF_DEBUG(sanity, checkSanity(false /* before GC */, major_gc));
304
305 // gather blocks allocated using allocatePinned() from each capability
306 // and put them on the g0->large_object list.
307 collect_pinned_object_blocks();
308
309 // Initialise all the generations that we're collecting.
310 for (g = 0; g <= N; g++) {
311 prepare_collected_gen(&generations[g]);
312 }
313 // Initialise all the generations that we're *not* collecting.
314 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
315 prepare_uncollected_gen(&generations[g]);
316 }
317
318 // Prepare this gc_thread
319 init_gc_thread(gct);
320
321 /* Allocate a mark stack if we're doing a major collection.
322 */
323 if (major_gc && oldest_gen->mark) {
324 mark_stack_bd = allocBlock();
325 mark_stack_top_bd = mark_stack_bd;
326 mark_stack_bd->link = NULL;
327 mark_stack_bd->u.back = NULL;
328 mark_sp = mark_stack_bd->start;
329 } else {
330 mark_stack_bd = NULL;
331 mark_stack_top_bd = NULL;
332 mark_sp = NULL;
333 }
334
335 /* -----------------------------------------------------------------------
336 * follow all the roots that we know about:
337 */
338
339 // the main thread is running: this prevents any other threads from
340 // exiting prematurely, so we can start them now.
341 // NB. do this after the mutable lists have been saved above, otherwise
342 // the other GC threads will be writing into the old mutable lists.
343 inc_running();
344 wakeup_gc_threads(gct->thread_index, idle_cap);
345
346 traceEventGcWork(gct->cap);
347
348 // scavenge the capability-private mutable lists. This isn't part
349 // of markSomeCapabilities() because markSomeCapabilities() can only
350 // call back into the GC via mark_root() (due to the gct register
351 // variable).
352 if (n_gc_threads == 1) {
353 for (n = 0; n < n_capabilities; n++) {
354 #if defined(THREADED_RTS)
355 scavenge_capability_mut_Lists1(capabilities[n]);
356 #else
357 scavenge_capability_mut_lists(capabilities[n]);
358 #endif
359 }
360 } else {
361 scavenge_capability_mut_lists(gct->cap);
362 for (n = 0; n < n_capabilities; n++) {
363 if (idle_cap[n]) {
364 markCapability(mark_root, gct, capabilities[n],
365 true/*don't mark sparks*/);
366 scavenge_capability_mut_lists(capabilities[n]);
367 }
368 }
369 }
370
371 // follow roots from the CAF list (used by GHCi)
372 gct->evac_gen_no = 0;
373 markCAFs(mark_root, gct);
374
375 // follow all the roots that the application knows about.
376 gct->evac_gen_no = 0;
377 if (n_gc_threads == 1) {
378 for (n = 0; n < n_capabilities; n++) {
379 markCapability(mark_root, gct, capabilities[n],
380 true/*don't mark sparks*/);
381 }
382 } else {
383 markCapability(mark_root, gct, cap, true/*don't mark sparks*/);
384 }
385
386 markScheduler(mark_root, gct);
387
388 #if defined(RTS_USER_SIGNALS)
389 // mark the signal handlers (signals should be already blocked)
390 markSignalHandlers(mark_root, gct);
391 #endif
392
393 // Mark the weak pointer list, and prepare to detect dead weak pointers.
394 markWeakPtrList();
395 initWeakForGC();
396
397 // Mark the stable pointer table.
398 markStableTables(mark_root, gct);
399
400 /* -------------------------------------------------------------------------
401 * Repeatedly scavenge all the areas we know about until there's no
402 * more scavenging to be done.
403 */
404 for (;;)
405 {
406 scavenge_until_all_done();
407 // The other threads are now stopped. We might recurse back to
408 // here, but from now on this is the only thread.
409
410 // must be last... invariant is that everything is fully
411 // scavenged at this point.
412 if (traverseWeakPtrList()) { // returns true if evaced something
413 inc_running();
414 continue;
415 }
416
417 // If we get to here, there's really nothing left to do.
418 break;
419 }
420
421 shutdown_gc_threads(gct->thread_index, idle_cap);
422
423 // Now see which stable names are still alive.
424 gcStableTables();
425
426 #if defined(THREADED_RTS)
427 if (n_gc_threads == 1) {
428 for (n = 0; n < n_capabilities; n++) {
429 pruneSparkQueue(capabilities[n]);
430 }
431 } else {
432 for (n = 0; n < n_capabilities; n++) {
433 if (n == cap->no || idle_cap[n]) {
434 pruneSparkQueue(capabilities[n]);
435 }
436 }
437 }
438 #endif
439
440 #if defined(PROFILING)
441 // We call processHeapClosureForDead() on every closure destroyed during
442 // the current garbage collection, so we invoke LdvCensusForDead().
443 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
444 || RtsFlags.ProfFlags.bioSelector != NULL) {
445 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
446 LdvCensusForDead(N);
447 ACQUIRE_SM_LOCK;
448 }
449 #endif
450
451 // NO MORE EVACUATION AFTER THIS POINT!
452
453 // Finally: compact or sweep the oldest generation.
454 if (major_gc && oldest_gen->mark) {
455 if (oldest_gen->compact)
456 compact(gct->scavenged_static_objects);
457 else
458 sweep(oldest_gen);
459 }
460
461 copied = 0;
462 par_max_copied = 0;
463 par_balanced_copied = 0;
464 {
465 uint32_t i;
466 uint64_t par_balanced_copied_acc = 0;
467
468 for (i=0; i < n_gc_threads; i++) {
469 copied += gc_threads[i]->copied;
470 }
471 for (i=0; i < n_gc_threads; i++) {
472 if (n_gc_threads > 1) {
473 debugTrace(DEBUG_gc,"thread %d:", i);
474 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
475 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
476 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
477 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
478 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
479 }
480 par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
481 par_balanced_copied_acc +=
482 stg_min(n_gc_threads * gc_threads[i]->copied, copied);
483 }
484 if (n_gc_threads == 1) {
485 par_max_copied = 0;
486 par_balanced_copied = 0;
487 }
488 else
489 {
490 // See Note [Work Balance] for an explanation of this computation
491 par_balanced_copied =
492 (par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) /
493 (n_gc_threads - 1);
494 }
495 }
496
497 // Run through all the generations and tidy up.
498 // We're going to:
499 // - count the amount of "live" data (live_words, live_blocks)
500 // - count the amount of "copied" data in this GC (copied)
501 // - free from-space
502 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
503 //
504 live_words = 0;
505 live_blocks = 0;
506
507 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
508
509 if (g == N) {
510 generations[g].collections++; // for stats
511 if (n_gc_threads > 1) generations[g].par_collections++;
512 }
513
514 // Count the mutable list as bytes "copied" for the purposes of
515 // stats. Every mutable list is copied during every GC.
516 if (g > 0) {
517 W_ mut_list_size = 0;
518 for (n = 0; n < n_capabilities; n++) {
519 mut_list_size += countOccupied(capabilities[n]->mut_lists[g]);
520 }
521 copied += mut_list_size;
522
523 debugTrace(DEBUG_gc,
524 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d ATOMIC_INVARIANTs, %d INVARIANT_CHECK_QUEUEs, %d others)",
525 (unsigned long)(mut_list_size * sizeof(W_)),
526 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS,
527 mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE,
528 mutlist_TREC_CHUNK, mutlist_TREC_HEADER,
529 mutlist_ATOMIC_INVARIANT,
530 mutlist_INVARIANT_CHECK_QUEUE,
531 mutlist_OTHERS);
532 }
533
534 bdescr *next, *prev;
535 gen = &generations[g];
536
537 // for generations we collected...
538 if (g <= N) {
539
540 /* free old memory and shift to-space into from-space for all
541 * the collected generations (except the allocation area). These
542 * freed blocks will probaby be quickly recycled.
543 */
544 if (gen->mark)
545 {
546 // tack the new blocks on the end of the existing blocks
547 if (gen->old_blocks != NULL) {
548
549 prev = NULL;
550 for (bd = gen->old_blocks; bd != NULL; bd = next) {
551
552 next = bd->link;
553
554 if (!(bd->flags & BF_MARKED))
555 {
556 if (prev == NULL) {
557 gen->old_blocks = next;
558 } else {
559 prev->link = next;
560 }
561 freeGroup(bd);
562 gen->n_old_blocks--;
563 }
564 else
565 {
566 gen->n_words += bd->free - bd->start;
567
568 // NB. this step might not be compacted next
569 // time, so reset the BF_MARKED flags.
570 // They are set before GC if we're going to
571 // compact. (search for BF_MARKED above).
572 bd->flags &= ~BF_MARKED;
573
574 // between GCs, all blocks in the heap except
575 // for the nursery have the BF_EVACUATED flag set.
576 bd->flags |= BF_EVACUATED;
577
578 prev = bd;
579 }
580 }
581
582 if (prev != NULL) {
583 prev->link = gen->blocks;
584 gen->blocks = gen->old_blocks;
585 }
586 }
587 // add the new blocks to the block tally
588 gen->n_blocks += gen->n_old_blocks;
589 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
590 ASSERT(countOccupied(gen->blocks) == gen->n_words);
591 }
592 else // not copacted
593 {
594 freeChain(gen->old_blocks);
595 }
596
597 gen->old_blocks = NULL;
598 gen->n_old_blocks = 0;
599
600 /* LARGE OBJECTS. The current live large objects are chained on
601 * scavenged_large, having been moved during garbage
602 * collection from large_objects. Any objects left on the
603 * large_objects list are therefore dead, so we free them here.
604 */
605 freeChain(gen->large_objects);
606 gen->large_objects = gen->scavenged_large_objects;
607 gen->n_large_blocks = gen->n_scavenged_large_blocks;
608 gen->n_large_words = countOccupied(gen->large_objects);
609 gen->n_new_large_words = 0;
610
611 /* COMPACT_NFDATA. The currently live compacts are chained
612 * to live_compact_objects, quite like large objects. And
613 * objects left on the compact_objects list are dead.
614 *
615 * We don't run a simple freeChain because want to give the
616 * CNF module some chance to free memory that freeChain would
617 * not see (namely blocks appended to a CNF through a compactResize).
618 *
619 * See Note [Compact Normal Forms] for details.
620 */
621 for (bd = gen->compact_objects; bd; bd = next) {
622 next = bd->link;
623 compactFree(((StgCompactNFDataBlock*)bd->start)->owner);
624 }
625 gen->compact_objects = gen->live_compact_objects;
626 gen->n_compact_blocks = gen->n_live_compact_blocks;
627 }
628 else // for generations > N
629 {
630 /* For older generations, we need to append the
631 * scavenged_large_object list (i.e. large objects that have been
632 * promoted during this GC) to the large_object list for that step.
633 */
634 for (bd = gen->scavenged_large_objects; bd; bd = next) {
635 next = bd->link;
636 dbl_link_onto(bd, &gen->large_objects);
637 gen->n_large_words += bd->free - bd->start;
638 }
639
640 // And same for compacts
641 for (bd = gen->live_compact_objects; bd; bd = next) {
642 next = bd->link;
643 dbl_link_onto(bd, &gen->compact_objects);
644 }
645
646 // add the new blocks we promoted during this GC
647 gen->n_large_blocks += gen->n_scavenged_large_blocks;
648 gen->n_compact_blocks += gen->n_live_compact_blocks;
649 }
650
651 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
652 ASSERT(countOccupied(gen->large_objects) == gen->n_large_words);
653 // We can run the same assertion on compact objects because there
654 // is memory "the GC doesn't see" (directly), but which is still
655 // accounted in gen->n_compact_blocks
656
657 gen->scavenged_large_objects = NULL;
658 gen->n_scavenged_large_blocks = 0;
659 gen->live_compact_objects = NULL;
660 gen->n_live_compact_blocks = 0;
661
662 // Count "live" data
663 live_words += genLiveWords(gen);
664 live_blocks += genLiveBlocks(gen);
665
666 // add in the partial blocks in the gen_workspaces
667 {
668 uint32_t i;
669 for (i = 0; i < n_capabilities; i++) {
670 live_words += gcThreadLiveWords(i, gen->no);
671 live_blocks += gcThreadLiveBlocks(i, gen->no);
672 }
673 }
674 } // for all generations
675
676 // update the max size of older generations after a major GC
677 resize_generations();
678
679 // Free the mark stack.
680 if (mark_stack_top_bd != NULL) {
681 debugTrace(DEBUG_gc, "mark stack: %d blocks",
682 countBlocks(mark_stack_top_bd));
683 freeChain(mark_stack_top_bd);
684 }
685
686 // Free any bitmaps.
687 for (g = 0; g <= N; g++) {
688 gen = &generations[g];
689 if (gen->bitmap != NULL) {
690 freeGroup(gen->bitmap);
691 gen->bitmap = NULL;
692 }
693 }
694
695 resize_nursery();
696
697 resetNurseries();
698
699 // mark the garbage collected CAFs as dead
700 #if defined(DEBUG)
701 if (major_gc) { gcCAFs(); }
702 #endif
703
704 // Update the stable pointer hash table.
705 updateStableTables(major_gc);
706
707 // unlock the StablePtr table. Must be before scheduleFinalizers(),
708 // because a finalizer may call hs_free_fun_ptr() or
709 // hs_free_stable_ptr(), both of which access the StablePtr table.
710 stableUnlock();
711
712 // Must be after stableUnlock(), because it might free stable ptrs.
713 if (major_gc) {
714 checkUnload (gct->scavenged_static_objects);
715 }
716
717 #if defined(PROFILING)
718 // resetStaticObjectForRetainerProfiling() must be called before
719 // zeroing below.
720
721 // ToDo: fix the gct->scavenged_static_objects below
722 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
723 #endif
724
725 // Start any pending finalizers. Must be after
726 // updateStableTables() and stableUnlock() (see #4221).
727 RELEASE_SM_LOCK;
728 scheduleFinalizers(cap, dead_weak_ptr_list);
729 ACQUIRE_SM_LOCK;
730
731 // check sanity after GC
732 // before resurrectThreads(), because that might overwrite some
733 // closures, which will cause problems with THREADED where we don't
734 // fill slop.
735 IF_DEBUG(sanity, checkSanity(true /* after GC */, major_gc));
736
737 // If a heap census is due, we need to do it before
738 // resurrectThreads(), for the same reason as checkSanity above:
739 // resurrectThreads() will overwrite some closures and leave slop
740 // behind.
741 if (do_heap_census) {
742 debugTrace(DEBUG_sched, "performing heap census");
743 RELEASE_SM_LOCK;
744 heapCensus(gct->gc_start_cpu);
745 ACQUIRE_SM_LOCK;
746 }
747
748 // send exceptions to any threads which were about to die
749 RELEASE_SM_LOCK;
750 resurrectThreads(resurrected_threads);
751 ACQUIRE_SM_LOCK;
752
753 if (major_gc) {
754 W_ need, got;
755 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
756 got = mblocks_allocated;
757 /* If the amount of data remains constant, next major GC we'll
758 require (F+1)*need. We leave (F+2)*need in order to reduce
759 repeated deallocation and reallocation. */
760 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
761 /* But with a large nursery, the above estimate might exceed
762 * maxHeapSize. A large resident set size might make the OS
763 * kill this process, or swap unnecessarily. Therefore we
764 * ensure that our estimate does not exceed maxHeapSize.
765 */
766 if (RtsFlags.GcFlags.maxHeapSize != 0) {
767 W_ max = BLOCKS_TO_MBLOCKS(RtsFlags.GcFlags.maxHeapSize);
768 if (need > max) {
769 need = max;
770 }
771 }
772 if (got > need) {
773 returnMemoryToOS(got - need);
774 }
775 }
776
777 // extra GC trace info
778 IF_DEBUG(gc, statDescribeGens());
779
780 #if defined(DEBUG)
781 // symbol-table based profiling
782 /* heapCensus(to_blocks); */ /* ToDo */
783 #endif
784
785 // restore enclosing cost centre
786 #if defined(PROFILING)
787 for (n = 0; n < n_capabilities; n++) {
788 capabilities[n]->r.rCCCS = save_CCS[n];
789 }
790 #endif
791
792 #if defined(DEBUG)
793 // check for memory leaks if DEBUG is on
794 memInventory(DEBUG_gc);
795 #endif
796
797 // ok, GC over: tell the stats department what happened.
798 stat_endGC(cap, gct, live_words, copied,
799 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
800 N, n_gc_threads, par_max_copied, par_balanced_copied);
801
802 #if defined(RTS_USER_SIGNALS)
803 if (RtsFlags.MiscFlags.install_signal_handlers) {
804 // unblock signals again
805 unblockUserSignals();
806 }
807 #endif
808
809 RELEASE_SM_LOCK;
810
811 SET_GCT(saved_gct);
812 }
813
814 /* -----------------------------------------------------------------------------
815 Heap overflow is indicated by setting a flag that the caller of
816 GarbageCollect can check. (not ideal, TODO: better)
817 -------------------------------------------------------------------------- */
818
819 static void heapOverflow(void)
820 {
821 heap_overflow = true;
822 }
823
824 /* -----------------------------------------------------------------------------
825 Initialise the gc_thread structures.
826 -------------------------------------------------------------------------- */
827
828 #define GC_THREAD_INACTIVE 0
829 #define GC_THREAD_STANDING_BY 1
830 #define GC_THREAD_RUNNING 2
831 #define GC_THREAD_WAITING_TO_CONTINUE 3
832
833 static void
834 new_gc_thread (uint32_t n, gc_thread *t)
835 {
836 uint32_t g;
837 gen_workspace *ws;
838
839 t->cap = capabilities[n];
840
841 #if defined(THREADED_RTS)
842 t->id = 0;
843 initSpinLock(&t->gc_spin);
844 initSpinLock(&t->mut_spin);
845 ACQUIRE_SPIN_LOCK(&t->gc_spin);
846 ACQUIRE_SPIN_LOCK(&t->mut_spin);
847 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
848 // thread to start up, see wakeup_gc_threads
849 #endif
850
851 t->thread_index = n;
852 t->free_blocks = NULL;
853 t->gc_count = 0;
854
855 init_gc_thread(t);
856
857 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
858 {
859 ws = &t->gens[g];
860 ws->gen = &generations[g];
861 ASSERT(g == ws->gen->no);
862 ws->my_gct = t;
863
864 // We want to call
865 // alloc_todo_block(ws,0);
866 // but can't, because it uses gct which isn't set up at this point.
867 // Hence, allocate a block for todo_bd manually:
868 {
869 bdescr *bd = allocBlockOnNode(capNoToNumaNode(n));
870 // no lock, locks aren't initialised yet
871 initBdescr(bd, ws->gen, ws->gen->to);
872 bd->flags = BF_EVACUATED;
873 bd->u.scan = bd->free = bd->start;
874
875 ws->todo_bd = bd;
876 ws->todo_free = bd->free;
877 ws->todo_lim = bd->start + BLOCK_SIZE_W;
878 }
879
880 ws->todo_q = newWSDeque(128);
881 ws->todo_overflow = NULL;
882 ws->n_todo_overflow = 0;
883 ws->todo_large_objects = NULL;
884
885 ws->part_list = NULL;
886 ws->n_part_blocks = 0;
887 ws->n_part_words = 0;
888
889 ws->scavd_list = NULL;
890 ws->n_scavd_blocks = 0;
891 ws->n_scavd_words = 0;
892 }
893 }
894
895
896 void
897 initGcThreads (uint32_t from USED_IF_THREADS, uint32_t to USED_IF_THREADS)
898 {
899 #if defined(THREADED_RTS)
900 uint32_t i;
901
902 if (from > 0) {
903 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
904 "initGcThreads");
905 } else {
906 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
907 "initGcThreads");
908 }
909
910 for (i = from; i < to; i++) {
911 gc_threads[i] =
912 stgMallocBytes(sizeof(gc_thread) +
913 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
914 "alloc_gc_threads");
915
916 new_gc_thread(i, gc_threads[i]);
917 }
918 #else
919 ASSERT(from == 0 && to == 1);
920 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
921 gc_threads[0] = gct;
922 new_gc_thread(0,gc_threads[0]);
923 #endif
924 }
925
926 void
927 freeGcThreads (void)
928 {
929 uint32_t g;
930 if (gc_threads != NULL) {
931 #if defined(THREADED_RTS)
932 uint32_t i;
933 for (i = 0; i < n_capabilities; i++) {
934 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
935 {
936 freeWSDeque(gc_threads[i]->gens[g].todo_q);
937 }
938 stgFree (gc_threads[i]);
939 }
940 stgFree (gc_threads);
941 #else
942 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
943 {
944 freeWSDeque(gc_threads[0]->gens[g].todo_q);
945 }
946 stgFree (gc_threads);
947 #endif
948 gc_threads = NULL;
949 }
950 }
951
952 /* ----------------------------------------------------------------------------
953 Start GC threads
954 ------------------------------------------------------------------------- */
955
956 static volatile StgWord gc_running_threads;
957
958 static StgWord
959 inc_running (void)
960 {
961 StgWord new;
962 new = atomic_inc(&gc_running_threads, 1);
963 ASSERT(new <= n_gc_threads);
964 return new;
965 }
966
967 static StgWord
968 dec_running (void)
969 {
970 ASSERT(gc_running_threads != 0);
971 return atomic_dec(&gc_running_threads);
972 }
973
974 static bool
975 any_work (void)
976 {
977 int g;
978 gen_workspace *ws;
979
980 gct->any_work++;
981
982 write_barrier();
983
984 // scavenge objects in compacted generation
985 if (mark_stack_bd != NULL && !mark_stack_empty()) {
986 return true;
987 }
988
989 // Check for global work in any gen. We don't need to check for
990 // local work, because we have already exited scavenge_loop(),
991 // which means there is no local work for this thread.
992 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
993 ws = &gct->gens[g];
994 if (ws->todo_large_objects) return true;
995 if (!looksEmptyWSDeque(ws->todo_q)) return true;
996 if (ws->todo_overflow) return true;
997 }
998
999 #if defined(THREADED_RTS)
1000 if (work_stealing) {
1001 uint32_t n;
1002 // look for work to steal
1003 for (n = 0; n < n_gc_threads; n++) {
1004 if (n == gct->thread_index) continue;
1005 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
1006 ws = &gc_threads[n]->gens[g];
1007 if (!looksEmptyWSDeque(ws->todo_q)) return true;
1008 }
1009 }
1010 }
1011 #endif
1012
1013 gct->no_work++;
1014 #if defined(THREADED_RTS)
1015 yieldThread();
1016 #endif
1017
1018 return false;
1019 }
1020
1021 static void
1022 scavenge_until_all_done (void)
1023 {
1024 DEBUG_ONLY( uint32_t r );
1025
1026
1027 loop:
1028 #if defined(THREADED_RTS)
1029 if (n_gc_threads > 1) {
1030 scavenge_loop();
1031 } else {
1032 scavenge_loop1();
1033 }
1034 #else
1035 scavenge_loop();
1036 #endif
1037
1038 collect_gct_blocks();
1039
1040 // scavenge_loop() only exits when there's no work to do
1041
1042 #if defined(DEBUG)
1043 r = dec_running();
1044 #else
1045 dec_running();
1046 #endif
1047
1048 traceEventGcIdle(gct->cap);
1049
1050 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1051
1052 while (gc_running_threads != 0) {
1053 // usleep(1);
1054 if (any_work()) {
1055 inc_running();
1056 traceEventGcWork(gct->cap);
1057 goto loop;
1058 }
1059 // any_work() does not remove the work from the queue, it
1060 // just checks for the presence of work. If we find any,
1061 // then we increment gc_running_threads and go back to
1062 // scavenge_loop() to perform any pending work.
1063 }
1064
1065 traceEventGcDone(gct->cap);
1066 }
1067
1068 #if defined(THREADED_RTS)
1069
1070 void
1071 gcWorkerThread (Capability *cap)
1072 {
1073 gc_thread *saved_gct;
1074
1075 // necessary if we stole a callee-saves register for gct:
1076 saved_gct = gct;
1077
1078 SET_GCT(gc_threads[cap->no]);
1079 gct->id = osThreadId();
1080
1081 // Wait until we're told to wake up
1082 RELEASE_SPIN_LOCK(&gct->mut_spin);
1083 // yieldThread();
1084 // Strangely, adding a yieldThread() here makes the CPU time
1085 // measurements more accurate on Linux, perhaps because it syncs
1086 // the CPU time across the multiple cores. Without this, CPU time
1087 // is heavily skewed towards GC rather than MUT.
1088 gct->wakeup = GC_THREAD_STANDING_BY;
1089 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1090 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1091
1092 init_gc_thread(gct);
1093
1094 traceEventGcWork(gct->cap);
1095
1096 // Every thread evacuates some roots.
1097 gct->evac_gen_no = 0;
1098 markCapability(mark_root, gct, cap, true/*prune sparks*/);
1099 scavenge_capability_mut_lists(cap);
1100
1101 scavenge_until_all_done();
1102
1103 #if defined(THREADED_RTS)
1104 // Now that the whole heap is marked, we discard any sparks that
1105 // were found to be unreachable. The main GC thread is currently
1106 // marking heap reachable via weak pointers, so it is
1107 // non-deterministic whether a spark will be retained if it is
1108 // only reachable via weak pointers. To fix this problem would
1109 // require another GC barrier, which is too high a price.
1110 pruneSparkQueue(cap);
1111 #endif
1112
1113 // Wait until we're told to continue
1114 RELEASE_SPIN_LOCK(&gct->gc_spin);
1115 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1116 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1117 gct->thread_index);
1118 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1119 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1120
1121 SET_GCT(saved_gct);
1122 }
1123
1124 #endif
1125
1126 #if defined(THREADED_RTS)
1127
1128 void
1129 waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
1130 {
1131 const uint32_t n_threads = n_capabilities;
1132 const uint32_t me = cap->no;
1133 uint32_t i, j;
1134 bool retry = true;
1135
1136 while(retry) {
1137 for (i=0; i < n_threads; i++) {
1138 if (i == me || idle_cap[i]) continue;
1139 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1140 prodCapability(capabilities[i], cap->running_task);
1141 }
1142 }
1143 for (j=0; j < 10; j++) {
1144 retry = false;
1145 for (i=0; i < n_threads; i++) {
1146 if (i == me || idle_cap[i]) continue;
1147 write_barrier();
1148 interruptCapability(capabilities[i]);
1149 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1150 retry = true;
1151 }
1152 }
1153 if (!retry) break;
1154 yieldThread();
1155 }
1156 }
1157 }
1158
1159 #endif // THREADED_RTS
1160
1161 static void
1162 start_gc_threads (void)
1163 {
1164 #if defined(THREADED_RTS)
1165 gc_running_threads = 0;
1166 #endif
1167 }
1168
1169 static void
1170 wakeup_gc_threads (uint32_t me USED_IF_THREADS,
1171 bool idle_cap[] USED_IF_THREADS)
1172 {
1173 #if defined(THREADED_RTS)
1174 uint32_t i;
1175
1176 if (n_gc_threads == 1) return;
1177
1178 for (i=0; i < n_gc_threads; i++) {
1179 if (i == me || idle_cap[i]) continue;
1180 inc_running();
1181 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1182 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY)
1183 barf("wakeup_gc_threads");
1184
1185 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1186 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1187 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1188 }
1189 #endif
1190 }
1191
1192 // After GC is complete, we must wait for all GC threads to enter the
1193 // standby state, otherwise they may still be executing inside
1194 // any_work(), and may even remain awake until the next GC starts.
1195 static void
1196 shutdown_gc_threads (uint32_t me USED_IF_THREADS,
1197 bool idle_cap[] USED_IF_THREADS)
1198 {
1199 #if defined(THREADED_RTS)
1200 uint32_t i;
1201
1202 if (n_gc_threads == 1) return;
1203
1204 for (i=0; i < n_gc_threads; i++) {
1205 if (i == me || idle_cap[i]) continue;
1206 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) {
1207 busy_wait_nop();
1208 write_barrier();
1209 }
1210 }
1211 #endif
1212 }
1213
1214 #if defined(THREADED_RTS)
1215 void
1216 releaseGCThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
1217 {
1218 const uint32_t n_threads = n_capabilities;
1219 const uint32_t me = cap->no;
1220 uint32_t i;
1221 for (i=0; i < n_threads; i++) {
1222 if (i == me || idle_cap[i]) continue;
1223 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1224 barf("releaseGCThreads");
1225
1226 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1227 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1228 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1229 }
1230 }
1231 #endif
1232
1233 /* ----------------------------------------------------------------------------
1234 Initialise a generation that is to be collected
1235 ------------------------------------------------------------------------- */
1236
1237 static void
1238 prepare_collected_gen (generation *gen)
1239 {
1240 uint32_t i, g, n;
1241 gen_workspace *ws;
1242 bdescr *bd, *next;
1243
1244 // Throw away the current mutable list. Invariant: the mutable
1245 // list always has at least one block; this means we can avoid a
1246 // check for NULL in recordMutable().
1247 g = gen->no;
1248 if (g != 0) {
1249 for (i = 0; i < n_capabilities; i++) {
1250 freeChain(capabilities[i]->mut_lists[g]);
1251 capabilities[i]->mut_lists[g] =
1252 allocBlockOnNode(capNoToNumaNode(i));
1253 }
1254 }
1255
1256 gen = &generations[g];
1257 ASSERT(gen->no == g);
1258
1259 // we'll construct a new list of threads in this step
1260 // during GC, throw away the current list.
1261 gen->old_threads = gen->threads;
1262 gen->threads = END_TSO_QUEUE;
1263
1264 // deprecate the existing blocks
1265 gen->old_blocks = gen->blocks;
1266 gen->n_old_blocks = gen->n_blocks;
1267 gen->blocks = NULL;
1268 gen->n_blocks = 0;
1269 gen->n_words = 0;
1270 gen->live_estimate = 0;
1271
1272 // initialise the large object queues.
1273 ASSERT(gen->scavenged_large_objects == NULL);
1274 ASSERT(gen->n_scavenged_large_blocks == 0);
1275 ASSERT(gen->live_compact_objects == NULL);
1276 ASSERT(gen->n_live_compact_blocks == 0);
1277
1278 // grab all the partial blocks stashed in the gc_thread workspaces and
1279 // move them to the old_blocks list of this gen.
1280 for (n = 0; n < n_capabilities; n++) {
1281 ws = &gc_threads[n]->gens[gen->no];
1282
1283 for (bd = ws->part_list; bd != NULL; bd = next) {
1284 next = bd->link;
1285 bd->link = gen->old_blocks;
1286 gen->old_blocks = bd;
1287 gen->n_old_blocks += bd->blocks;
1288 }
1289 ws->part_list = NULL;
1290 ws->n_part_blocks = 0;
1291 ws->n_part_words = 0;
1292
1293 ASSERT(ws->scavd_list == NULL);
1294 ASSERT(ws->n_scavd_blocks == 0);
1295 ASSERT(ws->n_scavd_words == 0);
1296
1297 if (ws->todo_free != ws->todo_bd->start) {
1298 ws->todo_bd->free = ws->todo_free;
1299 ws->todo_bd->link = gen->old_blocks;
1300 gen->old_blocks = ws->todo_bd;
1301 gen->n_old_blocks += ws->todo_bd->blocks;
1302 alloc_todo_block(ws,0); // always has one block.
1303 }
1304 }
1305
1306 // mark the small objects as from-space
1307 for (bd = gen->old_blocks; bd; bd = bd->link) {
1308 bd->flags &= ~BF_EVACUATED;
1309 }
1310
1311 // mark the large objects as from-space
1312 for (bd = gen->large_objects; bd; bd = bd->link) {
1313 bd->flags &= ~BF_EVACUATED;
1314 }
1315
1316 // mark the compact objects as from-space
1317 for (bd = gen->compact_objects; bd; bd = bd->link) {
1318 bd->flags &= ~BF_EVACUATED;
1319 }
1320
1321 // for a compacted generation, we need to allocate the bitmap
1322 if (gen->mark) {
1323 StgWord bitmap_size; // in bytes
1324 bdescr *bitmap_bdescr;
1325 StgWord *bitmap;
1326
1327 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1328
1329 if (bitmap_size > 0) {
1330 bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
1331 / BLOCK_SIZE);
1332 gen->bitmap = bitmap_bdescr;
1333 bitmap = bitmap_bdescr->start;
1334
1335 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1336 bitmap_size, bitmap);
1337
1338 // don't forget to fill it with zeros!
1339 memset(bitmap, 0, bitmap_size);
1340
1341 // For each block in this step, point to its bitmap from the
1342 // block descriptor.
1343 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1344 bd->u.bitmap = bitmap;
1345 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1346
1347 // Also at this point we set the BF_MARKED flag
1348 // for this block. The invariant is that
1349 // BF_MARKED is always unset, except during GC
1350 // when it is set on those blocks which will be
1351 // compacted.
1352 if (!(bd->flags & BF_FRAGMENTED)) {
1353 bd->flags |= BF_MARKED;
1354 }
1355
1356 // BF_SWEPT should be marked only for blocks that are being
1357 // collected in sweep()
1358 bd->flags &= ~BF_SWEPT;
1359 }
1360 }
1361 }
1362 }
1363
1364
1365 /* ----------------------------------------------------------------------------
1366 Save the mutable lists in saved_mut_lists
1367 ------------------------------------------------------------------------- */
1368
1369 static void
1370 stash_mut_list (Capability *cap, uint32_t gen_no)
1371 {
1372 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1373 cap->mut_lists[gen_no] = allocBlockOnNode_sync(cap->node);
1374 }
1375
1376 /* ----------------------------------------------------------------------------
1377 Initialise a generation that is *not* to be collected
1378 ------------------------------------------------------------------------- */
1379
1380 static void
1381 prepare_uncollected_gen (generation *gen)
1382 {
1383 uint32_t i;
1384
1385
1386 ASSERT(gen->no > 0);
1387
1388 // save the current mutable lists for this generation, and
1389 // allocate a fresh block for each one. We'll traverse these
1390 // mutable lists as roots early on in the GC.
1391 for (i = 0; i < n_capabilities; i++) {
1392 stash_mut_list(capabilities[i], gen->no);
1393 }
1394
1395 ASSERT(gen->scavenged_large_objects == NULL);
1396 ASSERT(gen->n_scavenged_large_blocks == 0);
1397 }
1398
1399 /* -----------------------------------------------------------------------------
1400 Collect the completed blocks from a GC thread and attach them to
1401 the generation.
1402 -------------------------------------------------------------------------- */
1403
1404 static void
1405 collect_gct_blocks (void)
1406 {
1407 uint32_t g;
1408 gen_workspace *ws;
1409 bdescr *bd, *prev;
1410
1411 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1412 ws = &gct->gens[g];
1413
1414 // there may still be a block attached to ws->todo_bd;
1415 // leave it there to use next time.
1416
1417 if (ws->scavd_list != NULL) {
1418 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1419
1420 ASSERT(gct->scan_bd == NULL);
1421 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1422
1423 prev = NULL;
1424 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1425 prev = bd;
1426 }
1427 if (prev != NULL) {
1428 prev->link = ws->gen->blocks;
1429 ws->gen->blocks = ws->scavd_list;
1430 }
1431 ws->gen->n_blocks += ws->n_scavd_blocks;
1432 ws->gen->n_words += ws->n_scavd_words;
1433
1434 ws->scavd_list = NULL;
1435 ws->n_scavd_blocks = 0;
1436 ws->n_scavd_words = 0;
1437
1438 RELEASE_SPIN_LOCK(&ws->gen->sync);
1439 }
1440 }
1441 }
1442
1443 /* -----------------------------------------------------------------------------
1444 During mutation, any blocks that are filled by allocatePinned() are
1445 stashed on the local pinned_object_blocks list, to avoid needing to
1446 take a global lock. Here we collect those blocks from the
1447 cap->pinned_object_blocks lists and put them on the
1448 main g0->large_object list.
1449
1450 Returns: the number of words allocated this way, for stats
1451 purposes.
1452 -------------------------------------------------------------------------- */
1453
1454 static void
1455 collect_pinned_object_blocks (void)
1456 {
1457 uint32_t n;
1458 bdescr *bd, *prev;
1459
1460 for (n = 0; n < n_capabilities; n++) {
1461 prev = NULL;
1462 for (bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
1463 prev = bd;
1464 }
1465 if (prev != NULL) {
1466 prev->link = g0->large_objects;
1467 if (g0->large_objects != NULL) {
1468 g0->large_objects->u.back = prev;
1469 }
1470 g0->large_objects = capabilities[n]->pinned_object_blocks;
1471 capabilities[n]->pinned_object_blocks = 0;
1472 }
1473 }
1474 }
1475
1476 /* -----------------------------------------------------------------------------
1477 Initialise a gc_thread before GC
1478 -------------------------------------------------------------------------- */
1479
1480 static void
1481 init_gc_thread (gc_thread *t)
1482 {
1483 t->static_objects = END_OF_STATIC_OBJECT_LIST;
1484 t->scavenged_static_objects = END_OF_STATIC_OBJECT_LIST;
1485 t->scan_bd = NULL;
1486 t->mut_lists = t->cap->mut_lists;
1487 t->evac_gen_no = 0;
1488 t->failed_to_evac = false;
1489 t->eager_promotion = true;
1490 t->thunk_selector_depth = 0;
1491 t->copied = 0;
1492 t->scanned = 0;
1493 t->any_work = 0;
1494 t->no_work = 0;
1495 t->scav_find_work = 0;
1496 }
1497
1498 /* -----------------------------------------------------------------------------
1499 Function we pass to evacuate roots.
1500 -------------------------------------------------------------------------- */
1501
1502 static void
1503 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1504 {
1505 // we stole a register for gct, but this function is called from
1506 // *outside* the GC where the register variable is not in effect,
1507 // so we need to save and restore it here. NB. only call
1508 // mark_root() from the main GC thread, otherwise gct will be
1509 // incorrect.
1510 #if defined(THREADED_RTS)
1511 gc_thread *saved_gct;
1512 saved_gct = gct;
1513 #endif
1514 SET_GCT(user);
1515
1516 evacuate(root);
1517
1518 SET_GCT(saved_gct);
1519 }
1520
1521 /* ----------------------------------------------------------------------------
1522 Reset the sizes of the older generations when we do a major
1523 collection.
1524
1525 CURRENT STRATEGY: make all generations except zero the same size.
1526 We have to stay within the maximum heap size, and leave a certain
1527 percentage of the maximum heap size available to allocate into.
1528 ------------------------------------------------------------------------- */
1529
1530 static void
1531 resize_generations (void)
1532 {
1533 uint32_t g;
1534
1535 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1536 W_ live, size, min_alloc, words;
1537 const W_ max = RtsFlags.GcFlags.maxHeapSize;
1538 const W_ gens = RtsFlags.GcFlags.generations;
1539
1540 // live in the oldest generations
1541 if (oldest_gen->live_estimate != 0) {
1542 words = oldest_gen->live_estimate;
1543 } else {
1544 words = oldest_gen->n_words;
1545 }
1546 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1547 oldest_gen->n_large_blocks +
1548 oldest_gen->n_compact_blocks;
1549
1550 // default max size for all generations except zero
1551 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1552 RtsFlags.GcFlags.minOldGenSize);
1553
1554 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1555 if (max > 0) {
1556 RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
1557 } else {
1558 RtsFlags.GcFlags.heapSizeSuggestion = size;
1559 }
1560 }
1561
1562 // minimum size for generation zero
1563 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1564 RtsFlags.GcFlags.minAllocAreaSize
1565 * (W_)n_capabilities);
1566
1567 // Auto-enable compaction when the residency reaches a
1568 // certain percentage of the maximum heap size (default: 30%).
1569 if (RtsFlags.GcFlags.compact ||
1570 (max > 0 &&
1571 oldest_gen->n_blocks >
1572 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1573 oldest_gen->mark = 1;
1574 oldest_gen->compact = 1;
1575 // debugBelch("compaction: on\n", live);
1576 } else {
1577 oldest_gen->mark = 0;
1578 oldest_gen->compact = 0;
1579 // debugBelch("compaction: off\n", live);
1580 }
1581
1582 if (RtsFlags.GcFlags.sweep) {
1583 oldest_gen->mark = 1;
1584 }
1585
1586 // if we're going to go over the maximum heap size, reduce the
1587 // size of the generations accordingly. The calculation is
1588 // different if compaction is turned on, because we don't need
1589 // to double the space required to collect the old generation.
1590 if (max != 0) {
1591
1592 // this test is necessary to ensure that the calculations
1593 // below don't have any negative results - we're working
1594 // with unsigned values here.
1595 if (max < min_alloc) {
1596 heapOverflow();
1597 }
1598
1599 if (oldest_gen->compact) {
1600 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1601 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1602 }
1603 } else {
1604 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1605 size = (max - min_alloc) / ((gens - 1) * 2);
1606 }
1607 }
1608
1609 if (size < live) {
1610 heapOverflow();
1611 }
1612 }
1613
1614 #if 0
1615 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1616 min_alloc, size, max);
1617 #endif
1618
1619 for (g = 0; g < gens; g++) {
1620 generations[g].max_blocks = size;
1621 }
1622 }
1623 }
1624
1625 /* -----------------------------------------------------------------------------
1626 Calculate the new size of the nursery, and resize it.
1627 -------------------------------------------------------------------------- */
1628
1629 static void
1630 resize_nursery (void)
1631 {
1632 const StgWord min_nursery =
1633 RtsFlags.GcFlags.minAllocAreaSize * (StgWord)n_capabilities;
1634
1635 if (RtsFlags.GcFlags.generations == 1)
1636 { // Two-space collector:
1637 W_ blocks;
1638
1639 /* set up a new nursery. Allocate a nursery size based on a
1640 * function of the amount of live data (by default a factor of 2)
1641 * Use the blocks from the old nursery if possible, freeing up any
1642 * left over blocks.
1643 *
1644 * If we get near the maximum heap size, then adjust our nursery
1645 * size accordingly. If the nursery is the same size as the live
1646 * data (L), then we need 3L bytes. We can reduce the size of the
1647 * nursery to bring the required memory down near 2L bytes.
1648 *
1649 * A normal 2-space collector would need 4L bytes to give the same
1650 * performance we get from 3L bytes, reducing to the same
1651 * performance at 2L bytes.
1652 */
1653 blocks = generations[0].n_blocks;
1654
1655 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1656 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1657 RtsFlags.GcFlags.maxHeapSize )
1658 {
1659 long adjusted_blocks; // signed on purpose
1660 int pc_free;
1661
1662 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1663
1664 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1665 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1666
1667 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1668 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1669 {
1670 heapOverflow();
1671 }
1672 blocks = adjusted_blocks;
1673 }
1674 else
1675 {
1676 blocks *= RtsFlags.GcFlags.oldGenFactor;
1677 if (blocks < min_nursery)
1678 {
1679 blocks = min_nursery;
1680 }
1681 }
1682 resizeNurseries(blocks);
1683 }
1684 else // Generational collector
1685 {
1686 /*
1687 * If the user has given us a suggested heap size, adjust our
1688 * allocation area to make best use of the memory available.
1689 */
1690 if (RtsFlags.GcFlags.heapSizeSuggestion)
1691 {
1692 long blocks;
1693 StgWord needed;
1694
1695 calcNeeded(false, &needed); // approx blocks needed at next GC
1696
1697 /* Guess how much will be live in generation 0 step 0 next time.
1698 * A good approximation is obtained by finding the
1699 * percentage of g0 that was live at the last minor GC.
1700 *
1701 * We have an accurate figure for the amount of copied data in
1702 * 'copied', but we must convert this to a number of blocks, with
1703 * a small adjustment for estimated slop at the end of a block
1704 * (- 10 words).
1705 */
1706 if (N == 0)
1707 {
1708 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1709 / countNurseryBlocks();
1710 }
1711
1712 /* Estimate a size for the allocation area based on the
1713 * information available. We might end up going slightly under
1714 * or over the suggested heap size, but we should be pretty
1715 * close on average.
1716 *
1717 * Formula: suggested - needed
1718 * ----------------------------
1719 * 1 + g0_pcnt_kept/100
1720 *
1721 * where 'needed' is the amount of memory needed at the next
1722 * collection for collecting all gens except g0.
1723 */
1724 blocks =
1725 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1726 (100 + (long)g0_pcnt_kept);
1727
1728 if (blocks < (long)min_nursery) {
1729 blocks = min_nursery;
1730 }
1731
1732 resizeNurseries((W_)blocks);
1733 }
1734 else
1735 {
1736 // we might have added extra blocks to the nursery, so
1737 // resize back to the original size again.
1738 resizeNurseriesFixed();
1739 }
1740 }
1741 }
1742
1743 /* -----------------------------------------------------------------------------
1744 Sanity code for CAF garbage collection.
1745
1746 With DEBUG turned on, we manage a CAF list in addition to the SRT
1747 mechanism. After GC, we run down the CAF list and blackhole any
1748 CAFs which have been garbage collected. This means we get an error
1749 whenever the program tries to enter a garbage collected CAF.
1750
1751 Any garbage collected CAFs are taken off the CAF list at the same
1752 time.
1753 -------------------------------------------------------------------------- */
1754
1755 #if defined(DEBUG)
1756
1757 static void gcCAFs(void)
1758 {
1759 StgIndStatic *p, *prev;
1760
1761 const StgInfoTable *info;
1762 uint32_t i;
1763
1764 i = 0;
1765 p = debug_caf_list;
1766 prev = NULL;
1767
1768 for (p = debug_caf_list; p != (StgIndStatic*)END_OF_CAF_LIST;
1769 p = (StgIndStatic*)p->saved_info) {
1770
1771 info = get_itbl((StgClosure*)p);
1772 ASSERT(info->type == IND_STATIC);
1773
1774 if (p->static_link == NULL) {
1775 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", p);
1776 SET_INFO((StgClosure*)p,&stg_GCD_CAF_info); // stub it
1777 if (prev == NULL) {
1778 debug_caf_list = (StgIndStatic*)p->saved_info;
1779 } else {
1780 prev->saved_info = p->saved_info;
1781 }
1782 } else {
1783 prev = p;
1784 i++;
1785 }
1786 }
1787
1788 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1789 }
1790 #endif