Disable aging when doing deadlock detection GC
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/rts/storage/gc
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "GC.h"
19 #include "GCThread.h"
20 #include "GCTDecl.h" // NB. before RtsSignals.h which
21 // clobbers REG_R1 on arm/Linux
22 #include "Compact.h"
23 #include "Evac.h"
24 #include "Scav.h"
25 #include "GCUtils.h"
26 #include "MarkStack.h"
27 #include "MarkWeak.h"
28 #include "Sparks.h"
29 #include "Sweep.h"
30
31 #include "Arena.h"
32 #include "Storage.h"
33 #include "RtsUtils.h"
34 #include "Apply.h"
35 #include "Updates.h"
36 #include "Stats.h"
37 #include "Schedule.h"
38 #include "Sanity.h"
39 #include "BlockAlloc.h"
40 #include "ProfHeap.h"
41 #include "Weak.h"
42 #include "Prelude.h"
43 #include "RtsSignals.h"
44 #include "STM.h"
45 #include "Trace.h"
46 #include "RetainerProfile.h"
47 #include "LdvProfile.h"
48 #include "RaiseAsync.h"
49 #include "StableName.h"
50 #include "StablePtr.h"
51 #include "CheckUnload.h"
52 #include "CNF.h"
53 #include "RtsFlags.h"
54 #include "NonMoving.h"
55
56 #if defined(PROFILING)
57 #include "RetainerProfile.h"
58 #endif
59
60 #include <string.h> // for memset()
61 #include <unistd.h>
62
63 /* -----------------------------------------------------------------------------
64 Global variables
65 -------------------------------------------------------------------------- */
66
67 /* STATIC OBJECT LIST.
68 *
69 * During GC:
70 * We maintain a linked list of static objects that are still live.
71 * The requirements for this list are:
72 *
73 * - we need to scan the list while adding to it, in order to
74 * scavenge all the static objects (in the same way that
75 * breadth-first scavenging works for dynamic objects).
76 *
77 * - we need to be able to tell whether an object is already on
78 * the list, to break loops.
79 *
80 * Each static object has a "static link field", which we use for
81 * linking objects on to the list. We use a stack-type list, consing
82 * objects on the front as they are added (this means that the
83 * scavenge phase is depth-first, not breadth-first, but that
84 * shouldn't matter).
85 *
86 * A separate list is kept for objects that have been scavenged
87 * already - this is so that we can zero all the marks afterwards.
88 *
89 * An object is on the list if its static link field is non-zero; this
90 * means that we have to mark the end of the list with '1', not NULL.
91 *
92 * Extra notes for generational GC:
93 *
94 * Each generation has a static object list associated with it. When
95 * collecting generations up to N, we treat the static object lists
96 * from generations > N as roots.
97 *
98 * We build up a static object list while collecting generations 0..N,
99 * which is then appended to the static object list of generation N+1.
100 *
101 * See also: Note [STATIC_LINK fields] in Storage.h.
102 */
103
104 /* N is the oldest generation being collected, where the generations
105 * are numbered starting at 0. A major GC (indicated by the major_gc
106 * flag) is when we're collecting all generations. We only attempt to
107 * deal with static objects and GC CAFs when doing a major GC.
108 */
109 uint32_t N;
110 bool major_gc;
111 bool deadlock_detect_gc;
112
113 /* Data used for allocation area sizing.
114 */
115 static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
116
117 /* Mut-list stats */
118 #if defined(DEBUG)
119 uint32_t mutlist_MUTVARS,
120 mutlist_MUTARRS,
121 mutlist_MVARS,
122 mutlist_TVAR,
123 mutlist_TVAR_WATCH_QUEUE,
124 mutlist_TREC_CHUNK,
125 mutlist_TREC_HEADER,
126 mutlist_OTHERS;
127 #endif
128
129 /* Thread-local data for each GC thread
130 */
131 gc_thread **gc_threads = NULL;
132
133 #if !defined(THREADED_RTS)
134 // Must be aligned to 64-bytes to meet stated 64-byte alignment of gen_workspace
135 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)]
136 ATTRIBUTE_ALIGNED(64);
137 #endif
138
139 // Number of threads running in *this* GC. Affects how many
140 // step->todos[] lists we have to look in to find work.
141 uint32_t n_gc_threads;
142
143 // For stats:
144 static long copied; // *words* copied & scavenged during this GC
145
146 #if defined(PROF_SPIN) && defined(THREADED_RTS)
147 // spin and yield counts for the quasi-SpinLock in waitForGcThreads
148 volatile StgWord64 waitForGcThreads_spin = 0;
149 volatile StgWord64 waitForGcThreads_yield = 0;
150 volatile StgWord64 whitehole_gc_spin = 0;
151 #endif
152
153 bool work_stealing;
154
155 uint32_t static_flag = STATIC_FLAG_B;
156 uint32_t prev_static_flag = STATIC_FLAG_A;
157
158 DECLARE_GCT
159
160 /* -----------------------------------------------------------------------------
161 Static function declarations
162 -------------------------------------------------------------------------- */
163
164 static void mark_root (void *user, StgClosure **root);
165 static void prepare_collected_gen (generation *gen);
166 static void prepare_uncollected_gen (generation *gen);
167 static void init_gc_thread (gc_thread *t);
168 static void resize_nursery (void);
169 static void start_gc_threads (void);
170 static void scavenge_until_all_done (void);
171 static StgWord inc_running (void);
172 static StgWord dec_running (void);
173 static void wakeup_gc_threads (uint32_t me, bool idle_cap[]);
174 static void shutdown_gc_threads (uint32_t me, bool idle_cap[]);
175 static void collect_gct_blocks (void);
176 static void collect_pinned_object_blocks (void);
177 static void heapOverflow (void);
178
179 #if defined(DEBUG)
180 static void gcCAFs (void);
181 #endif
182
183 /* -----------------------------------------------------------------------------
184 The mark stack.
185 -------------------------------------------------------------------------- */
186
187 bdescr *mark_stack_top_bd; // topmost block in the mark stack
188 bdescr *mark_stack_bd; // current block in the mark stack
189 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
190
191 /* -----------------------------------------------------------------------------
192 GarbageCollect: the main entry point to the garbage collector.
193
194 The collect_gen parameter is gotten by calling calcNeeded().
195
196 Locks held: all capabilities are held throughout GarbageCollect().
197 -------------------------------------------------------------------------- */
198
199 void
200 GarbageCollect (uint32_t collect_gen,
201 bool do_heap_census,
202 bool deadlock_detect,
203 uint32_t gc_type USED_IF_THREADS,
204 Capability *cap,
205 bool idle_cap[])
206 {
207 bdescr *bd;
208 generation *gen;
209 StgWord live_blocks, live_words, par_max_copied, par_balanced_copied,
210 gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield,
211 any_work, no_work, scav_find_work;
212 #if defined(THREADED_RTS)
213 gc_thread *saved_gct;
214 #endif
215 uint32_t g, n;
216
217 // necessary if we stole a callee-saves register for gct:
218 #if defined(THREADED_RTS)
219 saved_gct = gct;
220 #endif
221
222 #if defined(PROFILING)
223 CostCentreStack *save_CCS[n_capabilities];
224 #endif
225
226 ACQUIRE_SM_LOCK;
227
228 #if defined(RTS_USER_SIGNALS)
229 if (RtsFlags.MiscFlags.install_signal_handlers) {
230 // block signals
231 blockUserSignals();
232 }
233 #endif
234
235 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
236 // otherwise adjust the padding in gen_workspace.
237
238 // this is the main thread
239 SET_GCT(gc_threads[cap->no]);
240
241 // tell the stats department that we've started a GC
242 stat_startGC(cap, gct);
243
244 // Lock the StablePtr table. This prevents FFI calls manipulating
245 // the table from occurring during GC.
246 stablePtrLock();
247
248 #if defined(DEBUG)
249 mutlist_MUTVARS = 0;
250 mutlist_MUTARRS = 0;
251 mutlist_MVARS = 0;
252 mutlist_TVAR = 0;
253 mutlist_TVAR_WATCH_QUEUE = 0;
254 mutlist_TREC_CHUNK = 0;
255 mutlist_TREC_HEADER = 0;
256 mutlist_OTHERS = 0;
257 #endif
258
259 // attribute any costs to CCS_GC
260 #if defined(PROFILING)
261 for (n = 0; n < n_capabilities; n++) {
262 save_CCS[n] = capabilities[n]->r.rCCCS;
263 capabilities[n]->r.rCCCS = CCS_GC;
264 }
265 #endif
266
267 /* Figure out which generation to collect
268 */
269 N = collect_gen;
270 major_gc = (N == RtsFlags.GcFlags.generations-1);
271
272 /* See Note [Deadlock detection under nonmoving collector]. */
273 deadlock_detect_gc = deadlock_detect;
274
275 /* N.B. The nonmoving collector works a bit differently. See
276 * Note [Static objects under the nonmoving collector].
277 */
278 if (major_gc && !RtsFlags.GcFlags.useNonmoving) {
279 prev_static_flag = static_flag;
280 static_flag =
281 static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
282 }
283
284 #if defined(THREADED_RTS)
285 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
286 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
287 // It's not always a good idea to do load balancing in parallel
288 // GC. In particular, for a parallel program we don't want to
289 // lose locality by moving cached data into another CPU's cache
290 // (this effect can be quite significant).
291 //
292 // We could have a more complex way to determine whether to do
293 // work stealing or not, e.g. it might be a good idea to do it
294 // if the heap is big. For now, we just turn it on or off with
295 // a flag.
296 #endif
297
298 /* Start threads, so they can be spinning up while we finish initialisation.
299 */
300 start_gc_threads();
301
302 #if defined(THREADED_RTS)
303 /* How many threads will be participating in this GC?
304 * We don't try to parallelise minor GCs (unless the user asks for
305 * it with +RTS -gn0), or mark/compact/sweep GC.
306 */
307 if (gc_type == SYNC_GC_PAR) {
308 n_gc_threads = n_capabilities;
309 } else {
310 n_gc_threads = 1;
311 }
312 #else
313 n_gc_threads = 1;
314 #endif
315
316 debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
317 N, n_gc_threads);
318
319 #if defined(DEBUG)
320 // check for memory leaks if DEBUG is on
321 memInventory(DEBUG_gc);
322 #endif
323
324 // do this *before* we start scavenging
325 collectFreshWeakPtrs();
326
327 // check sanity *before* GC
328 IF_DEBUG(sanity, checkSanity(false /* before GC */, major_gc));
329
330 // gather blocks allocated using allocatePinned() from each capability
331 // and put them on the g0->large_object list.
332 collect_pinned_object_blocks();
333
334 // Initialise all the generations that we're collecting.
335 for (g = 0; g <= N; g++) {
336 prepare_collected_gen(&generations[g]);
337 }
338 // Initialise all the generations that we're *not* collecting.
339 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
340 prepare_uncollected_gen(&generations[g]);
341 }
342
343 // Prepare this gc_thread
344 init_gc_thread(gct);
345
346 /* Allocate a mark stack if we're doing a major collection.
347 */
348 if (major_gc && oldest_gen->mark) {
349 mark_stack_bd = allocBlock();
350 mark_stack_top_bd = mark_stack_bd;
351 mark_stack_bd->link = NULL;
352 mark_stack_bd->u.back = NULL;
353 mark_sp = mark_stack_bd->start;
354 } else {
355 mark_stack_bd = NULL;
356 mark_stack_top_bd = NULL;
357 mark_sp = NULL;
358 }
359
360 /* -----------------------------------------------------------------------
361 * follow all the roots that we know about:
362 */
363
364 // the main thread is running: this prevents any other threads from
365 // exiting prematurely, so we can start them now.
366 // NB. do this after the mutable lists have been saved above, otherwise
367 // the other GC threads will be writing into the old mutable lists.
368 inc_running();
369 wakeup_gc_threads(gct->thread_index, idle_cap);
370
371 traceEventGcWork(gct->cap);
372
373 // scavenge the capability-private mutable lists. This isn't part
374 // of markSomeCapabilities() because markSomeCapabilities() can only
375 // call back into the GC via mark_root() (due to the gct register
376 // variable).
377 if (n_gc_threads == 1) {
378 for (n = 0; n < n_capabilities; n++) {
379 #if defined(THREADED_RTS)
380 scavenge_capability_mut_Lists1(capabilities[n]);
381 #else
382 scavenge_capability_mut_lists(capabilities[n]);
383 #endif
384 }
385 } else {
386 scavenge_capability_mut_lists(gct->cap);
387 for (n = 0; n < n_capabilities; n++) {
388 if (idle_cap[n]) {
389 markCapability(mark_root, gct, capabilities[n],
390 true/*don't mark sparks*/);
391 scavenge_capability_mut_lists(capabilities[n]);
392 }
393 }
394 }
395
396 // follow roots from the CAF list (used by GHCi)
397 gct->evac_gen_no = 0;
398 markCAFs(mark_root, gct);
399
400 // follow all the roots that the application knows about.
401 gct->evac_gen_no = 0;
402 if (n_gc_threads == 1) {
403 for (n = 0; n < n_capabilities; n++) {
404 markCapability(mark_root, gct, capabilities[n],
405 true/*don't mark sparks*/);
406 }
407 } else {
408 markCapability(mark_root, gct, cap, true/*don't mark sparks*/);
409 }
410
411 markScheduler(mark_root, gct);
412
413 // Mark the weak pointer list, and prepare to detect dead weak pointers.
414 markWeakPtrList();
415 initWeakForGC();
416
417 // Mark the stable pointer table.
418 markStablePtrTable(mark_root, gct);
419
420 // Remember old stable name addresses.
421 rememberOldStableNameAddresses ();
422
423 /* -------------------------------------------------------------------------
424 * Repeatedly scavenge all the areas we know about until there's no
425 * more scavenging to be done.
426 */
427
428 StgWeak *dead_weak_ptr_list = NULL;
429 StgTSO *resurrected_threads = END_TSO_QUEUE;
430
431 for (;;)
432 {
433 scavenge_until_all_done();
434
435 // The other threads are now stopped. We might recurse back to
436 // here, but from now on this is the only thread.
437
438 // must be last... invariant is that everything is fully
439 // scavenged at this point.
440 if (traverseWeakPtrList(&dead_weak_ptr_list, &resurrected_threads)) { // returns true if evaced something
441 inc_running();
442 continue;
443 }
444
445 // If we get to here, there's really nothing left to do.
446 break;
447 }
448
449 shutdown_gc_threads(gct->thread_index, idle_cap);
450
451 // Now see which stable names are still alive.
452 gcStableNameTable();
453
454 #if defined(THREADED_RTS)
455 if (n_gc_threads == 1) {
456 for (n = 0; n < n_capabilities; n++) {
457 pruneSparkQueue(capabilities[n]);
458 }
459 } else {
460 for (n = 0; n < n_capabilities; n++) {
461 if (n == cap->no || idle_cap[n]) {
462 pruneSparkQueue(capabilities[n]);
463 }
464 }
465 }
466 #endif
467
468 #if defined(PROFILING)
469 // We call processHeapClosureForDead() on every closure destroyed during
470 // the current garbage collection, so we invoke LdvCensusForDead().
471 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
472 || RtsFlags.ProfFlags.bioSelector != NULL) {
473 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
474 LdvCensusForDead(N);
475 ACQUIRE_SM_LOCK;
476 }
477 #endif
478
479 // NO MORE EVACUATION AFTER THIS POINT!
480
481 // Finally: compact or sweep the oldest generation.
482 if (major_gc && oldest_gen->mark) {
483 if (oldest_gen->compact)
484 compact(gct->scavenged_static_objects,
485 &dead_weak_ptr_list,
486 &resurrected_threads);
487 else
488 sweep(oldest_gen);
489 }
490
491 copied = 0;
492 par_max_copied = 0;
493 par_balanced_copied = 0;
494 gc_spin_spin = 0;
495 gc_spin_yield = 0;
496 mut_spin_spin = 0;
497 mut_spin_yield = 0;
498 any_work = 0;
499 no_work = 0;
500 scav_find_work = 0;
501 {
502 uint32_t i;
503 uint64_t par_balanced_copied_acc = 0;
504 const gc_thread* thread;
505
506 for (i=0; i < n_gc_threads; i++) {
507 copied += gc_threads[i]->copied;
508 }
509 for (i=0; i < n_gc_threads; i++) {
510 thread = gc_threads[i];
511 if (n_gc_threads > 1) {
512 debugTrace(DEBUG_gc,"thread %d:", i);
513 debugTrace(DEBUG_gc," copied %ld",
514 thread->copied * sizeof(W_));
515 debugTrace(DEBUG_gc," scanned %ld",
516 thread->scanned * sizeof(W_));
517 debugTrace(DEBUG_gc," any_work %ld",
518 thread->any_work);
519 debugTrace(DEBUG_gc," no_work %ld",
520 thread->no_work);
521 debugTrace(DEBUG_gc," scav_find_work %ld",
522 thread->scav_find_work);
523
524 #if defined(THREADED_RTS) && defined(PROF_SPIN)
525 gc_spin_spin += thread->gc_spin.spin;
526 gc_spin_yield += thread->gc_spin.yield;
527 mut_spin_spin += thread->mut_spin.spin;
528 mut_spin_yield += thread->mut_spin.yield;
529 #endif
530
531 any_work += thread->any_work;
532 no_work += thread->no_work;
533 scav_find_work += thread->scav_find_work;
534
535 par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
536 par_balanced_copied_acc +=
537 stg_min(n_gc_threads * gc_threads[i]->copied, copied);
538 }
539 }
540 if (n_gc_threads > 1) {
541 // See Note [Work Balance] for an explanation of this computation
542 par_balanced_copied =
543 (par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) /
544 (n_gc_threads - 1);
545 }
546 }
547
548 // Run through all the generations and tidy up.
549 // We're going to:
550 // - count the amount of "live" data (live_words, live_blocks)
551 // - count the amount of "copied" data in this GC (copied)
552 // - free from-space
553 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
554 //
555 live_words = 0;
556 live_blocks = 0;
557
558 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
559
560 if (g == N) {
561 generations[g].collections++; // for stats
562 if (n_gc_threads > 1) generations[g].par_collections++;
563 }
564
565 // Count the mutable list as bytes "copied" for the purposes of
566 // stats. Every mutable list is copied during every GC.
567 if (g > 0) {
568 W_ mut_list_size = 0;
569 for (n = 0; n < n_capabilities; n++) {
570 mut_list_size += countOccupied(capabilities[n]->mut_lists[g]);
571 }
572 copied += mut_list_size;
573
574 debugTrace(DEBUG_gc,
575 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d others)",
576 (unsigned long)(mut_list_size * sizeof(W_)),
577 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS,
578 mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE,
579 mutlist_TREC_CHUNK, mutlist_TREC_HEADER,
580 mutlist_OTHERS);
581 }
582
583 bdescr *next, *prev;
584 gen = &generations[g];
585
586 // for generations we collected...
587 if (g <= N && !(RtsFlags.GcFlags.useNonmoving && gen == oldest_gen)) {
588
589 /* free old memory and shift to-space into from-space for all
590 * the collected generations (except the allocation area). These
591 * freed blocks will probaby be quickly recycled.
592 */
593 if (gen->mark)
594 {
595 // tack the new blocks on the end of the existing blocks
596 if (gen->old_blocks != NULL) {
597
598 prev = NULL;
599 for (bd = gen->old_blocks; bd != NULL; bd = next) {
600
601 next = bd->link;
602
603 if (!(bd->flags & BF_MARKED))
604 {
605 if (prev == NULL) {
606 gen->old_blocks = next;
607 } else {
608 prev->link = next;
609 }
610 freeGroup(bd);
611 gen->n_old_blocks--;
612 }
613 else
614 {
615 gen->n_words += bd->free - bd->start;
616
617 // NB. this step might not be compacted next
618 // time, so reset the BF_MARKED flags.
619 // They are set before GC if we're going to
620 // compact. (search for BF_MARKED above).
621 bd->flags &= ~BF_MARKED;
622
623 // between GCs, all blocks in the heap except
624 // for the nursery have the BF_EVACUATED flag set.
625 bd->flags |= BF_EVACUATED;
626
627 prev = bd;
628 }
629 }
630
631 if (prev != NULL) {
632 prev->link = gen->blocks;
633 gen->blocks = gen->old_blocks;
634 }
635 }
636 // add the new blocks to the block tally
637 gen->n_blocks += gen->n_old_blocks;
638 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
639 ASSERT(countOccupied(gen->blocks) == gen->n_words);
640 }
641 else // not copacted
642 {
643 freeChain(gen->old_blocks);
644 }
645
646 gen->old_blocks = NULL;
647 gen->n_old_blocks = 0;
648
649 /* LARGE OBJECTS. The current live large objects are chained on
650 * scavenged_large, having been moved during garbage
651 * collection from large_objects. Any objects left on the
652 * large_objects list are therefore dead, so we free them here.
653 */
654 freeChain(gen->large_objects);
655 gen->large_objects = gen->scavenged_large_objects;
656 gen->n_large_blocks = gen->n_scavenged_large_blocks;
657 gen->n_large_words = countOccupied(gen->large_objects);
658 gen->n_new_large_words = 0;
659
660 /* COMPACT_NFDATA. The currently live compacts are chained
661 * to live_compact_objects, quite like large objects. And
662 * objects left on the compact_objects list are dead.
663 *
664 * We don't run a simple freeChain because want to give the
665 * CNF module some chance to free memory that freeChain would
666 * not see (namely blocks appended to a CNF through a compactResize).
667 *
668 * See Note [Compact Normal Forms] for details.
669 */
670 for (bd = gen->compact_objects; bd; bd = next) {
671 next = bd->link;
672 compactFree(((StgCompactNFDataBlock*)bd->start)->owner);
673 }
674 gen->compact_objects = gen->live_compact_objects;
675 gen->n_compact_blocks = gen->n_live_compact_blocks;
676 }
677 else // for generations > N
678 {
679 /* For older generations, we need to append the
680 * scavenged_large_object list (i.e. large objects that have been
681 * promoted during this GC) to the large_object list for that step.
682 */
683 for (bd = gen->scavenged_large_objects; bd; bd = next) {
684 next = bd->link;
685 dbl_link_onto(bd, &gen->large_objects);
686 gen->n_large_words += bd->free - bd->start;
687 }
688
689 // And same for compacts
690 for (bd = gen->live_compact_objects; bd; bd = next) {
691 next = bd->link;
692 dbl_link_onto(bd, &gen->compact_objects);
693 }
694
695 // add the new blocks we promoted during this GC
696 gen->n_large_blocks += gen->n_scavenged_large_blocks;
697 gen->n_compact_blocks += gen->n_live_compact_blocks;
698 }
699
700 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
701 ASSERT(countOccupied(gen->large_objects) == gen->n_large_words);
702 // We can run the same assertion on compact objects because there
703 // is memory "the GC doesn't see" (directly), but which is still
704 // accounted in gen->n_compact_blocks
705
706 gen->scavenged_large_objects = NULL;
707 gen->n_scavenged_large_blocks = 0;
708 gen->live_compact_objects = NULL;
709 gen->n_live_compact_blocks = 0;
710
711 // Count "live" data
712 live_words += genLiveWords(gen);
713 live_blocks += genLiveBlocks(gen);
714
715 // add in the partial blocks in the gen_workspaces
716 {
717 uint32_t i;
718 for (i = 0; i < n_capabilities; i++) {
719 live_words += gcThreadLiveWords(i, gen->no);
720 live_blocks += gcThreadLiveBlocks(i, gen->no);
721 }
722 }
723 } // for all generations
724
725 // Mark and sweep the oldest generation.
726 // N.B. This can only happen after we've moved
727 // oldest_gen->scavenged_large_objects back to oldest_gen->large_objects.
728 ASSERT(oldest_gen->scavenged_large_objects == NULL);
729 if (RtsFlags.GcFlags.useNonmoving && major_gc) {
730 // All threads in non-moving heap should be found to be alive, becuase
731 // threads in the non-moving generation's list should live in the
732 // non-moving heap, and we consider non-moving objects alive during
733 // preparation.
734 ASSERT(oldest_gen->old_threads == END_TSO_QUEUE);
735 // For weaks, remember that we evacuated all weaks to the non-moving heap
736 // in markWeakPtrList(), and then moved the weak_ptr_list list to
737 // old_weak_ptr_list. We then moved weaks with live keys to the
738 // weak_ptr_list again. Then, in collectDeadWeakPtrs() we moved weaks in
739 // old_weak_ptr_list to dead_weak_ptr_list. So at this point
740 // old_weak_ptr_list should be empty.
741 ASSERT(oldest_gen->old_weak_ptr_list == NULL);
742
743 // we may need to take the lock to allocate mark queue blocks
744 RELEASE_SM_LOCK;
745 // dead_weak_ptr_list contains weak pointers with dead keys. Those need to
746 // be kept alive because we'll use them in finalizeSchedulers(). Similarly
747 // resurrected_threads are also going to be used in resurrectedThreads()
748 // so we need to mark those too.
749 // Note that in sequential case these lists will be appended with more
750 // weaks and threads found to be dead in mark.
751 #if !defined(THREADED_RTS)
752 // In the non-threaded runtime this is the only time we push to the
753 // upd_rem_set
754 nonmovingAddUpdRemSetBlocks(&gct->cap->upd_rem_set.queue);
755 #endif
756 nonmovingCollect(&dead_weak_ptr_list, &resurrected_threads);
757 ACQUIRE_SM_LOCK;
758 }
759
760 // Update the max size of older generations after a major GC:
761 // We can't resize here in the case of the concurrent collector since we
762 // don't yet know how much live data we have. This will be instead done
763 // once we finish marking.
764 if (major_gc && RtsFlags.GcFlags.generations > 1 && ! RtsFlags.GcFlags.useNonmoving)
765 resizeGenerations();
766
767 // Free the mark stack.
768 if (mark_stack_top_bd != NULL) {
769 debugTrace(DEBUG_gc, "mark stack: %d blocks",
770 countBlocks(mark_stack_top_bd));
771 freeChain(mark_stack_top_bd);
772 }
773
774 // Free any bitmaps.
775 for (g = 0; g <= N; g++) {
776 gen = &generations[g];
777 if (gen->bitmap != NULL) {
778 freeGroup(gen->bitmap);
779 gen->bitmap = NULL;
780 }
781 }
782
783 resize_nursery();
784
785 resetNurseries();
786
787 // mark the garbage collected CAFs as dead
788 #if defined(DEBUG)
789 if (major_gc && !RtsFlags.GcFlags.useNonmoving) { gcCAFs(); }
790 #endif
791
792 // Update the stable name hash table
793 updateStableNameTable(major_gc);
794
795 // unlock the StablePtr table. Must be before scheduleFinalizers(),
796 // because a finalizer may call hs_free_fun_ptr() or
797 // hs_free_stable_ptr(), both of which access the StablePtr table.
798 stablePtrUnlock();
799
800 // Must be after stablePtrUnlock(), because it might free stable ptrs.
801 if (major_gc) {
802 checkUnload (gct->scavenged_static_objects);
803 }
804
805 #if defined(PROFILING)
806 // resetStaticObjectForRetainerProfiling() must be called before
807 // zeroing below.
808
809 // ToDo: fix the gct->scavenged_static_objects below
810 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
811 #endif
812
813 // Start any pending finalizers. Must be after
814 // updateStableTables() and stableUnlock() (see #4221).
815 RELEASE_SM_LOCK;
816 scheduleFinalizers(cap, dead_weak_ptr_list);
817 ACQUIRE_SM_LOCK;
818
819 // check sanity after GC
820 // before resurrectThreads(), because that might overwrite some
821 // closures, which will cause problems with THREADED where we don't
822 // fill slop. If we are using the nonmoving collector then we can't claim to
823 // be *after* the major GC; it's now running concurrently.
824 IF_DEBUG(sanity, checkSanity(true /* after GC */, major_gc && !RtsFlags.GcFlags.useNonmoving));
825
826 // If a heap census is due, we need to do it before
827 // resurrectThreads(), for the same reason as checkSanity above:
828 // resurrectThreads() will overwrite some closures and leave slop
829 // behind.
830 if (do_heap_census) {
831 debugTrace(DEBUG_sched, "performing heap census");
832 RELEASE_SM_LOCK;
833 heapCensus(gct->gc_start_cpu);
834 ACQUIRE_SM_LOCK;
835 }
836
837 // send exceptions to any threads which were about to die
838 RELEASE_SM_LOCK;
839 resurrectThreads(resurrected_threads);
840 ACQUIRE_SM_LOCK;
841
842 if (major_gc) {
843 W_ need_prealloc, need_live, need, got;
844 uint32_t i;
845
846 need_live = 0;
847 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
848 need_live += genLiveBlocks(&generations[i]);
849 }
850 need_live = stg_max(RtsFlags.GcFlags.minOldGenSize, need_live);
851
852 need_prealloc = 0;
853 for (i = 0; i < n_nurseries; i++) {
854 need_prealloc += nurseries[i].n_blocks;
855 }
856 need_prealloc += RtsFlags.GcFlags.largeAllocLim;
857 need_prealloc += countAllocdBlocks(exec_block);
858 need_prealloc += arenaBlocks();
859 #if defined(PROFILING)
860 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
861 need_prealloc = retainerStackBlocks();
862 }
863 #endif
864
865 /* If the amount of data remains constant, next major GC we'll
866 * require (F+1)*live + prealloc. We leave (F+2)*live + prealloc
867 * in order to reduce repeated deallocation and reallocation. #14702
868 */
869 need = need_prealloc + (RtsFlags.GcFlags.oldGenFactor + 2) * need_live;
870
871 /* Also, if user set heap size, do not drop below it.
872 */
873 need = stg_max(RtsFlags.GcFlags.heapSizeSuggestion, need);
874
875 /* But with a large nursery, the above estimate might exceed
876 * maxHeapSize. A large resident set size might make the OS
877 * kill this process, or swap unnecessarily. Therefore we
878 * ensure that our estimate does not exceed maxHeapSize.
879 */
880 if (RtsFlags.GcFlags.maxHeapSize != 0) {
881 need = stg_min(RtsFlags.GcFlags.maxHeapSize, need);
882 }
883
884 need = BLOCKS_TO_MBLOCKS(need);
885
886 got = mblocks_allocated;
887
888 if (got > need) {
889 returnMemoryToOS(got - need);
890 }
891 }
892
893 // extra GC trace info
894 IF_DEBUG(gc, statDescribeGens());
895
896 #if defined(DEBUG)
897 // symbol-table based profiling
898 /* heapCensus(to_blocks); */ /* ToDo */
899 #endif
900
901 // restore enclosing cost centre
902 #if defined(PROFILING)
903 for (n = 0; n < n_capabilities; n++) {
904 capabilities[n]->r.rCCCS = save_CCS[n];
905 }
906 #endif
907
908 #if defined(DEBUG)
909 // check for memory leaks if DEBUG is on
910 memInventory(DEBUG_gc);
911 #endif
912
913 // ok, GC over: tell the stats department what happened.
914 stat_endGC(cap, gct, live_words, copied,
915 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
916 N, n_gc_threads, par_max_copied, par_balanced_copied,
917 gc_spin_spin, gc_spin_yield, mut_spin_spin, mut_spin_yield,
918 any_work, no_work, scav_find_work);
919
920 #if defined(RTS_USER_SIGNALS)
921 if (RtsFlags.MiscFlags.install_signal_handlers) {
922 // unblock signals again
923 unblockUserSignals();
924 }
925 #endif
926
927 RELEASE_SM_LOCK;
928
929 SET_GCT(saved_gct);
930 }
931
932 /* -----------------------------------------------------------------------------
933 Heap overflow is indicated by setting a flag that the caller of
934 GarbageCollect can check. (not ideal, TODO: better)
935 -------------------------------------------------------------------------- */
936
937 static void heapOverflow(void)
938 {
939 heap_overflow = true;
940 }
941
942 /* -----------------------------------------------------------------------------
943 Initialise the gc_thread structures.
944 -------------------------------------------------------------------------- */
945
946 static void
947 new_gc_thread (uint32_t n, gc_thread *t)
948 {
949 uint32_t g;
950 gen_workspace *ws;
951
952 t->cap = capabilities[n];
953
954 #if defined(THREADED_RTS)
955 t->id = 0;
956 initSpinLock(&t->gc_spin);
957 initSpinLock(&t->mut_spin);
958 ACQUIRE_SPIN_LOCK(&t->gc_spin);
959 ACQUIRE_SPIN_LOCK(&t->mut_spin);
960 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
961 // thread to start up, see wakeup_gc_threads
962 #endif
963
964 t->thread_index = n;
965 t->free_blocks = NULL;
966 t->gc_count = 0;
967
968 init_gc_thread(t);
969
970 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
971 {
972 ws = &t->gens[g];
973 ws->gen = &generations[g];
974 ASSERT(g == ws->gen->no);
975 ws->my_gct = t;
976
977 // We want to call
978 // alloc_todo_block(ws,0);
979 // but can't, because it uses gct which isn't set up at this point.
980 // Hence, allocate a block for todo_bd manually:
981 {
982 bdescr *bd = allocBlockOnNode(capNoToNumaNode(n));
983 // no lock, locks aren't initialised yet
984 initBdescr(bd, ws->gen, ws->gen->to);
985 bd->flags = BF_EVACUATED;
986 bd->u.scan = bd->free = bd->start;
987
988 ws->todo_bd = bd;
989 ws->todo_free = bd->free;
990 ws->todo_lim = bd->start + BLOCK_SIZE_W;
991 }
992
993 ws->todo_q = newWSDeque(128);
994 ws->todo_overflow = NULL;
995 ws->n_todo_overflow = 0;
996 ws->todo_large_objects = NULL;
997 ws->todo_seg = END_NONMOVING_TODO_LIST;
998
999 ws->part_list = NULL;
1000 ws->n_part_blocks = 0;
1001 ws->n_part_words = 0;
1002
1003 ws->scavd_list = NULL;
1004 ws->n_scavd_blocks = 0;
1005 ws->n_scavd_words = 0;
1006 }
1007 }
1008
1009
1010 void
1011 initGcThreads (uint32_t from USED_IF_THREADS, uint32_t to USED_IF_THREADS)
1012 {
1013 #if defined(THREADED_RTS)
1014 uint32_t i;
1015
1016 if (from > 0) {
1017 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
1018 "initGcThreads");
1019 } else {
1020 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
1021 "initGcThreads");
1022 }
1023
1024 for (i = from; i < to; i++) {
1025 gc_threads[i] =
1026 stgMallocBytes(sizeof(gc_thread) +
1027 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
1028 "alloc_gc_threads");
1029
1030 new_gc_thread(i, gc_threads[i]);
1031 }
1032 #else
1033 ASSERT(from == 0 && to == 1);
1034 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
1035 gc_threads[0] = gct;
1036 new_gc_thread(0,gc_threads[0]);
1037 #endif
1038 }
1039
1040 void
1041 freeGcThreads (void)
1042 {
1043 uint32_t g;
1044 if (gc_threads != NULL) {
1045 #if defined(THREADED_RTS)
1046 uint32_t i;
1047 for (i = 0; i < n_capabilities; i++) {
1048 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
1049 {
1050 freeWSDeque(gc_threads[i]->gens[g].todo_q);
1051 }
1052 stgFree (gc_threads[i]);
1053 }
1054 stgFree (gc_threads);
1055 #else
1056 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
1057 {
1058 freeWSDeque(gc_threads[0]->gens[g].todo_q);
1059 }
1060 stgFree (gc_threads);
1061 #endif
1062 gc_threads = NULL;
1063 }
1064 }
1065
1066 /* ----------------------------------------------------------------------------
1067 Start GC threads
1068 ------------------------------------------------------------------------- */
1069
1070 static volatile StgWord gc_running_threads;
1071
1072 static StgWord
1073 inc_running (void)
1074 {
1075 StgWord new;
1076 new = atomic_inc(&gc_running_threads, 1);
1077 ASSERT(new <= n_gc_threads);
1078 return new;
1079 }
1080
1081 static StgWord
1082 dec_running (void)
1083 {
1084 ASSERT(gc_running_threads != 0);
1085 return atomic_dec(&gc_running_threads);
1086 }
1087
1088 static bool
1089 any_work (void)
1090 {
1091 int g;
1092 gen_workspace *ws;
1093
1094 gct->any_work++;
1095
1096 write_barrier();
1097
1098 // scavenge objects in compacted generation
1099 if (mark_stack_bd != NULL && !mark_stack_empty()) {
1100 return true;
1101 }
1102
1103 // Check for global work in any gen. We don't need to check for
1104 // local work, because we have already exited scavenge_loop(),
1105 // which means there is no local work for this thread.
1106 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
1107 ws = &gct->gens[g];
1108 if (ws->todo_large_objects) return true;
1109 if (!looksEmptyWSDeque(ws->todo_q)) return true;
1110 if (ws->todo_overflow) return true;
1111 }
1112
1113 #if defined(THREADED_RTS)
1114 if (work_stealing) {
1115 uint32_t n;
1116 // look for work to steal
1117 for (n = 0; n < n_gc_threads; n++) {
1118 if (n == gct->thread_index) continue;
1119 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
1120 ws = &gc_threads[n]->gens[g];
1121 if (!looksEmptyWSDeque(ws->todo_q)) return true;
1122 }
1123 }
1124 }
1125 #endif
1126
1127 gct->no_work++;
1128 #if defined(THREADED_RTS)
1129 yieldThread();
1130 #endif
1131
1132 return false;
1133 }
1134
1135 static void
1136 scavenge_until_all_done (void)
1137 {
1138 DEBUG_ONLY( uint32_t r );
1139
1140
1141 loop:
1142 #if defined(THREADED_RTS)
1143 if (n_gc_threads > 1) {
1144 scavenge_loop();
1145 } else {
1146 scavenge_loop1();
1147 }
1148 #else
1149 scavenge_loop();
1150 #endif
1151
1152 collect_gct_blocks();
1153
1154 // scavenge_loop() only exits when there's no work to do
1155
1156 #if defined(DEBUG)
1157 r = dec_running();
1158 #else
1159 dec_running();
1160 #endif
1161
1162 traceEventGcIdle(gct->cap);
1163
1164 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1165
1166 while (gc_running_threads != 0) {
1167 // usleep(1);
1168 if (any_work()) {
1169 inc_running();
1170 traceEventGcWork(gct->cap);
1171 goto loop;
1172 }
1173 // any_work() does not remove the work from the queue, it
1174 // just checks for the presence of work. If we find any,
1175 // then we increment gc_running_threads and go back to
1176 // scavenge_loop() to perform any pending work.
1177 }
1178
1179 traceEventGcDone(gct->cap);
1180 }
1181
1182 #if defined(THREADED_RTS)
1183
1184 void
1185 gcWorkerThread (Capability *cap)
1186 {
1187 gc_thread *saved_gct;
1188
1189 // necessary if we stole a callee-saves register for gct:
1190 saved_gct = gct;
1191
1192 SET_GCT(gc_threads[cap->no]);
1193 gct->id = osThreadId();
1194
1195 // Wait until we're told to wake up
1196 RELEASE_SPIN_LOCK(&gct->mut_spin);
1197 // yieldThread();
1198 // Strangely, adding a yieldThread() here makes the CPU time
1199 // measurements more accurate on Linux, perhaps because it syncs
1200 // the CPU time across the multiple cores. Without this, CPU time
1201 // is heavily skewed towards GC rather than MUT.
1202 gct->wakeup = GC_THREAD_STANDING_BY;
1203 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1204 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1205
1206 init_gc_thread(gct);
1207
1208 traceEventGcWork(gct->cap);
1209
1210 // Every thread evacuates some roots.
1211 gct->evac_gen_no = 0;
1212 markCapability(mark_root, gct, cap, true/*prune sparks*/);
1213 scavenge_capability_mut_lists(cap);
1214
1215 scavenge_until_all_done();
1216
1217 #if defined(THREADED_RTS)
1218 // Now that the whole heap is marked, we discard any sparks that
1219 // were found to be unreachable. The main GC thread is currently
1220 // marking heap reachable via weak pointers, so it is
1221 // non-deterministic whether a spark will be retained if it is
1222 // only reachable via weak pointers. To fix this problem would
1223 // require another GC barrier, which is too high a price.
1224 pruneSparkQueue(cap);
1225 #endif
1226
1227 // Wait until we're told to continue
1228 RELEASE_SPIN_LOCK(&gct->gc_spin);
1229 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1230 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1231 gct->thread_index);
1232 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1233 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1234
1235 SET_GCT(saved_gct);
1236 }
1237
1238 #endif
1239
1240 #if defined(THREADED_RTS)
1241
1242 void
1243 waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
1244 {
1245 const uint32_t n_threads = n_capabilities;
1246 const uint32_t me = cap->no;
1247 uint32_t i, j;
1248 bool retry = true;
1249 Time t0, t1, t2;
1250
1251 t0 = t1 = t2 = getProcessElapsedTime();
1252
1253 while(retry) {
1254 for (i=0; i < n_threads; i++) {
1255 if (i == me || idle_cap[i]) continue;
1256 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1257 prodCapability(capabilities[i], cap->running_task);
1258 }
1259 }
1260 for (j=0; j < 10; j++) {
1261 retry = false;
1262 for (i=0; i < n_threads; i++) {
1263 if (i == me || idle_cap[i]) continue;
1264 write_barrier();
1265 interruptCapability(capabilities[i]);
1266 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1267 retry = true;
1268 }
1269 }
1270 if (!retry) break;
1271 #if defined(PROF_SPIN)
1272 waitForGcThreads_yield++;
1273 #endif
1274 yieldThread();
1275 }
1276
1277 t2 = getProcessElapsedTime();
1278 if (RtsFlags.GcFlags.longGCSync != 0 &&
1279 t2 - t1 > RtsFlags.GcFlags.longGCSync) {
1280 /* call this every longGCSync of delay */
1281 rtsConfig.longGCSync(cap->no, t2 - t0);
1282 t1 = t2;
1283 }
1284 if (retry) {
1285 #if defined(PROF_SPIN)
1286 // This is a bit strange, we'll get more yields than spins.
1287 // I guess that means it's not a spin-lock at all, but these
1288 // numbers are still useful (I think).
1289 waitForGcThreads_spin++;
1290 #endif
1291 }
1292 }
1293
1294 if (RtsFlags.GcFlags.longGCSync != 0 &&
1295 t2 - t0 > RtsFlags.GcFlags.longGCSync) {
1296 rtsConfig.longGCSyncEnd(t2 - t0);
1297 }
1298 }
1299
1300 #endif // THREADED_RTS
1301
1302 static void
1303 start_gc_threads (void)
1304 {
1305 #if defined(THREADED_RTS)
1306 gc_running_threads = 0;
1307 #endif
1308 }
1309
1310 static void
1311 wakeup_gc_threads (uint32_t me USED_IF_THREADS,
1312 bool idle_cap[] USED_IF_THREADS)
1313 {
1314 #if defined(THREADED_RTS)
1315 uint32_t i;
1316
1317 if (n_gc_threads == 1) return;
1318
1319 for (i=0; i < n_gc_threads; i++) {
1320 if (i == me || idle_cap[i]) continue;
1321 inc_running();
1322 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1323 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY)
1324 barf("wakeup_gc_threads");
1325
1326 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1327 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1328 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1329 }
1330 #endif
1331 }
1332
1333 // After GC is complete, we must wait for all GC threads to enter the
1334 // standby state, otherwise they may still be executing inside
1335 // any_work(), and may even remain awake until the next GC starts.
1336 static void
1337 shutdown_gc_threads (uint32_t me USED_IF_THREADS,
1338 bool idle_cap[] USED_IF_THREADS)
1339 {
1340 #if defined(THREADED_RTS)
1341 uint32_t i;
1342
1343 if (n_gc_threads == 1) return;
1344
1345 for (i=0; i < n_gc_threads; i++) {
1346 if (i == me || idle_cap[i]) continue;
1347 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) {
1348 busy_wait_nop();
1349 write_barrier();
1350 }
1351 }
1352 #endif
1353 }
1354
1355 #if defined(THREADED_RTS)
1356 void
1357 releaseGCThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
1358 {
1359 const uint32_t n_threads = n_capabilities;
1360 const uint32_t me = cap->no;
1361 uint32_t i;
1362 for (i=0; i < n_threads; i++) {
1363 if (i == me || idle_cap[i]) continue;
1364 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1365 barf("releaseGCThreads");
1366
1367 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1368 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1369 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1370 }
1371 }
1372 #endif
1373
1374 /* ----------------------------------------------------------------------------
1375 Save the mutable lists in saved_mut_lists where it will be scavenged
1376 during GC
1377 ------------------------------------------------------------------------- */
1378
1379 static void
1380 stash_mut_list (Capability *cap, uint32_t gen_no)
1381 {
1382 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1383 cap->mut_lists[gen_no] = allocBlockOnNode_sync(cap->node);
1384 }
1385
1386 /* ----------------------------------------------------------------------------
1387 Initialise a generation that is to be collected
1388 ------------------------------------------------------------------------- */
1389
1390 static void
1391 prepare_collected_gen (generation *gen)
1392 {
1393 uint32_t i, g, n;
1394 gen_workspace *ws;
1395 bdescr *bd, *next;
1396
1397 g = gen->no;
1398
1399 if (RtsFlags.GcFlags.useNonmoving && g == oldest_gen->no) {
1400 // Nonmoving heap's mutable list is always a root.
1401 for (i = 0; i < n_capabilities; i++) {
1402 stash_mut_list(capabilities[i], g);
1403 }
1404 } else if (g != 0) {
1405 // Otherwise throw away the current mutable list. Invariant: the
1406 // mutable list always has at least one block; this means we can avoid
1407 // a check for NULL in recordMutable().
1408 for (i = 0; i < n_capabilities; i++) {
1409 freeChain(capabilities[i]->mut_lists[g]);
1410 capabilities[i]->mut_lists[g] =
1411 allocBlockOnNode(capNoToNumaNode(i));
1412 }
1413 }
1414
1415 gen = &generations[g];
1416 ASSERT(gen->no == g);
1417
1418 // we'll construct a new list of threads in this step
1419 // during GC, throw away the current list.
1420 gen->old_threads = gen->threads;
1421 gen->threads = END_TSO_QUEUE;
1422
1423 // deprecate the existing blocks (except in the case of the nonmoving
1424 // collector since these will be preserved in nonmovingCollect for the
1425 // concurrent GC).
1426 if (!(RtsFlags.GcFlags.useNonmoving && g == oldest_gen->no)) {
1427 gen->old_blocks = gen->blocks;
1428 gen->n_old_blocks = gen->n_blocks;
1429 gen->blocks = NULL;
1430 gen->n_blocks = 0;
1431 gen->n_words = 0;
1432 gen->live_estimate = 0;
1433 }
1434
1435 // initialise the large object queues.
1436 ASSERT(gen->scavenged_large_objects == NULL);
1437 ASSERT(gen->n_scavenged_large_blocks == 0);
1438 ASSERT(gen->live_compact_objects == NULL);
1439 ASSERT(gen->n_live_compact_blocks == 0);
1440
1441 // grab all the partial blocks stashed in the gc_thread workspaces and
1442 // move them to the old_blocks list of this gen.
1443 for (n = 0; n < n_capabilities; n++) {
1444 ws = &gc_threads[n]->gens[gen->no];
1445
1446 for (bd = ws->part_list; bd != NULL; bd = next) {
1447 next = bd->link;
1448 bd->link = gen->old_blocks;
1449 gen->old_blocks = bd;
1450 gen->n_old_blocks += bd->blocks;
1451 }
1452 ws->part_list = NULL;
1453 ws->n_part_blocks = 0;
1454 ws->n_part_words = 0;
1455
1456 ASSERT(ws->scavd_list == NULL);
1457 ASSERT(ws->n_scavd_blocks == 0);
1458 ASSERT(ws->n_scavd_words == 0);
1459
1460 if (ws->todo_free != ws->todo_bd->start) {
1461 ws->todo_bd->free = ws->todo_free;
1462 ws->todo_bd->link = gen->old_blocks;
1463 gen->old_blocks = ws->todo_bd;
1464 gen->n_old_blocks += ws->todo_bd->blocks;
1465 alloc_todo_block(ws,0); // always has one block.
1466 }
1467 }
1468
1469 // mark the small objects as from-space
1470 for (bd = gen->old_blocks; bd; bd = bd->link) {
1471 bd->flags &= ~BF_EVACUATED;
1472 }
1473
1474 // mark the large objects as from-space
1475 for (bd = gen->large_objects; bd; bd = bd->link) {
1476 bd->flags &= ~BF_EVACUATED;
1477 }
1478
1479 // mark the compact objects as from-space
1480 for (bd = gen->compact_objects; bd; bd = bd->link) {
1481 bd->flags &= ~BF_EVACUATED;
1482 }
1483
1484 // for a compacted generation, we need to allocate the bitmap
1485 if (gen->mark) {
1486 StgWord bitmap_size; // in bytes
1487 bdescr *bitmap_bdescr;
1488 StgWord *bitmap;
1489
1490 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / BITS_IN(W_);
1491
1492 if (bitmap_size > 0) {
1493 bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
1494 / BLOCK_SIZE);
1495 gen->bitmap = bitmap_bdescr;
1496 bitmap = bitmap_bdescr->start;
1497
1498 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1499 bitmap_size, bitmap);
1500
1501 // don't forget to fill it with zeros!
1502 memset(bitmap, 0, bitmap_size);
1503
1504 // For each block in this step, point to its bitmap from the
1505 // block descriptor.
1506 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1507 bd->u.bitmap = bitmap;
1508 bitmap += BLOCK_SIZE_W / BITS_IN(W_);
1509
1510 // Also at this point we set the BF_MARKED flag
1511 // for this block. The invariant is that
1512 // BF_MARKED is always unset, except during GC
1513 // when it is set on those blocks which will be
1514 // compacted.
1515 if (!(bd->flags & BF_FRAGMENTED)) {
1516 bd->flags |= BF_MARKED;
1517 }
1518
1519 // BF_SWEPT should be marked only for blocks that are being
1520 // collected in sweep()
1521 bd->flags &= ~BF_SWEPT;
1522 }
1523 }
1524 }
1525 }
1526
1527 /* ----------------------------------------------------------------------------
1528 Initialise a generation that is *not* to be collected
1529 ------------------------------------------------------------------------- */
1530
1531 static void
1532 prepare_uncollected_gen (generation *gen)
1533 {
1534 uint32_t i;
1535
1536
1537 ASSERT(gen->no > 0);
1538
1539 // save the current mutable lists for this generation, and
1540 // allocate a fresh block for each one. We'll traverse these
1541 // mutable lists as roots early on in the GC.
1542 for (i = 0; i < n_capabilities; i++) {
1543 stash_mut_list(capabilities[i], gen->no);
1544 }
1545
1546 ASSERT(gen->scavenged_large_objects == NULL);
1547 ASSERT(gen->n_scavenged_large_blocks == 0);
1548 }
1549
1550 /* -----------------------------------------------------------------------------
1551 Collect the completed blocks from a GC thread and attach them to
1552 the generation.
1553 -------------------------------------------------------------------------- */
1554
1555 static void
1556 collect_gct_blocks (void)
1557 {
1558 uint32_t g;
1559 gen_workspace *ws;
1560 bdescr *bd, *prev;
1561
1562 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1563 ws = &gct->gens[g];
1564
1565 // there may still be a block attached to ws->todo_bd;
1566 // leave it there to use next time.
1567
1568 if (ws->scavd_list != NULL) {
1569 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1570
1571 ASSERT(gct->scan_bd == NULL);
1572 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1573
1574 prev = NULL;
1575 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1576 prev = bd;
1577 }
1578 if (prev != NULL) {
1579 prev->link = ws->gen->blocks;
1580 ws->gen->blocks = ws->scavd_list;
1581 }
1582 ws->gen->n_blocks += ws->n_scavd_blocks;
1583 ws->gen->n_words += ws->n_scavd_words;
1584
1585 ws->scavd_list = NULL;
1586 ws->n_scavd_blocks = 0;
1587 ws->n_scavd_words = 0;
1588
1589 RELEASE_SPIN_LOCK(&ws->gen->sync);
1590 }
1591 }
1592 }
1593
1594 /* -----------------------------------------------------------------------------
1595 During mutation, any blocks that are filled by allocatePinned() are stashed
1596 on the local pinned_object_blocks list, to avoid needing to take a global
1597 lock. Here we collect those blocks from the cap->pinned_object_blocks lists
1598 and put them on the g0->large_object or oldest_gen->large_objects.
1599
1600 How to decide which list to put them on?
1601
1602 - When non-moving heap is enabled and this is a major GC, we put them on
1603 oldest_gen. This is because after preparation we really want no
1604 old-to-young references, and we want to be able to reset mut_lists. For
1605 this we need to promote every potentially live object to the oldest gen.
1606
1607 - Otherwise we put them on g0.
1608 -------------------------------------------------------------------------- */
1609
1610 static void
1611 collect_pinned_object_blocks (void)
1612 {
1613 generation *gen;
1614 const bool use_nonmoving = RtsFlags.GcFlags.useNonmoving;
1615 if (use_nonmoving && major_gc) {
1616 gen = oldest_gen;
1617 } else {
1618 gen = g0;
1619 }
1620
1621 for (uint32_t n = 0; n < n_capabilities; n++) {
1622 bdescr *last = NULL;
1623 if (use_nonmoving && gen == oldest_gen) {
1624 // Mark objects as belonging to the nonmoving heap
1625 for (bdescr *bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
1626 bd->flags |= BF_NONMOVING;
1627 bd->gen = oldest_gen;
1628 bd->gen_no = oldest_gen->no;
1629 oldest_gen->n_large_words += bd->free - bd->start;
1630 oldest_gen->n_large_blocks += bd->blocks;
1631 last = bd;
1632 }
1633 } else {
1634 for (bdescr *bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
1635 last = bd;
1636 }
1637 }
1638
1639 if (last != NULL) {
1640 last->link = gen->large_objects;
1641 if (gen->large_objects != NULL) {
1642 gen->large_objects->u.back = last;
1643 }
1644 gen->large_objects = capabilities[n]->pinned_object_blocks;
1645 capabilities[n]->pinned_object_blocks = NULL;
1646 }
1647 }
1648 }
1649
1650 /* -----------------------------------------------------------------------------
1651 Initialise a gc_thread before GC
1652 -------------------------------------------------------------------------- */
1653
1654 static void
1655 init_gc_thread (gc_thread *t)
1656 {
1657 t->static_objects = END_OF_STATIC_OBJECT_LIST;
1658 t->scavenged_static_objects = END_OF_STATIC_OBJECT_LIST;
1659 t->scan_bd = NULL;
1660 t->mut_lists = t->cap->mut_lists;
1661 t->evac_gen_no = 0;
1662 t->failed_to_evac = false;
1663 t->eager_promotion = true;
1664 t->thunk_selector_depth = 0;
1665 t->copied = 0;
1666 t->scanned = 0;
1667 t->any_work = 0;
1668 t->no_work = 0;
1669 t->scav_find_work = 0;
1670 }
1671
1672 /* -----------------------------------------------------------------------------
1673 Function we pass to evacuate roots.
1674 -------------------------------------------------------------------------- */
1675
1676 static void
1677 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1678 {
1679 // we stole a register for gct, but this function is called from
1680 // *outside* the GC where the register variable is not in effect,
1681 // so we need to save and restore it here. NB. only call
1682 // mark_root() from the main GC thread, otherwise gct will be
1683 // incorrect.
1684 #if defined(THREADED_RTS)
1685 gc_thread *saved_gct;
1686 saved_gct = gct;
1687 #endif
1688 SET_GCT(user);
1689
1690 evacuate(root);
1691
1692 SET_GCT(saved_gct);
1693 }
1694
1695 /* ----------------------------------------------------------------------------
1696 Reset the sizes of the older generations when we do a major
1697 collection.
1698
1699 CURRENT STRATEGY: make all generations except zero the same size.
1700 We have to stay within the maximum heap size, and leave a certain
1701 percentage of the maximum heap size available to allocate into.
1702 ------------------------------------------------------------------------- */
1703
1704 void
1705 resizeGenerations (void)
1706 {
1707 uint32_t g;
1708 W_ live, size, min_alloc, words;
1709 const W_ max = RtsFlags.GcFlags.maxHeapSize;
1710 const W_ gens = RtsFlags.GcFlags.generations;
1711
1712 // live in the oldest generations
1713 if (oldest_gen->live_estimate != 0) {
1714 words = oldest_gen->live_estimate;
1715 } else {
1716 words = oldest_gen->n_words;
1717 }
1718 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1719 oldest_gen->n_large_blocks +
1720 oldest_gen->n_compact_blocks;
1721
1722 // default max size for all generations except zero
1723 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1724 RtsFlags.GcFlags.minOldGenSize);
1725
1726 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1727 if (max > 0) {
1728 RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
1729 } else {
1730 RtsFlags.GcFlags.heapSizeSuggestion = size;
1731 }
1732 }
1733
1734 // minimum size for generation zero
1735 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1736 RtsFlags.GcFlags.minAllocAreaSize
1737 * (W_)n_capabilities);
1738
1739 // Auto-enable compaction when the residency reaches a
1740 // certain percentage of the maximum heap size (default: 30%).
1741 // Except when non-moving GC is enabled.
1742 if (!RtsFlags.GcFlags.useNonmoving &&
1743 (RtsFlags.GcFlags.compact ||
1744 (max > 0 &&
1745 oldest_gen->n_blocks >
1746 (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1747 oldest_gen->mark = 1;
1748 oldest_gen->compact = 1;
1749 // debugBelch("compaction: on\n", live);
1750 } else {
1751 oldest_gen->mark = 0;
1752 oldest_gen->compact = 0;
1753 // debugBelch("compaction: off\n", live);
1754 }
1755
1756 if (RtsFlags.GcFlags.sweep) {
1757 oldest_gen->mark = 1;
1758 }
1759
1760 // if we're going to go over the maximum heap size, reduce the
1761 // size of the generations accordingly. The calculation is
1762 // different if compaction is turned on, because we don't need
1763 // to double the space required to collect the old generation.
1764 if (max != 0) {
1765
1766 // this test is necessary to ensure that the calculations
1767 // below don't have any negative results - we're working
1768 // with unsigned values here.
1769 if (max < min_alloc) {
1770 heapOverflow();
1771 }
1772
1773 if (oldest_gen->compact) {
1774 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1775 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1776 }
1777 } else {
1778 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1779 size = (max - min_alloc) / ((gens - 1) * 2);
1780 }
1781 }
1782
1783 if (size < live) {
1784 heapOverflow();
1785 }
1786 }
1787
1788 #if 0
1789 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1790 min_alloc, size, max);
1791 debugBelch("resize_gen: n_blocks: %lu, n_large_block: %lu, n_compact_blocks: %lu\n",
1792 oldest_gen->n_blocks, oldest_gen->n_large_blocks, oldest_gen->n_compact_blocks);
1793 debugBelch("resize_gen: max_blocks: %lu -> %lu\n", oldest_gen->max_blocks, oldest_gen->n_blocks);
1794 #endif
1795
1796 for (g = 0; g < gens; g++) {
1797 generations[g].max_blocks = size;
1798 }
1799 }
1800
1801 /* -----------------------------------------------------------------------------
1802 Calculate the new size of the nursery, and resize it.
1803 -------------------------------------------------------------------------- */
1804
1805 static void
1806 resize_nursery (void)
1807 {
1808 const StgWord min_nursery =
1809 RtsFlags.GcFlags.minAllocAreaSize * (StgWord)n_capabilities;
1810
1811 if (RtsFlags.GcFlags.generations == 1)
1812 { // Two-space collector:
1813 W_ blocks;
1814
1815 /* set up a new nursery. Allocate a nursery size based on a
1816 * function of the amount of live data (by default a factor of 2)
1817 * Use the blocks from the old nursery if possible, freeing up any
1818 * left over blocks.
1819 *
1820 * If we get near the maximum heap size, then adjust our nursery
1821 * size accordingly. If the nursery is the same size as the live
1822 * data (L), then we need 3L bytes. We can reduce the size of the
1823 * nursery to bring the required memory down near 2L bytes.
1824 *
1825 * A normal 2-space collector would need 4L bytes to give the same
1826 * performance we get from 3L bytes, reducing to the same
1827 * performance at 2L bytes.
1828 */
1829 blocks = generations[0].n_blocks;
1830
1831 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1832 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1833 RtsFlags.GcFlags.maxHeapSize )
1834 {
1835 long adjusted_blocks; // signed on purpose
1836 int pc_free;
1837
1838 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1839
1840 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1841 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1842
1843 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1844 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1845 {
1846 heapOverflow();
1847 }
1848 blocks = adjusted_blocks;
1849 }
1850 else
1851 {
1852 blocks *= RtsFlags.GcFlags.oldGenFactor;
1853 if (blocks < min_nursery)
1854 {
1855 blocks = min_nursery;
1856 }
1857 }
1858 resizeNurseries(blocks);
1859 }
1860 else // Generational collector
1861 {
1862 /*
1863 * If the user has given us a suggested heap size, adjust our
1864 * allocation area to make best use of the memory available.
1865 */
1866 if (RtsFlags.GcFlags.heapSizeSuggestion)
1867 {
1868 long blocks;
1869 StgWord needed;
1870
1871 calcNeeded(false, &needed); // approx blocks needed at next GC
1872
1873 /* Guess how much will be live in generation 0 step 0 next time.
1874 * A good approximation is obtained by finding the
1875 * percentage of g0 that was live at the last minor GC.
1876 *
1877 * We have an accurate figure for the amount of copied data in
1878 * 'copied', but we must convert this to a number of blocks, with
1879 * a small adjustment for estimated slop at the end of a block
1880 * (- 10 words).
1881 */
1882 if (N == 0)
1883 {
1884 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1885 / countNurseryBlocks();
1886 }
1887
1888 /* Estimate a size for the allocation area based on the
1889 * information available. We might end up going slightly under
1890 * or over the suggested heap size, but we should be pretty
1891 * close on average.
1892 *
1893 * Formula: suggested - needed
1894 * ----------------------------
1895 * 1 + g0_pcnt_kept/100
1896 *
1897 * where 'needed' is the amount of memory needed at the next
1898 * collection for collecting all gens except g0.
1899 */
1900 blocks =
1901 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1902 (100 + (long)g0_pcnt_kept);
1903
1904 if (blocks < (long)min_nursery) {
1905 blocks = min_nursery;
1906 }
1907
1908 resizeNurseries((W_)blocks);
1909 }
1910 else
1911 {
1912 // we might have added extra blocks to the nursery, so
1913 // resize back to the original size again.
1914 resizeNurseriesFixed();
1915 }
1916 }
1917 }
1918
1919 /* -----------------------------------------------------------------------------
1920 Sanity code for CAF garbage collection.
1921
1922 With DEBUG turned on, we manage a CAF list in addition to the SRT
1923 mechanism. After GC, we run down the CAF list and make any
1924 CAFs which have been garbage collected GCD_CAF. This means we get an error
1925 whenever the program tries to enter a garbage collected CAF.
1926
1927 Any garbage collected CAFs are taken off the CAF list at the same
1928 time.
1929 -------------------------------------------------------------------------- */
1930
1931 #if defined(DEBUG)
1932
1933 void gcCAFs(void)
1934 {
1935 uint32_t i = 0;
1936 StgIndStatic *prev = NULL;
1937
1938 for (StgIndStatic *p = debug_caf_list;
1939 p != (StgIndStatic*) END_OF_CAF_LIST;
1940 p = (StgIndStatic*) p->saved_info)
1941 {
1942 const StgInfoTable *info = get_itbl((StgClosure*)p);
1943 ASSERT(info->type == IND_STATIC);
1944
1945 // See Note [STATIC_LINK fields] in Storage.h
1946 // This condition identifies CAFs that have just been GC'd and
1947 // don't have static_link==3 which means they should be ignored.
1948 if ((((StgWord)(p->static_link)&STATIC_BITS) | prev_static_flag) != 3) {
1949 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", p);
1950 SET_INFO((StgClosure*)p,&stg_GCD_CAF_info); // stub it
1951 if (prev == NULL) {
1952 debug_caf_list = (StgIndStatic*)p->saved_info;
1953 } else {
1954 prev->saved_info = p->saved_info;
1955 }
1956 } else {
1957 prev = p;
1958 i++;
1959 }
1960 }
1961
1962 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1963 }
1964 #endif
1965
1966
1967 /* -----------------------------------------------------------------------------
1968 The GC can leave some work for the mutator to do before the next
1969 GC, provided the work can be safely overlapped with mutation. This
1970 can help reduce the GC pause time.
1971
1972 The mutator can call doIdleGCWork() any time it likes, but
1973 preferably when it is idle. It's safe for multiple capabilities to
1974 call doIdleGCWork().
1975
1976 When 'all' is
1977 * false: doIdleGCWork() should only take a short, bounded, amount
1978 of time.
1979 * true: doIdleGCWork() will complete all the outstanding GC work.
1980
1981 The return value is
1982 * true if there's more to do (only if 'all' is false).
1983 * false otherwise.
1984 -------------------------------------------------------------------------- */
1985
1986 bool doIdleGCWork(Capability *cap STG_UNUSED, bool all)
1987 {
1988 return runSomeFinalizers(all);
1989 }