54797ba0f0a89b3f08554fa9f5696c21e9e833ec
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "GC.h"
19 #include "GCThread.h"
20 #include "GCTDecl.h" // NB. before RtsSignals.h which
21 // clobbers REG_R1 on arm/Linux
22 #include "Compact.h"
23 #include "Evac.h"
24 #include "Scav.h"
25 #include "GCUtils.h"
26 #include "MarkStack.h"
27 #include "MarkWeak.h"
28 #include "Sparks.h"
29 #include "Sweep.h"
30
31 #include "Arena.h"
32 #include "Storage.h"
33 #include "RtsUtils.h"
34 #include "Apply.h"
35 #include "Updates.h"
36 #include "Stats.h"
37 #include "Schedule.h"
38 #include "Sanity.h"
39 #include "BlockAlloc.h"
40 #include "ProfHeap.h"
41 #include "Weak.h"
42 #include "Prelude.h"
43 #include "RtsSignals.h"
44 #include "STM.h"
45 #include "Trace.h"
46 #include "RetainerProfile.h"
47 #include "LdvProfile.h"
48 #include "RaiseAsync.h"
49 #include "Stable.h"
50 #include "CheckUnload.h"
51 #include "CNF.h"
52 #include "RtsFlags.h"
53
54 #if defined(PROFILING)
55 #include "RetainerProfile.h"
56 #endif
57
58 #include <string.h> // for memset()
59 #include <unistd.h>
60
61 /* -----------------------------------------------------------------------------
62 Global variables
63 -------------------------------------------------------------------------- */
64
65 /* STATIC OBJECT LIST.
66 *
67 * During GC:
68 * We maintain a linked list of static objects that are still live.
69 * The requirements for this list are:
70 *
71 * - we need to scan the list while adding to it, in order to
72 * scavenge all the static objects (in the same way that
73 * breadth-first scavenging works for dynamic objects).
74 *
75 * - we need to be able to tell whether an object is already on
76 * the list, to break loops.
77 *
78 * Each static object has a "static link field", which we use for
79 * linking objects on to the list. We use a stack-type list, consing
80 * objects on the front as they are added (this means that the
81 * scavenge phase is depth-first, not breadth-first, but that
82 * shouldn't matter).
83 *
84 * A separate list is kept for objects that have been scavenged
85 * already - this is so that we can zero all the marks afterwards.
86 *
87 * An object is on the list if its static link field is non-zero; this
88 * means that we have to mark the end of the list with '1', not NULL.
89 *
90 * Extra notes for generational GC:
91 *
92 * Each generation has a static object list associated with it. When
93 * collecting generations up to N, we treat the static object lists
94 * from generations > N as roots.
95 *
96 * We build up a static object list while collecting generations 0..N,
97 * which is then appended to the static object list of generation N+1.
98 */
99
100 /* N is the oldest generation being collected, where the generations
101 * are numbered starting at 0. A major GC (indicated by the major_gc
102 * flag) is when we're collecting all generations. We only attempt to
103 * deal with static objects and GC CAFs when doing a major GC.
104 */
105 uint32_t N;
106 bool major_gc;
107
108 /* Data used for allocation area sizing.
109 */
110 static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
111
112 /* Mut-list stats */
113 #if defined(DEBUG)
114 uint32_t mutlist_MUTVARS,
115 mutlist_MUTARRS,
116 mutlist_MVARS,
117 mutlist_TVAR,
118 mutlist_TVAR_WATCH_QUEUE,
119 mutlist_TREC_CHUNK,
120 mutlist_TREC_HEADER,
121 mutlist_ATOMIC_INVARIANT,
122 mutlist_INVARIANT_CHECK_QUEUE,
123 mutlist_OTHERS;
124 #endif
125
126 /* Thread-local data for each GC thread
127 */
128 gc_thread **gc_threads = NULL;
129
130 #if !defined(THREADED_RTS)
131 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
132 #endif
133
134 // Number of threads running in *this* GC. Affects how many
135 // step->todos[] lists we have to look in to find work.
136 uint32_t n_gc_threads;
137
138 // For stats:
139 static long copied; // *words* copied & scavenged during this GC
140
141 #if defined(PROF_SPIN) && defined(THREADED_RTS)
142 volatile StgWord64 whitehole_gc_spin = 0;
143 #endif
144
145 bool work_stealing;
146
147 uint32_t static_flag = STATIC_FLAG_B;
148 uint32_t prev_static_flag = STATIC_FLAG_A;
149
150 DECLARE_GCT
151
152 /* -----------------------------------------------------------------------------
153 Static function declarations
154 -------------------------------------------------------------------------- */
155
156 static void mark_root (void *user, StgClosure **root);
157 static void prepare_collected_gen (generation *gen);
158 static void prepare_uncollected_gen (generation *gen);
159 static void init_gc_thread (gc_thread *t);
160 static void resize_generations (void);
161 static void resize_nursery (void);
162 static void start_gc_threads (void);
163 static void scavenge_until_all_done (void);
164 static StgWord inc_running (void);
165 static StgWord dec_running (void);
166 static void wakeup_gc_threads (uint32_t me, bool idle_cap[]);
167 static void shutdown_gc_threads (uint32_t me, bool idle_cap[]);
168 static void collect_gct_blocks (void);
169 static void collect_pinned_object_blocks (void);
170 static void heapOverflow (void);
171
172 #if defined(DEBUG)
173 static void gcCAFs (void);
174 #endif
175
176 /* -----------------------------------------------------------------------------
177 The mark stack.
178 -------------------------------------------------------------------------- */
179
180 bdescr *mark_stack_top_bd; // topmost block in the mark stack
181 bdescr *mark_stack_bd; // current block in the mark stack
182 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
183
184 /* -----------------------------------------------------------------------------
185 GarbageCollect: the main entry point to the garbage collector.
186
187 The collect_gen parameter is gotten by calling calcNeeded().
188
189 Locks held: all capabilities are held throughout GarbageCollect().
190 -------------------------------------------------------------------------- */
191
192 void
193 GarbageCollect (uint32_t collect_gen,
194 bool do_heap_census,
195 uint32_t gc_type USED_IF_THREADS,
196 Capability *cap,
197 bool idle_cap[])
198 {
199 bdescr *bd;
200 generation *gen;
201 StgWord live_blocks, live_words, par_max_copied, par_balanced_copied;
202 #if defined(THREADED_RTS)
203 gc_thread *saved_gct;
204 #endif
205 uint32_t g, n;
206
207 // necessary if we stole a callee-saves register for gct:
208 #if defined(THREADED_RTS)
209 saved_gct = gct;
210 #endif
211
212 #if defined(PROFILING)
213 CostCentreStack *save_CCS[n_capabilities];
214 #endif
215
216 ACQUIRE_SM_LOCK;
217
218 #if defined(RTS_USER_SIGNALS)
219 if (RtsFlags.MiscFlags.install_signal_handlers) {
220 // block signals
221 blockUserSignals();
222 }
223 #endif
224
225 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
226 // otherwise adjust the padding in gen_workspace.
227
228 // this is the main thread
229 SET_GCT(gc_threads[cap->no]);
230
231 // tell the stats department that we've started a GC
232 stat_startGC(cap, gct);
233
234 // lock the StablePtr table
235 stableLock();
236
237 #if defined(DEBUG)
238 mutlist_MUTVARS = 0;
239 mutlist_MUTARRS = 0;
240 mutlist_MVARS = 0;
241 mutlist_TVAR = 0;
242 mutlist_TVAR_WATCH_QUEUE = 0;
243 mutlist_TREC_CHUNK = 0;
244 mutlist_TREC_HEADER = 0;
245 mutlist_ATOMIC_INVARIANT = 0;
246 mutlist_INVARIANT_CHECK_QUEUE = 0;
247 mutlist_OTHERS = 0;
248 #endif
249
250 // attribute any costs to CCS_GC
251 #if defined(PROFILING)
252 for (n = 0; n < n_capabilities; n++) {
253 save_CCS[n] = capabilities[n]->r.rCCCS;
254 capabilities[n]->r.rCCCS = CCS_GC;
255 }
256 #endif
257
258 /* Figure out which generation to collect
259 */
260 N = collect_gen;
261 major_gc = (N == RtsFlags.GcFlags.generations-1);
262
263 if (major_gc) {
264 prev_static_flag = static_flag;
265 static_flag =
266 static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
267 }
268
269 #if defined(THREADED_RTS)
270 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
271 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
272 // It's not always a good idea to do load balancing in parallel
273 // GC. In particular, for a parallel program we don't want to
274 // lose locality by moving cached data into another CPU's cache
275 // (this effect can be quite significant).
276 //
277 // We could have a more complex way to deterimine whether to do
278 // work stealing or not, e.g. it might be a good idea to do it
279 // if the heap is big. For now, we just turn it on or off with
280 // a flag.
281 #endif
282
283 /* Start threads, so they can be spinning up while we finish initialisation.
284 */
285 start_gc_threads();
286
287 #if defined(THREADED_RTS)
288 /* How many threads will be participating in this GC?
289 * We don't try to parallelise minor GCs (unless the user asks for
290 * it with +RTS -gn0), or mark/compact/sweep GC.
291 */
292 if (gc_type == SYNC_GC_PAR) {
293 n_gc_threads = n_capabilities;
294 } else {
295 n_gc_threads = 1;
296 }
297 #else
298 n_gc_threads = 1;
299 #endif
300
301 debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
302 N, n_gc_threads);
303
304 #if defined(DEBUG)
305 // check for memory leaks if DEBUG is on
306 memInventory(DEBUG_gc);
307 #endif
308
309 // do this *before* we start scavenging
310 collectFreshWeakPtrs();
311
312 // check sanity *before* GC
313 IF_DEBUG(sanity, checkSanity(false /* before GC */, major_gc));
314
315 // gather blocks allocated using allocatePinned() from each capability
316 // and put them on the g0->large_object list.
317 collect_pinned_object_blocks();
318
319 // Initialise all the generations that we're collecting.
320 for (g = 0; g <= N; g++) {
321 prepare_collected_gen(&generations[g]);
322 }
323 // Initialise all the generations that we're *not* collecting.
324 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
325 prepare_uncollected_gen(&generations[g]);
326 }
327
328 // Prepare this gc_thread
329 init_gc_thread(gct);
330
331 /* Allocate a mark stack if we're doing a major collection.
332 */
333 if (major_gc && oldest_gen->mark) {
334 mark_stack_bd = allocBlock();
335 mark_stack_top_bd = mark_stack_bd;
336 mark_stack_bd->link = NULL;
337 mark_stack_bd->u.back = NULL;
338 mark_sp = mark_stack_bd->start;
339 } else {
340 mark_stack_bd = NULL;
341 mark_stack_top_bd = NULL;
342 mark_sp = NULL;
343 }
344
345 /* -----------------------------------------------------------------------
346 * follow all the roots that we know about:
347 */
348
349 // the main thread is running: this prevents any other threads from
350 // exiting prematurely, so we can start them now.
351 // NB. do this after the mutable lists have been saved above, otherwise
352 // the other GC threads will be writing into the old mutable lists.
353 inc_running();
354 wakeup_gc_threads(gct->thread_index, idle_cap);
355
356 traceEventGcWork(gct->cap);
357
358 // scavenge the capability-private mutable lists. This isn't part
359 // of markSomeCapabilities() because markSomeCapabilities() can only
360 // call back into the GC via mark_root() (due to the gct register
361 // variable).
362 if (n_gc_threads == 1) {
363 for (n = 0; n < n_capabilities; n++) {
364 #if defined(THREADED_RTS)
365 scavenge_capability_mut_Lists1(capabilities[n]);
366 #else
367 scavenge_capability_mut_lists(capabilities[n]);
368 #endif
369 }
370 } else {
371 scavenge_capability_mut_lists(gct->cap);
372 for (n = 0; n < n_capabilities; n++) {
373 if (idle_cap[n]) {
374 markCapability(mark_root, gct, capabilities[n],
375 true/*don't mark sparks*/);
376 scavenge_capability_mut_lists(capabilities[n]);
377 }
378 }
379 }
380
381 // follow roots from the CAF list (used by GHCi)
382 gct->evac_gen_no = 0;
383 markCAFs(mark_root, gct);
384
385 // follow all the roots that the application knows about.
386 gct->evac_gen_no = 0;
387 if (n_gc_threads == 1) {
388 for (n = 0; n < n_capabilities; n++) {
389 markCapability(mark_root, gct, capabilities[n],
390 true/*don't mark sparks*/);
391 }
392 } else {
393 markCapability(mark_root, gct, cap, true/*don't mark sparks*/);
394 }
395
396 markScheduler(mark_root, gct);
397
398 #if defined(RTS_USER_SIGNALS)
399 // mark the signal handlers (signals should be already blocked)
400 markSignalHandlers(mark_root, gct);
401 #endif
402
403 // Mark the weak pointer list, and prepare to detect dead weak pointers.
404 markWeakPtrList();
405 initWeakForGC();
406
407 // Mark the stable pointer table.
408 markStableTables(mark_root, gct);
409
410 /* -------------------------------------------------------------------------
411 * Repeatedly scavenge all the areas we know about until there's no
412 * more scavenging to be done.
413 */
414 for (;;)
415 {
416 scavenge_until_all_done();
417 // The other threads are now stopped. We might recurse back to
418 // here, but from now on this is the only thread.
419
420 // must be last... invariant is that everything is fully
421 // scavenged at this point.
422 if (traverseWeakPtrList()) { // returns true if evaced something
423 inc_running();
424 continue;
425 }
426
427 // If we get to here, there's really nothing left to do.
428 break;
429 }
430
431 shutdown_gc_threads(gct->thread_index, idle_cap);
432
433 // Now see which stable names are still alive.
434 gcStableTables();
435
436 #if defined(THREADED_RTS)
437 if (n_gc_threads == 1) {
438 for (n = 0; n < n_capabilities; n++) {
439 pruneSparkQueue(capabilities[n]);
440 }
441 } else {
442 for (n = 0; n < n_capabilities; n++) {
443 if (n == cap->no || idle_cap[n]) {
444 pruneSparkQueue(capabilities[n]);
445 }
446 }
447 }
448 #endif
449
450 #if defined(PROFILING)
451 // We call processHeapClosureForDead() on every closure destroyed during
452 // the current garbage collection, so we invoke LdvCensusForDead().
453 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
454 || RtsFlags.ProfFlags.bioSelector != NULL) {
455 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
456 LdvCensusForDead(N);
457 ACQUIRE_SM_LOCK;
458 }
459 #endif
460
461 // NO MORE EVACUATION AFTER THIS POINT!
462
463 // Finally: compact or sweep the oldest generation.
464 if (major_gc && oldest_gen->mark) {
465 if (oldest_gen->compact)
466 compact(gct->scavenged_static_objects);
467 else
468 sweep(oldest_gen);
469 }
470
471 copied = 0;
472 par_max_copied = 0;
473 par_balanced_copied = 0;
474 {
475 uint32_t i;
476 uint64_t par_balanced_copied_acc = 0;
477
478 for (i=0; i < n_gc_threads; i++) {
479 copied += gc_threads[i]->copied;
480 }
481 for (i=0; i < n_gc_threads; i++) {
482 if (n_gc_threads > 1) {
483 debugTrace(DEBUG_gc,"thread %d:", i);
484 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
485 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
486 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
487 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
488 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
489 }
490 par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
491 par_balanced_copied_acc +=
492 stg_min(n_gc_threads * gc_threads[i]->copied, copied);
493 }
494 if (n_gc_threads == 1) {
495 par_max_copied = 0;
496 par_balanced_copied = 0;
497 }
498 else
499 {
500 // See Note [Work Balance] for an explanation of this computation
501 par_balanced_copied =
502 (par_balanced_copied_acc - copied + (n_gc_threads - 1) / 2) /
503 (n_gc_threads - 1);
504 }
505 }
506
507 // Run through all the generations and tidy up.
508 // We're going to:
509 // - count the amount of "live" data (live_words, live_blocks)
510 // - count the amount of "copied" data in this GC (copied)
511 // - free from-space
512 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
513 //
514 live_words = 0;
515 live_blocks = 0;
516
517 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
518
519 if (g == N) {
520 generations[g].collections++; // for stats
521 if (n_gc_threads > 1) generations[g].par_collections++;
522 }
523
524 // Count the mutable list as bytes "copied" for the purposes of
525 // stats. Every mutable list is copied during every GC.
526 if (g > 0) {
527 W_ mut_list_size = 0;
528 for (n = 0; n < n_capabilities; n++) {
529 mut_list_size += countOccupied(capabilities[n]->mut_lists[g]);
530 }
531 copied += mut_list_size;
532
533 debugTrace(DEBUG_gc,
534 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d ATOMIC_INVARIANTs, %d INVARIANT_CHECK_QUEUEs, %d others)",
535 (unsigned long)(mut_list_size * sizeof(W_)),
536 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS,
537 mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE,
538 mutlist_TREC_CHUNK, mutlist_TREC_HEADER,
539 mutlist_ATOMIC_INVARIANT,
540 mutlist_INVARIANT_CHECK_QUEUE,
541 mutlist_OTHERS);
542 }
543
544 bdescr *next, *prev;
545 gen = &generations[g];
546
547 // for generations we collected...
548 if (g <= N) {
549
550 /* free old memory and shift to-space into from-space for all
551 * the collected generations (except the allocation area). These
552 * freed blocks will probaby be quickly recycled.
553 */
554 if (gen->mark)
555 {
556 // tack the new blocks on the end of the existing blocks
557 if (gen->old_blocks != NULL) {
558
559 prev = NULL;
560 for (bd = gen->old_blocks; bd != NULL; bd = next) {
561
562 next = bd->link;
563
564 if (!(bd->flags & BF_MARKED))
565 {
566 if (prev == NULL) {
567 gen->old_blocks = next;
568 } else {
569 prev->link = next;
570 }
571 freeGroup(bd);
572 gen->n_old_blocks--;
573 }
574 else
575 {
576 gen->n_words += bd->free - bd->start;
577
578 // NB. this step might not be compacted next
579 // time, so reset the BF_MARKED flags.
580 // They are set before GC if we're going to
581 // compact. (search for BF_MARKED above).
582 bd->flags &= ~BF_MARKED;
583
584 // between GCs, all blocks in the heap except
585 // for the nursery have the BF_EVACUATED flag set.
586 bd->flags |= BF_EVACUATED;
587
588 prev = bd;
589 }
590 }
591
592 if (prev != NULL) {
593 prev->link = gen->blocks;
594 gen->blocks = gen->old_blocks;
595 }
596 }
597 // add the new blocks to the block tally
598 gen->n_blocks += gen->n_old_blocks;
599 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
600 ASSERT(countOccupied(gen->blocks) == gen->n_words);
601 }
602 else // not copacted
603 {
604 freeChain(gen->old_blocks);
605 }
606
607 gen->old_blocks = NULL;
608 gen->n_old_blocks = 0;
609
610 /* LARGE OBJECTS. The current live large objects are chained on
611 * scavenged_large, having been moved during garbage
612 * collection from large_objects. Any objects left on the
613 * large_objects list are therefore dead, so we free them here.
614 */
615 freeChain(gen->large_objects);
616 gen->large_objects = gen->scavenged_large_objects;
617 gen->n_large_blocks = gen->n_scavenged_large_blocks;
618 gen->n_large_words = countOccupied(gen->large_objects);
619 gen->n_new_large_words = 0;
620
621 /* COMPACT_NFDATA. The currently live compacts are chained
622 * to live_compact_objects, quite like large objects. And
623 * objects left on the compact_objects list are dead.
624 *
625 * We don't run a simple freeChain because want to give the
626 * CNF module some chance to free memory that freeChain would
627 * not see (namely blocks appended to a CNF through a compactResize).
628 *
629 * See Note [Compact Normal Forms] for details.
630 */
631 for (bd = gen->compact_objects; bd; bd = next) {
632 next = bd->link;
633 compactFree(((StgCompactNFDataBlock*)bd->start)->owner);
634 }
635 gen->compact_objects = gen->live_compact_objects;
636 gen->n_compact_blocks = gen->n_live_compact_blocks;
637 }
638 else // for generations > N
639 {
640 /* For older generations, we need to append the
641 * scavenged_large_object list (i.e. large objects that have been
642 * promoted during this GC) to the large_object list for that step.
643 */
644 for (bd = gen->scavenged_large_objects; bd; bd = next) {
645 next = bd->link;
646 dbl_link_onto(bd, &gen->large_objects);
647 gen->n_large_words += bd->free - bd->start;
648 }
649
650 // And same for compacts
651 for (bd = gen->live_compact_objects; bd; bd = next) {
652 next = bd->link;
653 dbl_link_onto(bd, &gen->compact_objects);
654 }
655
656 // add the new blocks we promoted during this GC
657 gen->n_large_blocks += gen->n_scavenged_large_blocks;
658 gen->n_compact_blocks += gen->n_live_compact_blocks;
659 }
660
661 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
662 ASSERT(countOccupied(gen->large_objects) == gen->n_large_words);
663 // We can run the same assertion on compact objects because there
664 // is memory "the GC doesn't see" (directly), but which is still
665 // accounted in gen->n_compact_blocks
666
667 gen->scavenged_large_objects = NULL;
668 gen->n_scavenged_large_blocks = 0;
669 gen->live_compact_objects = NULL;
670 gen->n_live_compact_blocks = 0;
671
672 // Count "live" data
673 live_words += genLiveWords(gen);
674 live_blocks += genLiveBlocks(gen);
675
676 // add in the partial blocks in the gen_workspaces
677 {
678 uint32_t i;
679 for (i = 0; i < n_capabilities; i++) {
680 live_words += gcThreadLiveWords(i, gen->no);
681 live_blocks += gcThreadLiveBlocks(i, gen->no);
682 }
683 }
684 } // for all generations
685
686 // update the max size of older generations after a major GC
687 resize_generations();
688
689 // Free the mark stack.
690 if (mark_stack_top_bd != NULL) {
691 debugTrace(DEBUG_gc, "mark stack: %d blocks",
692 countBlocks(mark_stack_top_bd));
693 freeChain(mark_stack_top_bd);
694 }
695
696 // Free any bitmaps.
697 for (g = 0; g <= N; g++) {
698 gen = &generations[g];
699 if (gen->bitmap != NULL) {
700 freeGroup(gen->bitmap);
701 gen->bitmap = NULL;
702 }
703 }
704
705 resize_nursery();
706
707 resetNurseries();
708
709 // mark the garbage collected CAFs as dead
710 #if defined(DEBUG)
711 if (major_gc) { gcCAFs(); }
712 #endif
713
714 // Update the stable pointer hash table.
715 updateStableTables(major_gc);
716
717 // unlock the StablePtr table. Must be before scheduleFinalizers(),
718 // because a finalizer may call hs_free_fun_ptr() or
719 // hs_free_stable_ptr(), both of which access the StablePtr table.
720 stableUnlock();
721
722 // Must be after stableUnlock(), because it might free stable ptrs.
723 if (major_gc) {
724 checkUnload (gct->scavenged_static_objects);
725 }
726
727 #if defined(PROFILING)
728 // resetStaticObjectForRetainerProfiling() must be called before
729 // zeroing below.
730
731 // ToDo: fix the gct->scavenged_static_objects below
732 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
733 #endif
734
735 // Start any pending finalizers. Must be after
736 // updateStableTables() and stableUnlock() (see #4221).
737 RELEASE_SM_LOCK;
738 scheduleFinalizers(cap, dead_weak_ptr_list);
739 ACQUIRE_SM_LOCK;
740
741 // check sanity after GC
742 // before resurrectThreads(), because that might overwrite some
743 // closures, which will cause problems with THREADED where we don't
744 // fill slop.
745 IF_DEBUG(sanity, checkSanity(true /* after GC */, major_gc));
746
747 // If a heap census is due, we need to do it before
748 // resurrectThreads(), for the same reason as checkSanity above:
749 // resurrectThreads() will overwrite some closures and leave slop
750 // behind.
751 if (do_heap_census) {
752 debugTrace(DEBUG_sched, "performing heap census");
753 RELEASE_SM_LOCK;
754 heapCensus(gct->gc_start_cpu);
755 ACQUIRE_SM_LOCK;
756 }
757
758 // send exceptions to any threads which were about to die
759 RELEASE_SM_LOCK;
760 resurrectThreads(resurrected_threads);
761 ACQUIRE_SM_LOCK;
762
763 if (major_gc) {
764 W_ need_prealloc, need_live, need, got;
765 uint32_t i;
766
767 need_live = 0;
768 for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
769 need_live += genLiveBlocks(&generations[i]);
770 }
771 need_live = stg_max(RtsFlags.GcFlags.minOldGenSize, need_live);
772
773 need_prealloc = 0;
774 for (i = 0; i < n_nurseries; i++) {
775 need_prealloc += nurseries[i].n_blocks;
776 }
777 need_prealloc += RtsFlags.GcFlags.largeAllocLim;
778 need_prealloc += countAllocdBlocks(exec_block);
779 need_prealloc += arenaBlocks();
780 #if defined(PROFILING)
781 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
782 need_prealloc = retainerStackBlocks();
783 }
784 #endif
785
786 /* If the amount of data remains constant, next major GC we'll
787 * require (F+1)*live + prealloc. We leave (F+2)*live + prealloc
788 * in order to reduce repeated deallocation and reallocation. #14702
789 */
790 need = need_prealloc + (RtsFlags.GcFlags.oldGenFactor + 2) * need_live;
791
792 /* Also, if user set heap size, do not drop below it.
793 */
794 need = stg_max(RtsFlags.GcFlags.heapSizeSuggestion, need);
795
796 /* But with a large nursery, the above estimate might exceed
797 * maxHeapSize. A large resident set size might make the OS
798 * kill this process, or swap unnecessarily. Therefore we
799 * ensure that our estimate does not exceed maxHeapSize.
800 */
801 if (RtsFlags.GcFlags.maxHeapSize != 0) {
802 need = stg_min(RtsFlags.GcFlags.maxHeapSize, need);
803 }
804
805 need = BLOCKS_TO_MBLOCKS(need);
806
807 got = mblocks_allocated;
808
809 if (got > need) {
810 returnMemoryToOS(got - need);
811 }
812 }
813
814 // extra GC trace info
815 IF_DEBUG(gc, statDescribeGens());
816
817 #if defined(DEBUG)
818 // symbol-table based profiling
819 /* heapCensus(to_blocks); */ /* ToDo */
820 #endif
821
822 // restore enclosing cost centre
823 #if defined(PROFILING)
824 for (n = 0; n < n_capabilities; n++) {
825 capabilities[n]->r.rCCCS = save_CCS[n];
826 }
827 #endif
828
829 #if defined(DEBUG)
830 // check for memory leaks if DEBUG is on
831 memInventory(DEBUG_gc);
832 #endif
833
834 // ok, GC over: tell the stats department what happened.
835 stat_endGC(cap, gct, live_words, copied,
836 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
837 N, n_gc_threads, par_max_copied, par_balanced_copied);
838
839 #if defined(RTS_USER_SIGNALS)
840 if (RtsFlags.MiscFlags.install_signal_handlers) {
841 // unblock signals again
842 unblockUserSignals();
843 }
844 #endif
845
846 RELEASE_SM_LOCK;
847
848 SET_GCT(saved_gct);
849 }
850
851 /* -----------------------------------------------------------------------------
852 Heap overflow is indicated by setting a flag that the caller of
853 GarbageCollect can check. (not ideal, TODO: better)
854 -------------------------------------------------------------------------- */
855
856 static void heapOverflow(void)
857 {
858 heap_overflow = true;
859 }
860
861 /* -----------------------------------------------------------------------------
862 Initialise the gc_thread structures.
863 -------------------------------------------------------------------------- */
864
865 static void
866 new_gc_thread (uint32_t n, gc_thread *t)
867 {
868 uint32_t g;
869 gen_workspace *ws;
870
871 t->cap = capabilities[n];
872
873 #if defined(THREADED_RTS)
874 t->id = 0;
875 initSpinLock(&t->gc_spin);
876 initSpinLock(&t->mut_spin);
877 ACQUIRE_SPIN_LOCK(&t->gc_spin);
878 ACQUIRE_SPIN_LOCK(&t->mut_spin);
879 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
880 // thread to start up, see wakeup_gc_threads
881 #endif
882
883 t->thread_index = n;
884 t->free_blocks = NULL;
885 t->gc_count = 0;
886
887 init_gc_thread(t);
888
889 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
890 {
891 ws = &t->gens[g];
892 ws->gen = &generations[g];
893 ASSERT(g == ws->gen->no);
894 ws->my_gct = t;
895
896 // We want to call
897 // alloc_todo_block(ws,0);
898 // but can't, because it uses gct which isn't set up at this point.
899 // Hence, allocate a block for todo_bd manually:
900 {
901 bdescr *bd = allocBlockOnNode(capNoToNumaNode(n));
902 // no lock, locks aren't initialised yet
903 initBdescr(bd, ws->gen, ws->gen->to);
904 bd->flags = BF_EVACUATED;
905 bd->u.scan = bd->free = bd->start;
906
907 ws->todo_bd = bd;
908 ws->todo_free = bd->free;
909 ws->todo_lim = bd->start + BLOCK_SIZE_W;
910 }
911
912 ws->todo_q = newWSDeque(128);
913 ws->todo_overflow = NULL;
914 ws->n_todo_overflow = 0;
915 ws->todo_large_objects = NULL;
916
917 ws->part_list = NULL;
918 ws->n_part_blocks = 0;
919 ws->n_part_words = 0;
920
921 ws->scavd_list = NULL;
922 ws->n_scavd_blocks = 0;
923 ws->n_scavd_words = 0;
924 }
925 }
926
927
928 void
929 initGcThreads (uint32_t from USED_IF_THREADS, uint32_t to USED_IF_THREADS)
930 {
931 #if defined(THREADED_RTS)
932 uint32_t i;
933
934 if (from > 0) {
935 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
936 "initGcThreads");
937 } else {
938 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
939 "initGcThreads");
940 }
941
942 for (i = from; i < to; i++) {
943 gc_threads[i] =
944 stgMallocBytes(sizeof(gc_thread) +
945 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
946 "alloc_gc_threads");
947
948 new_gc_thread(i, gc_threads[i]);
949 }
950 #else
951 ASSERT(from == 0 && to == 1);
952 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
953 gc_threads[0] = gct;
954 new_gc_thread(0,gc_threads[0]);
955 #endif
956 }
957
958 void
959 freeGcThreads (void)
960 {
961 uint32_t g;
962 if (gc_threads != NULL) {
963 #if defined(THREADED_RTS)
964 uint32_t i;
965 for (i = 0; i < n_capabilities; i++) {
966 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
967 {
968 freeWSDeque(gc_threads[i]->gens[g].todo_q);
969 }
970 stgFree (gc_threads[i]);
971 }
972 stgFree (gc_threads);
973 #else
974 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
975 {
976 freeWSDeque(gc_threads[0]->gens[g].todo_q);
977 }
978 stgFree (gc_threads);
979 #endif
980 gc_threads = NULL;
981 }
982 }
983
984 /* ----------------------------------------------------------------------------
985 Start GC threads
986 ------------------------------------------------------------------------- */
987
988 static volatile StgWord gc_running_threads;
989
990 static StgWord
991 inc_running (void)
992 {
993 StgWord new;
994 new = atomic_inc(&gc_running_threads, 1);
995 ASSERT(new <= n_gc_threads);
996 return new;
997 }
998
999 static StgWord
1000 dec_running (void)
1001 {
1002 ASSERT(gc_running_threads != 0);
1003 return atomic_dec(&gc_running_threads);
1004 }
1005
1006 static bool
1007 any_work (void)
1008 {
1009 int g;
1010 gen_workspace *ws;
1011
1012 gct->any_work++;
1013
1014 write_barrier();
1015
1016 // scavenge objects in compacted generation
1017 if (mark_stack_bd != NULL && !mark_stack_empty()) {
1018 return true;
1019 }
1020
1021 // Check for global work in any gen. We don't need to check for
1022 // local work, because we have already exited scavenge_loop(),
1023 // which means there is no local work for this thread.
1024 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
1025 ws = &gct->gens[g];
1026 if (ws->todo_large_objects) return true;
1027 if (!looksEmptyWSDeque(ws->todo_q)) return true;
1028 if (ws->todo_overflow) return true;
1029 }
1030
1031 #if defined(THREADED_RTS)
1032 if (work_stealing) {
1033 uint32_t n;
1034 // look for work to steal
1035 for (n = 0; n < n_gc_threads; n++) {
1036 if (n == gct->thread_index) continue;
1037 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
1038 ws = &gc_threads[n]->gens[g];
1039 if (!looksEmptyWSDeque(ws->todo_q)) return true;
1040 }
1041 }
1042 }
1043 #endif
1044
1045 gct->no_work++;
1046 #if defined(THREADED_RTS)
1047 yieldThread();
1048 #endif
1049
1050 return false;
1051 }
1052
1053 static void
1054 scavenge_until_all_done (void)
1055 {
1056 DEBUG_ONLY( uint32_t r );
1057
1058
1059 loop:
1060 #if defined(THREADED_RTS)
1061 if (n_gc_threads > 1) {
1062 scavenge_loop();
1063 } else {
1064 scavenge_loop1();
1065 }
1066 #else
1067 scavenge_loop();
1068 #endif
1069
1070 collect_gct_blocks();
1071
1072 // scavenge_loop() only exits when there's no work to do
1073
1074 #if defined(DEBUG)
1075 r = dec_running();
1076 #else
1077 dec_running();
1078 #endif
1079
1080 traceEventGcIdle(gct->cap);
1081
1082 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1083
1084 while (gc_running_threads != 0) {
1085 // usleep(1);
1086 if (any_work()) {
1087 inc_running();
1088 traceEventGcWork(gct->cap);
1089 goto loop;
1090 }
1091 // any_work() does not remove the work from the queue, it
1092 // just checks for the presence of work. If we find any,
1093 // then we increment gc_running_threads and go back to
1094 // scavenge_loop() to perform any pending work.
1095 }
1096
1097 traceEventGcDone(gct->cap);
1098 }
1099
1100 #if defined(THREADED_RTS)
1101
1102 void
1103 gcWorkerThread (Capability *cap)
1104 {
1105 gc_thread *saved_gct;
1106
1107 // necessary if we stole a callee-saves register for gct:
1108 saved_gct = gct;
1109
1110 SET_GCT(gc_threads[cap->no]);
1111 gct->id = osThreadId();
1112
1113 // Wait until we're told to wake up
1114 RELEASE_SPIN_LOCK(&gct->mut_spin);
1115 // yieldThread();
1116 // Strangely, adding a yieldThread() here makes the CPU time
1117 // measurements more accurate on Linux, perhaps because it syncs
1118 // the CPU time across the multiple cores. Without this, CPU time
1119 // is heavily skewed towards GC rather than MUT.
1120 gct->wakeup = GC_THREAD_STANDING_BY;
1121 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1122 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1123
1124 init_gc_thread(gct);
1125
1126 traceEventGcWork(gct->cap);
1127
1128 // Every thread evacuates some roots.
1129 gct->evac_gen_no = 0;
1130 markCapability(mark_root, gct, cap, true/*prune sparks*/);
1131 scavenge_capability_mut_lists(cap);
1132
1133 scavenge_until_all_done();
1134
1135 #if defined(THREADED_RTS)
1136 // Now that the whole heap is marked, we discard any sparks that
1137 // were found to be unreachable. The main GC thread is currently
1138 // marking heap reachable via weak pointers, so it is
1139 // non-deterministic whether a spark will be retained if it is
1140 // only reachable via weak pointers. To fix this problem would
1141 // require another GC barrier, which is too high a price.
1142 pruneSparkQueue(cap);
1143 #endif
1144
1145 // Wait until we're told to continue
1146 RELEASE_SPIN_LOCK(&gct->gc_spin);
1147 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1148 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1149 gct->thread_index);
1150 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1151 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1152
1153 SET_GCT(saved_gct);
1154 }
1155
1156 #endif
1157
1158 #if defined(THREADED_RTS)
1159
1160 void
1161 waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
1162 {
1163 const uint32_t n_threads = n_capabilities;
1164 const uint32_t me = cap->no;
1165 uint32_t i, j;
1166 bool retry = true;
1167 Time t0, t1, t2;
1168
1169 t0 = t1 = t2 = getProcessElapsedTime();
1170
1171 while(retry) {
1172 for (i=0; i < n_threads; i++) {
1173 if (i == me || idle_cap[i]) continue;
1174 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1175 prodCapability(capabilities[i], cap->running_task);
1176 }
1177 }
1178 for (j=0; j < 10; j++) {
1179 retry = false;
1180 for (i=0; i < n_threads; i++) {
1181 if (i == me || idle_cap[i]) continue;
1182 write_barrier();
1183 interruptCapability(capabilities[i]);
1184 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1185 retry = true;
1186 }
1187 }
1188 if (!retry) break;
1189 yieldThread();
1190 }
1191
1192 t2 = getProcessElapsedTime();
1193 if (RtsFlags.GcFlags.longGCSync != 0 &&
1194 t2 - t1 > RtsFlags.GcFlags.longGCSync) {
1195 /* call this every longGCSync of delay */
1196 rtsConfig.longGCSync(cap->no, t2 - t0);
1197 t1 = t2;
1198 }
1199 }
1200
1201 if (RtsFlags.GcFlags.longGCSync != 0 &&
1202 t2 - t0 > RtsFlags.GcFlags.longGCSync) {
1203 rtsConfig.longGCSyncEnd(t2 - t0);
1204 }
1205 }
1206
1207 #endif // THREADED_RTS
1208
1209 static void
1210 start_gc_threads (void)
1211 {
1212 #if defined(THREADED_RTS)
1213 gc_running_threads = 0;
1214 #endif
1215 }
1216
1217 static void
1218 wakeup_gc_threads (uint32_t me USED_IF_THREADS,
1219 bool idle_cap[] USED_IF_THREADS)
1220 {
1221 #if defined(THREADED_RTS)
1222 uint32_t i;
1223
1224 if (n_gc_threads == 1) return;
1225
1226 for (i=0; i < n_gc_threads; i++) {
1227 if (i == me || idle_cap[i]) continue;
1228 inc_running();
1229 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1230 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY)
1231 barf("wakeup_gc_threads");
1232
1233 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1234 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1235 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1236 }
1237 #endif
1238 }
1239
1240 // After GC is complete, we must wait for all GC threads to enter the
1241 // standby state, otherwise they may still be executing inside
1242 // any_work(), and may even remain awake until the next GC starts.
1243 static void
1244 shutdown_gc_threads (uint32_t me USED_IF_THREADS,
1245 bool idle_cap[] USED_IF_THREADS)
1246 {
1247 #if defined(THREADED_RTS)
1248 uint32_t i;
1249
1250 if (n_gc_threads == 1) return;
1251
1252 for (i=0; i < n_gc_threads; i++) {
1253 if (i == me || idle_cap[i]) continue;
1254 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) {
1255 busy_wait_nop();
1256 write_barrier();
1257 }
1258 }
1259 #endif
1260 }
1261
1262 #if defined(THREADED_RTS)
1263 void
1264 releaseGCThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
1265 {
1266 const uint32_t n_threads = n_capabilities;
1267 const uint32_t me = cap->no;
1268 uint32_t i;
1269 for (i=0; i < n_threads; i++) {
1270 if (i == me || idle_cap[i]) continue;
1271 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1272 barf("releaseGCThreads");
1273
1274 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1275 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1276 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1277 }
1278 }
1279 #endif
1280
1281 /* ----------------------------------------------------------------------------
1282 Initialise a generation that is to be collected
1283 ------------------------------------------------------------------------- */
1284
1285 static void
1286 prepare_collected_gen (generation *gen)
1287 {
1288 uint32_t i, g, n;
1289 gen_workspace *ws;
1290 bdescr *bd, *next;
1291
1292 // Throw away the current mutable list. Invariant: the mutable
1293 // list always has at least one block; this means we can avoid a
1294 // check for NULL in recordMutable().
1295 g = gen->no;
1296 if (g != 0) {
1297 for (i = 0; i < n_capabilities; i++) {
1298 freeChain(capabilities[i]->mut_lists[g]);
1299 capabilities[i]->mut_lists[g] =
1300 allocBlockOnNode(capNoToNumaNode(i));
1301 }
1302 }
1303
1304 gen = &generations[g];
1305 ASSERT(gen->no == g);
1306
1307 // we'll construct a new list of threads in this step
1308 // during GC, throw away the current list.
1309 gen->old_threads = gen->threads;
1310 gen->threads = END_TSO_QUEUE;
1311
1312 // deprecate the existing blocks
1313 gen->old_blocks = gen->blocks;
1314 gen->n_old_blocks = gen->n_blocks;
1315 gen->blocks = NULL;
1316 gen->n_blocks = 0;
1317 gen->n_words = 0;
1318 gen->live_estimate = 0;
1319
1320 // initialise the large object queues.
1321 ASSERT(gen->scavenged_large_objects == NULL);
1322 ASSERT(gen->n_scavenged_large_blocks == 0);
1323 ASSERT(gen->live_compact_objects == NULL);
1324 ASSERT(gen->n_live_compact_blocks == 0);
1325
1326 // grab all the partial blocks stashed in the gc_thread workspaces and
1327 // move them to the old_blocks list of this gen.
1328 for (n = 0; n < n_capabilities; n++) {
1329 ws = &gc_threads[n]->gens[gen->no];
1330
1331 for (bd = ws->part_list; bd != NULL; bd = next) {
1332 next = bd->link;
1333 bd->link = gen->old_blocks;
1334 gen->old_blocks = bd;
1335 gen->n_old_blocks += bd->blocks;
1336 }
1337 ws->part_list = NULL;
1338 ws->n_part_blocks = 0;
1339 ws->n_part_words = 0;
1340
1341 ASSERT(ws->scavd_list == NULL);
1342 ASSERT(ws->n_scavd_blocks == 0);
1343 ASSERT(ws->n_scavd_words == 0);
1344
1345 if (ws->todo_free != ws->todo_bd->start) {
1346 ws->todo_bd->free = ws->todo_free;
1347 ws->todo_bd->link = gen->old_blocks;
1348 gen->old_blocks = ws->todo_bd;
1349 gen->n_old_blocks += ws->todo_bd->blocks;
1350 alloc_todo_block(ws,0); // always has one block.
1351 }
1352 }
1353
1354 // mark the small objects as from-space
1355 for (bd = gen->old_blocks; bd; bd = bd->link) {
1356 bd->flags &= ~BF_EVACUATED;
1357 }
1358
1359 // mark the large objects as from-space
1360 for (bd = gen->large_objects; bd; bd = bd->link) {
1361 bd->flags &= ~BF_EVACUATED;
1362 }
1363
1364 // mark the compact objects as from-space
1365 for (bd = gen->compact_objects; bd; bd = bd->link) {
1366 bd->flags &= ~BF_EVACUATED;
1367 }
1368
1369 // for a compacted generation, we need to allocate the bitmap
1370 if (gen->mark) {
1371 StgWord bitmap_size; // in bytes
1372 bdescr *bitmap_bdescr;
1373 StgWord *bitmap;
1374
1375 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / BITS_IN(W_);
1376
1377 if (bitmap_size > 0) {
1378 bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
1379 / BLOCK_SIZE);
1380 gen->bitmap = bitmap_bdescr;
1381 bitmap = bitmap_bdescr->start;
1382
1383 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1384 bitmap_size, bitmap);
1385
1386 // don't forget to fill it with zeros!
1387 memset(bitmap, 0, bitmap_size);
1388
1389 // For each block in this step, point to its bitmap from the
1390 // block descriptor.
1391 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1392 bd->u.bitmap = bitmap;
1393 bitmap += BLOCK_SIZE_W / BITS_IN(W_);
1394
1395 // Also at this point we set the BF_MARKED flag
1396 // for this block. The invariant is that
1397 // BF_MARKED is always unset, except during GC
1398 // when it is set on those blocks which will be
1399 // compacted.
1400 if (!(bd->flags & BF_FRAGMENTED)) {
1401 bd->flags |= BF_MARKED;
1402 }
1403
1404 // BF_SWEPT should be marked only for blocks that are being
1405 // collected in sweep()
1406 bd->flags &= ~BF_SWEPT;
1407 }
1408 }
1409 }
1410 }
1411
1412
1413 /* ----------------------------------------------------------------------------
1414 Save the mutable lists in saved_mut_lists
1415 ------------------------------------------------------------------------- */
1416
1417 static void
1418 stash_mut_list (Capability *cap, uint32_t gen_no)
1419 {
1420 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1421 cap->mut_lists[gen_no] = allocBlockOnNode_sync(cap->node);
1422 }
1423
1424 /* ----------------------------------------------------------------------------
1425 Initialise a generation that is *not* to be collected
1426 ------------------------------------------------------------------------- */
1427
1428 static void
1429 prepare_uncollected_gen (generation *gen)
1430 {
1431 uint32_t i;
1432
1433
1434 ASSERT(gen->no > 0);
1435
1436 // save the current mutable lists for this generation, and
1437 // allocate a fresh block for each one. We'll traverse these
1438 // mutable lists as roots early on in the GC.
1439 for (i = 0; i < n_capabilities; i++) {
1440 stash_mut_list(capabilities[i], gen->no);
1441 }
1442
1443 ASSERT(gen->scavenged_large_objects == NULL);
1444 ASSERT(gen->n_scavenged_large_blocks == 0);
1445 }
1446
1447 /* -----------------------------------------------------------------------------
1448 Collect the completed blocks from a GC thread and attach them to
1449 the generation.
1450 -------------------------------------------------------------------------- */
1451
1452 static void
1453 collect_gct_blocks (void)
1454 {
1455 uint32_t g;
1456 gen_workspace *ws;
1457 bdescr *bd, *prev;
1458
1459 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1460 ws = &gct->gens[g];
1461
1462 // there may still be a block attached to ws->todo_bd;
1463 // leave it there to use next time.
1464
1465 if (ws->scavd_list != NULL) {
1466 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1467
1468 ASSERT(gct->scan_bd == NULL);
1469 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1470
1471 prev = NULL;
1472 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1473 prev = bd;
1474 }
1475 if (prev != NULL) {
1476 prev->link = ws->gen->blocks;
1477 ws->gen->blocks = ws->scavd_list;
1478 }
1479 ws->gen->n_blocks += ws->n_scavd_blocks;
1480 ws->gen->n_words += ws->n_scavd_words;
1481
1482 ws->scavd_list = NULL;
1483 ws->n_scavd_blocks = 0;
1484 ws->n_scavd_words = 0;
1485
1486 RELEASE_SPIN_LOCK(&ws->gen->sync);
1487 }
1488 }
1489 }
1490
1491 /* -----------------------------------------------------------------------------
1492 During mutation, any blocks that are filled by allocatePinned() are
1493 stashed on the local pinned_object_blocks list, to avoid needing to
1494 take a global lock. Here we collect those blocks from the
1495 cap->pinned_object_blocks lists and put them on the
1496 main g0->large_object list.
1497
1498 Returns: the number of words allocated this way, for stats
1499 purposes.
1500 -------------------------------------------------------------------------- */
1501
1502 static void
1503 collect_pinned_object_blocks (void)
1504 {
1505 uint32_t n;
1506 bdescr *bd, *prev;
1507
1508 for (n = 0; n < n_capabilities; n++) {
1509 prev = NULL;
1510 for (bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
1511 prev = bd;
1512 }
1513 if (prev != NULL) {
1514 prev->link = g0->large_objects;
1515 if (g0->large_objects != NULL) {
1516 g0->large_objects->u.back = prev;
1517 }
1518 g0->large_objects = capabilities[n]->pinned_object_blocks;
1519 capabilities[n]->pinned_object_blocks = 0;
1520 }
1521 }
1522 }
1523
1524 /* -----------------------------------------------------------------------------
1525 Initialise a gc_thread before GC
1526 -------------------------------------------------------------------------- */
1527
1528 static void
1529 init_gc_thread (gc_thread *t)
1530 {
1531 t->static_objects = END_OF_STATIC_OBJECT_LIST;
1532 t->scavenged_static_objects = END_OF_STATIC_OBJECT_LIST;
1533 t->scan_bd = NULL;
1534 t->mut_lists = t->cap->mut_lists;
1535 t->evac_gen_no = 0;
1536 t->failed_to_evac = false;
1537 t->eager_promotion = true;
1538 t->thunk_selector_depth = 0;
1539 t->copied = 0;
1540 t->scanned = 0;
1541 t->any_work = 0;
1542 t->no_work = 0;
1543 t->scav_find_work = 0;
1544 }
1545
1546 /* -----------------------------------------------------------------------------
1547 Function we pass to evacuate roots.
1548 -------------------------------------------------------------------------- */
1549
1550 static void
1551 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1552 {
1553 // we stole a register for gct, but this function is called from
1554 // *outside* the GC where the register variable is not in effect,
1555 // so we need to save and restore it here. NB. only call
1556 // mark_root() from the main GC thread, otherwise gct will be
1557 // incorrect.
1558 #if defined(THREADED_RTS)
1559 gc_thread *saved_gct;
1560 saved_gct = gct;
1561 #endif
1562 SET_GCT(user);
1563
1564 evacuate(root);
1565
1566 SET_GCT(saved_gct);
1567 }
1568
1569 /* ----------------------------------------------------------------------------
1570 Reset the sizes of the older generations when we do a major
1571 collection.
1572
1573 CURRENT STRATEGY: make all generations except zero the same size.
1574 We have to stay within the maximum heap size, and leave a certain
1575 percentage of the maximum heap size available to allocate into.
1576 ------------------------------------------------------------------------- */
1577
1578 static void
1579 resize_generations (void)
1580 {
1581 uint32_t g;
1582
1583 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1584 W_ live, size, min_alloc, words;
1585 const W_ max = RtsFlags.GcFlags.maxHeapSize;
1586 const W_ gens = RtsFlags.GcFlags.generations;
1587
1588 // live in the oldest generations
1589 if (oldest_gen->live_estimate != 0) {
1590 words = oldest_gen->live_estimate;
1591 } else {
1592 words = oldest_gen->n_words;
1593 }
1594 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1595 oldest_gen->n_large_blocks +
1596 oldest_gen->n_compact_blocks;
1597
1598 // default max size for all generations except zero
1599 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1600 RtsFlags.GcFlags.minOldGenSize);
1601
1602 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1603 if (max > 0) {
1604 RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
1605 } else {
1606 RtsFlags.GcFlags.heapSizeSuggestion = size;
1607 }
1608 }
1609
1610 // minimum size for generation zero
1611 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1612 RtsFlags.GcFlags.minAllocAreaSize
1613 * (W_)n_capabilities);
1614
1615 // Auto-enable compaction when the residency reaches a
1616 // certain percentage of the maximum heap size (default: 30%).
1617 if (RtsFlags.GcFlags.compact ||
1618 (max > 0 &&
1619 oldest_gen->n_blocks >
1620 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1621 oldest_gen->mark = 1;
1622 oldest_gen->compact = 1;
1623 // debugBelch("compaction: on\n", live);
1624 } else {
1625 oldest_gen->mark = 0;
1626 oldest_gen->compact = 0;
1627 // debugBelch("compaction: off\n", live);
1628 }
1629
1630 if (RtsFlags.GcFlags.sweep) {
1631 oldest_gen->mark = 1;
1632 }
1633
1634 // if we're going to go over the maximum heap size, reduce the
1635 // size of the generations accordingly. The calculation is
1636 // different if compaction is turned on, because we don't need
1637 // to double the space required to collect the old generation.
1638 if (max != 0) {
1639
1640 // this test is necessary to ensure that the calculations
1641 // below don't have any negative results - we're working
1642 // with unsigned values here.
1643 if (max < min_alloc) {
1644 heapOverflow();
1645 }
1646
1647 if (oldest_gen->compact) {
1648 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1649 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1650 }
1651 } else {
1652 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1653 size = (max - min_alloc) / ((gens - 1) * 2);
1654 }
1655 }
1656
1657 if (size < live) {
1658 heapOverflow();
1659 }
1660 }
1661
1662 #if 0
1663 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1664 min_alloc, size, max);
1665 #endif
1666
1667 for (g = 0; g < gens; g++) {
1668 generations[g].max_blocks = size;
1669 }
1670 }
1671 }
1672
1673 /* -----------------------------------------------------------------------------
1674 Calculate the new size of the nursery, and resize it.
1675 -------------------------------------------------------------------------- */
1676
1677 static void
1678 resize_nursery (void)
1679 {
1680 const StgWord min_nursery =
1681 RtsFlags.GcFlags.minAllocAreaSize * (StgWord)n_capabilities;
1682
1683 if (RtsFlags.GcFlags.generations == 1)
1684 { // Two-space collector:
1685 W_ blocks;
1686
1687 /* set up a new nursery. Allocate a nursery size based on a
1688 * function of the amount of live data (by default a factor of 2)
1689 * Use the blocks from the old nursery if possible, freeing up any
1690 * left over blocks.
1691 *
1692 * If we get near the maximum heap size, then adjust our nursery
1693 * size accordingly. If the nursery is the same size as the live
1694 * data (L), then we need 3L bytes. We can reduce the size of the
1695 * nursery to bring the required memory down near 2L bytes.
1696 *
1697 * A normal 2-space collector would need 4L bytes to give the same
1698 * performance we get from 3L bytes, reducing to the same
1699 * performance at 2L bytes.
1700 */
1701 blocks = generations[0].n_blocks;
1702
1703 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1704 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1705 RtsFlags.GcFlags.maxHeapSize )
1706 {
1707 long adjusted_blocks; // signed on purpose
1708 int pc_free;
1709
1710 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1711
1712 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1713 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1714
1715 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1716 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1717 {
1718 heapOverflow();
1719 }
1720 blocks = adjusted_blocks;
1721 }
1722 else
1723 {
1724 blocks *= RtsFlags.GcFlags.oldGenFactor;
1725 if (blocks < min_nursery)
1726 {
1727 blocks = min_nursery;
1728 }
1729 }
1730 resizeNurseries(blocks);
1731 }
1732 else // Generational collector
1733 {
1734 /*
1735 * If the user has given us a suggested heap size, adjust our
1736 * allocation area to make best use of the memory available.
1737 */
1738 if (RtsFlags.GcFlags.heapSizeSuggestion)
1739 {
1740 long blocks;
1741 StgWord needed;
1742
1743 calcNeeded(false, &needed); // approx blocks needed at next GC
1744
1745 /* Guess how much will be live in generation 0 step 0 next time.
1746 * A good approximation is obtained by finding the
1747 * percentage of g0 that was live at the last minor GC.
1748 *
1749 * We have an accurate figure for the amount of copied data in
1750 * 'copied', but we must convert this to a number of blocks, with
1751 * a small adjustment for estimated slop at the end of a block
1752 * (- 10 words).
1753 */
1754 if (N == 0)
1755 {
1756 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1757 / countNurseryBlocks();
1758 }
1759
1760 /* Estimate a size for the allocation area based on the
1761 * information available. We might end up going slightly under
1762 * or over the suggested heap size, but we should be pretty
1763 * close on average.
1764 *
1765 * Formula: suggested - needed
1766 * ----------------------------
1767 * 1 + g0_pcnt_kept/100
1768 *
1769 * where 'needed' is the amount of memory needed at the next
1770 * collection for collecting all gens except g0.
1771 */
1772 blocks =
1773 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1774 (100 + (long)g0_pcnt_kept);
1775
1776 if (blocks < (long)min_nursery) {
1777 blocks = min_nursery;
1778 }
1779
1780 resizeNurseries((W_)blocks);
1781 }
1782 else
1783 {
1784 // we might have added extra blocks to the nursery, so
1785 // resize back to the original size again.
1786 resizeNurseriesFixed();
1787 }
1788 }
1789 }
1790
1791 /* -----------------------------------------------------------------------------
1792 Sanity code for CAF garbage collection.
1793
1794 With DEBUG turned on, we manage a CAF list in addition to the SRT
1795 mechanism. After GC, we run down the CAF list and blackhole any
1796 CAFs which have been garbage collected. This means we get an error
1797 whenever the program tries to enter a garbage collected CAF.
1798
1799 Any garbage collected CAFs are taken off the CAF list at the same
1800 time.
1801 -------------------------------------------------------------------------- */
1802
1803 #if defined(DEBUG)
1804
1805 static void gcCAFs(void)
1806 {
1807 StgIndStatic *p, *prev;
1808
1809 const StgInfoTable *info;
1810 uint32_t i;
1811
1812 i = 0;
1813 p = debug_caf_list;
1814 prev = NULL;
1815
1816 for (p = debug_caf_list; p != (StgIndStatic*)END_OF_CAF_LIST;
1817 p = (StgIndStatic*)p->saved_info) {
1818
1819 info = get_itbl((StgClosure*)p);
1820 ASSERT(info->type == IND_STATIC);
1821
1822 if (p->static_link == NULL) {
1823 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", p);
1824 SET_INFO((StgClosure*)p,&stg_GCD_CAF_info); // stub it
1825 if (prev == NULL) {
1826 debug_caf_list = (StgIndStatic*)p->saved_info;
1827 } else {
1828 prev->saved_info = p->saved_info;
1829 }
1830 } else {
1831 prev = p;
1832 i++;
1833 }
1834 }
1835
1836 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1837 }
1838 #endif