515a7fe6d8f59d62b9b8ba01cde1f6a4689813d1
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "GC.h"
19 #include "GCThread.h"
20 #include "GCTDecl.h" // NB. before RtsSignals.h which
21 // clobbers REG_R1 on arm/Linux
22 #include "Compact.h"
23 #include "Evac.h"
24 #include "Scav.h"
25 #include "GCUtils.h"
26 #include "MarkStack.h"
27 #include "MarkWeak.h"
28 #include "Sparks.h"
29 #include "Sweep.h"
30
31 #include "Storage.h"
32 #include "RtsUtils.h"
33 #include "Apply.h"
34 #include "Updates.h"
35 #include "Stats.h"
36 #include "Schedule.h"
37 #include "Sanity.h"
38 #include "BlockAlloc.h"
39 #include "ProfHeap.h"
40 #include "Weak.h"
41 #include "Prelude.h"
42 #include "RtsSignals.h"
43 #include "STM.h"
44 #include "Trace.h"
45 #include "RetainerProfile.h"
46 #include "LdvProfile.h"
47 #include "RaiseAsync.h"
48 #include "Stable.h"
49 #include "CheckUnload.h"
50 #include "CNF.h"
51
52 #include <string.h> // for memset()
53 #include <unistd.h>
54
55 /* -----------------------------------------------------------------------------
56 Global variables
57 -------------------------------------------------------------------------- */
58
59 /* STATIC OBJECT LIST.
60 *
61 * During GC:
62 * We maintain a linked list of static objects that are still live.
63 * The requirements for this list are:
64 *
65 * - we need to scan the list while adding to it, in order to
66 * scavenge all the static objects (in the same way that
67 * breadth-first scavenging works for dynamic objects).
68 *
69 * - we need to be able to tell whether an object is already on
70 * the list, to break loops.
71 *
72 * Each static object has a "static link field", which we use for
73 * linking objects on to the list. We use a stack-type list, consing
74 * objects on the front as they are added (this means that the
75 * scavenge phase is depth-first, not breadth-first, but that
76 * shouldn't matter).
77 *
78 * A separate list is kept for objects that have been scavenged
79 * already - this is so that we can zero all the marks afterwards.
80 *
81 * An object is on the list if its static link field is non-zero; this
82 * means that we have to mark the end of the list with '1', not NULL.
83 *
84 * Extra notes for generational GC:
85 *
86 * Each generation has a static object list associated with it. When
87 * collecting generations up to N, we treat the static object lists
88 * from generations > N as roots.
89 *
90 * We build up a static object list while collecting generations 0..N,
91 * which is then appended to the static object list of generation N+1.
92 */
93
94 /* N is the oldest generation being collected, where the generations
95 * are numbered starting at 0. A major GC (indicated by the major_gc
96 * flag) is when we're collecting all generations. We only attempt to
97 * deal with static objects and GC CAFs when doing a major GC.
98 */
99 uint32_t N;
100 bool major_gc;
101
102 /* Data used for allocation area sizing.
103 */
104 static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
105
106 /* Mut-list stats */
107 #if defined(DEBUG)
108 uint32_t mutlist_MUTVARS,
109 mutlist_MUTARRS,
110 mutlist_MVARS,
111 mutlist_TVAR,
112 mutlist_TVAR_WATCH_QUEUE,
113 mutlist_TREC_CHUNK,
114 mutlist_TREC_HEADER,
115 mutlist_ATOMIC_INVARIANT,
116 mutlist_INVARIANT_CHECK_QUEUE,
117 mutlist_OTHERS;
118 #endif
119
120 /* Thread-local data for each GC thread
121 */
122 gc_thread **gc_threads = NULL;
123
124 #if !defined(THREADED_RTS)
125 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
126 #endif
127
128 // Number of threads running in *this* GC. Affects how many
129 // step->todos[] lists we have to look in to find work.
130 uint32_t n_gc_threads;
131
132 // For stats:
133 static long copied; // *words* copied & scavenged during this GC
134
135 bool work_stealing;
136
137 uint32_t static_flag = STATIC_FLAG_B;
138 uint32_t prev_static_flag = STATIC_FLAG_A;
139
140 DECLARE_GCT
141
142 /* -----------------------------------------------------------------------------
143 Static function declarations
144 -------------------------------------------------------------------------- */
145
146 static void mark_root (void *user, StgClosure **root);
147 static void prepare_collected_gen (generation *gen);
148 static void prepare_uncollected_gen (generation *gen);
149 static void init_gc_thread (gc_thread *t);
150 static void resize_generations (void);
151 static void resize_nursery (void);
152 static void start_gc_threads (void);
153 static void scavenge_until_all_done (void);
154 static StgWord inc_running (void);
155 static StgWord dec_running (void);
156 static void wakeup_gc_threads (uint32_t me, bool idle_cap[]);
157 static void shutdown_gc_threads (uint32_t me, bool idle_cap[]);
158 static void collect_gct_blocks (void);
159 static void collect_pinned_object_blocks (void);
160 static void heapOverflow (void);
161
162 #if defined(DEBUG)
163 static void gcCAFs (void);
164 #endif
165
166 /* -----------------------------------------------------------------------------
167 The mark stack.
168 -------------------------------------------------------------------------- */
169
170 bdescr *mark_stack_top_bd; // topmost block in the mark stack
171 bdescr *mark_stack_bd; // current block in the mark stack
172 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
173
174 /* -----------------------------------------------------------------------------
175 GarbageCollect: the main entry point to the garbage collector.
176
177 The collect_gen parameter is gotten by calling calcNeeded().
178
179 Locks held: all capabilities are held throughout GarbageCollect().
180 -------------------------------------------------------------------------- */
181
182 void
183 GarbageCollect (uint32_t collect_gen,
184 bool do_heap_census,
185 uint32_t gc_type USED_IF_THREADS,
186 Capability *cap,
187 bool idle_cap[])
188 {
189 bdescr *bd;
190 generation *gen;
191 StgWord live_blocks, live_words, par_max_copied;
192 #if defined(THREADED_RTS)
193 gc_thread *saved_gct;
194 #endif
195 uint32_t g, n;
196
197 // necessary if we stole a callee-saves register for gct:
198 #if defined(THREADED_RTS)
199 saved_gct = gct;
200 #endif
201
202 #if defined(PROFILING)
203 CostCentreStack *save_CCS[n_capabilities];
204 #endif
205
206 ACQUIRE_SM_LOCK;
207
208 #if defined(RTS_USER_SIGNALS)
209 if (RtsFlags.MiscFlags.install_signal_handlers) {
210 // block signals
211 blockUserSignals();
212 }
213 #endif
214
215 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
216 // otherwise adjust the padding in gen_workspace.
217
218 // this is the main thread
219 SET_GCT(gc_threads[cap->no]);
220
221 // tell the stats department that we've started a GC
222 stat_startGC(cap, gct);
223
224 // lock the StablePtr table
225 stableLock();
226
227 #if defined(DEBUG)
228 mutlist_MUTVARS = 0;
229 mutlist_MUTARRS = 0;
230 mutlist_MVARS = 0;
231 mutlist_TVAR = 0;
232 mutlist_TVAR_WATCH_QUEUE = 0;
233 mutlist_TREC_CHUNK = 0;
234 mutlist_TREC_HEADER = 0;
235 mutlist_ATOMIC_INVARIANT = 0;
236 mutlist_INVARIANT_CHECK_QUEUE = 0;
237 mutlist_OTHERS = 0;
238 #endif
239
240 // attribute any costs to CCS_GC
241 #if defined(PROFILING)
242 for (n = 0; n < n_capabilities; n++) {
243 save_CCS[n] = capabilities[n]->r.rCCCS;
244 capabilities[n]->r.rCCCS = CCS_GC;
245 }
246 #endif
247
248 /* Figure out which generation to collect
249 */
250 N = collect_gen;
251 major_gc = (N == RtsFlags.GcFlags.generations-1);
252
253 if (major_gc) {
254 prev_static_flag = static_flag;
255 static_flag =
256 static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
257 }
258
259 #if defined(THREADED_RTS)
260 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
261 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
262 // It's not always a good idea to do load balancing in parallel
263 // GC. In particular, for a parallel program we don't want to
264 // lose locality by moving cached data into another CPU's cache
265 // (this effect can be quite significant).
266 //
267 // We could have a more complex way to deterimine whether to do
268 // work stealing or not, e.g. it might be a good idea to do it
269 // if the heap is big. For now, we just turn it on or off with
270 // a flag.
271 #endif
272
273 /* Start threads, so they can be spinning up while we finish initialisation.
274 */
275 start_gc_threads();
276
277 #if defined(THREADED_RTS)
278 /* How many threads will be participating in this GC?
279 * We don't try to parallelise minor GCs (unless the user asks for
280 * it with +RTS -gn0), or mark/compact/sweep GC.
281 */
282 if (gc_type == SYNC_GC_PAR) {
283 n_gc_threads = n_capabilities;
284 } else {
285 n_gc_threads = 1;
286 }
287 #else
288 n_gc_threads = 1;
289 #endif
290
291 debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
292 N, n_gc_threads);
293
294 #if defined(DEBUG)
295 // check for memory leaks if DEBUG is on
296 memInventory(DEBUG_gc);
297 #endif
298
299 // do this *before* we start scavenging
300 collectFreshWeakPtrs();
301
302 // check sanity *before* GC
303 IF_DEBUG(sanity, checkSanity(false /* before GC */, major_gc));
304
305 // gather blocks allocated using allocatePinned() from each capability
306 // and put them on the g0->large_object list.
307 collect_pinned_object_blocks();
308
309 // Initialise all the generations that we're collecting.
310 for (g = 0; g <= N; g++) {
311 prepare_collected_gen(&generations[g]);
312 }
313 // Initialise all the generations that we're *not* collecting.
314 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
315 prepare_uncollected_gen(&generations[g]);
316 }
317
318 // Prepare this gc_thread
319 init_gc_thread(gct);
320
321 /* Allocate a mark stack if we're doing a major collection.
322 */
323 if (major_gc && oldest_gen->mark) {
324 mark_stack_bd = allocBlock();
325 mark_stack_top_bd = mark_stack_bd;
326 mark_stack_bd->link = NULL;
327 mark_stack_bd->u.back = NULL;
328 mark_sp = mark_stack_bd->start;
329 } else {
330 mark_stack_bd = NULL;
331 mark_stack_top_bd = NULL;
332 mark_sp = NULL;
333 }
334
335 /* -----------------------------------------------------------------------
336 * follow all the roots that we know about:
337 */
338
339 // the main thread is running: this prevents any other threads from
340 // exiting prematurely, so we can start them now.
341 // NB. do this after the mutable lists have been saved above, otherwise
342 // the other GC threads will be writing into the old mutable lists.
343 inc_running();
344 wakeup_gc_threads(gct->thread_index, idle_cap);
345
346 traceEventGcWork(gct->cap);
347
348 // scavenge the capability-private mutable lists. This isn't part
349 // of markSomeCapabilities() because markSomeCapabilities() can only
350 // call back into the GC via mark_root() (due to the gct register
351 // variable).
352 if (n_gc_threads == 1) {
353 for (n = 0; n < n_capabilities; n++) {
354 #if defined(THREADED_RTS)
355 scavenge_capability_mut_Lists1(capabilities[n]);
356 #else
357 scavenge_capability_mut_lists(capabilities[n]);
358 #endif
359 }
360 } else {
361 scavenge_capability_mut_lists(gct->cap);
362 for (n = 0; n < n_capabilities; n++) {
363 if (idle_cap[n]) {
364 markCapability(mark_root, gct, capabilities[n],
365 true/*don't mark sparks*/);
366 scavenge_capability_mut_lists(capabilities[n]);
367 }
368 }
369 }
370
371 // follow roots from the CAF list (used by GHCi)
372 gct->evac_gen_no = 0;
373 markCAFs(mark_root, gct);
374
375 // follow all the roots that the application knows about.
376 gct->evac_gen_no = 0;
377 if (n_gc_threads == 1) {
378 for (n = 0; n < n_capabilities; n++) {
379 markCapability(mark_root, gct, capabilities[n],
380 true/*don't mark sparks*/);
381 }
382 } else {
383 markCapability(mark_root, gct, cap, true/*don't mark sparks*/);
384 }
385
386 markScheduler(mark_root, gct);
387
388 #if defined(RTS_USER_SIGNALS)
389 // mark the signal handlers (signals should be already blocked)
390 markSignalHandlers(mark_root, gct);
391 #endif
392
393 // Mark the weak pointer list, and prepare to detect dead weak pointers.
394 markWeakPtrList();
395 initWeakForGC();
396
397 // Mark the stable pointer table.
398 markStableTables(mark_root, gct);
399
400 /* -------------------------------------------------------------------------
401 * Repeatedly scavenge all the areas we know about until there's no
402 * more scavenging to be done.
403 */
404 for (;;)
405 {
406 scavenge_until_all_done();
407 // The other threads are now stopped. We might recurse back to
408 // here, but from now on this is the only thread.
409
410 // must be last... invariant is that everything is fully
411 // scavenged at this point.
412 if (traverseWeakPtrList()) { // returns true if evaced something
413 inc_running();
414 continue;
415 }
416
417 // If we get to here, there's really nothing left to do.
418 break;
419 }
420
421 shutdown_gc_threads(gct->thread_index, idle_cap);
422
423 // Now see which stable names are still alive.
424 gcStableTables();
425
426 #if defined(THREADED_RTS)
427 if (n_gc_threads == 1) {
428 for (n = 0; n < n_capabilities; n++) {
429 pruneSparkQueue(capabilities[n]);
430 }
431 } else {
432 for (n = 0; n < n_capabilities; n++) {
433 if (n == cap->no || idle_cap[n]) {
434 pruneSparkQueue(capabilities[n]);
435 }
436 }
437 }
438 #endif
439
440 #if defined(PROFILING)
441 // We call processHeapClosureForDead() on every closure destroyed during
442 // the current garbage collection, so we invoke LdvCensusForDead().
443 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
444 || RtsFlags.ProfFlags.bioSelector != NULL) {
445 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
446 LdvCensusForDead(N);
447 ACQUIRE_SM_LOCK;
448 }
449 #endif
450
451 // NO MORE EVACUATION AFTER THIS POINT!
452
453 // Finally: compact or sweep the oldest generation.
454 if (major_gc && oldest_gen->mark) {
455 if (oldest_gen->compact)
456 compact(gct->scavenged_static_objects);
457 else
458 sweep(oldest_gen);
459 }
460
461 copied = 0;
462 par_max_copied = 0;
463 {
464 uint32_t i;
465 for (i=0; i < n_gc_threads; i++) {
466 if (n_gc_threads > 1) {
467 debugTrace(DEBUG_gc,"thread %d:", i);
468 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
469 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
470 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
471 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
472 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
473 }
474 copied += gc_threads[i]->copied;
475 par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
476 }
477 if (n_gc_threads == 1) {
478 par_max_copied = 0;
479 }
480 }
481
482 // Run through all the generations and tidy up.
483 // We're going to:
484 // - count the amount of "live" data (live_words, live_blocks)
485 // - count the amount of "copied" data in this GC (copied)
486 // - free from-space
487 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
488 //
489 live_words = 0;
490 live_blocks = 0;
491
492 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
493
494 if (g == N) {
495 generations[g].collections++; // for stats
496 if (n_gc_threads > 1) generations[g].par_collections++;
497 }
498
499 // Count the mutable list as bytes "copied" for the purposes of
500 // stats. Every mutable list is copied during every GC.
501 if (g > 0) {
502 W_ mut_list_size = 0;
503 for (n = 0; n < n_capabilities; n++) {
504 mut_list_size += countOccupied(capabilities[n]->mut_lists[g]);
505 }
506 copied += mut_list_size;
507
508 debugTrace(DEBUG_gc,
509 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d ATOMIC_INVARIANTs, %d INVARIANT_CHECK_QUEUEs, %d others)",
510 (unsigned long)(mut_list_size * sizeof(W_)),
511 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS,
512 mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE,
513 mutlist_TREC_CHUNK, mutlist_TREC_HEADER,
514 mutlist_ATOMIC_INVARIANT,
515 mutlist_INVARIANT_CHECK_QUEUE,
516 mutlist_OTHERS);
517 }
518
519 bdescr *next, *prev;
520 gen = &generations[g];
521
522 // for generations we collected...
523 if (g <= N) {
524
525 /* free old memory and shift to-space into from-space for all
526 * the collected generations (except the allocation area). These
527 * freed blocks will probaby be quickly recycled.
528 */
529 if (gen->mark)
530 {
531 // tack the new blocks on the end of the existing blocks
532 if (gen->old_blocks != NULL) {
533
534 prev = NULL;
535 for (bd = gen->old_blocks; bd != NULL; bd = next) {
536
537 next = bd->link;
538
539 if (!(bd->flags & BF_MARKED))
540 {
541 if (prev == NULL) {
542 gen->old_blocks = next;
543 } else {
544 prev->link = next;
545 }
546 freeGroup(bd);
547 gen->n_old_blocks--;
548 }
549 else
550 {
551 gen->n_words += bd->free - bd->start;
552
553 // NB. this step might not be compacted next
554 // time, so reset the BF_MARKED flags.
555 // They are set before GC if we're going to
556 // compact. (search for BF_MARKED above).
557 bd->flags &= ~BF_MARKED;
558
559 // between GCs, all blocks in the heap except
560 // for the nursery have the BF_EVACUATED flag set.
561 bd->flags |= BF_EVACUATED;
562
563 prev = bd;
564 }
565 }
566
567 if (prev != NULL) {
568 prev->link = gen->blocks;
569 gen->blocks = gen->old_blocks;
570 }
571 }
572 // add the new blocks to the block tally
573 gen->n_blocks += gen->n_old_blocks;
574 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
575 ASSERT(countOccupied(gen->blocks) == gen->n_words);
576 }
577 else // not copacted
578 {
579 freeChain(gen->old_blocks);
580 }
581
582 gen->old_blocks = NULL;
583 gen->n_old_blocks = 0;
584
585 /* LARGE OBJECTS. The current live large objects are chained on
586 * scavenged_large, having been moved during garbage
587 * collection from large_objects. Any objects left on the
588 * large_objects list are therefore dead, so we free them here.
589 */
590 freeChain(gen->large_objects);
591 gen->large_objects = gen->scavenged_large_objects;
592 gen->n_large_blocks = gen->n_scavenged_large_blocks;
593 gen->n_large_words = countOccupied(gen->large_objects);
594 gen->n_new_large_words = 0;
595
596 /* COMPACT_NFDATA. The currently live compacts are chained
597 * to live_compact_objects, quite like large objects. And
598 * objects left on the compact_objects list are dead.
599 *
600 * We don't run a simple freeChain because want to give the
601 * CNF module some chance to free memory that freeChain would
602 * not see (namely blocks appended to a CNF through a compactResize).
603 *
604 * See Note [Compact Normal Forms] for details.
605 */
606 for (bd = gen->compact_objects; bd; bd = next) {
607 next = bd->link;
608 compactFree(((StgCompactNFDataBlock*)bd->start)->owner);
609 }
610 gen->compact_objects = gen->live_compact_objects;
611 gen->n_compact_blocks = gen->n_live_compact_blocks;
612 }
613 else // for generations > N
614 {
615 /* For older generations, we need to append the
616 * scavenged_large_object list (i.e. large objects that have been
617 * promoted during this GC) to the large_object list for that step.
618 */
619 for (bd = gen->scavenged_large_objects; bd; bd = next) {
620 next = bd->link;
621 dbl_link_onto(bd, &gen->large_objects);
622 gen->n_large_words += bd->free - bd->start;
623 }
624
625 // And same for compacts
626 for (bd = gen->live_compact_objects; bd; bd = next) {
627 next = bd->link;
628 dbl_link_onto(bd, &gen->compact_objects);
629 }
630
631 // add the new blocks we promoted during this GC
632 gen->n_large_blocks += gen->n_scavenged_large_blocks;
633 gen->n_compact_blocks += gen->n_live_compact_blocks;
634 }
635
636 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
637 ASSERT(countOccupied(gen->large_objects) == gen->n_large_words);
638 // We can run the same assertion on compact objects because there
639 // is memory "the GC doesn't see" (directly), but which is still
640 // accounted in gen->n_compact_blocks
641
642 gen->scavenged_large_objects = NULL;
643 gen->n_scavenged_large_blocks = 0;
644 gen->live_compact_objects = NULL;
645 gen->n_live_compact_blocks = 0;
646
647 // Count "live" data
648 live_words += genLiveWords(gen);
649 live_blocks += genLiveBlocks(gen);
650
651 // add in the partial blocks in the gen_workspaces
652 {
653 uint32_t i;
654 for (i = 0; i < n_capabilities; i++) {
655 live_words += gcThreadLiveWords(i, gen->no);
656 live_blocks += gcThreadLiveBlocks(i, gen->no);
657 }
658 }
659 } // for all generations
660
661 // update the max size of older generations after a major GC
662 resize_generations();
663
664 // Free the mark stack.
665 if (mark_stack_top_bd != NULL) {
666 debugTrace(DEBUG_gc, "mark stack: %d blocks",
667 countBlocks(mark_stack_top_bd));
668 freeChain(mark_stack_top_bd);
669 }
670
671 // Free any bitmaps.
672 for (g = 0; g <= N; g++) {
673 gen = &generations[g];
674 if (gen->bitmap != NULL) {
675 freeGroup(gen->bitmap);
676 gen->bitmap = NULL;
677 }
678 }
679
680 resize_nursery();
681
682 resetNurseries();
683
684 // mark the garbage collected CAFs as dead
685 #if defined(DEBUG)
686 if (major_gc) { gcCAFs(); }
687 #endif
688
689 // Update the stable pointer hash table.
690 updateStableTables(major_gc);
691
692 // unlock the StablePtr table. Must be before scheduleFinalizers(),
693 // because a finalizer may call hs_free_fun_ptr() or
694 // hs_free_stable_ptr(), both of which access the StablePtr table.
695 stableUnlock();
696
697 // Must be after stableUnlock(), because it might free stable ptrs.
698 if (major_gc) {
699 checkUnload (gct->scavenged_static_objects);
700 }
701
702 #if defined(PROFILING)
703 // resetStaticObjectForRetainerProfiling() must be called before
704 // zeroing below.
705
706 // ToDo: fix the gct->scavenged_static_objects below
707 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
708 #endif
709
710 // Start any pending finalizers. Must be after
711 // updateStableTables() and stableUnlock() (see #4221).
712 RELEASE_SM_LOCK;
713 scheduleFinalizers(cap, dead_weak_ptr_list);
714 ACQUIRE_SM_LOCK;
715
716 // check sanity after GC
717 // before resurrectThreads(), because that might overwrite some
718 // closures, which will cause problems with THREADED where we don't
719 // fill slop.
720 IF_DEBUG(sanity, checkSanity(true /* after GC */, major_gc));
721
722 // If a heap census is due, we need to do it before
723 // resurrectThreads(), for the same reason as checkSanity above:
724 // resurrectThreads() will overwrite some closures and leave slop
725 // behind.
726 if (do_heap_census) {
727 debugTrace(DEBUG_sched, "performing heap census");
728 RELEASE_SM_LOCK;
729 heapCensus(gct->gc_start_cpu);
730 ACQUIRE_SM_LOCK;
731 }
732
733 // send exceptions to any threads which were about to die
734 RELEASE_SM_LOCK;
735 resurrectThreads(resurrected_threads);
736 ACQUIRE_SM_LOCK;
737
738 if (major_gc) {
739 W_ need, got;
740 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
741 got = mblocks_allocated;
742 /* If the amount of data remains constant, next major GC we'll
743 require (F+1)*need. We leave (F+2)*need in order to reduce
744 repeated deallocation and reallocation. */
745 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
746 /* But with a large nursery, the above estimate might exceed
747 * maxHeapSize. A large resident set size might make the OS
748 * kill this process, or swap unnecessarily. Therefore we
749 * ensure that our estimate does not exceed maxHeapSize.
750 */
751 if (RtsFlags.GcFlags.maxHeapSize != 0) {
752 W_ max = BLOCKS_TO_MBLOCKS(RtsFlags.GcFlags.maxHeapSize);
753 if (need > max) {
754 need = max;
755 }
756 }
757 if (got > need) {
758 returnMemoryToOS(got - need);
759 }
760 }
761
762 // extra GC trace info
763 IF_DEBUG(gc, statDescribeGens());
764
765 #if defined(DEBUG)
766 // symbol-table based profiling
767 /* heapCensus(to_blocks); */ /* ToDo */
768 #endif
769
770 // restore enclosing cost centre
771 #if defined(PROFILING)
772 for (n = 0; n < n_capabilities; n++) {
773 capabilities[n]->r.rCCCS = save_CCS[n];
774 }
775 #endif
776
777 #if defined(DEBUG)
778 // check for memory leaks if DEBUG is on
779 memInventory(DEBUG_gc);
780 #endif
781
782 // ok, GC over: tell the stats department what happened.
783 stat_endGC(cap, gct, live_words, copied,
784 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
785 N, n_gc_threads, par_max_copied);
786
787 #if defined(RTS_USER_SIGNALS)
788 if (RtsFlags.MiscFlags.install_signal_handlers) {
789 // unblock signals again
790 unblockUserSignals();
791 }
792 #endif
793
794 RELEASE_SM_LOCK;
795
796 SET_GCT(saved_gct);
797 }
798
799 /* -----------------------------------------------------------------------------
800 Heap overflow is indicated by setting a flag that the caller of
801 GarbageCollect can check. (not ideal, TODO: better)
802 -------------------------------------------------------------------------- */
803
804 static void heapOverflow(void)
805 {
806 heap_overflow = true;
807 }
808
809 /* -----------------------------------------------------------------------------
810 Initialise the gc_thread structures.
811 -------------------------------------------------------------------------- */
812
813 #define GC_THREAD_INACTIVE 0
814 #define GC_THREAD_STANDING_BY 1
815 #define GC_THREAD_RUNNING 2
816 #define GC_THREAD_WAITING_TO_CONTINUE 3
817
818 static void
819 new_gc_thread (uint32_t n, gc_thread *t)
820 {
821 uint32_t g;
822 gen_workspace *ws;
823
824 t->cap = capabilities[n];
825
826 #if defined(THREADED_RTS)
827 t->id = 0;
828 initSpinLock(&t->gc_spin);
829 initSpinLock(&t->mut_spin);
830 ACQUIRE_SPIN_LOCK(&t->gc_spin);
831 ACQUIRE_SPIN_LOCK(&t->mut_spin);
832 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
833 // thread to start up, see wakeup_gc_threads
834 #endif
835
836 t->thread_index = n;
837 t->free_blocks = NULL;
838 t->gc_count = 0;
839
840 init_gc_thread(t);
841
842 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
843 {
844 ws = &t->gens[g];
845 ws->gen = &generations[g];
846 ASSERT(g == ws->gen->no);
847 ws->my_gct = t;
848
849 // We want to call
850 // alloc_todo_block(ws,0);
851 // but can't, because it uses gct which isn't set up at this point.
852 // Hence, allocate a block for todo_bd manually:
853 {
854 bdescr *bd = allocBlockOnNode(capNoToNumaNode(n));
855 // no lock, locks aren't initialised yet
856 initBdescr(bd, ws->gen, ws->gen->to);
857 bd->flags = BF_EVACUATED;
858 bd->u.scan = bd->free = bd->start;
859
860 ws->todo_bd = bd;
861 ws->todo_free = bd->free;
862 ws->todo_lim = bd->start + BLOCK_SIZE_W;
863 }
864
865 ws->todo_q = newWSDeque(128);
866 ws->todo_overflow = NULL;
867 ws->n_todo_overflow = 0;
868 ws->todo_large_objects = NULL;
869
870 ws->part_list = NULL;
871 ws->n_part_blocks = 0;
872 ws->n_part_words = 0;
873
874 ws->scavd_list = NULL;
875 ws->n_scavd_blocks = 0;
876 ws->n_scavd_words = 0;
877 }
878 }
879
880
881 void
882 initGcThreads (uint32_t from USED_IF_THREADS, uint32_t to USED_IF_THREADS)
883 {
884 #if defined(THREADED_RTS)
885 uint32_t i;
886
887 if (from > 0) {
888 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
889 "initGcThreads");
890 } else {
891 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
892 "initGcThreads");
893 }
894
895 for (i = from; i < to; i++) {
896 gc_threads[i] =
897 stgMallocBytes(sizeof(gc_thread) +
898 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
899 "alloc_gc_threads");
900
901 new_gc_thread(i, gc_threads[i]);
902 }
903 #else
904 ASSERT(from == 0 && to == 1);
905 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
906 gc_threads[0] = gct;
907 new_gc_thread(0,gc_threads[0]);
908 #endif
909 }
910
911 void
912 freeGcThreads (void)
913 {
914 uint32_t g;
915 if (gc_threads != NULL) {
916 #if defined(THREADED_RTS)
917 uint32_t i;
918 for (i = 0; i < n_capabilities; i++) {
919 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
920 {
921 freeWSDeque(gc_threads[i]->gens[g].todo_q);
922 }
923 stgFree (gc_threads[i]);
924 }
925 stgFree (gc_threads);
926 #else
927 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
928 {
929 freeWSDeque(gc_threads[0]->gens[g].todo_q);
930 }
931 stgFree (gc_threads);
932 #endif
933 gc_threads = NULL;
934 }
935 }
936
937 /* ----------------------------------------------------------------------------
938 Start GC threads
939 ------------------------------------------------------------------------- */
940
941 static volatile StgWord gc_running_threads;
942
943 static StgWord
944 inc_running (void)
945 {
946 StgWord new;
947 new = atomic_inc(&gc_running_threads, 1);
948 ASSERT(new <= n_gc_threads);
949 return new;
950 }
951
952 static StgWord
953 dec_running (void)
954 {
955 ASSERT(gc_running_threads != 0);
956 return atomic_dec(&gc_running_threads);
957 }
958
959 static bool
960 any_work (void)
961 {
962 int g;
963 gen_workspace *ws;
964
965 gct->any_work++;
966
967 write_barrier();
968
969 // scavenge objects in compacted generation
970 if (mark_stack_bd != NULL && !mark_stack_empty()) {
971 return true;
972 }
973
974 // Check for global work in any gen. We don't need to check for
975 // local work, because we have already exited scavenge_loop(),
976 // which means there is no local work for this thread.
977 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
978 ws = &gct->gens[g];
979 if (ws->todo_large_objects) return true;
980 if (!looksEmptyWSDeque(ws->todo_q)) return true;
981 if (ws->todo_overflow) return true;
982 }
983
984 #if defined(THREADED_RTS)
985 if (work_stealing) {
986 uint32_t n;
987 // look for work to steal
988 for (n = 0; n < n_gc_threads; n++) {
989 if (n == gct->thread_index) continue;
990 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
991 ws = &gc_threads[n]->gens[g];
992 if (!looksEmptyWSDeque(ws->todo_q)) return true;
993 }
994 }
995 }
996 #endif
997
998 gct->no_work++;
999 #if defined(THREADED_RTS)
1000 yieldThread();
1001 #endif
1002
1003 return false;
1004 }
1005
1006 static void
1007 scavenge_until_all_done (void)
1008 {
1009 DEBUG_ONLY( uint32_t r );
1010
1011
1012 loop:
1013 #if defined(THREADED_RTS)
1014 if (n_gc_threads > 1) {
1015 scavenge_loop();
1016 } else {
1017 scavenge_loop1();
1018 }
1019 #else
1020 scavenge_loop();
1021 #endif
1022
1023 collect_gct_blocks();
1024
1025 // scavenge_loop() only exits when there's no work to do
1026
1027 #if defined(DEBUG)
1028 r = dec_running();
1029 #else
1030 dec_running();
1031 #endif
1032
1033 traceEventGcIdle(gct->cap);
1034
1035 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1036
1037 while (gc_running_threads != 0) {
1038 // usleep(1);
1039 if (any_work()) {
1040 inc_running();
1041 traceEventGcWork(gct->cap);
1042 goto loop;
1043 }
1044 // any_work() does not remove the work from the queue, it
1045 // just checks for the presence of work. If we find any,
1046 // then we increment gc_running_threads and go back to
1047 // scavenge_loop() to perform any pending work.
1048 }
1049
1050 traceEventGcDone(gct->cap);
1051 }
1052
1053 #if defined(THREADED_RTS)
1054
1055 void
1056 gcWorkerThread (Capability *cap)
1057 {
1058 gc_thread *saved_gct;
1059
1060 // necessary if we stole a callee-saves register for gct:
1061 saved_gct = gct;
1062
1063 SET_GCT(gc_threads[cap->no]);
1064 gct->id = osThreadId();
1065
1066 // Wait until we're told to wake up
1067 RELEASE_SPIN_LOCK(&gct->mut_spin);
1068 // yieldThread();
1069 // Strangely, adding a yieldThread() here makes the CPU time
1070 // measurements more accurate on Linux, perhaps because it syncs
1071 // the CPU time across the multiple cores. Without this, CPU time
1072 // is heavily skewed towards GC rather than MUT.
1073 gct->wakeup = GC_THREAD_STANDING_BY;
1074 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1075 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1076
1077 init_gc_thread(gct);
1078
1079 traceEventGcWork(gct->cap);
1080
1081 // Every thread evacuates some roots.
1082 gct->evac_gen_no = 0;
1083 markCapability(mark_root, gct, cap, true/*prune sparks*/);
1084 scavenge_capability_mut_lists(cap);
1085
1086 scavenge_until_all_done();
1087
1088 #if defined(THREADED_RTS)
1089 // Now that the whole heap is marked, we discard any sparks that
1090 // were found to be unreachable. The main GC thread is currently
1091 // marking heap reachable via weak pointers, so it is
1092 // non-deterministic whether a spark will be retained if it is
1093 // only reachable via weak pointers. To fix this problem would
1094 // require another GC barrier, which is too high a price.
1095 pruneSparkQueue(cap);
1096 #endif
1097
1098 // Wait until we're told to continue
1099 RELEASE_SPIN_LOCK(&gct->gc_spin);
1100 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1101 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1102 gct->thread_index);
1103 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1104 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1105
1106 SET_GCT(saved_gct);
1107 }
1108
1109 #endif
1110
1111 #if defined(THREADED_RTS)
1112
1113 void
1114 waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
1115 {
1116 const uint32_t n_threads = n_capabilities;
1117 const uint32_t me = cap->no;
1118 uint32_t i, j;
1119 bool retry = true;
1120
1121 while(retry) {
1122 for (i=0; i < n_threads; i++) {
1123 if (i == me || idle_cap[i]) continue;
1124 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1125 prodCapability(capabilities[i], cap->running_task);
1126 }
1127 }
1128 for (j=0; j < 10; j++) {
1129 retry = false;
1130 for (i=0; i < n_threads; i++) {
1131 if (i == me || idle_cap[i]) continue;
1132 write_barrier();
1133 interruptCapability(capabilities[i]);
1134 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1135 retry = true;
1136 }
1137 }
1138 if (!retry) break;
1139 yieldThread();
1140 }
1141 }
1142 }
1143
1144 #endif // THREADED_RTS
1145
1146 static void
1147 start_gc_threads (void)
1148 {
1149 #if defined(THREADED_RTS)
1150 gc_running_threads = 0;
1151 #endif
1152 }
1153
1154 static void
1155 wakeup_gc_threads (uint32_t me USED_IF_THREADS,
1156 bool idle_cap[] USED_IF_THREADS)
1157 {
1158 #if defined(THREADED_RTS)
1159 uint32_t i;
1160
1161 if (n_gc_threads == 1) return;
1162
1163 for (i=0; i < n_gc_threads; i++) {
1164 if (i == me || idle_cap[i]) continue;
1165 inc_running();
1166 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1167 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY)
1168 barf("wakeup_gc_threads");
1169
1170 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1171 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1172 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1173 }
1174 #endif
1175 }
1176
1177 // After GC is complete, we must wait for all GC threads to enter the
1178 // standby state, otherwise they may still be executing inside
1179 // any_work(), and may even remain awake until the next GC starts.
1180 static void
1181 shutdown_gc_threads (uint32_t me USED_IF_THREADS,
1182 bool idle_cap[] USED_IF_THREADS)
1183 {
1184 #if defined(THREADED_RTS)
1185 uint32_t i;
1186
1187 if (n_gc_threads == 1) return;
1188
1189 for (i=0; i < n_gc_threads; i++) {
1190 if (i == me || idle_cap[i]) continue;
1191 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) {
1192 busy_wait_nop();
1193 write_barrier();
1194 }
1195 }
1196 #endif
1197 }
1198
1199 #if defined(THREADED_RTS)
1200 void
1201 releaseGCThreads (Capability *cap USED_IF_THREADS, bool idle_cap[])
1202 {
1203 const uint32_t n_threads = n_capabilities;
1204 const uint32_t me = cap->no;
1205 uint32_t i;
1206 for (i=0; i < n_threads; i++) {
1207 if (i == me || idle_cap[i]) continue;
1208 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1209 barf("releaseGCThreads");
1210
1211 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1212 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1213 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1214 }
1215 }
1216 #endif
1217
1218 /* ----------------------------------------------------------------------------
1219 Initialise a generation that is to be collected
1220 ------------------------------------------------------------------------- */
1221
1222 static void
1223 prepare_collected_gen (generation *gen)
1224 {
1225 uint32_t i, g, n;
1226 gen_workspace *ws;
1227 bdescr *bd, *next;
1228
1229 // Throw away the current mutable list. Invariant: the mutable
1230 // list always has at least one block; this means we can avoid a
1231 // check for NULL in recordMutable().
1232 g = gen->no;
1233 if (g != 0) {
1234 for (i = 0; i < n_capabilities; i++) {
1235 freeChain(capabilities[i]->mut_lists[g]);
1236 capabilities[i]->mut_lists[g] =
1237 allocBlockOnNode(capNoToNumaNode(i));
1238 }
1239 }
1240
1241 gen = &generations[g];
1242 ASSERT(gen->no == g);
1243
1244 // we'll construct a new list of threads in this step
1245 // during GC, throw away the current list.
1246 gen->old_threads = gen->threads;
1247 gen->threads = END_TSO_QUEUE;
1248
1249 // deprecate the existing blocks
1250 gen->old_blocks = gen->blocks;
1251 gen->n_old_blocks = gen->n_blocks;
1252 gen->blocks = NULL;
1253 gen->n_blocks = 0;
1254 gen->n_words = 0;
1255 gen->live_estimate = 0;
1256
1257 // initialise the large object queues.
1258 ASSERT(gen->scavenged_large_objects == NULL);
1259 ASSERT(gen->n_scavenged_large_blocks == 0);
1260 ASSERT(gen->live_compact_objects == NULL);
1261 ASSERT(gen->n_live_compact_blocks == 0);
1262
1263 // grab all the partial blocks stashed in the gc_thread workspaces and
1264 // move them to the old_blocks list of this gen.
1265 for (n = 0; n < n_capabilities; n++) {
1266 ws = &gc_threads[n]->gens[gen->no];
1267
1268 for (bd = ws->part_list; bd != NULL; bd = next) {
1269 next = bd->link;
1270 bd->link = gen->old_blocks;
1271 gen->old_blocks = bd;
1272 gen->n_old_blocks += bd->blocks;
1273 }
1274 ws->part_list = NULL;
1275 ws->n_part_blocks = 0;
1276 ws->n_part_words = 0;
1277
1278 ASSERT(ws->scavd_list == NULL);
1279 ASSERT(ws->n_scavd_blocks == 0);
1280 ASSERT(ws->n_scavd_words == 0);
1281
1282 if (ws->todo_free != ws->todo_bd->start) {
1283 ws->todo_bd->free = ws->todo_free;
1284 ws->todo_bd->link = gen->old_blocks;
1285 gen->old_blocks = ws->todo_bd;
1286 gen->n_old_blocks += ws->todo_bd->blocks;
1287 alloc_todo_block(ws,0); // always has one block.
1288 }
1289 }
1290
1291 // mark the small objects as from-space
1292 for (bd = gen->old_blocks; bd; bd = bd->link) {
1293 bd->flags &= ~BF_EVACUATED;
1294 }
1295
1296 // mark the large objects as from-space
1297 for (bd = gen->large_objects; bd; bd = bd->link) {
1298 bd->flags &= ~BF_EVACUATED;
1299 }
1300
1301 // mark the compact objects as from-space
1302 for (bd = gen->compact_objects; bd; bd = bd->link) {
1303 bd->flags &= ~BF_EVACUATED;
1304 }
1305
1306 // for a compacted generation, we need to allocate the bitmap
1307 if (gen->mark) {
1308 StgWord bitmap_size; // in bytes
1309 bdescr *bitmap_bdescr;
1310 StgWord *bitmap;
1311
1312 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1313
1314 if (bitmap_size > 0) {
1315 bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
1316 / BLOCK_SIZE);
1317 gen->bitmap = bitmap_bdescr;
1318 bitmap = bitmap_bdescr->start;
1319
1320 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1321 bitmap_size, bitmap);
1322
1323 // don't forget to fill it with zeros!
1324 memset(bitmap, 0, bitmap_size);
1325
1326 // For each block in this step, point to its bitmap from the
1327 // block descriptor.
1328 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1329 bd->u.bitmap = bitmap;
1330 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1331
1332 // Also at this point we set the BF_MARKED flag
1333 // for this block. The invariant is that
1334 // BF_MARKED is always unset, except during GC
1335 // when it is set on those blocks which will be
1336 // compacted.
1337 if (!(bd->flags & BF_FRAGMENTED)) {
1338 bd->flags |= BF_MARKED;
1339 }
1340
1341 // BF_SWEPT should be marked only for blocks that are being
1342 // collected in sweep()
1343 bd->flags &= ~BF_SWEPT;
1344 }
1345 }
1346 }
1347 }
1348
1349
1350 /* ----------------------------------------------------------------------------
1351 Save the mutable lists in saved_mut_lists
1352 ------------------------------------------------------------------------- */
1353
1354 static void
1355 stash_mut_list (Capability *cap, uint32_t gen_no)
1356 {
1357 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1358 cap->mut_lists[gen_no] = allocBlockOnNode_sync(cap->node);
1359 }
1360
1361 /* ----------------------------------------------------------------------------
1362 Initialise a generation that is *not* to be collected
1363 ------------------------------------------------------------------------- */
1364
1365 static void
1366 prepare_uncollected_gen (generation *gen)
1367 {
1368 uint32_t i;
1369
1370
1371 ASSERT(gen->no > 0);
1372
1373 // save the current mutable lists for this generation, and
1374 // allocate a fresh block for each one. We'll traverse these
1375 // mutable lists as roots early on in the GC.
1376 for (i = 0; i < n_capabilities; i++) {
1377 stash_mut_list(capabilities[i], gen->no);
1378 }
1379
1380 ASSERT(gen->scavenged_large_objects == NULL);
1381 ASSERT(gen->n_scavenged_large_blocks == 0);
1382 }
1383
1384 /* -----------------------------------------------------------------------------
1385 Collect the completed blocks from a GC thread and attach them to
1386 the generation.
1387 -------------------------------------------------------------------------- */
1388
1389 static void
1390 collect_gct_blocks (void)
1391 {
1392 uint32_t g;
1393 gen_workspace *ws;
1394 bdescr *bd, *prev;
1395
1396 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1397 ws = &gct->gens[g];
1398
1399 // there may still be a block attached to ws->todo_bd;
1400 // leave it there to use next time.
1401
1402 if (ws->scavd_list != NULL) {
1403 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1404
1405 ASSERT(gct->scan_bd == NULL);
1406 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1407
1408 prev = NULL;
1409 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1410 prev = bd;
1411 }
1412 if (prev != NULL) {
1413 prev->link = ws->gen->blocks;
1414 ws->gen->blocks = ws->scavd_list;
1415 }
1416 ws->gen->n_blocks += ws->n_scavd_blocks;
1417 ws->gen->n_words += ws->n_scavd_words;
1418
1419 ws->scavd_list = NULL;
1420 ws->n_scavd_blocks = 0;
1421 ws->n_scavd_words = 0;
1422
1423 RELEASE_SPIN_LOCK(&ws->gen->sync);
1424 }
1425 }
1426 }
1427
1428 /* -----------------------------------------------------------------------------
1429 During mutation, any blocks that are filled by allocatePinned() are
1430 stashed on the local pinned_object_blocks list, to avoid needing to
1431 take a global lock. Here we collect those blocks from the
1432 cap->pinned_object_blocks lists and put them on the
1433 main g0->large_object list.
1434
1435 Returns: the number of words allocated this way, for stats
1436 purposes.
1437 -------------------------------------------------------------------------- */
1438
1439 static void
1440 collect_pinned_object_blocks (void)
1441 {
1442 uint32_t n;
1443 bdescr *bd, *prev;
1444
1445 for (n = 0; n < n_capabilities; n++) {
1446 prev = NULL;
1447 for (bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
1448 prev = bd;
1449 }
1450 if (prev != NULL) {
1451 prev->link = g0->large_objects;
1452 if (g0->large_objects != NULL) {
1453 g0->large_objects->u.back = prev;
1454 }
1455 g0->large_objects = capabilities[n]->pinned_object_blocks;
1456 capabilities[n]->pinned_object_blocks = 0;
1457 }
1458 }
1459 }
1460
1461 /* -----------------------------------------------------------------------------
1462 Initialise a gc_thread before GC
1463 -------------------------------------------------------------------------- */
1464
1465 static void
1466 init_gc_thread (gc_thread *t)
1467 {
1468 t->static_objects = END_OF_STATIC_OBJECT_LIST;
1469 t->scavenged_static_objects = END_OF_STATIC_OBJECT_LIST;
1470 t->scan_bd = NULL;
1471 t->mut_lists = t->cap->mut_lists;
1472 t->evac_gen_no = 0;
1473 t->failed_to_evac = false;
1474 t->eager_promotion = true;
1475 t->thunk_selector_depth = 0;
1476 t->copied = 0;
1477 t->scanned = 0;
1478 t->any_work = 0;
1479 t->no_work = 0;
1480 t->scav_find_work = 0;
1481 }
1482
1483 /* -----------------------------------------------------------------------------
1484 Function we pass to evacuate roots.
1485 -------------------------------------------------------------------------- */
1486
1487 static void
1488 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1489 {
1490 // we stole a register for gct, but this function is called from
1491 // *outside* the GC where the register variable is not in effect,
1492 // so we need to save and restore it here. NB. only call
1493 // mark_root() from the main GC thread, otherwise gct will be
1494 // incorrect.
1495 #if defined(THREADED_RTS)
1496 gc_thread *saved_gct;
1497 saved_gct = gct;
1498 #endif
1499 SET_GCT(user);
1500
1501 evacuate(root);
1502
1503 SET_GCT(saved_gct);
1504 }
1505
1506 /* ----------------------------------------------------------------------------
1507 Reset the sizes of the older generations when we do a major
1508 collection.
1509
1510 CURRENT STRATEGY: make all generations except zero the same size.
1511 We have to stay within the maximum heap size, and leave a certain
1512 percentage of the maximum heap size available to allocate into.
1513 ------------------------------------------------------------------------- */
1514
1515 static void
1516 resize_generations (void)
1517 {
1518 uint32_t g;
1519
1520 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1521 W_ live, size, min_alloc, words;
1522 const W_ max = RtsFlags.GcFlags.maxHeapSize;
1523 const W_ gens = RtsFlags.GcFlags.generations;
1524
1525 // live in the oldest generations
1526 if (oldest_gen->live_estimate != 0) {
1527 words = oldest_gen->live_estimate;
1528 } else {
1529 words = oldest_gen->n_words;
1530 }
1531 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1532 oldest_gen->n_large_blocks +
1533 oldest_gen->n_compact_blocks;
1534
1535 // default max size for all generations except zero
1536 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1537 RtsFlags.GcFlags.minOldGenSize);
1538
1539 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1540 if (max > 0) {
1541 RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
1542 } else {
1543 RtsFlags.GcFlags.heapSizeSuggestion = size;
1544 }
1545 }
1546
1547 // minimum size for generation zero
1548 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1549 RtsFlags.GcFlags.minAllocAreaSize
1550 * (W_)n_capabilities);
1551
1552 // Auto-enable compaction when the residency reaches a
1553 // certain percentage of the maximum heap size (default: 30%).
1554 if (RtsFlags.GcFlags.compact ||
1555 (max > 0 &&
1556 oldest_gen->n_blocks >
1557 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1558 oldest_gen->mark = 1;
1559 oldest_gen->compact = 1;
1560 // debugBelch("compaction: on\n", live);
1561 } else {
1562 oldest_gen->mark = 0;
1563 oldest_gen->compact = 0;
1564 // debugBelch("compaction: off\n", live);
1565 }
1566
1567 if (RtsFlags.GcFlags.sweep) {
1568 oldest_gen->mark = 1;
1569 }
1570
1571 // if we're going to go over the maximum heap size, reduce the
1572 // size of the generations accordingly. The calculation is
1573 // different if compaction is turned on, because we don't need
1574 // to double the space required to collect the old generation.
1575 if (max != 0) {
1576
1577 // this test is necessary to ensure that the calculations
1578 // below don't have any negative results - we're working
1579 // with unsigned values here.
1580 if (max < min_alloc) {
1581 heapOverflow();
1582 }
1583
1584 if (oldest_gen->compact) {
1585 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1586 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1587 }
1588 } else {
1589 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1590 size = (max - min_alloc) / ((gens - 1) * 2);
1591 }
1592 }
1593
1594 if (size < live) {
1595 heapOverflow();
1596 }
1597 }
1598
1599 #if 0
1600 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1601 min_alloc, size, max);
1602 #endif
1603
1604 for (g = 0; g < gens; g++) {
1605 generations[g].max_blocks = size;
1606 }
1607 }
1608 }
1609
1610 /* -----------------------------------------------------------------------------
1611 Calculate the new size of the nursery, and resize it.
1612 -------------------------------------------------------------------------- */
1613
1614 static void
1615 resize_nursery (void)
1616 {
1617 const StgWord min_nursery =
1618 RtsFlags.GcFlags.minAllocAreaSize * (StgWord)n_capabilities;
1619
1620 if (RtsFlags.GcFlags.generations == 1)
1621 { // Two-space collector:
1622 W_ blocks;
1623
1624 /* set up a new nursery. Allocate a nursery size based on a
1625 * function of the amount of live data (by default a factor of 2)
1626 * Use the blocks from the old nursery if possible, freeing up any
1627 * left over blocks.
1628 *
1629 * If we get near the maximum heap size, then adjust our nursery
1630 * size accordingly. If the nursery is the same size as the live
1631 * data (L), then we need 3L bytes. We can reduce the size of the
1632 * nursery to bring the required memory down near 2L bytes.
1633 *
1634 * A normal 2-space collector would need 4L bytes to give the same
1635 * performance we get from 3L bytes, reducing to the same
1636 * performance at 2L bytes.
1637 */
1638 blocks = generations[0].n_blocks;
1639
1640 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1641 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1642 RtsFlags.GcFlags.maxHeapSize )
1643 {
1644 long adjusted_blocks; // signed on purpose
1645 int pc_free;
1646
1647 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1648
1649 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1650 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1651
1652 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1653 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1654 {
1655 heapOverflow();
1656 }
1657 blocks = adjusted_blocks;
1658 }
1659 else
1660 {
1661 blocks *= RtsFlags.GcFlags.oldGenFactor;
1662 if (blocks < min_nursery)
1663 {
1664 blocks = min_nursery;
1665 }
1666 }
1667 resizeNurseries(blocks);
1668 }
1669 else // Generational collector
1670 {
1671 /*
1672 * If the user has given us a suggested heap size, adjust our
1673 * allocation area to make best use of the memory available.
1674 */
1675 if (RtsFlags.GcFlags.heapSizeSuggestion)
1676 {
1677 long blocks;
1678 StgWord needed;
1679
1680 calcNeeded(false, &needed); // approx blocks needed at next GC
1681
1682 /* Guess how much will be live in generation 0 step 0 next time.
1683 * A good approximation is obtained by finding the
1684 * percentage of g0 that was live at the last minor GC.
1685 *
1686 * We have an accurate figure for the amount of copied data in
1687 * 'copied', but we must convert this to a number of blocks, with
1688 * a small adjustment for estimated slop at the end of a block
1689 * (- 10 words).
1690 */
1691 if (N == 0)
1692 {
1693 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1694 / countNurseryBlocks();
1695 }
1696
1697 /* Estimate a size for the allocation area based on the
1698 * information available. We might end up going slightly under
1699 * or over the suggested heap size, but we should be pretty
1700 * close on average.
1701 *
1702 * Formula: suggested - needed
1703 * ----------------------------
1704 * 1 + g0_pcnt_kept/100
1705 *
1706 * where 'needed' is the amount of memory needed at the next
1707 * collection for collecting all gens except g0.
1708 */
1709 blocks =
1710 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1711 (100 + (long)g0_pcnt_kept);
1712
1713 if (blocks < (long)min_nursery) {
1714 blocks = min_nursery;
1715 }
1716
1717 resizeNurseries((W_)blocks);
1718 }
1719 else
1720 {
1721 // we might have added extra blocks to the nursery, so
1722 // resize back to the original size again.
1723 resizeNurseriesFixed();
1724 }
1725 }
1726 }
1727
1728 /* -----------------------------------------------------------------------------
1729 Sanity code for CAF garbage collection.
1730
1731 With DEBUG turned on, we manage a CAF list in addition to the SRT
1732 mechanism. After GC, we run down the CAF list and blackhole any
1733 CAFs which have been garbage collected. This means we get an error
1734 whenever the program tries to enter a garbage collected CAF.
1735
1736 Any garbage collected CAFs are taken off the CAF list at the same
1737 time.
1738 -------------------------------------------------------------------------- */
1739
1740 #if defined(DEBUG)
1741
1742 static void gcCAFs(void)
1743 {
1744 StgIndStatic *p, *prev;
1745
1746 const StgInfoTable *info;
1747 uint32_t i;
1748
1749 i = 0;
1750 p = debug_caf_list;
1751 prev = NULL;
1752
1753 for (p = debug_caf_list; p != (StgIndStatic*)END_OF_CAF_LIST;
1754 p = (StgIndStatic*)p->saved_info) {
1755
1756 info = get_itbl((StgClosure*)p);
1757 ASSERT(info->type == IND_STATIC);
1758
1759 if (p->static_link == NULL) {
1760 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", p);
1761 SET_INFO((StgClosure*)p,&stg_GCD_CAF_info); // stub it
1762 if (prev == NULL) {
1763 debug_caf_list = (StgIndStatic*)p->saved_info;
1764 } else {
1765 prev->saved_info = p->saved_info;
1766 }
1767 } else {
1768 prev = p;
1769 i++;
1770 }
1771 }
1772
1773 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1774 }
1775 #endif