02bb3bbe6b645d903e51a50dbd5a0655f0971bc8
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "GC.h"
19 #include "GCThread.h"
20 #include "GCTDecl.h" // NB. before RtsSignals.h which
21 // clobbers REG_R1 on arm/Linux
22 #include "Compact.h"
23 #include "Evac.h"
24 #include "Scav.h"
25 #include "GCUtils.h"
26 #include "MarkStack.h"
27 #include "MarkWeak.h"
28 #include "Sparks.h"
29 #include "Sweep.h"
30
31 #include "Storage.h"
32 #include "RtsUtils.h"
33 #include "Apply.h"
34 #include "Updates.h"
35 #include "Stats.h"
36 #include "Schedule.h"
37 #include "Sanity.h"
38 #include "BlockAlloc.h"
39 #include "ProfHeap.h"
40 #include "Weak.h"
41 #include "Prelude.h"
42 #include "RtsSignals.h"
43 #include "STM.h"
44 #include "Trace.h"
45 #include "RetainerProfile.h"
46 #include "LdvProfile.h"
47 #include "RaiseAsync.h"
48 #include "Stable.h"
49 #include "CheckUnload.h"
50
51 #include <string.h> // for memset()
52 #include <unistd.h>
53
54 /* -----------------------------------------------------------------------------
55 Global variables
56 -------------------------------------------------------------------------- */
57
58 /* STATIC OBJECT LIST.
59 *
60 * During GC:
61 * We maintain a linked list of static objects that are still live.
62 * The requirements for this list are:
63 *
64 * - we need to scan the list while adding to it, in order to
65 * scavenge all the static objects (in the same way that
66 * breadth-first scavenging works for dynamic objects).
67 *
68 * - we need to be able to tell whether an object is already on
69 * the list, to break loops.
70 *
71 * Each static object has a "static link field", which we use for
72 * linking objects on to the list. We use a stack-type list, consing
73 * objects on the front as they are added (this means that the
74 * scavenge phase is depth-first, not breadth-first, but that
75 * shouldn't matter).
76 *
77 * A separate list is kept for objects that have been scavenged
78 * already - this is so that we can zero all the marks afterwards.
79 *
80 * An object is on the list if its static link field is non-zero; this
81 * means that we have to mark the end of the list with '1', not NULL.
82 *
83 * Extra notes for generational GC:
84 *
85 * Each generation has a static object list associated with it. When
86 * collecting generations up to N, we treat the static object lists
87 * from generations > N as roots.
88 *
89 * We build up a static object list while collecting generations 0..N,
90 * which is then appended to the static object list of generation N+1.
91 */
92
93 /* N is the oldest generation being collected, where the generations
94 * are numbered starting at 0. A major GC (indicated by the major_gc
95 * flag) is when we're collecting all generations. We only attempt to
96 * deal with static objects and GC CAFs when doing a major GC.
97 */
98 nat N;
99 rtsBool major_gc;
100
101 /* Data used for allocation area sizing.
102 */
103 static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
104
105 /* Mut-list stats */
106 #ifdef DEBUG
107 nat mutlist_MUTVARS,
108 mutlist_MUTARRS,
109 mutlist_MVARS,
110 mutlist_TVAR,
111 mutlist_TVAR_WATCH_QUEUE,
112 mutlist_TREC_CHUNK,
113 mutlist_TREC_HEADER,
114 mutlist_ATOMIC_INVARIANT,
115 mutlist_INVARIANT_CHECK_QUEUE,
116 mutlist_OTHERS;
117 #endif
118
119 /* Thread-local data for each GC thread
120 */
121 gc_thread **gc_threads = NULL;
122
123 #if !defined(THREADED_RTS)
124 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
125 #endif
126
127 // Number of threads running in *this* GC. Affects how many
128 // step->todos[] lists we have to look in to find work.
129 nat n_gc_threads;
130
131 // For stats:
132 static long copied; // *words* copied & scavenged during this GC
133
134 rtsBool work_stealing;
135
136 nat static_flag = STATIC_FLAG_B;
137 nat prev_static_flag = STATIC_FLAG_A;
138
139 DECLARE_GCT
140
141 /* -----------------------------------------------------------------------------
142 Static function declarations
143 -------------------------------------------------------------------------- */
144
145 static void mark_root (void *user, StgClosure **root);
146 static void prepare_collected_gen (generation *gen);
147 static void prepare_uncollected_gen (generation *gen);
148 static void init_gc_thread (gc_thread *t);
149 static void resize_generations (void);
150 static void resize_nursery (void);
151 static void start_gc_threads (void);
152 static void scavenge_until_all_done (void);
153 static StgWord inc_running (void);
154 static StgWord dec_running (void);
155 static void wakeup_gc_threads (nat me);
156 static void shutdown_gc_threads (nat me);
157 static void collect_gct_blocks (void);
158 static void collect_pinned_object_blocks (void);
159
160 #if defined(DEBUG)
161 static void gcCAFs (void);
162 #endif
163
164 /* -----------------------------------------------------------------------------
165 The mark stack.
166 -------------------------------------------------------------------------- */
167
168 bdescr *mark_stack_top_bd; // topmost block in the mark stack
169 bdescr *mark_stack_bd; // current block in the mark stack
170 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
171
172 /* -----------------------------------------------------------------------------
173 GarbageCollect: the main entry point to the garbage collector.
174
175 The collect_gen parameter is gotten by calling calcNeeded().
176
177 Locks held: all capabilities are held throughout GarbageCollect().
178 -------------------------------------------------------------------------- */
179
180 void
181 GarbageCollect (nat collect_gen,
182 rtsBool do_heap_census,
183 nat gc_type USED_IF_THREADS,
184 Capability *cap)
185 {
186 bdescr *bd;
187 generation *gen;
188 StgWord live_blocks, live_words, par_max_copied, par_tot_copied;
189 #if defined(THREADED_RTS)
190 gc_thread *saved_gct;
191 #endif
192 nat g, n;
193
194 // necessary if we stole a callee-saves register for gct:
195 #if defined(THREADED_RTS)
196 saved_gct = gct;
197 #endif
198
199 #ifdef PROFILING
200 CostCentreStack *save_CCS[n_capabilities];
201 #endif
202
203 ACQUIRE_SM_LOCK;
204
205 #if defined(RTS_USER_SIGNALS)
206 if (RtsFlags.MiscFlags.install_signal_handlers) {
207 // block signals
208 blockUserSignals();
209 }
210 #endif
211
212 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
213 // otherwise adjust the padding in gen_workspace.
214
215 // this is the main thread
216 SET_GCT(gc_threads[cap->no]);
217
218 // tell the stats department that we've started a GC
219 stat_startGC(cap, gct);
220
221 // lock the StablePtr table
222 stableLock();
223
224 #ifdef DEBUG
225 mutlist_MUTVARS = 0;
226 mutlist_MUTARRS = 0;
227 mutlist_MVARS = 0;
228 mutlist_TVAR = 0;
229 mutlist_TVAR_WATCH_QUEUE = 0;
230 mutlist_TREC_CHUNK = 0;
231 mutlist_TREC_HEADER = 0;
232 mutlist_ATOMIC_INVARIANT = 0;
233 mutlist_INVARIANT_CHECK_QUEUE = 0;
234 mutlist_OTHERS = 0;
235 #endif
236
237 // attribute any costs to CCS_GC
238 #ifdef PROFILING
239 for (n = 0; n < n_capabilities; n++) {
240 save_CCS[n] = capabilities[n]->r.rCCCS;
241 capabilities[n]->r.rCCCS = CCS_GC;
242 }
243 #endif
244
245 /* Figure out which generation to collect
246 */
247 N = collect_gen;
248 major_gc = (N == RtsFlags.GcFlags.generations-1);
249
250 if (major_gc) {
251 prev_static_flag = static_flag;
252 static_flag =
253 static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
254 }
255
256 #if defined(THREADED_RTS)
257 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
258 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
259 // It's not always a good idea to do load balancing in parallel
260 // GC. In particular, for a parallel program we don't want to
261 // lose locality by moving cached data into another CPU's cache
262 // (this effect can be quite significant).
263 //
264 // We could have a more complex way to deterimine whether to do
265 // work stealing or not, e.g. it might be a good idea to do it
266 // if the heap is big. For now, we just turn it on or off with
267 // a flag.
268 #endif
269
270 /* Start threads, so they can be spinning up while we finish initialisation.
271 */
272 start_gc_threads();
273
274 #if defined(THREADED_RTS)
275 /* How many threads will be participating in this GC?
276 * We don't try to parallelise minor GCs (unless the user asks for
277 * it with +RTS -gn0), or mark/compact/sweep GC.
278 */
279 if (gc_type == SYNC_GC_PAR) {
280 n_gc_threads = n_capabilities;
281 } else {
282 n_gc_threads = 1;
283 }
284 #else
285 n_gc_threads = 1;
286 #endif
287
288 debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
289 N, n_gc_threads);
290
291 #ifdef DEBUG
292 // check for memory leaks if DEBUG is on
293 memInventory(DEBUG_gc);
294 #endif
295
296 // do this *before* we start scavenging
297 collectFreshWeakPtrs();
298
299 // check sanity *before* GC
300 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
301
302 // gather blocks allocated using allocatePinned() from each capability
303 // and put them on the g0->large_object list.
304 collect_pinned_object_blocks();
305
306 // Initialise all the generations/steps that we're collecting.
307 for (g = 0; g <= N; g++) {
308 prepare_collected_gen(&generations[g]);
309 }
310 // Initialise all the generations/steps that we're *not* collecting.
311 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
312 prepare_uncollected_gen(&generations[g]);
313 }
314
315 // Prepare this gc_thread
316 init_gc_thread(gct);
317
318 /* Allocate a mark stack if we're doing a major collection.
319 */
320 if (major_gc && oldest_gen->mark) {
321 mark_stack_bd = allocBlock();
322 mark_stack_top_bd = mark_stack_bd;
323 mark_stack_bd->link = NULL;
324 mark_stack_bd->u.back = NULL;
325 mark_sp = mark_stack_bd->start;
326 } else {
327 mark_stack_bd = NULL;
328 mark_stack_top_bd = NULL;
329 mark_sp = NULL;
330 }
331
332 /* -----------------------------------------------------------------------
333 * follow all the roots that we know about:
334 */
335
336 // the main thread is running: this prevents any other threads from
337 // exiting prematurely, so we can start them now.
338 // NB. do this after the mutable lists have been saved above, otherwise
339 // the other GC threads will be writing into the old mutable lists.
340 inc_running();
341 wakeup_gc_threads(gct->thread_index);
342
343 traceEventGcWork(gct->cap);
344
345 // scavenge the capability-private mutable lists. This isn't part
346 // of markSomeCapabilities() because markSomeCapabilities() can only
347 // call back into the GC via mark_root() (due to the gct register
348 // variable).
349 if (n_gc_threads == 1) {
350 for (n = 0; n < n_capabilities; n++) {
351 #if defined(THREADED_RTS)
352 scavenge_capability_mut_Lists1(capabilities[n]);
353 #else
354 scavenge_capability_mut_lists(capabilities[n]);
355 #endif
356 }
357 } else {
358 scavenge_capability_mut_lists(gct->cap);
359 for (n = 0; n < n_capabilities; n++) {
360 if (gc_threads[n]->idle) {
361 markCapability(mark_root, gct, capabilities[n],
362 rtsTrue/*don't mark sparks*/);
363 scavenge_capability_mut_lists(capabilities[n]);
364 }
365 }
366 }
367
368 // follow roots from the CAF list (used by GHCi)
369 gct->evac_gen_no = 0;
370 markCAFs(mark_root, gct);
371
372 // follow all the roots that the application knows about.
373 gct->evac_gen_no = 0;
374 if (n_gc_threads == 1) {
375 for (n = 0; n < n_capabilities; n++) {
376 markCapability(mark_root, gct, capabilities[n],
377 rtsTrue/*don't mark sparks*/);
378 }
379 } else {
380 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
381 }
382
383 markScheduler(mark_root, gct);
384
385 #if defined(RTS_USER_SIGNALS)
386 // mark the signal handlers (signals should be already blocked)
387 markSignalHandlers(mark_root, gct);
388 #endif
389
390 // Mark the weak pointer list, and prepare to detect dead weak pointers.
391 markWeakPtrList();
392 initWeakForGC();
393
394 // Mark the stable pointer table.
395 markStableTables(mark_root, gct);
396
397 /* -------------------------------------------------------------------------
398 * Repeatedly scavenge all the areas we know about until there's no
399 * more scavenging to be done.
400 */
401 for (;;)
402 {
403 scavenge_until_all_done();
404 // The other threads are now stopped. We might recurse back to
405 // here, but from now on this is the only thread.
406
407 // must be last... invariant is that everything is fully
408 // scavenged at this point.
409 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
410 inc_running();
411 continue;
412 }
413
414 // If we get to here, there's really nothing left to do.
415 break;
416 }
417
418 shutdown_gc_threads(gct->thread_index);
419
420 // Now see which stable names are still alive.
421 gcStableTables();
422
423 #ifdef THREADED_RTS
424 if (n_gc_threads == 1) {
425 for (n = 0; n < n_capabilities; n++) {
426 pruneSparkQueue(capabilities[n]);
427 }
428 } else {
429 for (n = 0; n < n_capabilities; n++) {
430 if (n == cap->no || gc_threads[n]->idle) {
431 pruneSparkQueue(capabilities[n]);
432 }
433 }
434 }
435 #endif
436
437 #ifdef PROFILING
438 // We call processHeapClosureForDead() on every closure destroyed during
439 // the current garbage collection, so we invoke LdvCensusForDead().
440 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
441 || RtsFlags.ProfFlags.bioSelector != NULL) {
442 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
443 LdvCensusForDead(N);
444 ACQUIRE_SM_LOCK;
445 }
446 #endif
447
448 // NO MORE EVACUATION AFTER THIS POINT!
449
450 // Finally: compact or sweep the oldest generation.
451 if (major_gc && oldest_gen->mark) {
452 if (oldest_gen->compact)
453 compact(gct->scavenged_static_objects);
454 else
455 sweep(oldest_gen);
456 }
457
458 copied = 0;
459 par_max_copied = 0;
460 par_tot_copied = 0;
461 {
462 nat i;
463 for (i=0; i < n_gc_threads; i++) {
464 if (n_gc_threads > 1) {
465 debugTrace(DEBUG_gc,"thread %d:", i);
466 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
467 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
468 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
469 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
470 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
471 }
472 copied += gc_threads[i]->copied;
473 par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
474 }
475 par_tot_copied = copied;
476 if (n_gc_threads == 1) {
477 par_max_copied = 0;
478 par_tot_copied = 0;
479 }
480 }
481
482 // Run through all the generations/steps and tidy up.
483 // We're going to:
484 // - count the amount of "live" data (live_words, live_blocks)
485 // - count the amount of "copied" data in this GC (copied)
486 // - free from-space
487 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
488 //
489 live_words = 0;
490 live_blocks = 0;
491
492 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
493
494 if (g == N) {
495 generations[g].collections++; // for stats
496 if (n_gc_threads > 1) generations[g].par_collections++;
497 }
498
499 // Count the mutable list as bytes "copied" for the purposes of
500 // stats. Every mutable list is copied during every GC.
501 if (g > 0) {
502 W_ mut_list_size = 0;
503 for (n = 0; n < n_capabilities; n++) {
504 mut_list_size += countOccupied(capabilities[n]->mut_lists[g]);
505 }
506 copied += mut_list_size;
507
508 debugTrace(DEBUG_gc,
509 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d ATOMIC_INVARIANTs, %d INVARIANT_CHECK_QUEUEs, %d others)",
510 (unsigned long)(mut_list_size * sizeof(W_)),
511 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS,
512 mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE,
513 mutlist_TREC_CHUNK, mutlist_TREC_HEADER,
514 mutlist_ATOMIC_INVARIANT,
515 mutlist_INVARIANT_CHECK_QUEUE,
516 mutlist_OTHERS);
517 }
518
519 bdescr *next, *prev;
520 gen = &generations[g];
521
522 // for generations we collected...
523 if (g <= N) {
524
525 /* free old memory and shift to-space into from-space for all
526 * the collected steps (except the allocation area). These
527 * freed blocks will probaby be quickly recycled.
528 */
529 if (gen->mark)
530 {
531 // tack the new blocks on the end of the existing blocks
532 if (gen->old_blocks != NULL) {
533
534 prev = NULL;
535 for (bd = gen->old_blocks; bd != NULL; bd = next) {
536
537 next = bd->link;
538
539 if (!(bd->flags & BF_MARKED))
540 {
541 if (prev == NULL) {
542 gen->old_blocks = next;
543 } else {
544 prev->link = next;
545 }
546 freeGroup(bd);
547 gen->n_old_blocks--;
548 }
549 else
550 {
551 gen->n_words += bd->free - bd->start;
552
553 // NB. this step might not be compacted next
554 // time, so reset the BF_MARKED flags.
555 // They are set before GC if we're going to
556 // compact. (search for BF_MARKED above).
557 bd->flags &= ~BF_MARKED;
558
559 // between GCs, all blocks in the heap except
560 // for the nursery have the BF_EVACUATED flag set.
561 bd->flags |= BF_EVACUATED;
562
563 prev = bd;
564 }
565 }
566
567 if (prev != NULL) {
568 prev->link = gen->blocks;
569 gen->blocks = gen->old_blocks;
570 }
571 }
572 // add the new blocks to the block tally
573 gen->n_blocks += gen->n_old_blocks;
574 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
575 ASSERT(countOccupied(gen->blocks) == gen->n_words);
576 }
577 else // not copacted
578 {
579 freeChain(gen->old_blocks);
580 }
581
582 gen->old_blocks = NULL;
583 gen->n_old_blocks = 0;
584
585 /* LARGE OBJECTS. The current live large objects are chained on
586 * scavenged_large, having been moved during garbage
587 * collection from large_objects. Any objects left on the
588 * large_objects list are therefore dead, so we free them here.
589 */
590 freeChain(gen->large_objects);
591 gen->large_objects = gen->scavenged_large_objects;
592 gen->n_large_blocks = gen->n_scavenged_large_blocks;
593 gen->n_large_words = countOccupied(gen->large_objects);
594 gen->n_new_large_words = 0;
595 }
596 else // for generations > N
597 {
598 /* For older generations, we need to append the
599 * scavenged_large_object list (i.e. large objects that have been
600 * promoted during this GC) to the large_object list for that step.
601 */
602 for (bd = gen->scavenged_large_objects; bd; bd = next) {
603 next = bd->link;
604 dbl_link_onto(bd, &gen->large_objects);
605 gen->n_large_words += bd->free - bd->start;
606 }
607
608 // add the new blocks we promoted during this GC
609 gen->n_large_blocks += gen->n_scavenged_large_blocks;
610 }
611
612 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
613 ASSERT(countOccupied(gen->large_objects) == gen->n_large_words);
614
615 gen->scavenged_large_objects = NULL;
616 gen->n_scavenged_large_blocks = 0;
617
618 // Count "live" data
619 live_words += genLiveWords(gen);
620 live_blocks += genLiveBlocks(gen);
621
622 // add in the partial blocks in the gen_workspaces, but ignore gen 0
623 // if this is a local GC (we can't count another capability's part_list)
624 {
625 nat i;
626 for (i = 0; i < n_capabilities; i++) {
627 live_words += gcThreadLiveWords(i, gen->no);
628 live_blocks += gcThreadLiveBlocks(i, gen->no);
629 }
630 }
631 } // for all generations
632
633 // update the max size of older generations after a major GC
634 resize_generations();
635
636 // Free the mark stack.
637 if (mark_stack_top_bd != NULL) {
638 debugTrace(DEBUG_gc, "mark stack: %d blocks",
639 countBlocks(mark_stack_top_bd));
640 freeChain(mark_stack_top_bd);
641 }
642
643 // Free any bitmaps.
644 for (g = 0; g <= N; g++) {
645 gen = &generations[g];
646 if (gen->bitmap != NULL) {
647 freeGroup(gen->bitmap);
648 gen->bitmap = NULL;
649 }
650 }
651
652 resize_nursery();
653
654 resetNurseries();
655
656 // mark the garbage collected CAFs as dead
657 #if defined(DEBUG)
658 if (major_gc) { gcCAFs(); }
659 #endif
660
661 // Update the stable pointer hash table.
662 updateStableTables(major_gc);
663
664 // unlock the StablePtr table. Must be before scheduleFinalizers(),
665 // because a finalizer may call hs_free_fun_ptr() or
666 // hs_free_stable_ptr(), both of which access the StablePtr table.
667 stableUnlock();
668
669 // Must be after stableUnlock(), because it might free stable ptrs.
670 if (major_gc) {
671 checkUnload (gct->scavenged_static_objects);
672 }
673
674 #ifdef PROFILING
675 // resetStaticObjectForRetainerProfiling() must be called before
676 // zeroing below.
677
678 // ToDo: fix the gct->scavenged_static_objects below
679 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
680 #endif
681
682 // Start any pending finalizers. Must be after
683 // updateStableTables() and stableUnlock() (see #4221).
684 RELEASE_SM_LOCK;
685 scheduleFinalizers(cap, dead_weak_ptr_list);
686 ACQUIRE_SM_LOCK;
687
688 // check sanity after GC
689 // before resurrectThreads(), because that might overwrite some
690 // closures, which will cause problems with THREADED where we don't
691 // fill slop.
692 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
693
694 // If a heap census is due, we need to do it before
695 // resurrectThreads(), for the same reason as checkSanity above:
696 // resurrectThreads() will overwrite some closures and leave slop
697 // behind.
698 if (do_heap_census) {
699 debugTrace(DEBUG_sched, "performing heap census");
700 RELEASE_SM_LOCK;
701 heapCensus(gct->gc_start_cpu);
702 ACQUIRE_SM_LOCK;
703 }
704
705 // send exceptions to any threads which were about to die
706 RELEASE_SM_LOCK;
707 resurrectThreads(resurrected_threads);
708 ACQUIRE_SM_LOCK;
709
710 if (major_gc) {
711 W_ need, got;
712 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
713 got = mblocks_allocated;
714 /* If the amount of data remains constant, next major GC we'll
715 require (F+1)*need. We leave (F+2)*need in order to reduce
716 repeated deallocation and reallocation. */
717 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
718 if (got > need) {
719 returnMemoryToOS(got - need);
720 }
721 }
722
723 // extra GC trace info
724 IF_DEBUG(gc, statDescribeGens());
725
726 #ifdef DEBUG
727 // symbol-table based profiling
728 /* heapCensus(to_blocks); */ /* ToDo */
729 #endif
730
731 // restore enclosing cost centre
732 #ifdef PROFILING
733 for (n = 0; n < n_capabilities; n++) {
734 capabilities[n]->r.rCCCS = save_CCS[n];
735 }
736 #endif
737
738 #ifdef DEBUG
739 // check for memory leaks if DEBUG is on
740 memInventory(DEBUG_gc);
741 #endif
742
743 // ok, GC over: tell the stats department what happened.
744 stat_endGC(cap, gct, live_words, copied,
745 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
746 N, n_gc_threads, par_max_copied, par_tot_copied);
747
748 #if defined(RTS_USER_SIGNALS)
749 if (RtsFlags.MiscFlags.install_signal_handlers) {
750 // unblock signals again
751 unblockUserSignals();
752 }
753 #endif
754
755 RELEASE_SM_LOCK;
756
757 SET_GCT(saved_gct);
758 }
759
760 /* -----------------------------------------------------------------------------
761 Initialise the gc_thread structures.
762 -------------------------------------------------------------------------- */
763
764 #define GC_THREAD_INACTIVE 0
765 #define GC_THREAD_STANDING_BY 1
766 #define GC_THREAD_RUNNING 2
767 #define GC_THREAD_WAITING_TO_CONTINUE 3
768
769 static void
770 new_gc_thread (nat n, gc_thread *t)
771 {
772 nat g;
773 gen_workspace *ws;
774
775 t->cap = capabilities[n];
776
777 #ifdef THREADED_RTS
778 t->id = 0;
779 initSpinLock(&t->gc_spin);
780 initSpinLock(&t->mut_spin);
781 ACQUIRE_SPIN_LOCK(&t->gc_spin);
782 ACQUIRE_SPIN_LOCK(&t->mut_spin);
783 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
784 // thread to start up, see wakeup_gc_threads
785 #endif
786
787 t->thread_index = n;
788 t->idle = rtsFalse;
789 t->free_blocks = NULL;
790 t->gc_count = 0;
791
792 init_gc_thread(t);
793
794 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
795 {
796 ws = &t->gens[g];
797 ws->gen = &generations[g];
798 ASSERT(g == ws->gen->no);
799 ws->my_gct = t;
800
801 // We want to call
802 // alloc_todo_block(ws,0);
803 // but can't, because it uses gct which isn't set up at this point.
804 // Hence, allocate a block for todo_bd manually:
805 {
806 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
807 initBdescr(bd, ws->gen, ws->gen->to);
808 bd->flags = BF_EVACUATED;
809 bd->u.scan = bd->free = bd->start;
810
811 ws->todo_bd = bd;
812 ws->todo_free = bd->free;
813 ws->todo_lim = bd->start + BLOCK_SIZE_W;
814 }
815
816 ws->todo_q = newWSDeque(128);
817 ws->todo_overflow = NULL;
818 ws->n_todo_overflow = 0;
819 ws->todo_large_objects = NULL;
820
821 ws->part_list = NULL;
822 ws->n_part_blocks = 0;
823 ws->n_part_words = 0;
824
825 ws->scavd_list = NULL;
826 ws->n_scavd_blocks = 0;
827 ws->n_scavd_words = 0;
828 }
829 }
830
831
832 void
833 initGcThreads (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
834 {
835 #if defined(THREADED_RTS)
836 nat i;
837
838 if (from > 0) {
839 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
840 "initGcThreads");
841 } else {
842 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
843 "initGcThreads");
844 }
845
846 for (i = from; i < to; i++) {
847 gc_threads[i] =
848 stgMallocBytes(sizeof(gc_thread) +
849 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
850 "alloc_gc_threads");
851
852 new_gc_thread(i, gc_threads[i]);
853 }
854 #else
855 ASSERT(from == 0 && to == 1);
856 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
857 gc_threads[0] = gct;
858 new_gc_thread(0,gc_threads[0]);
859 #endif
860 }
861
862 void
863 freeGcThreads (void)
864 {
865 nat g;
866 if (gc_threads != NULL) {
867 #if defined(THREADED_RTS)
868 nat i;
869 for (i = 0; i < n_capabilities; i++) {
870 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
871 {
872 freeWSDeque(gc_threads[i]->gens[g].todo_q);
873 }
874 stgFree (gc_threads[i]);
875 }
876 stgFree (gc_threads);
877 #else
878 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
879 {
880 freeWSDeque(gc_threads[0]->gens[g].todo_q);
881 }
882 stgFree (gc_threads);
883 #endif
884 gc_threads = NULL;
885 }
886 }
887
888 /* ----------------------------------------------------------------------------
889 Start GC threads
890 ------------------------------------------------------------------------- */
891
892 static volatile StgWord gc_running_threads;
893
894 static StgWord
895 inc_running (void)
896 {
897 StgWord new;
898 new = atomic_inc(&gc_running_threads, 1);
899 ASSERT(new <= n_gc_threads);
900 return new;
901 }
902
903 static StgWord
904 dec_running (void)
905 {
906 ASSERT(gc_running_threads != 0);
907 return atomic_dec(&gc_running_threads);
908 }
909
910 static rtsBool
911 any_work (void)
912 {
913 int g;
914 gen_workspace *ws;
915
916 gct->any_work++;
917
918 write_barrier();
919
920 // scavenge objects in compacted generation
921 if (mark_stack_bd != NULL && !mark_stack_empty()) {
922 return rtsTrue;
923 }
924
925 // Check for global work in any step. We don't need to check for
926 // local work, because we have already exited scavenge_loop(),
927 // which means there is no local work for this thread.
928 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
929 ws = &gct->gens[g];
930 if (ws->todo_large_objects) return rtsTrue;
931 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
932 if (ws->todo_overflow) return rtsTrue;
933 }
934
935 #if defined(THREADED_RTS)
936 if (work_stealing) {
937 nat n;
938 // look for work to steal
939 for (n = 0; n < n_gc_threads; n++) {
940 if (n == gct->thread_index) continue;
941 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
942 ws = &gc_threads[n]->gens[g];
943 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
944 }
945 }
946 }
947 #endif
948
949 gct->no_work++;
950 #if defined(THREADED_RTS)
951 yieldThread();
952 #endif
953
954 return rtsFalse;
955 }
956
957 static void
958 scavenge_until_all_done (void)
959 {
960 DEBUG_ONLY( nat r );
961
962
963 loop:
964 #if defined(THREADED_RTS)
965 if (n_gc_threads > 1) {
966 scavenge_loop();
967 } else {
968 scavenge_loop1();
969 }
970 #else
971 scavenge_loop();
972 #endif
973
974 collect_gct_blocks();
975
976 // scavenge_loop() only exits when there's no work to do
977
978 #ifdef DEBUG
979 r = dec_running();
980 #else
981 dec_running();
982 #endif
983
984 traceEventGcIdle(gct->cap);
985
986 debugTrace(DEBUG_gc, "%d GC threads still running", r);
987
988 while (gc_running_threads != 0) {
989 // usleep(1);
990 if (any_work()) {
991 inc_running();
992 traceEventGcWork(gct->cap);
993 goto loop;
994 }
995 // any_work() does not remove the work from the queue, it
996 // just checks for the presence of work. If we find any,
997 // then we increment gc_running_threads and go back to
998 // scavenge_loop() to perform any pending work.
999 }
1000
1001 traceEventGcDone(gct->cap);
1002 }
1003
1004 #if defined(THREADED_RTS)
1005
1006 void
1007 gcWorkerThread (Capability *cap)
1008 {
1009 gc_thread *saved_gct;
1010
1011 // necessary if we stole a callee-saves register for gct:
1012 saved_gct = gct;
1013
1014 SET_GCT(gc_threads[cap->no]);
1015 gct->id = osThreadId();
1016
1017 // Wait until we're told to wake up
1018 RELEASE_SPIN_LOCK(&gct->mut_spin);
1019 // yieldThread();
1020 // Strangely, adding a yieldThread() here makes the CPU time
1021 // measurements more accurate on Linux, perhaps because it syncs
1022 // the CPU time across the multiple cores. Without this, CPU time
1023 // is heavily skewed towards GC rather than MUT.
1024 gct->wakeup = GC_THREAD_STANDING_BY;
1025 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1026 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1027
1028 init_gc_thread(gct);
1029
1030 traceEventGcWork(gct->cap);
1031
1032 // Every thread evacuates some roots.
1033 gct->evac_gen_no = 0;
1034 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1035 scavenge_capability_mut_lists(cap);
1036
1037 scavenge_until_all_done();
1038
1039 #ifdef THREADED_RTS
1040 // Now that the whole heap is marked, we discard any sparks that
1041 // were found to be unreachable. The main GC thread is currently
1042 // marking heap reachable via weak pointers, so it is
1043 // non-deterministic whether a spark will be retained if it is
1044 // only reachable via weak pointers. To fix this problem would
1045 // require another GC barrier, which is too high a price.
1046 pruneSparkQueue(cap);
1047 #endif
1048
1049 // Wait until we're told to continue
1050 RELEASE_SPIN_LOCK(&gct->gc_spin);
1051 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1052 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1053 gct->thread_index);
1054 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1055 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1056
1057 SET_GCT(saved_gct);
1058 }
1059
1060 #endif
1061
1062 #if defined(THREADED_RTS)
1063
1064 void
1065 waitForGcThreads (Capability *cap USED_IF_THREADS)
1066 {
1067 const nat n_threads = n_capabilities;
1068 const nat me = cap->no;
1069 nat i, j;
1070 rtsBool retry = rtsTrue;
1071
1072 stat_startGCSync(gc_threads[cap->no]);
1073
1074
1075 while(retry) {
1076 for (i=0; i < n_threads; i++) {
1077 if (i == me || gc_threads[i]->idle) continue;
1078 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1079 prodCapability(capabilities[i], cap->running_task);
1080 }
1081 }
1082 for (j=0; j < 10; j++) {
1083 retry = rtsFalse;
1084 for (i=0; i < n_threads; i++) {
1085 if (i == me || gc_threads[i]->idle) continue;
1086 write_barrier();
1087 interruptCapability(capabilities[i]);
1088 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1089 retry = rtsTrue;
1090 }
1091 }
1092 if (!retry) break;
1093 yieldThread();
1094 }
1095 }
1096 }
1097
1098 #endif // THREADED_RTS
1099
1100 static void
1101 start_gc_threads (void)
1102 {
1103 #if defined(THREADED_RTS)
1104 gc_running_threads = 0;
1105 #endif
1106 }
1107
1108 static void
1109 wakeup_gc_threads (nat me USED_IF_THREADS)
1110 {
1111 #if defined(THREADED_RTS)
1112 nat i;
1113
1114 if (n_gc_threads == 1) return;
1115
1116 for (i=0; i < n_gc_threads; i++) {
1117 if (i == me || gc_threads[i]->idle) continue;
1118 inc_running();
1119 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1120 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1121
1122 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1123 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1124 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1125 }
1126 #endif
1127 }
1128
1129 // After GC is complete, we must wait for all GC threads to enter the
1130 // standby state, otherwise they may still be executing inside
1131 // any_work(), and may even remain awake until the next GC starts.
1132 static void
1133 shutdown_gc_threads (nat me USED_IF_THREADS)
1134 {
1135 #if defined(THREADED_RTS)
1136 nat i;
1137
1138 if (n_gc_threads == 1) return;
1139
1140 for (i=0; i < n_gc_threads; i++) {
1141 if (i == me || gc_threads[i]->idle) continue;
1142 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) {
1143 busy_wait_nop();
1144 write_barrier();
1145 }
1146 }
1147 #endif
1148 }
1149
1150 #if defined(THREADED_RTS)
1151 void
1152 releaseGCThreads (Capability *cap USED_IF_THREADS)
1153 {
1154 const nat n_threads = n_capabilities;
1155 const nat me = cap->no;
1156 nat i;
1157 for (i=0; i < n_threads; i++) {
1158 if (i == me || gc_threads[i]->idle) continue;
1159 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1160 barf("releaseGCThreads");
1161
1162 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1163 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1164 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1165 }
1166 }
1167 #endif
1168
1169 /* ----------------------------------------------------------------------------
1170 Initialise a generation that is to be collected
1171 ------------------------------------------------------------------------- */
1172
1173 static void
1174 prepare_collected_gen (generation *gen)
1175 {
1176 nat i, g, n;
1177 gen_workspace *ws;
1178 bdescr *bd, *next;
1179
1180 // Throw away the current mutable list. Invariant: the mutable
1181 // list always has at least one block; this means we can avoid a
1182 // check for NULL in recordMutable().
1183 g = gen->no;
1184 if (g != 0) {
1185 for (i = 0; i < n_capabilities; i++) {
1186 freeChain(capabilities[i]->mut_lists[g]);
1187 capabilities[i]->mut_lists[g] = allocBlock();
1188 }
1189 }
1190
1191 gen = &generations[g];
1192 ASSERT(gen->no == g);
1193
1194 // we'll construct a new list of threads in this step
1195 // during GC, throw away the current list.
1196 gen->old_threads = gen->threads;
1197 gen->threads = END_TSO_QUEUE;
1198
1199 // deprecate the existing blocks
1200 gen->old_blocks = gen->blocks;
1201 gen->n_old_blocks = gen->n_blocks;
1202 gen->blocks = NULL;
1203 gen->n_blocks = 0;
1204 gen->n_words = 0;
1205 gen->live_estimate = 0;
1206
1207 // initialise the large object queues.
1208 ASSERT(gen->scavenged_large_objects == NULL);
1209 ASSERT(gen->n_scavenged_large_blocks == 0);
1210
1211 // grab all the partial blocks stashed in the gc_thread workspaces and
1212 // move them to the old_blocks list of this gen.
1213 for (n = 0; n < n_capabilities; n++) {
1214 ws = &gc_threads[n]->gens[gen->no];
1215
1216 for (bd = ws->part_list; bd != NULL; bd = next) {
1217 next = bd->link;
1218 bd->link = gen->old_blocks;
1219 gen->old_blocks = bd;
1220 gen->n_old_blocks += bd->blocks;
1221 }
1222 ws->part_list = NULL;
1223 ws->n_part_blocks = 0;
1224 ws->n_part_words = 0;
1225
1226 ASSERT(ws->scavd_list == NULL);
1227 ASSERT(ws->n_scavd_blocks == 0);
1228 ASSERT(ws->n_scavd_words == 0);
1229
1230 if (ws->todo_free != ws->todo_bd->start) {
1231 ws->todo_bd->free = ws->todo_free;
1232 ws->todo_bd->link = gen->old_blocks;
1233 gen->old_blocks = ws->todo_bd;
1234 gen->n_old_blocks += ws->todo_bd->blocks;
1235 alloc_todo_block(ws,0); // always has one block.
1236 }
1237 }
1238
1239 // mark the small objects as from-space
1240 for (bd = gen->old_blocks; bd; bd = bd->link) {
1241 bd->flags &= ~BF_EVACUATED;
1242 }
1243
1244 // mark the large objects as from-space
1245 for (bd = gen->large_objects; bd; bd = bd->link) {
1246 bd->flags &= ~BF_EVACUATED;
1247 }
1248
1249 // for a compacted generation, we need to allocate the bitmap
1250 if (gen->mark) {
1251 StgWord bitmap_size; // in bytes
1252 bdescr *bitmap_bdescr;
1253 StgWord *bitmap;
1254
1255 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1256
1257 if (bitmap_size > 0) {
1258 bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
1259 / BLOCK_SIZE);
1260 gen->bitmap = bitmap_bdescr;
1261 bitmap = bitmap_bdescr->start;
1262
1263 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1264 bitmap_size, bitmap);
1265
1266 // don't forget to fill it with zeros!
1267 memset(bitmap, 0, bitmap_size);
1268
1269 // For each block in this step, point to its bitmap from the
1270 // block descriptor.
1271 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1272 bd->u.bitmap = bitmap;
1273 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1274
1275 // Also at this point we set the BF_MARKED flag
1276 // for this block. The invariant is that
1277 // BF_MARKED is always unset, except during GC
1278 // when it is set on those blocks which will be
1279 // compacted.
1280 if (!(bd->flags & BF_FRAGMENTED)) {
1281 bd->flags |= BF_MARKED;
1282 }
1283
1284 // BF_SWEPT should be marked only for blocks that are being
1285 // collected in sweep()
1286 bd->flags &= ~BF_SWEPT;
1287 }
1288 }
1289 }
1290 }
1291
1292
1293 /* ----------------------------------------------------------------------------
1294 Save the mutable lists in saved_mut_lists
1295 ------------------------------------------------------------------------- */
1296
1297 static void
1298 stash_mut_list (Capability *cap, nat gen_no)
1299 {
1300 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1301 cap->mut_lists[gen_no] = allocBlock_sync();
1302 }
1303
1304 /* ----------------------------------------------------------------------------
1305 Initialise a generation that is *not* to be collected
1306 ------------------------------------------------------------------------- */
1307
1308 static void
1309 prepare_uncollected_gen (generation *gen)
1310 {
1311 nat i;
1312
1313
1314 ASSERT(gen->no > 0);
1315
1316 // save the current mutable lists for this generation, and
1317 // allocate a fresh block for each one. We'll traverse these
1318 // mutable lists as roots early on in the GC.
1319 for (i = 0; i < n_capabilities; i++) {
1320 stash_mut_list(capabilities[i], gen->no);
1321 }
1322
1323 ASSERT(gen->scavenged_large_objects == NULL);
1324 ASSERT(gen->n_scavenged_large_blocks == 0);
1325 }
1326
1327 /* -----------------------------------------------------------------------------
1328 Collect the completed blocks from a GC thread and attach them to
1329 the generation.
1330 -------------------------------------------------------------------------- */
1331
1332 static void
1333 collect_gct_blocks (void)
1334 {
1335 nat g;
1336 gen_workspace *ws;
1337 bdescr *bd, *prev;
1338
1339 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1340 ws = &gct->gens[g];
1341
1342 // there may still be a block attached to ws->todo_bd;
1343 // leave it there to use next time.
1344
1345 if (ws->scavd_list != NULL) {
1346 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1347
1348 ASSERT(gct->scan_bd == NULL);
1349 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1350
1351 prev = NULL;
1352 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1353 prev = bd;
1354 }
1355 if (prev != NULL) {
1356 prev->link = ws->gen->blocks;
1357 ws->gen->blocks = ws->scavd_list;
1358 }
1359 ws->gen->n_blocks += ws->n_scavd_blocks;
1360 ws->gen->n_words += ws->n_scavd_words;
1361
1362 ws->scavd_list = NULL;
1363 ws->n_scavd_blocks = 0;
1364 ws->n_scavd_words = 0;
1365
1366 RELEASE_SPIN_LOCK(&ws->gen->sync);
1367 }
1368 }
1369 }
1370
1371 /* -----------------------------------------------------------------------------
1372 During mutation, any blocks that are filled by allocatePinned() are
1373 stashed on the local pinned_object_blocks list, to avoid needing to
1374 take a global lock. Here we collect those blocks from the
1375 cap->pinned_object_blocks lists and put them on the
1376 main g0->large_object list.
1377
1378 Returns: the number of words allocated this way, for stats
1379 purposes.
1380 -------------------------------------------------------------------------- */
1381
1382 static void
1383 collect_pinned_object_blocks (void)
1384 {
1385 nat n;
1386 bdescr *bd, *prev;
1387
1388 for (n = 0; n < n_capabilities; n++) {
1389 prev = NULL;
1390 for (bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
1391 prev = bd;
1392 }
1393 if (prev != NULL) {
1394 prev->link = g0->large_objects;
1395 if (g0->large_objects != NULL) {
1396 g0->large_objects->u.back = prev;
1397 }
1398 g0->large_objects = capabilities[n]->pinned_object_blocks;
1399 capabilities[n]->pinned_object_blocks = 0;
1400 }
1401 }
1402 }
1403
1404 /* -----------------------------------------------------------------------------
1405 Initialise a gc_thread before GC
1406 -------------------------------------------------------------------------- */
1407
1408 static void
1409 init_gc_thread (gc_thread *t)
1410 {
1411 t->static_objects = END_OF_STATIC_OBJECT_LIST;
1412 t->scavenged_static_objects = END_OF_STATIC_OBJECT_LIST;
1413 t->scan_bd = NULL;
1414 t->mut_lists = t->cap->mut_lists;
1415 t->evac_gen_no = 0;
1416 t->failed_to_evac = rtsFalse;
1417 t->eager_promotion = rtsTrue;
1418 t->thunk_selector_depth = 0;
1419 t->copied = 0;
1420 t->scanned = 0;
1421 t->any_work = 0;
1422 t->no_work = 0;
1423 t->scav_find_work = 0;
1424 }
1425
1426 /* -----------------------------------------------------------------------------
1427 Function we pass to evacuate roots.
1428 -------------------------------------------------------------------------- */
1429
1430 static void
1431 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1432 {
1433 // we stole a register for gct, but this function is called from
1434 // *outside* the GC where the register variable is not in effect,
1435 // so we need to save and restore it here. NB. only call
1436 // mark_root() from the main GC thread, otherwise gct will be
1437 // incorrect.
1438 #if defined(THREADED_RTS)
1439 gc_thread *saved_gct;
1440 saved_gct = gct;
1441 #endif
1442 SET_GCT(user);
1443
1444 evacuate(root);
1445
1446 SET_GCT(saved_gct);
1447 }
1448
1449 /* ----------------------------------------------------------------------------
1450 Reset the sizes of the older generations when we do a major
1451 collection.
1452
1453 CURRENT STRATEGY: make all generations except zero the same size.
1454 We have to stay within the maximum heap size, and leave a certain
1455 percentage of the maximum heap size available to allocate into.
1456 ------------------------------------------------------------------------- */
1457
1458 static void
1459 resize_generations (void)
1460 {
1461 nat g;
1462
1463 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1464 W_ live, size, min_alloc, words;
1465 const W_ max = RtsFlags.GcFlags.maxHeapSize;
1466 const W_ gens = RtsFlags.GcFlags.generations;
1467
1468 // live in the oldest generations
1469 if (oldest_gen->live_estimate != 0) {
1470 words = oldest_gen->live_estimate;
1471 } else {
1472 words = oldest_gen->n_words;
1473 }
1474 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1475 oldest_gen->n_large_blocks;
1476
1477 // default max size for all generations except zero
1478 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1479 RtsFlags.GcFlags.minOldGenSize);
1480
1481 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1482 if (max > 0) {
1483 RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
1484 } else {
1485 RtsFlags.GcFlags.heapSizeSuggestion = size;
1486 }
1487 }
1488
1489 // minimum size for generation zero
1490 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1491 RtsFlags.GcFlags.minAllocAreaSize);
1492
1493 // Auto-enable compaction when the residency reaches a
1494 // certain percentage of the maximum heap size (default: 30%).
1495 if (RtsFlags.GcFlags.compact ||
1496 (max > 0 &&
1497 oldest_gen->n_blocks >
1498 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1499 oldest_gen->mark = 1;
1500 oldest_gen->compact = 1;
1501 // debugBelch("compaction: on\n", live);
1502 } else {
1503 oldest_gen->mark = 0;
1504 oldest_gen->compact = 0;
1505 // debugBelch("compaction: off\n", live);
1506 }
1507
1508 if (RtsFlags.GcFlags.sweep) {
1509 oldest_gen->mark = 1;
1510 }
1511
1512 // if we're going to go over the maximum heap size, reduce the
1513 // size of the generations accordingly. The calculation is
1514 // different if compaction is turned on, because we don't need
1515 // to double the space required to collect the old generation.
1516 if (max != 0) {
1517
1518 // this test is necessary to ensure that the calculations
1519 // below don't have any negative results - we're working
1520 // with unsigned values here.
1521 if (max < min_alloc) {
1522 heapOverflow();
1523 }
1524
1525 if (oldest_gen->compact) {
1526 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1527 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1528 }
1529 } else {
1530 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1531 size = (max - min_alloc) / ((gens - 1) * 2);
1532 }
1533 }
1534
1535 if (size < live) {
1536 heapOverflow();
1537 }
1538 }
1539
1540 #if 0
1541 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1542 min_alloc, size, max);
1543 #endif
1544
1545 for (g = 0; g < gens; g++) {
1546 generations[g].max_blocks = size;
1547 }
1548 }
1549 }
1550
1551 /* -----------------------------------------------------------------------------
1552 Calculate the new size of the nursery, and resize it.
1553 -------------------------------------------------------------------------- */
1554
1555 static void
1556 resize_nursery (void)
1557 {
1558 const StgWord min_nursery =
1559 RtsFlags.GcFlags.minAllocAreaSize * (StgWord)n_capabilities;
1560
1561 if (RtsFlags.GcFlags.generations == 1)
1562 { // Two-space collector:
1563 W_ blocks;
1564
1565 /* set up a new nursery. Allocate a nursery size based on a
1566 * function of the amount of live data (by default a factor of 2)
1567 * Use the blocks from the old nursery if possible, freeing up any
1568 * left over blocks.
1569 *
1570 * If we get near the maximum heap size, then adjust our nursery
1571 * size accordingly. If the nursery is the same size as the live
1572 * data (L), then we need 3L bytes. We can reduce the size of the
1573 * nursery to bring the required memory down near 2L bytes.
1574 *
1575 * A normal 2-space collector would need 4L bytes to give the same
1576 * performance we get from 3L bytes, reducing to the same
1577 * performance at 2L bytes.
1578 */
1579 blocks = generations[0].n_blocks;
1580
1581 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1582 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1583 RtsFlags.GcFlags.maxHeapSize )
1584 {
1585 long adjusted_blocks; // signed on purpose
1586 int pc_free;
1587
1588 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1589
1590 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1591 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1592
1593 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1594 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1595 {
1596 heapOverflow();
1597 }
1598 blocks = adjusted_blocks;
1599 }
1600 else
1601 {
1602 blocks *= RtsFlags.GcFlags.oldGenFactor;
1603 if (blocks < min_nursery)
1604 {
1605 blocks = min_nursery;
1606 }
1607 }
1608 resizeNurseries(blocks);
1609 }
1610 else // Generational collector
1611 {
1612 /*
1613 * If the user has given us a suggested heap size, adjust our
1614 * allocation area to make best use of the memory available.
1615 */
1616 if (RtsFlags.GcFlags.heapSizeSuggestion)
1617 {
1618 long blocks;
1619 StgWord needed;
1620
1621 calcNeeded(rtsFalse, &needed); // approx blocks needed at next GC
1622
1623 /* Guess how much will be live in generation 0 step 0 next time.
1624 * A good approximation is obtained by finding the
1625 * percentage of g0 that was live at the last minor GC.
1626 *
1627 * We have an accurate figure for the amount of copied data in
1628 * 'copied', but we must convert this to a number of blocks, with
1629 * a small adjustment for estimated slop at the end of a block
1630 * (- 10 words).
1631 */
1632 if (N == 0)
1633 {
1634 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1635 / countNurseryBlocks();
1636 }
1637
1638 /* Estimate a size for the allocation area based on the
1639 * information available. We might end up going slightly under
1640 * or over the suggested heap size, but we should be pretty
1641 * close on average.
1642 *
1643 * Formula: suggested - needed
1644 * ----------------------------
1645 * 1 + g0_pcnt_kept/100
1646 *
1647 * where 'needed' is the amount of memory needed at the next
1648 * collection for collecting all gens except g0.
1649 */
1650 blocks =
1651 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1652 (100 + (long)g0_pcnt_kept);
1653
1654 if (blocks < (long)min_nursery) {
1655 blocks = min_nursery;
1656 }
1657
1658 resizeNurseries((W_)blocks);
1659 }
1660 else
1661 {
1662 // we might have added extra blocks to the nursery, so
1663 // resize back to the original size again.
1664 resizeNurseriesFixed();
1665 }
1666 }
1667 }
1668
1669 /* -----------------------------------------------------------------------------
1670 Sanity code for CAF garbage collection.
1671
1672 With DEBUG turned on, we manage a CAF list in addition to the SRT
1673 mechanism. After GC, we run down the CAF list and blackhole any
1674 CAFs which have been garbage collected. This means we get an error
1675 whenever the program tries to enter a garbage collected CAF.
1676
1677 Any garbage collected CAFs are taken off the CAF list at the same
1678 time.
1679 -------------------------------------------------------------------------- */
1680
1681 #if defined(DEBUG)
1682
1683 static void gcCAFs(void)
1684 {
1685 StgIndStatic *p, *prev;
1686
1687 const StgInfoTable *info;
1688 nat i;
1689
1690 i = 0;
1691 p = debug_caf_list;
1692 prev = NULL;
1693
1694 for (p = debug_caf_list; p != (StgIndStatic*)END_OF_CAF_LIST;
1695 p = (StgIndStatic*)p->saved_info) {
1696
1697 info = get_itbl((StgClosure*)p);
1698 ASSERT(info->type == IND_STATIC);
1699
1700 if (p->static_link == NULL) {
1701 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", p);
1702 SET_INFO((StgClosure*)p,&stg_GCD_CAF_info); // stub it
1703 if (prev == NULL) {
1704 debug_caf_list = (StgIndStatic*)p->saved_info;
1705 } else {
1706 prev->saved_info = p->saved_info;
1707 }
1708 } else {
1709 prev = p;
1710 i++;
1711 }
1712 }
1713
1714 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1715 }
1716 #endif