996ce8cbce10d7c5d1db3705bdf654d819f30be3
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "GC.h"
19 #include "GCThread.h"
20 #include "GCTDecl.h" // NB. before RtsSignals.h which
21 // clobbers REG_R1 on arm/Linux
22 #include "Compact.h"
23 #include "Evac.h"
24 #include "Scav.h"
25 #include "GCUtils.h"
26 #include "MarkStack.h"
27 #include "MarkWeak.h"
28 #include "Sparks.h"
29 #include "Sweep.h"
30
31 #include "Storage.h"
32 #include "RtsUtils.h"
33 #include "Apply.h"
34 #include "Updates.h"
35 #include "Stats.h"
36 #include "Schedule.h"
37 #include "Sanity.h"
38 #include "BlockAlloc.h"
39 #include "ProfHeap.h"
40 #include "Weak.h"
41 #include "Prelude.h"
42 #include "RtsSignals.h"
43 #include "STM.h"
44 #include "Trace.h"
45 #include "RetainerProfile.h"
46 #include "LdvProfile.h"
47 #include "RaiseAsync.h"
48 #include "Stable.h"
49 #include "CheckUnload.h"
50
51 #include <string.h> // for memset()
52 #include <unistd.h>
53
54 /* -----------------------------------------------------------------------------
55 Global variables
56 -------------------------------------------------------------------------- */
57
58 /* STATIC OBJECT LIST.
59 *
60 * During GC:
61 * We maintain a linked list of static objects that are still live.
62 * The requirements for this list are:
63 *
64 * - we need to scan the list while adding to it, in order to
65 * scavenge all the static objects (in the same way that
66 * breadth-first scavenging works for dynamic objects).
67 *
68 * - we need to be able to tell whether an object is already on
69 * the list, to break loops.
70 *
71 * Each static object has a "static link field", which we use for
72 * linking objects on to the list. We use a stack-type list, consing
73 * objects on the front as they are added (this means that the
74 * scavenge phase is depth-first, not breadth-first, but that
75 * shouldn't matter).
76 *
77 * A separate list is kept for objects that have been scavenged
78 * already - this is so that we can zero all the marks afterwards.
79 *
80 * An object is on the list if its static link field is non-zero; this
81 * means that we have to mark the end of the list with '1', not NULL.
82 *
83 * Extra notes for generational GC:
84 *
85 * Each generation has a static object list associated with it. When
86 * collecting generations up to N, we treat the static object lists
87 * from generations > N as roots.
88 *
89 * We build up a static object list while collecting generations 0..N,
90 * which is then appended to the static object list of generation N+1.
91 */
92
93 /* N is the oldest generation being collected, where the generations
94 * are numbered starting at 0. A major GC (indicated by the major_gc
95 * flag) is when we're collecting all generations. We only attempt to
96 * deal with static objects and GC CAFs when doing a major GC.
97 */
98 uint32_t N;
99 rtsBool major_gc;
100
101 /* Data used for allocation area sizing.
102 */
103 static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
104
105 /* Mut-list stats */
106 #ifdef DEBUG
107 uint32_t mutlist_MUTVARS,
108 mutlist_MUTARRS,
109 mutlist_MVARS,
110 mutlist_TVAR,
111 mutlist_TVAR_WATCH_QUEUE,
112 mutlist_TREC_CHUNK,
113 mutlist_TREC_HEADER,
114 mutlist_ATOMIC_INVARIANT,
115 mutlist_INVARIANT_CHECK_QUEUE,
116 mutlist_OTHERS;
117 #endif
118
119 /* Thread-local data for each GC thread
120 */
121 gc_thread **gc_threads = NULL;
122
123 #if !defined(THREADED_RTS)
124 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
125 #endif
126
127 // Number of threads running in *this* GC. Affects how many
128 // step->todos[] lists we have to look in to find work.
129 uint32_t n_gc_threads;
130
131 // For stats:
132 static long copied; // *words* copied & scavenged during this GC
133
134 rtsBool work_stealing;
135
136 uint32_t static_flag = STATIC_FLAG_B;
137 uint32_t prev_static_flag = STATIC_FLAG_A;
138
139 DECLARE_GCT
140
141 /* -----------------------------------------------------------------------------
142 Static function declarations
143 -------------------------------------------------------------------------- */
144
145 static void mark_root (void *user, StgClosure **root);
146 static void prepare_collected_gen (generation *gen);
147 static void prepare_uncollected_gen (generation *gen);
148 static void init_gc_thread (gc_thread *t);
149 static void resize_generations (void);
150 static void resize_nursery (void);
151 static void start_gc_threads (void);
152 static void scavenge_until_all_done (void);
153 static StgWord inc_running (void);
154 static StgWord dec_running (void);
155 static void wakeup_gc_threads (uint32_t me);
156 static void shutdown_gc_threads (uint32_t me);
157 static void collect_gct_blocks (void);
158 static void collect_pinned_object_blocks (void);
159
160 #if defined(DEBUG)
161 static void gcCAFs (void);
162 #endif
163
164 /* -----------------------------------------------------------------------------
165 The mark stack.
166 -------------------------------------------------------------------------- */
167
168 bdescr *mark_stack_top_bd; // topmost block in the mark stack
169 bdescr *mark_stack_bd; // current block in the mark stack
170 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
171
172 /* -----------------------------------------------------------------------------
173 GarbageCollect: the main entry point to the garbage collector.
174
175 The collect_gen parameter is gotten by calling calcNeeded().
176
177 Locks held: all capabilities are held throughout GarbageCollect().
178 -------------------------------------------------------------------------- */
179
180 void
181 GarbageCollect (uint32_t collect_gen,
182 rtsBool do_heap_census,
183 uint32_t gc_type USED_IF_THREADS,
184 Capability *cap)
185 {
186 bdescr *bd;
187 generation *gen;
188 StgWord live_blocks, live_words, par_max_copied, par_tot_copied;
189 #if defined(THREADED_RTS)
190 gc_thread *saved_gct;
191 #endif
192 uint32_t g, n;
193
194 // necessary if we stole a callee-saves register for gct:
195 #if defined(THREADED_RTS)
196 saved_gct = gct;
197 #endif
198
199 #ifdef PROFILING
200 CostCentreStack *save_CCS[n_capabilities];
201 #endif
202
203 ACQUIRE_SM_LOCK;
204
205 #if defined(RTS_USER_SIGNALS)
206 if (RtsFlags.MiscFlags.install_signal_handlers) {
207 // block signals
208 blockUserSignals();
209 }
210 #endif
211
212 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
213 // otherwise adjust the padding in gen_workspace.
214
215 // this is the main thread
216 SET_GCT(gc_threads[cap->no]);
217
218 // tell the stats department that we've started a GC
219 stat_startGC(cap, gct);
220
221 // lock the StablePtr table
222 stableLock();
223
224 #ifdef DEBUG
225 mutlist_MUTVARS = 0;
226 mutlist_MUTARRS = 0;
227 mutlist_MVARS = 0;
228 mutlist_TVAR = 0;
229 mutlist_TVAR_WATCH_QUEUE = 0;
230 mutlist_TREC_CHUNK = 0;
231 mutlist_TREC_HEADER = 0;
232 mutlist_ATOMIC_INVARIANT = 0;
233 mutlist_INVARIANT_CHECK_QUEUE = 0;
234 mutlist_OTHERS = 0;
235 #endif
236
237 // attribute any costs to CCS_GC
238 #ifdef PROFILING
239 for (n = 0; n < n_capabilities; n++) {
240 save_CCS[n] = capabilities[n]->r.rCCCS;
241 capabilities[n]->r.rCCCS = CCS_GC;
242 }
243 #endif
244
245 /* Figure out which generation to collect
246 */
247 N = collect_gen;
248 major_gc = (N == RtsFlags.GcFlags.generations-1);
249
250 if (major_gc) {
251 prev_static_flag = static_flag;
252 static_flag =
253 static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
254 }
255
256 #if defined(THREADED_RTS)
257 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
258 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
259 // It's not always a good idea to do load balancing in parallel
260 // GC. In particular, for a parallel program we don't want to
261 // lose locality by moving cached data into another CPU's cache
262 // (this effect can be quite significant).
263 //
264 // We could have a more complex way to deterimine whether to do
265 // work stealing or not, e.g. it might be a good idea to do it
266 // if the heap is big. For now, we just turn it on or off with
267 // a flag.
268 #endif
269
270 /* Start threads, so they can be spinning up while we finish initialisation.
271 */
272 start_gc_threads();
273
274 #if defined(THREADED_RTS)
275 /* How many threads will be participating in this GC?
276 * We don't try to parallelise minor GCs (unless the user asks for
277 * it with +RTS -gn0), or mark/compact/sweep GC.
278 */
279 if (gc_type == SYNC_GC_PAR) {
280 n_gc_threads = n_capabilities;
281 } else {
282 n_gc_threads = 1;
283 }
284 #else
285 n_gc_threads = 1;
286 #endif
287
288 debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
289 N, n_gc_threads);
290
291 #ifdef DEBUG
292 // check for memory leaks if DEBUG is on
293 memInventory(DEBUG_gc);
294 #endif
295
296 // do this *before* we start scavenging
297 collectFreshWeakPtrs();
298
299 // check sanity *before* GC
300 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
301
302 // gather blocks allocated using allocatePinned() from each capability
303 // and put them on the g0->large_object list.
304 collect_pinned_object_blocks();
305
306 // Initialise all the generations/steps that we're collecting.
307 for (g = 0; g <= N; g++) {
308 prepare_collected_gen(&generations[g]);
309 }
310 // Initialise all the generations/steps that we're *not* collecting.
311 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
312 prepare_uncollected_gen(&generations[g]);
313 }
314
315 // Prepare this gc_thread
316 init_gc_thread(gct);
317
318 /* Allocate a mark stack if we're doing a major collection.
319 */
320 if (major_gc && oldest_gen->mark) {
321 mark_stack_bd = allocBlock();
322 mark_stack_top_bd = mark_stack_bd;
323 mark_stack_bd->link = NULL;
324 mark_stack_bd->u.back = NULL;
325 mark_sp = mark_stack_bd->start;
326 } else {
327 mark_stack_bd = NULL;
328 mark_stack_top_bd = NULL;
329 mark_sp = NULL;
330 }
331
332 /* -----------------------------------------------------------------------
333 * follow all the roots that we know about:
334 */
335
336 // the main thread is running: this prevents any other threads from
337 // exiting prematurely, so we can start them now.
338 // NB. do this after the mutable lists have been saved above, otherwise
339 // the other GC threads will be writing into the old mutable lists.
340 inc_running();
341 wakeup_gc_threads(gct->thread_index);
342
343 traceEventGcWork(gct->cap);
344
345 // scavenge the capability-private mutable lists. This isn't part
346 // of markSomeCapabilities() because markSomeCapabilities() can only
347 // call back into the GC via mark_root() (due to the gct register
348 // variable).
349 if (n_gc_threads == 1) {
350 for (n = 0; n < n_capabilities; n++) {
351 #if defined(THREADED_RTS)
352 scavenge_capability_mut_Lists1(capabilities[n]);
353 #else
354 scavenge_capability_mut_lists(capabilities[n]);
355 #endif
356 }
357 } else {
358 scavenge_capability_mut_lists(gct->cap);
359 for (n = 0; n < n_capabilities; n++) {
360 if (gc_threads[n]->idle) {
361 markCapability(mark_root, gct, capabilities[n],
362 rtsTrue/*don't mark sparks*/);
363 scavenge_capability_mut_lists(capabilities[n]);
364 }
365 }
366 }
367
368 // follow roots from the CAF list (used by GHCi)
369 gct->evac_gen_no = 0;
370 markCAFs(mark_root, gct);
371
372 // follow all the roots that the application knows about.
373 gct->evac_gen_no = 0;
374 if (n_gc_threads == 1) {
375 for (n = 0; n < n_capabilities; n++) {
376 markCapability(mark_root, gct, capabilities[n],
377 rtsTrue/*don't mark sparks*/);
378 }
379 } else {
380 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
381 }
382
383 markScheduler(mark_root, gct);
384
385 #if defined(RTS_USER_SIGNALS)
386 // mark the signal handlers (signals should be already blocked)
387 markSignalHandlers(mark_root, gct);
388 #endif
389
390 // Mark the weak pointer list, and prepare to detect dead weak pointers.
391 markWeakPtrList();
392 initWeakForGC();
393
394 // Mark the stable pointer table.
395 markStableTables(mark_root, gct);
396
397 /* -------------------------------------------------------------------------
398 * Repeatedly scavenge all the areas we know about until there's no
399 * more scavenging to be done.
400 */
401 for (;;)
402 {
403 scavenge_until_all_done();
404 // The other threads are now stopped. We might recurse back to
405 // here, but from now on this is the only thread.
406
407 // must be last... invariant is that everything is fully
408 // scavenged at this point.
409 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
410 inc_running();
411 continue;
412 }
413
414 // If we get to here, there's really nothing left to do.
415 break;
416 }
417
418 shutdown_gc_threads(gct->thread_index);
419
420 // Now see which stable names are still alive.
421 gcStableTables();
422
423 #ifdef THREADED_RTS
424 if (n_gc_threads == 1) {
425 for (n = 0; n < n_capabilities; n++) {
426 pruneSparkQueue(capabilities[n]);
427 }
428 } else {
429 for (n = 0; n < n_capabilities; n++) {
430 if (n == cap->no || gc_threads[n]->idle) {
431 pruneSparkQueue(capabilities[n]);
432 }
433 }
434 }
435 #endif
436
437 #ifdef PROFILING
438 // We call processHeapClosureForDead() on every closure destroyed during
439 // the current garbage collection, so we invoke LdvCensusForDead().
440 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
441 || RtsFlags.ProfFlags.bioSelector != NULL) {
442 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
443 LdvCensusForDead(N);
444 ACQUIRE_SM_LOCK;
445 }
446 #endif
447
448 // NO MORE EVACUATION AFTER THIS POINT!
449
450 // Finally: compact or sweep the oldest generation.
451 if (major_gc && oldest_gen->mark) {
452 if (oldest_gen->compact)
453 compact(gct->scavenged_static_objects);
454 else
455 sweep(oldest_gen);
456 }
457
458 copied = 0;
459 par_max_copied = 0;
460 par_tot_copied = 0;
461 {
462 uint32_t i;
463 for (i=0; i < n_gc_threads; i++) {
464 if (n_gc_threads > 1) {
465 debugTrace(DEBUG_gc,"thread %d:", i);
466 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
467 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
468 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
469 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
470 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
471 }
472 copied += gc_threads[i]->copied;
473 par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
474 }
475 par_tot_copied = copied;
476 if (n_gc_threads == 1) {
477 par_max_copied = 0;
478 par_tot_copied = 0;
479 }
480 }
481
482 // Run through all the generations/steps and tidy up.
483 // We're going to:
484 // - count the amount of "live" data (live_words, live_blocks)
485 // - count the amount of "copied" data in this GC (copied)
486 // - free from-space
487 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
488 //
489 live_words = 0;
490 live_blocks = 0;
491
492 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
493
494 if (g == N) {
495 generations[g].collections++; // for stats
496 if (n_gc_threads > 1) generations[g].par_collections++;
497 }
498
499 // Count the mutable list as bytes "copied" for the purposes of
500 // stats. Every mutable list is copied during every GC.
501 if (g > 0) {
502 W_ mut_list_size = 0;
503 for (n = 0; n < n_capabilities; n++) {
504 mut_list_size += countOccupied(capabilities[n]->mut_lists[g]);
505 }
506 copied += mut_list_size;
507
508 debugTrace(DEBUG_gc,
509 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d ATOMIC_INVARIANTs, %d INVARIANT_CHECK_QUEUEs, %d others)",
510 (unsigned long)(mut_list_size * sizeof(W_)),
511 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS,
512 mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE,
513 mutlist_TREC_CHUNK, mutlist_TREC_HEADER,
514 mutlist_ATOMIC_INVARIANT,
515 mutlist_INVARIANT_CHECK_QUEUE,
516 mutlist_OTHERS);
517 }
518
519 bdescr *next, *prev;
520 gen = &generations[g];
521
522 // for generations we collected...
523 if (g <= N) {
524
525 /* free old memory and shift to-space into from-space for all
526 * the collected steps (except the allocation area). These
527 * freed blocks will probaby be quickly recycled.
528 */
529 if (gen->mark)
530 {
531 // tack the new blocks on the end of the existing blocks
532 if (gen->old_blocks != NULL) {
533
534 prev = NULL;
535 for (bd = gen->old_blocks; bd != NULL; bd = next) {
536
537 next = bd->link;
538
539 if (!(bd->flags & BF_MARKED))
540 {
541 if (prev == NULL) {
542 gen->old_blocks = next;
543 } else {
544 prev->link = next;
545 }
546 freeGroup(bd);
547 gen->n_old_blocks--;
548 }
549 else
550 {
551 gen->n_words += bd->free - bd->start;
552
553 // NB. this step might not be compacted next
554 // time, so reset the BF_MARKED flags.
555 // They are set before GC if we're going to
556 // compact. (search for BF_MARKED above).
557 bd->flags &= ~BF_MARKED;
558
559 // between GCs, all blocks in the heap except
560 // for the nursery have the BF_EVACUATED flag set.
561 bd->flags |= BF_EVACUATED;
562
563 prev = bd;
564 }
565 }
566
567 if (prev != NULL) {
568 prev->link = gen->blocks;
569 gen->blocks = gen->old_blocks;
570 }
571 }
572 // add the new blocks to the block tally
573 gen->n_blocks += gen->n_old_blocks;
574 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
575 ASSERT(countOccupied(gen->blocks) == gen->n_words);
576 }
577 else // not copacted
578 {
579 freeChain(gen->old_blocks);
580 }
581
582 gen->old_blocks = NULL;
583 gen->n_old_blocks = 0;
584
585 /* LARGE OBJECTS. The current live large objects are chained on
586 * scavenged_large, having been moved during garbage
587 * collection from large_objects. Any objects left on the
588 * large_objects list are therefore dead, so we free them here.
589 */
590 freeChain(gen->large_objects);
591 gen->large_objects = gen->scavenged_large_objects;
592 gen->n_large_blocks = gen->n_scavenged_large_blocks;
593 gen->n_large_words = countOccupied(gen->large_objects);
594 gen->n_new_large_words = 0;
595 }
596 else // for generations > N
597 {
598 /* For older generations, we need to append the
599 * scavenged_large_object list (i.e. large objects that have been
600 * promoted during this GC) to the large_object list for that step.
601 */
602 for (bd = gen->scavenged_large_objects; bd; bd = next) {
603 next = bd->link;
604 dbl_link_onto(bd, &gen->large_objects);
605 gen->n_large_words += bd->free - bd->start;
606 }
607
608 // add the new blocks we promoted during this GC
609 gen->n_large_blocks += gen->n_scavenged_large_blocks;
610 }
611
612 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
613 ASSERT(countOccupied(gen->large_objects) == gen->n_large_words);
614
615 gen->scavenged_large_objects = NULL;
616 gen->n_scavenged_large_blocks = 0;
617
618 // Count "live" data
619 live_words += genLiveWords(gen);
620 live_blocks += genLiveBlocks(gen);
621
622 // add in the partial blocks in the gen_workspaces
623 {
624 uint32_t i;
625 for (i = 0; i < n_capabilities; i++) {
626 live_words += gcThreadLiveWords(i, gen->no);
627 live_blocks += gcThreadLiveBlocks(i, gen->no);
628 }
629 }
630 } // for all generations
631
632 // update the max size of older generations after a major GC
633 resize_generations();
634
635 // Free the mark stack.
636 if (mark_stack_top_bd != NULL) {
637 debugTrace(DEBUG_gc, "mark stack: %d blocks",
638 countBlocks(mark_stack_top_bd));
639 freeChain(mark_stack_top_bd);
640 }
641
642 // Free any bitmaps.
643 for (g = 0; g <= N; g++) {
644 gen = &generations[g];
645 if (gen->bitmap != NULL) {
646 freeGroup(gen->bitmap);
647 gen->bitmap = NULL;
648 }
649 }
650
651 resize_nursery();
652
653 resetNurseries();
654
655 // mark the garbage collected CAFs as dead
656 #if defined(DEBUG)
657 if (major_gc) { gcCAFs(); }
658 #endif
659
660 // Update the stable pointer hash table.
661 updateStableTables(major_gc);
662
663 // unlock the StablePtr table. Must be before scheduleFinalizers(),
664 // because a finalizer may call hs_free_fun_ptr() or
665 // hs_free_stable_ptr(), both of which access the StablePtr table.
666 stableUnlock();
667
668 // Must be after stableUnlock(), because it might free stable ptrs.
669 if (major_gc) {
670 checkUnload (gct->scavenged_static_objects);
671 }
672
673 #ifdef PROFILING
674 // resetStaticObjectForRetainerProfiling() must be called before
675 // zeroing below.
676
677 // ToDo: fix the gct->scavenged_static_objects below
678 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
679 #endif
680
681 // Start any pending finalizers. Must be after
682 // updateStableTables() and stableUnlock() (see #4221).
683 RELEASE_SM_LOCK;
684 scheduleFinalizers(cap, dead_weak_ptr_list);
685 ACQUIRE_SM_LOCK;
686
687 // check sanity after GC
688 // before resurrectThreads(), because that might overwrite some
689 // closures, which will cause problems with THREADED where we don't
690 // fill slop.
691 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
692
693 // If a heap census is due, we need to do it before
694 // resurrectThreads(), for the same reason as checkSanity above:
695 // resurrectThreads() will overwrite some closures and leave slop
696 // behind.
697 if (do_heap_census) {
698 debugTrace(DEBUG_sched, "performing heap census");
699 RELEASE_SM_LOCK;
700 heapCensus(gct->gc_start_cpu);
701 ACQUIRE_SM_LOCK;
702 }
703
704 // send exceptions to any threads which were about to die
705 RELEASE_SM_LOCK;
706 resurrectThreads(resurrected_threads);
707 ACQUIRE_SM_LOCK;
708
709 if (major_gc) {
710 W_ need, got;
711 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
712 got = mblocks_allocated;
713 /* If the amount of data remains constant, next major GC we'll
714 require (F+1)*need. We leave (F+2)*need in order to reduce
715 repeated deallocation and reallocation. */
716 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
717 if (got > need) {
718 returnMemoryToOS(got - need);
719 }
720 }
721
722 // extra GC trace info
723 IF_DEBUG(gc, statDescribeGens());
724
725 #ifdef DEBUG
726 // symbol-table based profiling
727 /* heapCensus(to_blocks); */ /* ToDo */
728 #endif
729
730 // restore enclosing cost centre
731 #ifdef PROFILING
732 for (n = 0; n < n_capabilities; n++) {
733 capabilities[n]->r.rCCCS = save_CCS[n];
734 }
735 #endif
736
737 #ifdef DEBUG
738 // check for memory leaks if DEBUG is on
739 memInventory(DEBUG_gc);
740 #endif
741
742 // ok, GC over: tell the stats department what happened.
743 stat_endGC(cap, gct, live_words, copied,
744 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
745 N, n_gc_threads, par_max_copied, par_tot_copied);
746
747 #if defined(RTS_USER_SIGNALS)
748 if (RtsFlags.MiscFlags.install_signal_handlers) {
749 // unblock signals again
750 unblockUserSignals();
751 }
752 #endif
753
754 RELEASE_SM_LOCK;
755
756 SET_GCT(saved_gct);
757 }
758
759 /* -----------------------------------------------------------------------------
760 Initialise the gc_thread structures.
761 -------------------------------------------------------------------------- */
762
763 #define GC_THREAD_INACTIVE 0
764 #define GC_THREAD_STANDING_BY 1
765 #define GC_THREAD_RUNNING 2
766 #define GC_THREAD_WAITING_TO_CONTINUE 3
767
768 static void
769 new_gc_thread (uint32_t n, gc_thread *t)
770 {
771 uint32_t g;
772 gen_workspace *ws;
773
774 t->cap = capabilities[n];
775
776 #ifdef THREADED_RTS
777 t->id = 0;
778 initSpinLock(&t->gc_spin);
779 initSpinLock(&t->mut_spin);
780 ACQUIRE_SPIN_LOCK(&t->gc_spin);
781 ACQUIRE_SPIN_LOCK(&t->mut_spin);
782 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
783 // thread to start up, see wakeup_gc_threads
784 #endif
785
786 t->thread_index = n;
787 t->idle = rtsFalse;
788 t->free_blocks = NULL;
789 t->gc_count = 0;
790
791 init_gc_thread(t);
792
793 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
794 {
795 ws = &t->gens[g];
796 ws->gen = &generations[g];
797 ASSERT(g == ws->gen->no);
798 ws->my_gct = t;
799
800 // We want to call
801 // alloc_todo_block(ws,0);
802 // but can't, because it uses gct which isn't set up at this point.
803 // Hence, allocate a block for todo_bd manually:
804 {
805 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
806 initBdescr(bd, ws->gen, ws->gen->to);
807 bd->flags = BF_EVACUATED;
808 bd->u.scan = bd->free = bd->start;
809
810 ws->todo_bd = bd;
811 ws->todo_free = bd->free;
812 ws->todo_lim = bd->start + BLOCK_SIZE_W;
813 }
814
815 ws->todo_q = newWSDeque(128);
816 ws->todo_overflow = NULL;
817 ws->n_todo_overflow = 0;
818 ws->todo_large_objects = NULL;
819
820 ws->part_list = NULL;
821 ws->n_part_blocks = 0;
822 ws->n_part_words = 0;
823
824 ws->scavd_list = NULL;
825 ws->n_scavd_blocks = 0;
826 ws->n_scavd_words = 0;
827 }
828 }
829
830
831 void
832 initGcThreads (uint32_t from USED_IF_THREADS, uint32_t to USED_IF_THREADS)
833 {
834 #if defined(THREADED_RTS)
835 uint32_t i;
836
837 if (from > 0) {
838 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
839 "initGcThreads");
840 } else {
841 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
842 "initGcThreads");
843 }
844
845 for (i = from; i < to; i++) {
846 gc_threads[i] =
847 stgMallocBytes(sizeof(gc_thread) +
848 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
849 "alloc_gc_threads");
850
851 new_gc_thread(i, gc_threads[i]);
852 }
853 #else
854 ASSERT(from == 0 && to == 1);
855 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
856 gc_threads[0] = gct;
857 new_gc_thread(0,gc_threads[0]);
858 #endif
859 }
860
861 void
862 freeGcThreads (void)
863 {
864 uint32_t g;
865 if (gc_threads != NULL) {
866 #if defined(THREADED_RTS)
867 uint32_t i;
868 for (i = 0; i < n_capabilities; i++) {
869 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
870 {
871 freeWSDeque(gc_threads[i]->gens[g].todo_q);
872 }
873 stgFree (gc_threads[i]);
874 }
875 stgFree (gc_threads);
876 #else
877 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
878 {
879 freeWSDeque(gc_threads[0]->gens[g].todo_q);
880 }
881 stgFree (gc_threads);
882 #endif
883 gc_threads = NULL;
884 }
885 }
886
887 /* ----------------------------------------------------------------------------
888 Start GC threads
889 ------------------------------------------------------------------------- */
890
891 static volatile StgWord gc_running_threads;
892
893 static StgWord
894 inc_running (void)
895 {
896 StgWord new;
897 new = atomic_inc(&gc_running_threads, 1);
898 ASSERT(new <= n_gc_threads);
899 return new;
900 }
901
902 static StgWord
903 dec_running (void)
904 {
905 ASSERT(gc_running_threads != 0);
906 return atomic_dec(&gc_running_threads);
907 }
908
909 static rtsBool
910 any_work (void)
911 {
912 int g;
913 gen_workspace *ws;
914
915 gct->any_work++;
916
917 write_barrier();
918
919 // scavenge objects in compacted generation
920 if (mark_stack_bd != NULL && !mark_stack_empty()) {
921 return rtsTrue;
922 }
923
924 // Check for global work in any gen. We don't need to check for
925 // local work, because we have already exited scavenge_loop(),
926 // which means there is no local work for this thread.
927 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
928 ws = &gct->gens[g];
929 if (ws->todo_large_objects) return rtsTrue;
930 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
931 if (ws->todo_overflow) return rtsTrue;
932 }
933
934 #if defined(THREADED_RTS)
935 if (work_stealing) {
936 uint32_t n;
937 // look for work to steal
938 for (n = 0; n < n_gc_threads; n++) {
939 if (n == gct->thread_index) continue;
940 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
941 ws = &gc_threads[n]->gens[g];
942 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
943 }
944 }
945 }
946 #endif
947
948 gct->no_work++;
949 #if defined(THREADED_RTS)
950 yieldThread();
951 #endif
952
953 return rtsFalse;
954 }
955
956 static void
957 scavenge_until_all_done (void)
958 {
959 DEBUG_ONLY( uint32_t r );
960
961
962 loop:
963 #if defined(THREADED_RTS)
964 if (n_gc_threads > 1) {
965 scavenge_loop();
966 } else {
967 scavenge_loop1();
968 }
969 #else
970 scavenge_loop();
971 #endif
972
973 collect_gct_blocks();
974
975 // scavenge_loop() only exits when there's no work to do
976
977 #ifdef DEBUG
978 r = dec_running();
979 #else
980 dec_running();
981 #endif
982
983 traceEventGcIdle(gct->cap);
984
985 debugTrace(DEBUG_gc, "%d GC threads still running", r);
986
987 while (gc_running_threads != 0) {
988 // usleep(1);
989 if (any_work()) {
990 inc_running();
991 traceEventGcWork(gct->cap);
992 goto loop;
993 }
994 // any_work() does not remove the work from the queue, it
995 // just checks for the presence of work. If we find any,
996 // then we increment gc_running_threads and go back to
997 // scavenge_loop() to perform any pending work.
998 }
999
1000 traceEventGcDone(gct->cap);
1001 }
1002
1003 #if defined(THREADED_RTS)
1004
1005 void
1006 gcWorkerThread (Capability *cap)
1007 {
1008 gc_thread *saved_gct;
1009
1010 // necessary if we stole a callee-saves register for gct:
1011 saved_gct = gct;
1012
1013 SET_GCT(gc_threads[cap->no]);
1014 gct->id = osThreadId();
1015
1016 // Wait until we're told to wake up
1017 RELEASE_SPIN_LOCK(&gct->mut_spin);
1018 // yieldThread();
1019 // Strangely, adding a yieldThread() here makes the CPU time
1020 // measurements more accurate on Linux, perhaps because it syncs
1021 // the CPU time across the multiple cores. Without this, CPU time
1022 // is heavily skewed towards GC rather than MUT.
1023 gct->wakeup = GC_THREAD_STANDING_BY;
1024 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1025 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1026
1027 init_gc_thread(gct);
1028
1029 traceEventGcWork(gct->cap);
1030
1031 // Every thread evacuates some roots.
1032 gct->evac_gen_no = 0;
1033 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1034 scavenge_capability_mut_lists(cap);
1035
1036 scavenge_until_all_done();
1037
1038 #ifdef THREADED_RTS
1039 // Now that the whole heap is marked, we discard any sparks that
1040 // were found to be unreachable. The main GC thread is currently
1041 // marking heap reachable via weak pointers, so it is
1042 // non-deterministic whether a spark will be retained if it is
1043 // only reachable via weak pointers. To fix this problem would
1044 // require another GC barrier, which is too high a price.
1045 pruneSparkQueue(cap);
1046 #endif
1047
1048 // Wait until we're told to continue
1049 RELEASE_SPIN_LOCK(&gct->gc_spin);
1050 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1051 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1052 gct->thread_index);
1053 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1054 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1055
1056 SET_GCT(saved_gct);
1057 }
1058
1059 #endif
1060
1061 #if defined(THREADED_RTS)
1062
1063 void
1064 waitForGcThreads (Capability *cap USED_IF_THREADS)
1065 {
1066 const uint32_t n_threads = n_capabilities;
1067 const uint32_t me = cap->no;
1068 uint32_t i, j;
1069 rtsBool retry = rtsTrue;
1070
1071 stat_startGCSync(gc_threads[cap->no]);
1072
1073 while(retry) {
1074 for (i=0; i < n_threads; i++) {
1075 if (i == me || gc_threads[i]->idle) continue;
1076 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1077 prodCapability(capabilities[i], cap->running_task);
1078 }
1079 }
1080 for (j=0; j < 10; j++) {
1081 retry = rtsFalse;
1082 for (i=0; i < n_threads; i++) {
1083 if (i == me || gc_threads[i]->idle) continue;
1084 write_barrier();
1085 interruptCapability(capabilities[i]);
1086 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1087 retry = rtsTrue;
1088 }
1089 }
1090 if (!retry) break;
1091 yieldThread();
1092 }
1093 }
1094 }
1095
1096 #endif // THREADED_RTS
1097
1098 static void
1099 start_gc_threads (void)
1100 {
1101 #if defined(THREADED_RTS)
1102 gc_running_threads = 0;
1103 #endif
1104 }
1105
1106 static void
1107 wakeup_gc_threads (uint32_t me USED_IF_THREADS)
1108 {
1109 #if defined(THREADED_RTS)
1110 uint32_t i;
1111
1112 if (n_gc_threads == 1) return;
1113
1114 for (i=0; i < n_gc_threads; i++) {
1115 if (i == me || gc_threads[i]->idle) continue;
1116 inc_running();
1117 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1118 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1119
1120 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1121 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1122 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1123 }
1124 #endif
1125 }
1126
1127 // After GC is complete, we must wait for all GC threads to enter the
1128 // standby state, otherwise they may still be executing inside
1129 // any_work(), and may even remain awake until the next GC starts.
1130 static void
1131 shutdown_gc_threads (uint32_t me USED_IF_THREADS)
1132 {
1133 #if defined(THREADED_RTS)
1134 uint32_t i;
1135
1136 if (n_gc_threads == 1) return;
1137
1138 for (i=0; i < n_gc_threads; i++) {
1139 if (i == me || gc_threads[i]->idle) continue;
1140 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) {
1141 busy_wait_nop();
1142 write_barrier();
1143 }
1144 }
1145 #endif
1146 }
1147
1148 #if defined(THREADED_RTS)
1149 void
1150 releaseGCThreads (Capability *cap USED_IF_THREADS)
1151 {
1152 const uint32_t n_threads = n_capabilities;
1153 const uint32_t me = cap->no;
1154 uint32_t i;
1155 for (i=0; i < n_threads; i++) {
1156 if (i == me || gc_threads[i]->idle) continue;
1157 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1158 barf("releaseGCThreads");
1159
1160 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1161 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1162 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1163 }
1164 }
1165 #endif
1166
1167 /* ----------------------------------------------------------------------------
1168 Initialise a generation that is to be collected
1169 ------------------------------------------------------------------------- */
1170
1171 static void
1172 prepare_collected_gen (generation *gen)
1173 {
1174 uint32_t i, g, n;
1175 gen_workspace *ws;
1176 bdescr *bd, *next;
1177
1178 // Throw away the current mutable list. Invariant: the mutable
1179 // list always has at least one block; this means we can avoid a
1180 // check for NULL in recordMutable().
1181 g = gen->no;
1182 if (g != 0) {
1183 for (i = 0; i < n_capabilities; i++) {
1184 freeChain(capabilities[i]->mut_lists[g]);
1185 capabilities[i]->mut_lists[g] = allocBlock();
1186 }
1187 }
1188
1189 gen = &generations[g];
1190 ASSERT(gen->no == g);
1191
1192 // we'll construct a new list of threads in this step
1193 // during GC, throw away the current list.
1194 gen->old_threads = gen->threads;
1195 gen->threads = END_TSO_QUEUE;
1196
1197 // deprecate the existing blocks
1198 gen->old_blocks = gen->blocks;
1199 gen->n_old_blocks = gen->n_blocks;
1200 gen->blocks = NULL;
1201 gen->n_blocks = 0;
1202 gen->n_words = 0;
1203 gen->live_estimate = 0;
1204
1205 // initialise the large object queues.
1206 ASSERT(gen->scavenged_large_objects == NULL);
1207 ASSERT(gen->n_scavenged_large_blocks == 0);
1208
1209 // grab all the partial blocks stashed in the gc_thread workspaces and
1210 // move them to the old_blocks list of this gen.
1211 for (n = 0; n < n_capabilities; n++) {
1212 ws = &gc_threads[n]->gens[gen->no];
1213
1214 for (bd = ws->part_list; bd != NULL; bd = next) {
1215 next = bd->link;
1216 bd->link = gen->old_blocks;
1217 gen->old_blocks = bd;
1218 gen->n_old_blocks += bd->blocks;
1219 }
1220 ws->part_list = NULL;
1221 ws->n_part_blocks = 0;
1222 ws->n_part_words = 0;
1223
1224 ASSERT(ws->scavd_list == NULL);
1225 ASSERT(ws->n_scavd_blocks == 0);
1226 ASSERT(ws->n_scavd_words == 0);
1227
1228 if (ws->todo_free != ws->todo_bd->start) {
1229 ws->todo_bd->free = ws->todo_free;
1230 ws->todo_bd->link = gen->old_blocks;
1231 gen->old_blocks = ws->todo_bd;
1232 gen->n_old_blocks += ws->todo_bd->blocks;
1233 alloc_todo_block(ws,0); // always has one block.
1234 }
1235 }
1236
1237 // mark the small objects as from-space
1238 for (bd = gen->old_blocks; bd; bd = bd->link) {
1239 bd->flags &= ~BF_EVACUATED;
1240 }
1241
1242 // mark the large objects as from-space
1243 for (bd = gen->large_objects; bd; bd = bd->link) {
1244 bd->flags &= ~BF_EVACUATED;
1245 }
1246
1247 // for a compacted generation, we need to allocate the bitmap
1248 if (gen->mark) {
1249 StgWord bitmap_size; // in bytes
1250 bdescr *bitmap_bdescr;
1251 StgWord *bitmap;
1252
1253 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1254
1255 if (bitmap_size > 0) {
1256 bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
1257 / BLOCK_SIZE);
1258 gen->bitmap = bitmap_bdescr;
1259 bitmap = bitmap_bdescr->start;
1260
1261 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1262 bitmap_size, bitmap);
1263
1264 // don't forget to fill it with zeros!
1265 memset(bitmap, 0, bitmap_size);
1266
1267 // For each block in this step, point to its bitmap from the
1268 // block descriptor.
1269 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1270 bd->u.bitmap = bitmap;
1271 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1272
1273 // Also at this point we set the BF_MARKED flag
1274 // for this block. The invariant is that
1275 // BF_MARKED is always unset, except during GC
1276 // when it is set on those blocks which will be
1277 // compacted.
1278 if (!(bd->flags & BF_FRAGMENTED)) {
1279 bd->flags |= BF_MARKED;
1280 }
1281
1282 // BF_SWEPT should be marked only for blocks that are being
1283 // collected in sweep()
1284 bd->flags &= ~BF_SWEPT;
1285 }
1286 }
1287 }
1288 }
1289
1290
1291 /* ----------------------------------------------------------------------------
1292 Save the mutable lists in saved_mut_lists
1293 ------------------------------------------------------------------------- */
1294
1295 static void
1296 stash_mut_list (Capability *cap, uint32_t gen_no)
1297 {
1298 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1299 cap->mut_lists[gen_no] = allocBlock_sync();
1300 }
1301
1302 /* ----------------------------------------------------------------------------
1303 Initialise a generation that is *not* to be collected
1304 ------------------------------------------------------------------------- */
1305
1306 static void
1307 prepare_uncollected_gen (generation *gen)
1308 {
1309 uint32_t i;
1310
1311
1312 ASSERT(gen->no > 0);
1313
1314 // save the current mutable lists for this generation, and
1315 // allocate a fresh block for each one. We'll traverse these
1316 // mutable lists as roots early on in the GC.
1317 for (i = 0; i < n_capabilities; i++) {
1318 stash_mut_list(capabilities[i], gen->no);
1319 }
1320
1321 ASSERT(gen->scavenged_large_objects == NULL);
1322 ASSERT(gen->n_scavenged_large_blocks == 0);
1323 }
1324
1325 /* -----------------------------------------------------------------------------
1326 Collect the completed blocks from a GC thread and attach them to
1327 the generation.
1328 -------------------------------------------------------------------------- */
1329
1330 static void
1331 collect_gct_blocks (void)
1332 {
1333 uint32_t g;
1334 gen_workspace *ws;
1335 bdescr *bd, *prev;
1336
1337 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1338 ws = &gct->gens[g];
1339
1340 // there may still be a block attached to ws->todo_bd;
1341 // leave it there to use next time.
1342
1343 if (ws->scavd_list != NULL) {
1344 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1345
1346 ASSERT(gct->scan_bd == NULL);
1347 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1348
1349 prev = NULL;
1350 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1351 prev = bd;
1352 }
1353 if (prev != NULL) {
1354 prev->link = ws->gen->blocks;
1355 ws->gen->blocks = ws->scavd_list;
1356 }
1357 ws->gen->n_blocks += ws->n_scavd_blocks;
1358 ws->gen->n_words += ws->n_scavd_words;
1359
1360 ws->scavd_list = NULL;
1361 ws->n_scavd_blocks = 0;
1362 ws->n_scavd_words = 0;
1363
1364 RELEASE_SPIN_LOCK(&ws->gen->sync);
1365 }
1366 }
1367 }
1368
1369 /* -----------------------------------------------------------------------------
1370 During mutation, any blocks that are filled by allocatePinned() are
1371 stashed on the local pinned_object_blocks list, to avoid needing to
1372 take a global lock. Here we collect those blocks from the
1373 cap->pinned_object_blocks lists and put them on the
1374 main g0->large_object list.
1375
1376 Returns: the number of words allocated this way, for stats
1377 purposes.
1378 -------------------------------------------------------------------------- */
1379
1380 static void
1381 collect_pinned_object_blocks (void)
1382 {
1383 uint32_t n;
1384 bdescr *bd, *prev;
1385
1386 for (n = 0; n < n_capabilities; n++) {
1387 prev = NULL;
1388 for (bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
1389 prev = bd;
1390 }
1391 if (prev != NULL) {
1392 prev->link = g0->large_objects;
1393 if (g0->large_objects != NULL) {
1394 g0->large_objects->u.back = prev;
1395 }
1396 g0->large_objects = capabilities[n]->pinned_object_blocks;
1397 capabilities[n]->pinned_object_blocks = 0;
1398 }
1399 }
1400 }
1401
1402 /* -----------------------------------------------------------------------------
1403 Initialise a gc_thread before GC
1404 -------------------------------------------------------------------------- */
1405
1406 static void
1407 init_gc_thread (gc_thread *t)
1408 {
1409 t->static_objects = END_OF_STATIC_OBJECT_LIST;
1410 t->scavenged_static_objects = END_OF_STATIC_OBJECT_LIST;
1411 t->scan_bd = NULL;
1412 t->mut_lists = t->cap->mut_lists;
1413 t->evac_gen_no = 0;
1414 t->failed_to_evac = rtsFalse;
1415 t->eager_promotion = rtsTrue;
1416 t->thunk_selector_depth = 0;
1417 t->copied = 0;
1418 t->scanned = 0;
1419 t->any_work = 0;
1420 t->no_work = 0;
1421 t->scav_find_work = 0;
1422 }
1423
1424 /* -----------------------------------------------------------------------------
1425 Function we pass to evacuate roots.
1426 -------------------------------------------------------------------------- */
1427
1428 static void
1429 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1430 {
1431 // we stole a register for gct, but this function is called from
1432 // *outside* the GC where the register variable is not in effect,
1433 // so we need to save and restore it here. NB. only call
1434 // mark_root() from the main GC thread, otherwise gct will be
1435 // incorrect.
1436 #if defined(THREADED_RTS)
1437 gc_thread *saved_gct;
1438 saved_gct = gct;
1439 #endif
1440 SET_GCT(user);
1441
1442 evacuate(root);
1443
1444 SET_GCT(saved_gct);
1445 }
1446
1447 /* ----------------------------------------------------------------------------
1448 Reset the sizes of the older generations when we do a major
1449 collection.
1450
1451 CURRENT STRATEGY: make all generations except zero the same size.
1452 We have to stay within the maximum heap size, and leave a certain
1453 percentage of the maximum heap size available to allocate into.
1454 ------------------------------------------------------------------------- */
1455
1456 static void
1457 resize_generations (void)
1458 {
1459 uint32_t g;
1460
1461 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1462 W_ live, size, min_alloc, words;
1463 const W_ max = RtsFlags.GcFlags.maxHeapSize;
1464 const W_ gens = RtsFlags.GcFlags.generations;
1465
1466 // live in the oldest generations
1467 if (oldest_gen->live_estimate != 0) {
1468 words = oldest_gen->live_estimate;
1469 } else {
1470 words = oldest_gen->n_words;
1471 }
1472 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1473 oldest_gen->n_large_blocks;
1474
1475 // default max size for all generations except zero
1476 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1477 RtsFlags.GcFlags.minOldGenSize);
1478
1479 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1480 if (max > 0) {
1481 RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
1482 } else {
1483 RtsFlags.GcFlags.heapSizeSuggestion = size;
1484 }
1485 }
1486
1487 // minimum size for generation zero
1488 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1489 RtsFlags.GcFlags.minAllocAreaSize);
1490
1491 // Auto-enable compaction when the residency reaches a
1492 // certain percentage of the maximum heap size (default: 30%).
1493 if (RtsFlags.GcFlags.compact ||
1494 (max > 0 &&
1495 oldest_gen->n_blocks >
1496 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1497 oldest_gen->mark = 1;
1498 oldest_gen->compact = 1;
1499 // debugBelch("compaction: on\n", live);
1500 } else {
1501 oldest_gen->mark = 0;
1502 oldest_gen->compact = 0;
1503 // debugBelch("compaction: off\n", live);
1504 }
1505
1506 if (RtsFlags.GcFlags.sweep) {
1507 oldest_gen->mark = 1;
1508 }
1509
1510 // if we're going to go over the maximum heap size, reduce the
1511 // size of the generations accordingly. The calculation is
1512 // different if compaction is turned on, because we don't need
1513 // to double the space required to collect the old generation.
1514 if (max != 0) {
1515
1516 // this test is necessary to ensure that the calculations
1517 // below don't have any negative results - we're working
1518 // with unsigned values here.
1519 if (max < min_alloc) {
1520 heapOverflow();
1521 }
1522
1523 if (oldest_gen->compact) {
1524 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1525 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1526 }
1527 } else {
1528 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1529 size = (max - min_alloc) / ((gens - 1) * 2);
1530 }
1531 }
1532
1533 if (size < live) {
1534 heapOverflow();
1535 }
1536 }
1537
1538 #if 0
1539 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1540 min_alloc, size, max);
1541 #endif
1542
1543 for (g = 0; g < gens; g++) {
1544 generations[g].max_blocks = size;
1545 }
1546 }
1547 }
1548
1549 /* -----------------------------------------------------------------------------
1550 Calculate the new size of the nursery, and resize it.
1551 -------------------------------------------------------------------------- */
1552
1553 static void
1554 resize_nursery (void)
1555 {
1556 const StgWord min_nursery =
1557 RtsFlags.GcFlags.minAllocAreaSize * (StgWord)n_capabilities;
1558
1559 if (RtsFlags.GcFlags.generations == 1)
1560 { // Two-space collector:
1561 W_ blocks;
1562
1563 /* set up a new nursery. Allocate a nursery size based on a
1564 * function of the amount of live data (by default a factor of 2)
1565 * Use the blocks from the old nursery if possible, freeing up any
1566 * left over blocks.
1567 *
1568 * If we get near the maximum heap size, then adjust our nursery
1569 * size accordingly. If the nursery is the same size as the live
1570 * data (L), then we need 3L bytes. We can reduce the size of the
1571 * nursery to bring the required memory down near 2L bytes.
1572 *
1573 * A normal 2-space collector would need 4L bytes to give the same
1574 * performance we get from 3L bytes, reducing to the same
1575 * performance at 2L bytes.
1576 */
1577 blocks = generations[0].n_blocks;
1578
1579 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1580 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1581 RtsFlags.GcFlags.maxHeapSize )
1582 {
1583 long adjusted_blocks; // signed on purpose
1584 int pc_free;
1585
1586 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1587
1588 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1589 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1590
1591 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1592 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1593 {
1594 heapOverflow();
1595 }
1596 blocks = adjusted_blocks;
1597 }
1598 else
1599 {
1600 blocks *= RtsFlags.GcFlags.oldGenFactor;
1601 if (blocks < min_nursery)
1602 {
1603 blocks = min_nursery;
1604 }
1605 }
1606 resizeNurseries(blocks);
1607 }
1608 else // Generational collector
1609 {
1610 /*
1611 * If the user has given us a suggested heap size, adjust our
1612 * allocation area to make best use of the memory available.
1613 */
1614 if (RtsFlags.GcFlags.heapSizeSuggestion)
1615 {
1616 long blocks;
1617 StgWord needed;
1618
1619 calcNeeded(rtsFalse, &needed); // approx blocks needed at next GC
1620
1621 /* Guess how much will be live in generation 0 step 0 next time.
1622 * A good approximation is obtained by finding the
1623 * percentage of g0 that was live at the last minor GC.
1624 *
1625 * We have an accurate figure for the amount of copied data in
1626 * 'copied', but we must convert this to a number of blocks, with
1627 * a small adjustment for estimated slop at the end of a block
1628 * (- 10 words).
1629 */
1630 if (N == 0)
1631 {
1632 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1633 / countNurseryBlocks();
1634 }
1635
1636 /* Estimate a size for the allocation area based on the
1637 * information available. We might end up going slightly under
1638 * or over the suggested heap size, but we should be pretty
1639 * close on average.
1640 *
1641 * Formula: suggested - needed
1642 * ----------------------------
1643 * 1 + g0_pcnt_kept/100
1644 *
1645 * where 'needed' is the amount of memory needed at the next
1646 * collection for collecting all gens except g0.
1647 */
1648 blocks =
1649 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1650 (100 + (long)g0_pcnt_kept);
1651
1652 if (blocks < (long)min_nursery) {
1653 blocks = min_nursery;
1654 }
1655
1656 resizeNurseries((W_)blocks);
1657 }
1658 else
1659 {
1660 // we might have added extra blocks to the nursery, so
1661 // resize back to the original size again.
1662 resizeNurseriesFixed();
1663 }
1664 }
1665 }
1666
1667 /* -----------------------------------------------------------------------------
1668 Sanity code for CAF garbage collection.
1669
1670 With DEBUG turned on, we manage a CAF list in addition to the SRT
1671 mechanism. After GC, we run down the CAF list and blackhole any
1672 CAFs which have been garbage collected. This means we get an error
1673 whenever the program tries to enter a garbage collected CAF.
1674
1675 Any garbage collected CAFs are taken off the CAF list at the same
1676 time.
1677 -------------------------------------------------------------------------- */
1678
1679 #if defined(DEBUG)
1680
1681 static void gcCAFs(void)
1682 {
1683 StgIndStatic *p, *prev;
1684
1685 const StgInfoTable *info;
1686 uint32_t i;
1687
1688 i = 0;
1689 p = debug_caf_list;
1690 prev = NULL;
1691
1692 for (p = debug_caf_list; p != (StgIndStatic*)END_OF_CAF_LIST;
1693 p = (StgIndStatic*)p->saved_info) {
1694
1695 info = get_itbl((StgClosure*)p);
1696 ASSERT(info->type == IND_STATIC);
1697
1698 if (p->static_link == NULL) {
1699 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", p);
1700 SET_INFO((StgClosure*)p,&stg_GCD_CAF_info); // stub it
1701 if (prev == NULL) {
1702 debug_caf_list = (StgIndStatic*)p->saved_info;
1703 } else {
1704 prev->saved_info = p->saved_info;
1705 }
1706 } else {
1707 prev = p;
1708 i++;
1709 }
1710 }
1711
1712 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1713 }
1714 #endif