52d7f98fa07407a8fee5815bb950516641d1bb3d
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "GC.h"
19 #include "GCThread.h"
20 #include "GCTDecl.h" // NB. before RtsSignals.h which
21 // clobbers REG_R1 on arm/Linux
22 #include "Compact.h"
23 #include "Evac.h"
24 #include "Scav.h"
25 #include "GCUtils.h"
26 #include "MarkStack.h"
27 #include "MarkWeak.h"
28 #include "Sparks.h"
29 #include "Sweep.h"
30
31 #include "Storage.h"
32 #include "RtsUtils.h"
33 #include "Apply.h"
34 #include "Updates.h"
35 #include "Stats.h"
36 #include "Schedule.h"
37 #include "Sanity.h"
38 #include "BlockAlloc.h"
39 #include "ProfHeap.h"
40 #include "Weak.h"
41 #include "Prelude.h"
42 #include "RtsSignals.h"
43 #include "STM.h"
44 #include "Trace.h"
45 #include "RetainerProfile.h"
46 #include "LdvProfile.h"
47 #include "RaiseAsync.h"
48 #include "Papi.h"
49 #include "Stable.h"
50 #include "CheckUnload.h"
51
52 #include <string.h> // for memset()
53 #include <unistd.h>
54
55 /* -----------------------------------------------------------------------------
56 Global variables
57 -------------------------------------------------------------------------- */
58
59 /* STATIC OBJECT LIST.
60 *
61 * During GC:
62 * We maintain a linked list of static objects that are still live.
63 * The requirements for this list are:
64 *
65 * - we need to scan the list while adding to it, in order to
66 * scavenge all the static objects (in the same way that
67 * breadth-first scavenging works for dynamic objects).
68 *
69 * - we need to be able to tell whether an object is already on
70 * the list, to break loops.
71 *
72 * Each static object has a "static link field", which we use for
73 * linking objects on to the list. We use a stack-type list, consing
74 * objects on the front as they are added (this means that the
75 * scavenge phase is depth-first, not breadth-first, but that
76 * shouldn't matter).
77 *
78 * A separate list is kept for objects that have been scavenged
79 * already - this is so that we can zero all the marks afterwards.
80 *
81 * An object is on the list if its static link field is non-zero; this
82 * means that we have to mark the end of the list with '1', not NULL.
83 *
84 * Extra notes for generational GC:
85 *
86 * Each generation has a static object list associated with it. When
87 * collecting generations up to N, we treat the static object lists
88 * from generations > N as roots.
89 *
90 * We build up a static object list while collecting generations 0..N,
91 * which is then appended to the static object list of generation N+1.
92 */
93
94 /* N is the oldest generation being collected, where the generations
95 * are numbered starting at 0. A major GC (indicated by the major_gc
96 * flag) is when we're collecting all generations. We only attempt to
97 * deal with static objects and GC CAFs when doing a major GC.
98 */
99 nat N;
100 rtsBool major_gc;
101
102 /* Data used for allocation area sizing.
103 */
104 static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
105
106 /* Mut-list stats */
107 #ifdef DEBUG
108 nat mutlist_MUTVARS,
109 mutlist_MUTARRS,
110 mutlist_MVARS,
111 mutlist_TVAR,
112 mutlist_TVAR_WATCH_QUEUE,
113 mutlist_TREC_CHUNK,
114 mutlist_TREC_HEADER,
115 mutlist_ATOMIC_INVARIANT,
116 mutlist_INVARIANT_CHECK_QUEUE,
117 mutlist_OTHERS;
118 #endif
119
120 /* Thread-local data for each GC thread
121 */
122 gc_thread **gc_threads = NULL;
123
124 #if !defined(THREADED_RTS)
125 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
126 #endif
127
128 // Number of threads running in *this* GC. Affects how many
129 // step->todos[] lists we have to look in to find work.
130 nat n_gc_threads;
131
132 // For stats:
133 long copied; // *words* copied & scavenged during this GC
134
135 rtsBool work_stealing;
136
137 DECLARE_GCT
138
139 /* -----------------------------------------------------------------------------
140 Static function declarations
141 -------------------------------------------------------------------------- */
142
143 static void mark_root (void *user, StgClosure **root);
144 static void zero_static_object_list (StgClosure* first_static);
145 static void prepare_collected_gen (generation *gen);
146 static void prepare_uncollected_gen (generation *gen);
147 static void init_gc_thread (gc_thread *t);
148 static void resize_generations (void);
149 static void resize_nursery (void);
150 static void start_gc_threads (void);
151 static void scavenge_until_all_done (void);
152 static StgWord inc_running (void);
153 static StgWord dec_running (void);
154 static void wakeup_gc_threads (nat me);
155 static void shutdown_gc_threads (nat me);
156 static void collect_gct_blocks (void);
157 static void collect_pinned_object_blocks (void);
158
159 #if defined(DEBUG)
160 static void gcCAFs (void);
161 #endif
162
163 /* -----------------------------------------------------------------------------
164 The mark stack.
165 -------------------------------------------------------------------------- */
166
167 bdescr *mark_stack_top_bd; // topmost block in the mark stack
168 bdescr *mark_stack_bd; // current block in the mark stack
169 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
170
171 /* -----------------------------------------------------------------------------
172 GarbageCollect: the main entry point to the garbage collector.
173
174 The collect_gen parameter is gotten by calling calcNeeded().
175
176 Locks held: all capabilities are held throughout GarbageCollect().
177 -------------------------------------------------------------------------- */
178
179 void
180 GarbageCollect (nat collect_gen,
181 rtsBool do_heap_census,
182 nat gc_type USED_IF_THREADS,
183 Capability *cap)
184 {
185 bdescr *bd;
186 generation *gen;
187 StgWord live_blocks, live_words, par_max_copied, par_tot_copied;
188 #if defined(THREADED_RTS)
189 gc_thread *saved_gct;
190 #endif
191 nat g, n;
192
193 // necessary if we stole a callee-saves register for gct:
194 #if defined(THREADED_RTS)
195 saved_gct = gct;
196 #endif
197
198 #ifdef PROFILING
199 CostCentreStack *save_CCS[n_capabilities];
200 #endif
201
202 ACQUIRE_SM_LOCK;
203
204 #if defined(RTS_USER_SIGNALS)
205 if (RtsFlags.MiscFlags.install_signal_handlers) {
206 // block signals
207 blockUserSignals();
208 }
209 #endif
210
211 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
212 // otherwise adjust the padding in gen_workspace.
213
214 // this is the main thread
215 SET_GCT(gc_threads[cap->no]);
216
217 // tell the stats department that we've started a GC
218 stat_startGC(cap, gct);
219
220 // lock the StablePtr table
221 stableLock();
222
223 #ifdef DEBUG
224 mutlist_MUTVARS = 0;
225 mutlist_MUTARRS = 0;
226 mutlist_MVARS = 0;
227 mutlist_TVAR = 0;
228 mutlist_TVAR_WATCH_QUEUE = 0;
229 mutlist_TREC_CHUNK = 0;
230 mutlist_TREC_HEADER = 0;
231 mutlist_ATOMIC_INVARIANT = 0;
232 mutlist_INVARIANT_CHECK_QUEUE = 0;
233 mutlist_OTHERS = 0;
234 #endif
235
236 // attribute any costs to CCS_GC
237 #ifdef PROFILING
238 for (n = 0; n < n_capabilities; n++) {
239 save_CCS[n] = capabilities[n]->r.rCCCS;
240 capabilities[n]->r.rCCCS = CCS_GC;
241 }
242 #endif
243
244 /* Figure out which generation to collect
245 */
246 N = collect_gen;
247 major_gc = (N == RtsFlags.GcFlags.generations-1);
248
249 #if defined(THREADED_RTS)
250 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
251 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
252 // It's not always a good idea to do load balancing in parallel
253 // GC. In particular, for a parallel program we don't want to
254 // lose locality by moving cached data into another CPU's cache
255 // (this effect can be quite significant).
256 //
257 // We could have a more complex way to deterimine whether to do
258 // work stealing or not, e.g. it might be a good idea to do it
259 // if the heap is big. For now, we just turn it on or off with
260 // a flag.
261 #endif
262
263 /* Start threads, so they can be spinning up while we finish initialisation.
264 */
265 start_gc_threads();
266
267 #if defined(THREADED_RTS)
268 /* How many threads will be participating in this GC?
269 * We don't try to parallelise minor GCs (unless the user asks for
270 * it with +RTS -gn0), or mark/compact/sweep GC.
271 */
272 if (gc_type == SYNC_GC_PAR) {
273 n_gc_threads = n_capabilities;
274 } else {
275 n_gc_threads = 1;
276 }
277 #else
278 n_gc_threads = 1;
279 #endif
280
281 debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
282 N, n_gc_threads);
283
284 #ifdef DEBUG
285 // check for memory leaks if DEBUG is on
286 memInventory(DEBUG_gc);
287 #endif
288
289 // do this *before* we start scavenging
290 collectFreshWeakPtrs();
291
292 // check sanity *before* GC
293 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
294
295 // gather blocks allocated using allocatePinned() from each capability
296 // and put them on the g0->large_object list.
297 collect_pinned_object_blocks();
298
299 // Initialise all the generations/steps that we're collecting.
300 for (g = 0; g <= N; g++) {
301 prepare_collected_gen(&generations[g]);
302 }
303 // Initialise all the generations/steps that we're *not* collecting.
304 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
305 prepare_uncollected_gen(&generations[g]);
306 }
307
308 // Prepare this gc_thread
309 init_gc_thread(gct);
310
311 /* Allocate a mark stack if we're doing a major collection.
312 */
313 if (major_gc && oldest_gen->mark) {
314 mark_stack_bd = allocBlock();
315 mark_stack_top_bd = mark_stack_bd;
316 mark_stack_bd->link = NULL;
317 mark_stack_bd->u.back = NULL;
318 mark_sp = mark_stack_bd->start;
319 } else {
320 mark_stack_bd = NULL;
321 mark_stack_top_bd = NULL;
322 mark_sp = NULL;
323 }
324
325 /* -----------------------------------------------------------------------
326 * follow all the roots that we know about:
327 */
328
329 // the main thread is running: this prevents any other threads from
330 // exiting prematurely, so we can start them now.
331 // NB. do this after the mutable lists have been saved above, otherwise
332 // the other GC threads will be writing into the old mutable lists.
333 inc_running();
334 wakeup_gc_threads(gct->thread_index);
335
336 traceEventGcWork(gct->cap);
337
338 // scavenge the capability-private mutable lists. This isn't part
339 // of markSomeCapabilities() because markSomeCapabilities() can only
340 // call back into the GC via mark_root() (due to the gct register
341 // variable).
342 if (n_gc_threads == 1) {
343 for (n = 0; n < n_capabilities; n++) {
344 #if defined(THREADED_RTS)
345 scavenge_capability_mut_Lists1(capabilities[n]);
346 #else
347 scavenge_capability_mut_lists(capabilities[n]);
348 #endif
349 }
350 } else {
351 scavenge_capability_mut_lists(gct->cap);
352 for (n = 0; n < n_capabilities; n++) {
353 if (gc_threads[n]->idle) {
354 markCapability(mark_root, gct, capabilities[n],
355 rtsTrue/*don't mark sparks*/);
356 scavenge_capability_mut_lists(capabilities[n]);
357 }
358 }
359 }
360
361 // follow roots from the CAF list (used by GHCi)
362 gct->evac_gen_no = 0;
363 markCAFs(mark_root, gct);
364
365 // follow all the roots that the application knows about.
366 gct->evac_gen_no = 0;
367 if (n_gc_threads == 1) {
368 for (n = 0; n < n_capabilities; n++) {
369 markCapability(mark_root, gct, capabilities[n],
370 rtsTrue/*don't mark sparks*/);
371 }
372 } else {
373 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
374 }
375
376 markScheduler(mark_root, gct);
377
378 #if defined(RTS_USER_SIGNALS)
379 // mark the signal handlers (signals should be already blocked)
380 markSignalHandlers(mark_root, gct);
381 #endif
382
383 // Mark the weak pointer list, and prepare to detect dead weak pointers.
384 markWeakPtrList();
385 initWeakForGC();
386
387 // Mark the stable pointer table.
388 markStableTables(mark_root, gct);
389
390 /* -------------------------------------------------------------------------
391 * Repeatedly scavenge all the areas we know about until there's no
392 * more scavenging to be done.
393 */
394 for (;;)
395 {
396 scavenge_until_all_done();
397 // The other threads are now stopped. We might recurse back to
398 // here, but from now on this is the only thread.
399
400 // must be last... invariant is that everything is fully
401 // scavenged at this point.
402 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
403 inc_running();
404 continue;
405 }
406
407 // If we get to here, there's really nothing left to do.
408 break;
409 }
410
411 shutdown_gc_threads(gct->thread_index);
412
413 // Now see which stable names are still alive.
414 gcStableTables();
415
416 #ifdef THREADED_RTS
417 if (n_gc_threads == 1) {
418 for (n = 0; n < n_capabilities; n++) {
419 pruneSparkQueue(capabilities[n]);
420 }
421 } else {
422 for (n = 0; n < n_capabilities; n++) {
423 if (n == cap->no || gc_threads[n]->idle) {
424 pruneSparkQueue(capabilities[n]);
425 }
426 }
427 }
428 #endif
429
430 #ifdef PROFILING
431 // We call processHeapClosureForDead() on every closure destroyed during
432 // the current garbage collection, so we invoke LdvCensusForDead().
433 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
434 || RtsFlags.ProfFlags.bioSelector != NULL) {
435 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
436 LdvCensusForDead(N);
437 ACQUIRE_SM_LOCK;
438 }
439 #endif
440
441 // NO MORE EVACUATION AFTER THIS POINT!
442
443 // Finally: compact or sweep the oldest generation.
444 if (major_gc && oldest_gen->mark) {
445 if (oldest_gen->compact)
446 compact(gct->scavenged_static_objects);
447 else
448 sweep(oldest_gen);
449 }
450
451 copied = 0;
452 par_max_copied = 0;
453 par_tot_copied = 0;
454 {
455 nat i;
456 for (i=0; i < n_gc_threads; i++) {
457 if (n_gc_threads > 1) {
458 debugTrace(DEBUG_gc,"thread %d:", i);
459 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
460 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
461 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
462 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
463 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
464 }
465 copied += gc_threads[i]->copied;
466 par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
467 }
468 par_tot_copied = copied;
469 if (n_gc_threads == 1) {
470 par_max_copied = 0;
471 par_tot_copied = 0;
472 }
473 }
474
475 // Run through all the generations/steps and tidy up.
476 // We're going to:
477 // - count the amount of "live" data (live_words, live_blocks)
478 // - count the amount of "copied" data in this GC (copied)
479 // - free from-space
480 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
481 //
482 live_words = 0;
483 live_blocks = 0;
484
485 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
486
487 if (g == N) {
488 generations[g].collections++; // for stats
489 if (n_gc_threads > 1) generations[g].par_collections++;
490 }
491
492 // Count the mutable list as bytes "copied" for the purposes of
493 // stats. Every mutable list is copied during every GC.
494 if (g > 0) {
495 W_ mut_list_size = 0;
496 for (n = 0; n < n_capabilities; n++) {
497 mut_list_size += countOccupied(capabilities[n]->mut_lists[g]);
498 }
499 copied += mut_list_size;
500
501 debugTrace(DEBUG_gc,
502 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d ATOMIC_INVARIANTs, %d INVARIANT_CHECK_QUEUEs, %d others)",
503 (unsigned long)(mut_list_size * sizeof(W_)),
504 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS,
505 mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE,
506 mutlist_TREC_CHUNK, mutlist_TREC_HEADER,
507 mutlist_ATOMIC_INVARIANT,
508 mutlist_INVARIANT_CHECK_QUEUE,
509 mutlist_OTHERS);
510 }
511
512 bdescr *next, *prev;
513 gen = &generations[g];
514
515 // for generations we collected...
516 if (g <= N) {
517
518 /* free old memory and shift to-space into from-space for all
519 * the collected steps (except the allocation area). These
520 * freed blocks will probaby be quickly recycled.
521 */
522 if (gen->mark)
523 {
524 // tack the new blocks on the end of the existing blocks
525 if (gen->old_blocks != NULL) {
526
527 prev = NULL;
528 for (bd = gen->old_blocks; bd != NULL; bd = next) {
529
530 next = bd->link;
531
532 if (!(bd->flags & BF_MARKED))
533 {
534 if (prev == NULL) {
535 gen->old_blocks = next;
536 } else {
537 prev->link = next;
538 }
539 freeGroup(bd);
540 gen->n_old_blocks--;
541 }
542 else
543 {
544 gen->n_words += bd->free - bd->start;
545
546 // NB. this step might not be compacted next
547 // time, so reset the BF_MARKED flags.
548 // They are set before GC if we're going to
549 // compact. (search for BF_MARKED above).
550 bd->flags &= ~BF_MARKED;
551
552 // between GCs, all blocks in the heap except
553 // for the nursery have the BF_EVACUATED flag set.
554 bd->flags |= BF_EVACUATED;
555
556 prev = bd;
557 }
558 }
559
560 if (prev != NULL) {
561 prev->link = gen->blocks;
562 gen->blocks = gen->old_blocks;
563 }
564 }
565 // add the new blocks to the block tally
566 gen->n_blocks += gen->n_old_blocks;
567 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
568 ASSERT(countOccupied(gen->blocks) == gen->n_words);
569 }
570 else // not copacted
571 {
572 freeChain(gen->old_blocks);
573 }
574
575 gen->old_blocks = NULL;
576 gen->n_old_blocks = 0;
577
578 /* LARGE OBJECTS. The current live large objects are chained on
579 * scavenged_large, having been moved during garbage
580 * collection from large_objects. Any objects left on the
581 * large_objects list are therefore dead, so we free them here.
582 */
583 freeChain(gen->large_objects);
584 gen->large_objects = gen->scavenged_large_objects;
585 gen->n_large_blocks = gen->n_scavenged_large_blocks;
586 gen->n_large_words = countOccupied(gen->large_objects);
587 gen->n_new_large_words = 0;
588 }
589 else // for generations > N
590 {
591 /* For older generations, we need to append the
592 * scavenged_large_object list (i.e. large objects that have been
593 * promoted during this GC) to the large_object list for that step.
594 */
595 for (bd = gen->scavenged_large_objects; bd; bd = next) {
596 next = bd->link;
597 dbl_link_onto(bd, &gen->large_objects);
598 gen->n_large_words += bd->free - bd->start;
599 }
600
601 // add the new blocks we promoted during this GC
602 gen->n_large_blocks += gen->n_scavenged_large_blocks;
603 }
604
605 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
606 ASSERT(countOccupied(gen->large_objects) == gen->n_large_words);
607
608 gen->scavenged_large_objects = NULL;
609 gen->n_scavenged_large_blocks = 0;
610
611 // Count "live" data
612 live_words += genLiveWords(gen);
613 live_blocks += genLiveBlocks(gen);
614
615 // add in the partial blocks in the gen_workspaces, but ignore gen 0
616 // if this is a local GC (we can't count another capability's part_list)
617 {
618 nat i;
619 for (i = 0; i < n_capabilities; i++) {
620 live_words += gcThreadLiveWords(i, gen->no);
621 live_blocks += gcThreadLiveBlocks(i, gen->no);
622 }
623 }
624 } // for all generations
625
626 // update the max size of older generations after a major GC
627 resize_generations();
628
629 // Free the mark stack.
630 if (mark_stack_top_bd != NULL) {
631 debugTrace(DEBUG_gc, "mark stack: %d blocks",
632 countBlocks(mark_stack_top_bd));
633 freeChain(mark_stack_top_bd);
634 }
635
636 // Free any bitmaps.
637 for (g = 0; g <= N; g++) {
638 gen = &generations[g];
639 if (gen->bitmap != NULL) {
640 freeGroup(gen->bitmap);
641 gen->bitmap = NULL;
642 }
643 }
644
645 resize_nursery();
646
647 resetNurseries();
648
649 // mark the garbage collected CAFs as dead
650 #if defined(DEBUG)
651 if (major_gc) { gcCAFs(); }
652 #endif
653
654 // Update the stable pointer hash table.
655 updateStableTables(major_gc);
656
657 // unlock the StablePtr table. Must be before scheduleFinalizers(),
658 // because a finalizer may call hs_free_fun_ptr() or
659 // hs_free_stable_ptr(), both of which access the StablePtr table.
660 stableUnlock();
661
662 // Must be after stableUnlock(), because it might free stable ptrs.
663 if (major_gc) {
664 checkUnload (gct->scavenged_static_objects);
665 }
666
667 #ifdef PROFILING
668 // resetStaticObjectForRetainerProfiling() must be called before
669 // zeroing below.
670
671 // ToDo: fix the gct->scavenged_static_objects below
672 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
673 #endif
674
675 // zero the scavenged static object list
676 if (major_gc) {
677 nat i;
678 if (n_gc_threads == 1) {
679 zero_static_object_list(gct->scavenged_static_objects);
680 } else {
681 for (i = 0; i < n_gc_threads; i++) {
682 if (!gc_threads[i]->idle) {
683 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
684 }
685 }
686 }
687 }
688
689 // Start any pending finalizers. Must be after
690 // updateStableTables() and stableUnlock() (see #4221).
691 RELEASE_SM_LOCK;
692 scheduleFinalizers(cap, dead_weak_ptr_list);
693 ACQUIRE_SM_LOCK;
694
695 // check sanity after GC
696 // before resurrectThreads(), because that might overwrite some
697 // closures, which will cause problems with THREADED where we don't
698 // fill slop.
699 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
700
701 // If a heap census is due, we need to do it before
702 // resurrectThreads(), for the same reason as checkSanity above:
703 // resurrectThreads() will overwrite some closures and leave slop
704 // behind.
705 if (do_heap_census) {
706 debugTrace(DEBUG_sched, "performing heap census");
707 RELEASE_SM_LOCK;
708 heapCensus(gct->gc_start_cpu);
709 ACQUIRE_SM_LOCK;
710 }
711
712 // send exceptions to any threads which were about to die
713 RELEASE_SM_LOCK;
714 resurrectThreads(resurrected_threads);
715 ACQUIRE_SM_LOCK;
716
717 if (major_gc) {
718 W_ need, got;
719 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
720 got = mblocks_allocated;
721 /* If the amount of data remains constant, next major GC we'll
722 require (F+1)*need. We leave (F+2)*need in order to reduce
723 repeated deallocation and reallocation. */
724 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
725 if (got > need) {
726 returnMemoryToOS(got - need);
727 }
728 }
729
730 // extra GC trace info
731 IF_DEBUG(gc, statDescribeGens());
732
733 #ifdef DEBUG
734 // symbol-table based profiling
735 /* heapCensus(to_blocks); */ /* ToDo */
736 #endif
737
738 // restore enclosing cost centre
739 #ifdef PROFILING
740 for (n = 0; n < n_capabilities; n++) {
741 capabilities[n]->r.rCCCS = save_CCS[n];
742 }
743 #endif
744
745 #ifdef DEBUG
746 // check for memory leaks if DEBUG is on
747 memInventory(DEBUG_gc);
748 #endif
749
750 // ok, GC over: tell the stats department what happened.
751 stat_endGC(cap, gct, live_words, copied,
752 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
753 N, n_gc_threads, par_max_copied, par_tot_copied);
754
755 #if defined(RTS_USER_SIGNALS)
756 if (RtsFlags.MiscFlags.install_signal_handlers) {
757 // unblock signals again
758 unblockUserSignals();
759 }
760 #endif
761
762 RELEASE_SM_LOCK;
763
764 SET_GCT(saved_gct);
765 }
766
767 /* -----------------------------------------------------------------------------
768 Initialise the gc_thread structures.
769 -------------------------------------------------------------------------- */
770
771 #define GC_THREAD_INACTIVE 0
772 #define GC_THREAD_STANDING_BY 1
773 #define GC_THREAD_RUNNING 2
774 #define GC_THREAD_WAITING_TO_CONTINUE 3
775
776 static void
777 new_gc_thread (nat n, gc_thread *t)
778 {
779 nat g;
780 gen_workspace *ws;
781
782 t->cap = capabilities[n];
783
784 #ifdef THREADED_RTS
785 t->id = 0;
786 initSpinLock(&t->gc_spin);
787 initSpinLock(&t->mut_spin);
788 ACQUIRE_SPIN_LOCK(&t->gc_spin);
789 ACQUIRE_SPIN_LOCK(&t->mut_spin);
790 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
791 // thread to start up, see wakeup_gc_threads
792 #endif
793
794 t->thread_index = n;
795 t->idle = rtsFalse;
796 t->free_blocks = NULL;
797 t->gc_count = 0;
798
799 init_gc_thread(t);
800
801 #ifdef USE_PAPI
802 t->papi_events = -1;
803 #endif
804
805 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
806 {
807 ws = &t->gens[g];
808 ws->gen = &generations[g];
809 ASSERT(g == ws->gen->no);
810 ws->my_gct = t;
811
812 // We want to call
813 // alloc_todo_block(ws,0);
814 // but can't, because it uses gct which isn't set up at this point.
815 // Hence, allocate a block for todo_bd manually:
816 {
817 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
818 initBdescr(bd, ws->gen, ws->gen->to);
819 bd->flags = BF_EVACUATED;
820 bd->u.scan = bd->free = bd->start;
821
822 ws->todo_bd = bd;
823 ws->todo_free = bd->free;
824 ws->todo_lim = bd->start + BLOCK_SIZE_W;
825 }
826
827 ws->todo_q = newWSDeque(128);
828 ws->todo_overflow = NULL;
829 ws->n_todo_overflow = 0;
830 ws->todo_large_objects = NULL;
831
832 ws->part_list = NULL;
833 ws->n_part_blocks = 0;
834
835 ws->scavd_list = NULL;
836 ws->n_scavd_blocks = 0;
837 }
838 }
839
840
841 void
842 initGcThreads (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
843 {
844 #if defined(THREADED_RTS)
845 nat i;
846
847 if (from > 0) {
848 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
849 "initGcThreads");
850 } else {
851 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
852 "initGcThreads");
853 }
854
855 for (i = from; i < to; i++) {
856 gc_threads[i] =
857 stgMallocBytes(sizeof(gc_thread) +
858 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
859 "alloc_gc_threads");
860
861 new_gc_thread(i, gc_threads[i]);
862 }
863 #else
864 ASSERT(from == 0 && to == 1);
865 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
866 gc_threads[0] = gct;
867 new_gc_thread(0,gc_threads[0]);
868 #endif
869 }
870
871 void
872 freeGcThreads (void)
873 {
874 nat g;
875 if (gc_threads != NULL) {
876 #if defined(THREADED_RTS)
877 nat i;
878 for (i = 0; i < n_capabilities; i++) {
879 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
880 {
881 freeWSDeque(gc_threads[i]->gens[g].todo_q);
882 }
883 stgFree (gc_threads[i]);
884 }
885 stgFree (gc_threads);
886 #else
887 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
888 {
889 freeWSDeque(gc_threads[0]->gens[g].todo_q);
890 }
891 stgFree (gc_threads);
892 #endif
893 gc_threads = NULL;
894 }
895 }
896
897 /* ----------------------------------------------------------------------------
898 Start GC threads
899 ------------------------------------------------------------------------- */
900
901 static volatile StgWord gc_running_threads;
902
903 static StgWord
904 inc_running (void)
905 {
906 StgWord new;
907 new = atomic_inc(&gc_running_threads, 1);
908 ASSERT(new <= n_gc_threads);
909 return new;
910 }
911
912 static StgWord
913 dec_running (void)
914 {
915 ASSERT(gc_running_threads != 0);
916 return atomic_dec(&gc_running_threads);
917 }
918
919 static rtsBool
920 any_work (void)
921 {
922 int g;
923 gen_workspace *ws;
924
925 gct->any_work++;
926
927 write_barrier();
928
929 // scavenge objects in compacted generation
930 if (mark_stack_bd != NULL && !mark_stack_empty()) {
931 return rtsTrue;
932 }
933
934 // Check for global work in any step. We don't need to check for
935 // local work, because we have already exited scavenge_loop(),
936 // which means there is no local work for this thread.
937 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
938 ws = &gct->gens[g];
939 if (ws->todo_large_objects) return rtsTrue;
940 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
941 if (ws->todo_overflow) return rtsTrue;
942 }
943
944 #if defined(THREADED_RTS)
945 if (work_stealing) {
946 nat n;
947 // look for work to steal
948 for (n = 0; n < n_gc_threads; n++) {
949 if (n == gct->thread_index) continue;
950 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
951 ws = &gc_threads[n]->gens[g];
952 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
953 }
954 }
955 }
956 #endif
957
958 gct->no_work++;
959 #if defined(THREADED_RTS)
960 yieldThread();
961 #endif
962
963 return rtsFalse;
964 }
965
966 static void
967 scavenge_until_all_done (void)
968 {
969 DEBUG_ONLY( nat r );
970
971
972 loop:
973 #if defined(THREADED_RTS)
974 if (n_gc_threads > 1) {
975 scavenge_loop();
976 } else {
977 scavenge_loop1();
978 }
979 #else
980 scavenge_loop();
981 #endif
982
983 collect_gct_blocks();
984
985 // scavenge_loop() only exits when there's no work to do
986
987 #ifdef DEBUG
988 r = dec_running();
989 #else
990 dec_running();
991 #endif
992
993 traceEventGcIdle(gct->cap);
994
995 debugTrace(DEBUG_gc, "%d GC threads still running", r);
996
997 while (gc_running_threads != 0) {
998 // usleep(1);
999 if (any_work()) {
1000 inc_running();
1001 traceEventGcWork(gct->cap);
1002 goto loop;
1003 }
1004 // any_work() does not remove the work from the queue, it
1005 // just checks for the presence of work. If we find any,
1006 // then we increment gc_running_threads and go back to
1007 // scavenge_loop() to perform any pending work.
1008 }
1009
1010 traceEventGcDone(gct->cap);
1011 }
1012
1013 #if defined(THREADED_RTS)
1014
1015 void
1016 gcWorkerThread (Capability *cap)
1017 {
1018 gc_thread *saved_gct;
1019
1020 // necessary if we stole a callee-saves register for gct:
1021 saved_gct = gct;
1022
1023 SET_GCT(gc_threads[cap->no]);
1024 gct->id = osThreadId();
1025
1026 // Wait until we're told to wake up
1027 RELEASE_SPIN_LOCK(&gct->mut_spin);
1028 // yieldThread();
1029 // Strangely, adding a yieldThread() here makes the CPU time
1030 // measurements more accurate on Linux, perhaps because it syncs
1031 // the CPU time across the multiple cores. Without this, CPU time
1032 // is heavily skewed towards GC rather than MUT.
1033 gct->wakeup = GC_THREAD_STANDING_BY;
1034 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1035 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1036
1037 #ifdef USE_PAPI
1038 // start performance counters in this thread...
1039 if (gct->papi_events == -1) {
1040 papi_init_eventset(&gct->papi_events);
1041 }
1042 papi_thread_start_gc1_count(gct->papi_events);
1043 #endif
1044
1045 init_gc_thread(gct);
1046
1047 traceEventGcWork(gct->cap);
1048
1049 // Every thread evacuates some roots.
1050 gct->evac_gen_no = 0;
1051 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1052 scavenge_capability_mut_lists(cap);
1053
1054 scavenge_until_all_done();
1055
1056 #ifdef THREADED_RTS
1057 // Now that the whole heap is marked, we discard any sparks that
1058 // were found to be unreachable. The main GC thread is currently
1059 // marking heap reachable via weak pointers, so it is
1060 // non-deterministic whether a spark will be retained if it is
1061 // only reachable via weak pointers. To fix this problem would
1062 // require another GC barrier, which is too high a price.
1063 pruneSparkQueue(cap);
1064 #endif
1065
1066 #ifdef USE_PAPI
1067 // count events in this thread towards the GC totals
1068 papi_thread_stop_gc1_count(gct->papi_events);
1069 #endif
1070
1071 // Wait until we're told to continue
1072 RELEASE_SPIN_LOCK(&gct->gc_spin);
1073 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1074 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1075 gct->thread_index);
1076 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1077 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1078
1079 SET_GCT(saved_gct);
1080 }
1081
1082 #endif
1083
1084 #if defined(THREADED_RTS)
1085
1086 void
1087 waitForGcThreads (Capability *cap USED_IF_THREADS)
1088 {
1089 const nat n_threads = n_capabilities;
1090 const nat me = cap->no;
1091 nat i, j;
1092 rtsBool retry = rtsTrue;
1093
1094 stat_startGCSync(gc_threads[cap->no]);
1095
1096
1097 while(retry) {
1098 for (i=0; i < n_threads; i++) {
1099 if (i == me || gc_threads[i]->idle) continue;
1100 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1101 prodCapability(capabilities[i], cap->running_task);
1102 }
1103 }
1104 for (j=0; j < 10; j++) {
1105 retry = rtsFalse;
1106 for (i=0; i < n_threads; i++) {
1107 if (i == me || gc_threads[i]->idle) continue;
1108 write_barrier();
1109 interruptCapability(capabilities[i]);
1110 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1111 retry = rtsTrue;
1112 }
1113 }
1114 if (!retry) break;
1115 yieldThread();
1116 }
1117 }
1118 }
1119
1120 #endif // THREADED_RTS
1121
1122 static void
1123 start_gc_threads (void)
1124 {
1125 #if defined(THREADED_RTS)
1126 gc_running_threads = 0;
1127 #endif
1128 }
1129
1130 static void
1131 wakeup_gc_threads (nat me USED_IF_THREADS)
1132 {
1133 #if defined(THREADED_RTS)
1134 nat i;
1135
1136 if (n_gc_threads == 1) return;
1137
1138 for (i=0; i < n_gc_threads; i++) {
1139 if (i == me || gc_threads[i]->idle) continue;
1140 inc_running();
1141 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1142 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1143
1144 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1145 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1146 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1147 }
1148 #endif
1149 }
1150
1151 // After GC is complete, we must wait for all GC threads to enter the
1152 // standby state, otherwise they may still be executing inside
1153 // any_work(), and may even remain awake until the next GC starts.
1154 static void
1155 shutdown_gc_threads (nat me USED_IF_THREADS)
1156 {
1157 #if defined(THREADED_RTS)
1158 nat i;
1159
1160 if (n_gc_threads == 1) return;
1161
1162 for (i=0; i < n_gc_threads; i++) {
1163 if (i == me || gc_threads[i]->idle) continue;
1164 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) {
1165 busy_wait_nop();
1166 write_barrier();
1167 }
1168 }
1169 #endif
1170 }
1171
1172 #if defined(THREADED_RTS)
1173 void
1174 releaseGCThreads (Capability *cap USED_IF_THREADS)
1175 {
1176 const nat n_threads = n_capabilities;
1177 const nat me = cap->no;
1178 nat i;
1179 for (i=0; i < n_threads; i++) {
1180 if (i == me || gc_threads[i]->idle) continue;
1181 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1182 barf("releaseGCThreads");
1183
1184 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1185 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1186 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1187 }
1188 }
1189 #endif
1190
1191 /* ----------------------------------------------------------------------------
1192 Initialise a generation that is to be collected
1193 ------------------------------------------------------------------------- */
1194
1195 static void
1196 prepare_collected_gen (generation *gen)
1197 {
1198 nat i, g, n;
1199 gen_workspace *ws;
1200 bdescr *bd, *next;
1201
1202 // Throw away the current mutable list. Invariant: the mutable
1203 // list always has at least one block; this means we can avoid a
1204 // check for NULL in recordMutable().
1205 g = gen->no;
1206 if (g != 0) {
1207 for (i = 0; i < n_capabilities; i++) {
1208 freeChain(capabilities[i]->mut_lists[g]);
1209 capabilities[i]->mut_lists[g] = allocBlock();
1210 }
1211 }
1212
1213 gen = &generations[g];
1214 ASSERT(gen->no == g);
1215
1216 // we'll construct a new list of threads in this step
1217 // during GC, throw away the current list.
1218 gen->old_threads = gen->threads;
1219 gen->threads = END_TSO_QUEUE;
1220
1221 // deprecate the existing blocks
1222 gen->old_blocks = gen->blocks;
1223 gen->n_old_blocks = gen->n_blocks;
1224 gen->blocks = NULL;
1225 gen->n_blocks = 0;
1226 gen->n_words = 0;
1227 gen->live_estimate = 0;
1228
1229 // initialise the large object queues.
1230 ASSERT(gen->scavenged_large_objects == NULL);
1231 ASSERT(gen->n_scavenged_large_blocks == 0);
1232
1233 // grab all the partial blocks stashed in the gc_thread workspaces and
1234 // move them to the old_blocks list of this gen.
1235 for (n = 0; n < n_capabilities; n++) {
1236 ws = &gc_threads[n]->gens[gen->no];
1237
1238 for (bd = ws->part_list; bd != NULL; bd = next) {
1239 next = bd->link;
1240 bd->link = gen->old_blocks;
1241 gen->old_blocks = bd;
1242 gen->n_old_blocks += bd->blocks;
1243 }
1244 ws->part_list = NULL;
1245 ws->n_part_blocks = 0;
1246
1247 ASSERT(ws->scavd_list == NULL);
1248 ASSERT(ws->n_scavd_blocks == 0);
1249
1250 if (ws->todo_free != ws->todo_bd->start) {
1251 ws->todo_bd->free = ws->todo_free;
1252 ws->todo_bd->link = gen->old_blocks;
1253 gen->old_blocks = ws->todo_bd;
1254 gen->n_old_blocks += ws->todo_bd->blocks;
1255 alloc_todo_block(ws,0); // always has one block.
1256 }
1257 }
1258
1259 // mark the small objects as from-space
1260 for (bd = gen->old_blocks; bd; bd = bd->link) {
1261 bd->flags &= ~BF_EVACUATED;
1262 }
1263
1264 // mark the large objects as from-space
1265 for (bd = gen->large_objects; bd; bd = bd->link) {
1266 bd->flags &= ~BF_EVACUATED;
1267 }
1268
1269 // for a compacted generation, we need to allocate the bitmap
1270 if (gen->mark) {
1271 StgWord bitmap_size; // in bytes
1272 bdescr *bitmap_bdescr;
1273 StgWord *bitmap;
1274
1275 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1276
1277 if (bitmap_size > 0) {
1278 bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
1279 / BLOCK_SIZE);
1280 gen->bitmap = bitmap_bdescr;
1281 bitmap = bitmap_bdescr->start;
1282
1283 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1284 bitmap_size, bitmap);
1285
1286 // don't forget to fill it with zeros!
1287 memset(bitmap, 0, bitmap_size);
1288
1289 // For each block in this step, point to its bitmap from the
1290 // block descriptor.
1291 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1292 bd->u.bitmap = bitmap;
1293 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1294
1295 // Also at this point we set the BF_MARKED flag
1296 // for this block. The invariant is that
1297 // BF_MARKED is always unset, except during GC
1298 // when it is set on those blocks which will be
1299 // compacted.
1300 if (!(bd->flags & BF_FRAGMENTED)) {
1301 bd->flags |= BF_MARKED;
1302 }
1303
1304 // BF_SWEPT should be marked only for blocks that are being
1305 // collected in sweep()
1306 bd->flags &= ~BF_SWEPT;
1307 }
1308 }
1309 }
1310 }
1311
1312
1313 /* ----------------------------------------------------------------------------
1314 Save the mutable lists in saved_mut_lists
1315 ------------------------------------------------------------------------- */
1316
1317 static void
1318 stash_mut_list (Capability *cap, nat gen_no)
1319 {
1320 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1321 cap->mut_lists[gen_no] = allocBlock_sync();
1322 }
1323
1324 /* ----------------------------------------------------------------------------
1325 Initialise a generation that is *not* to be collected
1326 ------------------------------------------------------------------------- */
1327
1328 static void
1329 prepare_uncollected_gen (generation *gen)
1330 {
1331 nat i;
1332
1333
1334 ASSERT(gen->no > 0);
1335
1336 // save the current mutable lists for this generation, and
1337 // allocate a fresh block for each one. We'll traverse these
1338 // mutable lists as roots early on in the GC.
1339 for (i = 0; i < n_capabilities; i++) {
1340 stash_mut_list(capabilities[i], gen->no);
1341 }
1342
1343 ASSERT(gen->scavenged_large_objects == NULL);
1344 ASSERT(gen->n_scavenged_large_blocks == 0);
1345 }
1346
1347 /* -----------------------------------------------------------------------------
1348 Collect the completed blocks from a GC thread and attach them to
1349 the generation.
1350 -------------------------------------------------------------------------- */
1351
1352 static void
1353 collect_gct_blocks (void)
1354 {
1355 nat g;
1356 gen_workspace *ws;
1357 bdescr *bd, *prev;
1358
1359 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1360 ws = &gct->gens[g];
1361
1362 // there may still be a block attached to ws->todo_bd;
1363 // leave it there to use next time.
1364
1365 if (ws->scavd_list != NULL) {
1366 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1367
1368 ASSERT(gct->scan_bd == NULL);
1369 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1370
1371 prev = NULL;
1372 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1373 ws->gen->n_words += bd->free - bd->start;
1374 prev = bd;
1375 }
1376 if (prev != NULL) {
1377 prev->link = ws->gen->blocks;
1378 ws->gen->blocks = ws->scavd_list;
1379 }
1380 ws->gen->n_blocks += ws->n_scavd_blocks;
1381
1382 ws->scavd_list = NULL;
1383 ws->n_scavd_blocks = 0;
1384
1385 RELEASE_SPIN_LOCK(&ws->gen->sync);
1386 }
1387 }
1388 }
1389
1390 /* -----------------------------------------------------------------------------
1391 During mutation, any blocks that are filled by allocatePinned() are
1392 stashed on the local pinned_object_blocks list, to avoid needing to
1393 take a global lock. Here we collect those blocks from the
1394 cap->pinned_object_blocks lists and put them on the
1395 main g0->large_object list.
1396
1397 Returns: the number of words allocated this way, for stats
1398 purposes.
1399 -------------------------------------------------------------------------- */
1400
1401 static void
1402 collect_pinned_object_blocks (void)
1403 {
1404 nat n;
1405 bdescr *bd, *prev;
1406
1407 for (n = 0; n < n_capabilities; n++) {
1408 prev = NULL;
1409 for (bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
1410 prev = bd;
1411 }
1412 if (prev != NULL) {
1413 prev->link = g0->large_objects;
1414 if (g0->large_objects != NULL) {
1415 g0->large_objects->u.back = prev;
1416 }
1417 g0->large_objects = capabilities[n]->pinned_object_blocks;
1418 capabilities[n]->pinned_object_blocks = 0;
1419 }
1420 }
1421 }
1422
1423 /* -----------------------------------------------------------------------------
1424 Initialise a gc_thread before GC
1425 -------------------------------------------------------------------------- */
1426
1427 static void
1428 init_gc_thread (gc_thread *t)
1429 {
1430 t->static_objects = END_OF_STATIC_LIST;
1431 t->scavenged_static_objects = END_OF_STATIC_LIST;
1432 t->scan_bd = NULL;
1433 t->mut_lists = t->cap->mut_lists;
1434 t->evac_gen_no = 0;
1435 t->failed_to_evac = rtsFalse;
1436 t->eager_promotion = rtsTrue;
1437 t->thunk_selector_depth = 0;
1438 t->copied = 0;
1439 t->scanned = 0;
1440 t->any_work = 0;
1441 t->no_work = 0;
1442 t->scav_find_work = 0;
1443 }
1444
1445 /* -----------------------------------------------------------------------------
1446 Function we pass to evacuate roots.
1447 -------------------------------------------------------------------------- */
1448
1449 static void
1450 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1451 {
1452 // we stole a register for gct, but this function is called from
1453 // *outside* the GC where the register variable is not in effect,
1454 // so we need to save and restore it here. NB. only call
1455 // mark_root() from the main GC thread, otherwise gct will be
1456 // incorrect.
1457 #if defined(THREADED_RTS)
1458 gc_thread *saved_gct;
1459 saved_gct = gct;
1460 #endif
1461 SET_GCT(user);
1462
1463 evacuate(root);
1464
1465 SET_GCT(saved_gct);
1466 }
1467
1468 /* -----------------------------------------------------------------------------
1469 Initialising the static object & mutable lists
1470 -------------------------------------------------------------------------- */
1471
1472 static void
1473 zero_static_object_list(StgClosure* first_static)
1474 {
1475 StgClosure* p;
1476 StgClosure* link;
1477 const StgInfoTable *info;
1478
1479 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1480 info = get_itbl(p);
1481 link = *STATIC_LINK(info, p);
1482 *STATIC_LINK(info,p) = NULL;
1483 }
1484 }
1485
1486 /* ----------------------------------------------------------------------------
1487 Reset the sizes of the older generations when we do a major
1488 collection.
1489
1490 CURRENT STRATEGY: make all generations except zero the same size.
1491 We have to stay within the maximum heap size, and leave a certain
1492 percentage of the maximum heap size available to allocate into.
1493 ------------------------------------------------------------------------- */
1494
1495 static void
1496 resize_generations (void)
1497 {
1498 nat g;
1499
1500 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1501 W_ live, size, min_alloc, words;
1502 const W_ max = RtsFlags.GcFlags.maxHeapSize;
1503 const W_ gens = RtsFlags.GcFlags.generations;
1504
1505 // live in the oldest generations
1506 if (oldest_gen->live_estimate != 0) {
1507 words = oldest_gen->live_estimate;
1508 } else {
1509 words = oldest_gen->n_words;
1510 }
1511 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1512 oldest_gen->n_large_blocks;
1513
1514 // default max size for all generations except zero
1515 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1516 RtsFlags.GcFlags.minOldGenSize);
1517
1518 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1519 if (max > 0) {
1520 RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
1521 } else {
1522 RtsFlags.GcFlags.heapSizeSuggestion = size;
1523 }
1524 }
1525
1526 // minimum size for generation zero
1527 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1528 RtsFlags.GcFlags.minAllocAreaSize);
1529
1530 // Auto-enable compaction when the residency reaches a
1531 // certain percentage of the maximum heap size (default: 30%).
1532 if (RtsFlags.GcFlags.compact ||
1533 (max > 0 &&
1534 oldest_gen->n_blocks >
1535 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1536 oldest_gen->mark = 1;
1537 oldest_gen->compact = 1;
1538 // debugBelch("compaction: on\n", live);
1539 } else {
1540 oldest_gen->mark = 0;
1541 oldest_gen->compact = 0;
1542 // debugBelch("compaction: off\n", live);
1543 }
1544
1545 if (RtsFlags.GcFlags.sweep) {
1546 oldest_gen->mark = 1;
1547 }
1548
1549 // if we're going to go over the maximum heap size, reduce the
1550 // size of the generations accordingly. The calculation is
1551 // different if compaction is turned on, because we don't need
1552 // to double the space required to collect the old generation.
1553 if (max != 0) {
1554
1555 // this test is necessary to ensure that the calculations
1556 // below don't have any negative results - we're working
1557 // with unsigned values here.
1558 if (max < min_alloc) {
1559 heapOverflow();
1560 }
1561
1562 if (oldest_gen->compact) {
1563 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1564 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1565 }
1566 } else {
1567 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1568 size = (max - min_alloc) / ((gens - 1) * 2);
1569 }
1570 }
1571
1572 if (size < live) {
1573 heapOverflow();
1574 }
1575 }
1576
1577 #if 0
1578 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1579 min_alloc, size, max);
1580 #endif
1581
1582 for (g = 0; g < gens; g++) {
1583 generations[g].max_blocks = size;
1584 }
1585 }
1586 }
1587
1588 /* -----------------------------------------------------------------------------
1589 Calculate the new size of the nursery, and resize it.
1590 -------------------------------------------------------------------------- */
1591
1592 static void
1593 resize_nursery (void)
1594 {
1595 const StgWord min_nursery =
1596 RtsFlags.GcFlags.minAllocAreaSize * (StgWord)n_capabilities;
1597
1598 if (RtsFlags.GcFlags.generations == 1)
1599 { // Two-space collector:
1600 W_ blocks;
1601
1602 /* set up a new nursery. Allocate a nursery size based on a
1603 * function of the amount of live data (by default a factor of 2)
1604 * Use the blocks from the old nursery if possible, freeing up any
1605 * left over blocks.
1606 *
1607 * If we get near the maximum heap size, then adjust our nursery
1608 * size accordingly. If the nursery is the same size as the live
1609 * data (L), then we need 3L bytes. We can reduce the size of the
1610 * nursery to bring the required memory down near 2L bytes.
1611 *
1612 * A normal 2-space collector would need 4L bytes to give the same
1613 * performance we get from 3L bytes, reducing to the same
1614 * performance at 2L bytes.
1615 */
1616 blocks = generations[0].n_blocks;
1617
1618 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1619 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1620 RtsFlags.GcFlags.maxHeapSize )
1621 {
1622 long adjusted_blocks; // signed on purpose
1623 int pc_free;
1624
1625 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1626
1627 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1628 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1629
1630 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1631 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1632 {
1633 heapOverflow();
1634 }
1635 blocks = adjusted_blocks;
1636 }
1637 else
1638 {
1639 blocks *= RtsFlags.GcFlags.oldGenFactor;
1640 if (blocks < min_nursery)
1641 {
1642 blocks = min_nursery;
1643 }
1644 }
1645 resizeNurseries(blocks);
1646 }
1647 else // Generational collector
1648 {
1649 /*
1650 * If the user has given us a suggested heap size, adjust our
1651 * allocation area to make best use of the memory available.
1652 */
1653 if (RtsFlags.GcFlags.heapSizeSuggestion)
1654 {
1655 long blocks;
1656 StgWord needed;
1657
1658 calcNeeded(rtsFalse, &needed); // approx blocks needed at next GC
1659
1660 /* Guess how much will be live in generation 0 step 0 next time.
1661 * A good approximation is obtained by finding the
1662 * percentage of g0 that was live at the last minor GC.
1663 *
1664 * We have an accurate figure for the amount of copied data in
1665 * 'copied', but we must convert this to a number of blocks, with
1666 * a small adjustment for estimated slop at the end of a block
1667 * (- 10 words).
1668 */
1669 if (N == 0)
1670 {
1671 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1672 / countNurseryBlocks();
1673 }
1674
1675 /* Estimate a size for the allocation area based on the
1676 * information available. We might end up going slightly under
1677 * or over the suggested heap size, but we should be pretty
1678 * close on average.
1679 *
1680 * Formula: suggested - needed
1681 * ----------------------------
1682 * 1 + g0_pcnt_kept/100
1683 *
1684 * where 'needed' is the amount of memory needed at the next
1685 * collection for collecting all gens except g0.
1686 */
1687 blocks =
1688 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1689 (100 + (long)g0_pcnt_kept);
1690
1691 if (blocks < (long)min_nursery) {
1692 blocks = min_nursery;
1693 }
1694
1695 resizeNurseries((W_)blocks);
1696 }
1697 else
1698 {
1699 // we might have added extra blocks to the nursery, so
1700 // resize back to the original size again.
1701 resizeNurseriesFixed();
1702 }
1703 }
1704 }
1705
1706 /* -----------------------------------------------------------------------------
1707 Sanity code for CAF garbage collection.
1708
1709 With DEBUG turned on, we manage a CAF list in addition to the SRT
1710 mechanism. After GC, we run down the CAF list and blackhole any
1711 CAFs which have been garbage collected. This means we get an error
1712 whenever the program tries to enter a garbage collected CAF.
1713
1714 Any garbage collected CAFs are taken off the CAF list at the same
1715 time.
1716 -------------------------------------------------------------------------- */
1717
1718 #if defined(DEBUG)
1719
1720 static void gcCAFs(void)
1721 {
1722 StgIndStatic *p, *prev;
1723
1724 const StgInfoTable *info;
1725 nat i;
1726
1727 i = 0;
1728 p = debug_caf_list;
1729 prev = NULL;
1730
1731 for (p = debug_caf_list; p != (StgIndStatic*)END_OF_STATIC_LIST;
1732 p = (StgIndStatic*)p->saved_info) {
1733
1734 info = get_itbl((StgClosure*)p);
1735 ASSERT(info->type == IND_STATIC);
1736
1737 if (p->static_link == NULL) {
1738 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", p);
1739 SET_INFO((StgClosure*)p,&stg_GCD_CAF_info); // stub it
1740 if (prev == NULL) {
1741 debug_caf_list = (StgIndStatic*)p->saved_info;
1742 } else {
1743 prev->saved_info = p->saved_info;
1744 }
1745 } else {
1746 prev = p;
1747 i++;
1748 }
1749 }
1750
1751 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1752 }
1753 #endif