Add +RTS -n<size>: divide the nursery into chunks
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "GC.h"
19 #include "GCThread.h"
20 #include "GCTDecl.h" // NB. before RtsSignals.h which
21 // clobbers REG_R1 on arm/Linux
22 #include "Compact.h"
23 #include "Evac.h"
24 #include "Scav.h"
25 #include "GCUtils.h"
26 #include "MarkStack.h"
27 #include "MarkWeak.h"
28 #include "Sparks.h"
29 #include "Sweep.h"
30
31 #include "Storage.h"
32 #include "RtsUtils.h"
33 #include "Apply.h"
34 #include "Updates.h"
35 #include "Stats.h"
36 #include "Schedule.h"
37 #include "Sanity.h"
38 #include "BlockAlloc.h"
39 #include "ProfHeap.h"
40 #include "Weak.h"
41 #include "Prelude.h"
42 #include "RtsSignals.h"
43 #include "STM.h"
44 #include "Trace.h"
45 #include "RetainerProfile.h"
46 #include "LdvProfile.h"
47 #include "RaiseAsync.h"
48 #include "Papi.h"
49 #include "Stable.h"
50 #include "CheckUnload.h"
51
52 #include <string.h> // for memset()
53 #include <unistd.h>
54
55 /* -----------------------------------------------------------------------------
56 Global variables
57 -------------------------------------------------------------------------- */
58
59 /* STATIC OBJECT LIST.
60 *
61 * During GC:
62 * We maintain a linked list of static objects that are still live.
63 * The requirements for this list are:
64 *
65 * - we need to scan the list while adding to it, in order to
66 * scavenge all the static objects (in the same way that
67 * breadth-first scavenging works for dynamic objects).
68 *
69 * - we need to be able to tell whether an object is already on
70 * the list, to break loops.
71 *
72 * Each static object has a "static link field", which we use for
73 * linking objects on to the list. We use a stack-type list, consing
74 * objects on the front as they are added (this means that the
75 * scavenge phase is depth-first, not breadth-first, but that
76 * shouldn't matter).
77 *
78 * A separate list is kept for objects that have been scavenged
79 * already - this is so that we can zero all the marks afterwards.
80 *
81 * An object is on the list if its static link field is non-zero; this
82 * means that we have to mark the end of the list with '1', not NULL.
83 *
84 * Extra notes for generational GC:
85 *
86 * Each generation has a static object list associated with it. When
87 * collecting generations up to N, we treat the static object lists
88 * from generations > N as roots.
89 *
90 * We build up a static object list while collecting generations 0..N,
91 * which is then appended to the static object list of generation N+1.
92 */
93
94 /* N is the oldest generation being collected, where the generations
95 * are numbered starting at 0. A major GC (indicated by the major_gc
96 * flag) is when we're collecting all generations. We only attempt to
97 * deal with static objects and GC CAFs when doing a major GC.
98 */
99 nat N;
100 rtsBool major_gc;
101
102 /* Data used for allocation area sizing.
103 */
104 static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
105
106 /* Mut-list stats */
107 #ifdef DEBUG
108 nat mutlist_MUTVARS,
109 mutlist_MUTARRS,
110 mutlist_MVARS,
111 mutlist_TVAR,
112 mutlist_TVAR_WATCH_QUEUE,
113 mutlist_TREC_CHUNK,
114 mutlist_TREC_HEADER,
115 mutlist_ATOMIC_INVARIANT,
116 mutlist_INVARIANT_CHECK_QUEUE,
117 mutlist_OTHERS;
118 #endif
119
120 /* Thread-local data for each GC thread
121 */
122 gc_thread **gc_threads = NULL;
123
124 #if !defined(THREADED_RTS)
125 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
126 #endif
127
128 // Number of threads running in *this* GC. Affects how many
129 // step->todos[] lists we have to look in to find work.
130 nat n_gc_threads;
131
132 // For stats:
133 long copied; // *words* copied & scavenged during this GC
134
135 rtsBool work_stealing;
136
137 DECLARE_GCT
138
139 /* -----------------------------------------------------------------------------
140 Static function declarations
141 -------------------------------------------------------------------------- */
142
143 static void mark_root (void *user, StgClosure **root);
144 static void zero_static_object_list (StgClosure* first_static);
145 static void prepare_collected_gen (generation *gen);
146 static void prepare_uncollected_gen (generation *gen);
147 static void init_gc_thread (gc_thread *t);
148 static void resize_generations (void);
149 static void resize_nursery (void);
150 static void start_gc_threads (void);
151 static void scavenge_until_all_done (void);
152 static StgWord inc_running (void);
153 static StgWord dec_running (void);
154 static void wakeup_gc_threads (nat me);
155 static void shutdown_gc_threads (nat me);
156 static void collect_gct_blocks (void);
157 static void collect_pinned_object_blocks (void);
158
159 #if defined(DEBUG)
160 static void gcCAFs (void);
161 #endif
162
163 /* -----------------------------------------------------------------------------
164 The mark stack.
165 -------------------------------------------------------------------------- */
166
167 bdescr *mark_stack_top_bd; // topmost block in the mark stack
168 bdescr *mark_stack_bd; // current block in the mark stack
169 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
170
171 /* -----------------------------------------------------------------------------
172 GarbageCollect: the main entry point to the garbage collector.
173
174 The collect_gen parameter is gotten by calling calcNeeded().
175
176 Locks held: all capabilities are held throughout GarbageCollect().
177 -------------------------------------------------------------------------- */
178
179 void
180 GarbageCollect (nat collect_gen,
181 rtsBool do_heap_census,
182 nat gc_type USED_IF_THREADS,
183 Capability *cap)
184 {
185 bdescr *bd;
186 generation *gen;
187 StgWord live_blocks, live_words, par_max_copied, par_tot_copied;
188 #if defined(THREADED_RTS)
189 gc_thread *saved_gct;
190 #endif
191 nat g, n;
192
193 // necessary if we stole a callee-saves register for gct:
194 #if defined(THREADED_RTS)
195 saved_gct = gct;
196 #endif
197
198 #ifdef PROFILING
199 CostCentreStack *save_CCS[n_capabilities];
200 #endif
201
202 ACQUIRE_SM_LOCK;
203
204 #if defined(RTS_USER_SIGNALS)
205 if (RtsFlags.MiscFlags.install_signal_handlers) {
206 // block signals
207 blockUserSignals();
208 }
209 #endif
210
211 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
212 // otherwise adjust the padding in gen_workspace.
213
214 // this is the main thread
215 SET_GCT(gc_threads[cap->no]);
216
217 // tell the stats department that we've started a GC
218 stat_startGC(cap, gct);
219
220 // lock the StablePtr table
221 stableLock();
222
223 #ifdef DEBUG
224 mutlist_MUTVARS = 0;
225 mutlist_MUTARRS = 0;
226 mutlist_MVARS = 0;
227 mutlist_TVAR = 0;
228 mutlist_TVAR_WATCH_QUEUE = 0;
229 mutlist_TREC_CHUNK = 0;
230 mutlist_TREC_HEADER = 0;
231 mutlist_ATOMIC_INVARIANT = 0;
232 mutlist_INVARIANT_CHECK_QUEUE = 0;
233 mutlist_OTHERS = 0;
234 #endif
235
236 // attribute any costs to CCS_GC
237 #ifdef PROFILING
238 for (n = 0; n < n_capabilities; n++) {
239 save_CCS[n] = capabilities[n]->r.rCCCS;
240 capabilities[n]->r.rCCCS = CCS_GC;
241 }
242 #endif
243
244 /* Figure out which generation to collect
245 */
246 N = collect_gen;
247 major_gc = (N == RtsFlags.GcFlags.generations-1);
248
249 #if defined(THREADED_RTS)
250 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
251 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
252 // It's not always a good idea to do load balancing in parallel
253 // GC. In particular, for a parallel program we don't want to
254 // lose locality by moving cached data into another CPU's cache
255 // (this effect can be quite significant).
256 //
257 // We could have a more complex way to deterimine whether to do
258 // work stealing or not, e.g. it might be a good idea to do it
259 // if the heap is big. For now, we just turn it on or off with
260 // a flag.
261 #endif
262
263 /* Start threads, so they can be spinning up while we finish initialisation.
264 */
265 start_gc_threads();
266
267 #if defined(THREADED_RTS)
268 /* How many threads will be participating in this GC?
269 * We don't try to parallelise minor GCs (unless the user asks for
270 * it with +RTS -gn0), or mark/compact/sweep GC.
271 */
272 if (gc_type == SYNC_GC_PAR) {
273 n_gc_threads = n_capabilities;
274 } else {
275 n_gc_threads = 1;
276 }
277 #else
278 n_gc_threads = 1;
279 #endif
280
281 debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
282 N, n_gc_threads);
283
284 #ifdef DEBUG
285 // check for memory leaks if DEBUG is on
286 memInventory(DEBUG_gc);
287 #endif
288
289 // do this *before* we start scavenging
290 collectFreshWeakPtrs();
291
292 // check sanity *before* GC
293 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
294
295 // gather blocks allocated using allocatePinned() from each capability
296 // and put them on the g0->large_object list.
297 collect_pinned_object_blocks();
298
299 // Initialise all the generations/steps that we're collecting.
300 for (g = 0; g <= N; g++) {
301 prepare_collected_gen(&generations[g]);
302 }
303 // Initialise all the generations/steps that we're *not* collecting.
304 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
305 prepare_uncollected_gen(&generations[g]);
306 }
307
308 // Prepare this gc_thread
309 init_gc_thread(gct);
310
311 /* Allocate a mark stack if we're doing a major collection.
312 */
313 if (major_gc && oldest_gen->mark) {
314 mark_stack_bd = allocBlock();
315 mark_stack_top_bd = mark_stack_bd;
316 mark_stack_bd->link = NULL;
317 mark_stack_bd->u.back = NULL;
318 mark_sp = mark_stack_bd->start;
319 } else {
320 mark_stack_bd = NULL;
321 mark_stack_top_bd = NULL;
322 mark_sp = NULL;
323 }
324
325 /* -----------------------------------------------------------------------
326 * follow all the roots that we know about:
327 */
328
329 // the main thread is running: this prevents any other threads from
330 // exiting prematurely, so we can start them now.
331 // NB. do this after the mutable lists have been saved above, otherwise
332 // the other GC threads will be writing into the old mutable lists.
333 inc_running();
334 wakeup_gc_threads(gct->thread_index);
335
336 traceEventGcWork(gct->cap);
337
338 // scavenge the capability-private mutable lists. This isn't part
339 // of markSomeCapabilities() because markSomeCapabilities() can only
340 // call back into the GC via mark_root() (due to the gct register
341 // variable).
342 if (n_gc_threads == 1) {
343 for (n = 0; n < n_capabilities; n++) {
344 #if defined(THREADED_RTS)
345 scavenge_capability_mut_Lists1(capabilities[n]);
346 #else
347 scavenge_capability_mut_lists(capabilities[n]);
348 #endif
349 }
350 } else {
351 scavenge_capability_mut_lists(gct->cap);
352 for (n = 0; n < n_capabilities; n++) {
353 if (gc_threads[n]->idle) {
354 markCapability(mark_root, gct, capabilities[n],
355 rtsTrue/*don't mark sparks*/);
356 scavenge_capability_mut_lists(capabilities[n]);
357 }
358 }
359 }
360
361 // follow roots from the CAF list (used by GHCi)
362 gct->evac_gen_no = 0;
363 markCAFs(mark_root, gct);
364
365 // follow all the roots that the application knows about.
366 gct->evac_gen_no = 0;
367 if (n_gc_threads == 1) {
368 for (n = 0; n < n_capabilities; n++) {
369 markCapability(mark_root, gct, capabilities[n],
370 rtsTrue/*don't mark sparks*/);
371 }
372 } else {
373 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
374 }
375
376 markScheduler(mark_root, gct);
377
378 #if defined(RTS_USER_SIGNALS)
379 // mark the signal handlers (signals should be already blocked)
380 markSignalHandlers(mark_root, gct);
381 #endif
382
383 // Mark the weak pointer list, and prepare to detect dead weak pointers.
384 markWeakPtrList();
385 initWeakForGC();
386
387 // Mark the stable pointer table.
388 markStableTables(mark_root, gct);
389
390 /* -------------------------------------------------------------------------
391 * Repeatedly scavenge all the areas we know about until there's no
392 * more scavenging to be done.
393 */
394 for (;;)
395 {
396 scavenge_until_all_done();
397 // The other threads are now stopped. We might recurse back to
398 // here, but from now on this is the only thread.
399
400 // must be last... invariant is that everything is fully
401 // scavenged at this point.
402 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
403 inc_running();
404 continue;
405 }
406
407 // If we get to here, there's really nothing left to do.
408 break;
409 }
410
411 shutdown_gc_threads(gct->thread_index);
412
413 // Now see which stable names are still alive.
414 gcStableTables();
415
416 #ifdef THREADED_RTS
417 if (n_gc_threads == 1) {
418 for (n = 0; n < n_capabilities; n++) {
419 pruneSparkQueue(capabilities[n]);
420 }
421 } else {
422 for (n = 0; n < n_capabilities; n++) {
423 if (n == cap->no || gc_threads[n]->idle) {
424 pruneSparkQueue(capabilities[n]);
425 }
426 }
427 }
428 #endif
429
430 #ifdef PROFILING
431 // We call processHeapClosureForDead() on every closure destroyed during
432 // the current garbage collection, so we invoke LdvCensusForDead().
433 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
434 || RtsFlags.ProfFlags.bioSelector != NULL) {
435 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
436 LdvCensusForDead(N);
437 ACQUIRE_SM_LOCK;
438 }
439 #endif
440
441 // NO MORE EVACUATION AFTER THIS POINT!
442
443 // Finally: compact or sweep the oldest generation.
444 if (major_gc && oldest_gen->mark) {
445 if (oldest_gen->compact)
446 compact(gct->scavenged_static_objects);
447 else
448 sweep(oldest_gen);
449 }
450
451 copied = 0;
452 par_max_copied = 0;
453 par_tot_copied = 0;
454 {
455 nat i;
456 for (i=0; i < n_gc_threads; i++) {
457 if (n_gc_threads > 1) {
458 debugTrace(DEBUG_gc,"thread %d:", i);
459 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
460 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
461 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
462 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
463 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
464 }
465 copied += gc_threads[i]->copied;
466 par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
467 }
468 par_tot_copied = copied;
469 if (n_gc_threads == 1) {
470 par_max_copied = 0;
471 par_tot_copied = 0;
472 }
473 }
474
475 // Run through all the generations/steps and tidy up.
476 // We're going to:
477 // - count the amount of "live" data (live_words, live_blocks)
478 // - count the amount of "copied" data in this GC (copied)
479 // - free from-space
480 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
481 //
482 live_words = 0;
483 live_blocks = 0;
484
485 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
486
487 if (g == N) {
488 generations[g].collections++; // for stats
489 if (n_gc_threads > 1) generations[g].par_collections++;
490 }
491
492 // Count the mutable list as bytes "copied" for the purposes of
493 // stats. Every mutable list is copied during every GC.
494 if (g > 0) {
495 W_ mut_list_size = 0;
496 for (n = 0; n < n_capabilities; n++) {
497 mut_list_size += countOccupied(capabilities[n]->mut_lists[g]);
498 }
499 copied += mut_list_size;
500
501 debugTrace(DEBUG_gc,
502 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d ATOMIC_INVARIANTs, %d INVARIANT_CHECK_QUEUEs, %d others)",
503 (unsigned long)(mut_list_size * sizeof(W_)),
504 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS,
505 mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE,
506 mutlist_TREC_CHUNK, mutlist_TREC_HEADER,
507 mutlist_ATOMIC_INVARIANT,
508 mutlist_INVARIANT_CHECK_QUEUE,
509 mutlist_OTHERS);
510 }
511
512 bdescr *next, *prev;
513 gen = &generations[g];
514
515 // for generations we collected...
516 if (g <= N) {
517
518 /* free old memory and shift to-space into from-space for all
519 * the collected steps (except the allocation area). These
520 * freed blocks will probaby be quickly recycled.
521 */
522 if (gen->mark)
523 {
524 // tack the new blocks on the end of the existing blocks
525 if (gen->old_blocks != NULL) {
526
527 prev = NULL;
528 for (bd = gen->old_blocks; bd != NULL; bd = next) {
529
530 next = bd->link;
531
532 if (!(bd->flags & BF_MARKED))
533 {
534 if (prev == NULL) {
535 gen->old_blocks = next;
536 } else {
537 prev->link = next;
538 }
539 freeGroup(bd);
540 gen->n_old_blocks--;
541 }
542 else
543 {
544 gen->n_words += bd->free - bd->start;
545
546 // NB. this step might not be compacted next
547 // time, so reset the BF_MARKED flags.
548 // They are set before GC if we're going to
549 // compact. (search for BF_MARKED above).
550 bd->flags &= ~BF_MARKED;
551
552 // between GCs, all blocks in the heap except
553 // for the nursery have the BF_EVACUATED flag set.
554 bd->flags |= BF_EVACUATED;
555
556 prev = bd;
557 }
558 }
559
560 if (prev != NULL) {
561 prev->link = gen->blocks;
562 gen->blocks = gen->old_blocks;
563 }
564 }
565 // add the new blocks to the block tally
566 gen->n_blocks += gen->n_old_blocks;
567 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
568 ASSERT(countOccupied(gen->blocks) == gen->n_words);
569 }
570 else // not copacted
571 {
572 freeChain(gen->old_blocks);
573 }
574
575 gen->old_blocks = NULL;
576 gen->n_old_blocks = 0;
577
578 /* LARGE OBJECTS. The current live large objects are chained on
579 * scavenged_large, having been moved during garbage
580 * collection from large_objects. Any objects left on the
581 * large_objects list are therefore dead, so we free them here.
582 */
583 freeChain(gen->large_objects);
584 gen->large_objects = gen->scavenged_large_objects;
585 gen->n_large_blocks = gen->n_scavenged_large_blocks;
586 gen->n_large_words = countOccupied(gen->large_objects);
587 gen->n_new_large_words = 0;
588 }
589 else // for generations > N
590 {
591 /* For older generations, we need to append the
592 * scavenged_large_object list (i.e. large objects that have been
593 * promoted during this GC) to the large_object list for that step.
594 */
595 for (bd = gen->scavenged_large_objects; bd; bd = next) {
596 next = bd->link;
597 dbl_link_onto(bd, &gen->large_objects);
598 gen->n_large_words += bd->free - bd->start;
599 }
600
601 // add the new blocks we promoted during this GC
602 gen->n_large_blocks += gen->n_scavenged_large_blocks;
603 }
604
605 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
606 ASSERT(countOccupied(gen->large_objects) == gen->n_large_words);
607
608 gen->scavenged_large_objects = NULL;
609 gen->n_scavenged_large_blocks = 0;
610
611 // Count "live" data
612 live_words += genLiveWords(gen);
613 live_blocks += genLiveBlocks(gen);
614
615 // add in the partial blocks in the gen_workspaces, but ignore gen 0
616 // if this is a local GC (we can't count another capability's part_list)
617 {
618 nat i;
619 for (i = 0; i < n_capabilities; i++) {
620 live_words += gcThreadLiveWords(i, gen->no);
621 live_blocks += gcThreadLiveBlocks(i, gen->no);
622 }
623 }
624 } // for all generations
625
626 // update the max size of older generations after a major GC
627 resize_generations();
628
629 // Free the mark stack.
630 if (mark_stack_top_bd != NULL) {
631 debugTrace(DEBUG_gc, "mark stack: %d blocks",
632 countBlocks(mark_stack_top_bd));
633 freeChain(mark_stack_top_bd);
634 }
635
636 // Free any bitmaps.
637 for (g = 0; g <= N; g++) {
638 gen = &generations[g];
639 if (gen->bitmap != NULL) {
640 freeGroup(gen->bitmap);
641 gen->bitmap = NULL;
642 }
643 }
644
645 resize_nursery();
646
647 resetNurseries();
648
649 // mark the garbage collected CAFs as dead
650 #if defined(DEBUG)
651 if (major_gc) { gcCAFs(); }
652 #endif
653
654 // Update the stable pointer hash table.
655 updateStableTables(major_gc);
656
657 // unlock the StablePtr table. Must be before scheduleFinalizers(),
658 // because a finalizer may call hs_free_fun_ptr() or
659 // hs_free_stable_ptr(), both of which access the StablePtr table.
660 stableUnlock();
661
662 // Must be after stableUnlock(), because it might free stable ptrs.
663 if (major_gc) {
664 checkUnload (gct->scavenged_static_objects);
665 }
666
667 #ifdef PROFILING
668 // resetStaticObjectForRetainerProfiling() must be called before
669 // zeroing below.
670
671 // ToDo: fix the gct->scavenged_static_objects below
672 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
673 #endif
674
675 // zero the scavenged static object list
676 if (major_gc) {
677 nat i;
678 if (n_gc_threads == 1) {
679 zero_static_object_list(gct->scavenged_static_objects);
680 } else {
681 for (i = 0; i < n_gc_threads; i++) {
682 if (!gc_threads[i]->idle) {
683 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
684 }
685 }
686 }
687 }
688
689 // Start any pending finalizers. Must be after
690 // updateStableTables() and stableUnlock() (see #4221).
691 RELEASE_SM_LOCK;
692 scheduleFinalizers(cap, dead_weak_ptr_list);
693 ACQUIRE_SM_LOCK;
694
695 // check sanity after GC
696 // before resurrectThreads(), because that might overwrite some
697 // closures, which will cause problems with THREADED where we don't
698 // fill slop.
699 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
700
701 // If a heap census is due, we need to do it before
702 // resurrectThreads(), for the same reason as checkSanity above:
703 // resurrectThreads() will overwrite some closures and leave slop
704 // behind.
705 if (do_heap_census) {
706 debugTrace(DEBUG_sched, "performing heap census");
707 RELEASE_SM_LOCK;
708 heapCensus(gct->gc_start_cpu);
709 ACQUIRE_SM_LOCK;
710 }
711
712 // send exceptions to any threads which were about to die
713 RELEASE_SM_LOCK;
714 resurrectThreads(resurrected_threads);
715 ACQUIRE_SM_LOCK;
716
717 if (major_gc) {
718 W_ need, got;
719 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
720 got = mblocks_allocated;
721 /* If the amount of data remains constant, next major GC we'll
722 require (F+1)*need. We leave (F+2)*need in order to reduce
723 repeated deallocation and reallocation. */
724 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
725 if (got > need) {
726 returnMemoryToOS(got - need);
727 }
728 }
729
730 // extra GC trace info
731 IF_DEBUG(gc, statDescribeGens());
732
733 #ifdef DEBUG
734 // symbol-table based profiling
735 /* heapCensus(to_blocks); */ /* ToDo */
736 #endif
737
738 // restore enclosing cost centre
739 #ifdef PROFILING
740 for (n = 0; n < n_capabilities; n++) {
741 capabilities[n]->r.rCCCS = save_CCS[n];
742 }
743 #endif
744
745 #ifdef DEBUG
746 // check for memory leaks if DEBUG is on
747 memInventory(DEBUG_gc);
748 #endif
749
750 // ok, GC over: tell the stats department what happened.
751 stat_endGC(cap, gct, live_words, copied,
752 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
753 N, n_gc_threads, par_max_copied, par_tot_copied);
754
755 #if defined(RTS_USER_SIGNALS)
756 if (RtsFlags.MiscFlags.install_signal_handlers) {
757 // unblock signals again
758 unblockUserSignals();
759 }
760 #endif
761
762 RELEASE_SM_LOCK;
763
764 SET_GCT(saved_gct);
765 }
766
767 /* -----------------------------------------------------------------------------
768 Initialise the gc_thread structures.
769 -------------------------------------------------------------------------- */
770
771 #define GC_THREAD_INACTIVE 0
772 #define GC_THREAD_STANDING_BY 1
773 #define GC_THREAD_RUNNING 2
774 #define GC_THREAD_WAITING_TO_CONTINUE 3
775
776 static void
777 new_gc_thread (nat n, gc_thread *t)
778 {
779 nat g;
780 gen_workspace *ws;
781
782 t->cap = capabilities[n];
783
784 #ifdef THREADED_RTS
785 t->id = 0;
786 initSpinLock(&t->gc_spin);
787 initSpinLock(&t->mut_spin);
788 ACQUIRE_SPIN_LOCK(&t->gc_spin);
789 ACQUIRE_SPIN_LOCK(&t->mut_spin);
790 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
791 // thread to start up, see wakeup_gc_threads
792 #endif
793
794 t->thread_index = n;
795 t->idle = rtsFalse;
796 t->free_blocks = NULL;
797 t->gc_count = 0;
798
799 init_gc_thread(t);
800
801 #ifdef USE_PAPI
802 t->papi_events = -1;
803 #endif
804
805 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
806 {
807 ws = &t->gens[g];
808 ws->gen = &generations[g];
809 ASSERT(g == ws->gen->no);
810 ws->my_gct = t;
811
812 // We want to call
813 // alloc_todo_block(ws,0);
814 // but can't, because it uses gct which isn't set up at this point.
815 // Hence, allocate a block for todo_bd manually:
816 {
817 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
818 initBdescr(bd, ws->gen, ws->gen->to);
819 bd->flags = BF_EVACUATED;
820 bd->u.scan = bd->free = bd->start;
821
822 ws->todo_bd = bd;
823 ws->todo_free = bd->free;
824 ws->todo_lim = bd->start + BLOCK_SIZE_W;
825 }
826
827 ws->todo_q = newWSDeque(128);
828 ws->todo_overflow = NULL;
829 ws->n_todo_overflow = 0;
830 ws->todo_large_objects = NULL;
831
832 ws->part_list = NULL;
833 ws->n_part_blocks = 0;
834
835 ws->scavd_list = NULL;
836 ws->n_scavd_blocks = 0;
837 }
838 }
839
840
841 void
842 initGcThreads (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
843 {
844 #if defined(THREADED_RTS)
845 nat i;
846
847 if (from > 0) {
848 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
849 "initGcThreads");
850 } else {
851 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
852 "initGcThreads");
853 }
854
855 for (i = from; i < to; i++) {
856 gc_threads[i] =
857 stgMallocBytes(sizeof(gc_thread) +
858 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
859 "alloc_gc_threads");
860
861 new_gc_thread(i, gc_threads[i]);
862 }
863 #else
864 ASSERT(from == 0 && to == 1);
865 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
866 gc_threads[0] = gct;
867 new_gc_thread(0,gc_threads[0]);
868 #endif
869 }
870
871 void
872 freeGcThreads (void)
873 {
874 nat g;
875 if (gc_threads != NULL) {
876 #if defined(THREADED_RTS)
877 nat i;
878 for (i = 0; i < n_capabilities; i++) {
879 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
880 {
881 freeWSDeque(gc_threads[i]->gens[g].todo_q);
882 }
883 stgFree (gc_threads[i]);
884 }
885 stgFree (gc_threads);
886 #else
887 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
888 {
889 freeWSDeque(gc_threads[0]->gens[g].todo_q);
890 }
891 stgFree (gc_threads);
892 #endif
893 gc_threads = NULL;
894 }
895 }
896
897 /* ----------------------------------------------------------------------------
898 Start GC threads
899 ------------------------------------------------------------------------- */
900
901 static volatile StgWord gc_running_threads;
902
903 static StgWord
904 inc_running (void)
905 {
906 StgWord new;
907 new = atomic_inc(&gc_running_threads, 1);
908 ASSERT(new <= n_gc_threads);
909 return new;
910 }
911
912 static StgWord
913 dec_running (void)
914 {
915 ASSERT(gc_running_threads != 0);
916 return atomic_dec(&gc_running_threads);
917 }
918
919 static rtsBool
920 any_work (void)
921 {
922 int g;
923 gen_workspace *ws;
924
925 gct->any_work++;
926
927 write_barrier();
928
929 // scavenge objects in compacted generation
930 if (mark_stack_bd != NULL && !mark_stack_empty()) {
931 return rtsTrue;
932 }
933
934 // Check for global work in any step. We don't need to check for
935 // local work, because we have already exited scavenge_loop(),
936 // which means there is no local work for this thread.
937 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
938 ws = &gct->gens[g];
939 if (ws->todo_large_objects) return rtsTrue;
940 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
941 if (ws->todo_overflow) return rtsTrue;
942 }
943
944 #if defined(THREADED_RTS)
945 if (work_stealing) {
946 nat n;
947 // look for work to steal
948 for (n = 0; n < n_gc_threads; n++) {
949 if (n == gct->thread_index) continue;
950 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
951 ws = &gc_threads[n]->gens[g];
952 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
953 }
954 }
955 }
956 #endif
957
958 gct->no_work++;
959 #if defined(THREADED_RTS)
960 yieldThread();
961 #endif
962
963 return rtsFalse;
964 }
965
966 static void
967 scavenge_until_all_done (void)
968 {
969 DEBUG_ONLY( nat r );
970
971
972 loop:
973 #if defined(THREADED_RTS)
974 if (n_gc_threads > 1) {
975 scavenge_loop();
976 } else {
977 scavenge_loop1();
978 }
979 #else
980 scavenge_loop();
981 #endif
982
983 collect_gct_blocks();
984
985 // scavenge_loop() only exits when there's no work to do
986
987 #ifdef DEBUG
988 r = dec_running();
989 #else
990 dec_running();
991 #endif
992
993 traceEventGcIdle(gct->cap);
994
995 debugTrace(DEBUG_gc, "%d GC threads still running", r);
996
997 while (gc_running_threads != 0) {
998 // usleep(1);
999 if (any_work()) {
1000 inc_running();
1001 traceEventGcWork(gct->cap);
1002 goto loop;
1003 }
1004 // any_work() does not remove the work from the queue, it
1005 // just checks for the presence of work. If we find any,
1006 // then we increment gc_running_threads and go back to
1007 // scavenge_loop() to perform any pending work.
1008 }
1009
1010 traceEventGcDone(gct->cap);
1011 }
1012
1013 #if defined(THREADED_RTS)
1014
1015 void
1016 gcWorkerThread (Capability *cap)
1017 {
1018 gc_thread *saved_gct;
1019
1020 // necessary if we stole a callee-saves register for gct:
1021 saved_gct = gct;
1022
1023 SET_GCT(gc_threads[cap->no]);
1024 gct->id = osThreadId();
1025
1026 // Wait until we're told to wake up
1027 RELEASE_SPIN_LOCK(&gct->mut_spin);
1028 // yieldThread();
1029 // Strangely, adding a yieldThread() here makes the CPU time
1030 // measurements more accurate on Linux, perhaps because it syncs
1031 // the CPU time across the multiple cores. Without this, CPU time
1032 // is heavily skewed towards GC rather than MUT.
1033 gct->wakeup = GC_THREAD_STANDING_BY;
1034 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1035 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1036
1037 #ifdef USE_PAPI
1038 // start performance counters in this thread...
1039 if (gct->papi_events == -1) {
1040 papi_init_eventset(&gct->papi_events);
1041 }
1042 papi_thread_start_gc1_count(gct->papi_events);
1043 #endif
1044
1045 init_gc_thread(gct);
1046
1047 traceEventGcWork(gct->cap);
1048
1049 // Every thread evacuates some roots.
1050 gct->evac_gen_no = 0;
1051 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1052 scavenge_capability_mut_lists(cap);
1053
1054 scavenge_until_all_done();
1055
1056 #ifdef THREADED_RTS
1057 // Now that the whole heap is marked, we discard any sparks that
1058 // were found to be unreachable. The main GC thread is currently
1059 // marking heap reachable via weak pointers, so it is
1060 // non-deterministic whether a spark will be retained if it is
1061 // only reachable via weak pointers. To fix this problem would
1062 // require another GC barrier, which is too high a price.
1063 pruneSparkQueue(cap);
1064 #endif
1065
1066 #ifdef USE_PAPI
1067 // count events in this thread towards the GC totals
1068 papi_thread_stop_gc1_count(gct->papi_events);
1069 #endif
1070
1071 // Wait until we're told to continue
1072 RELEASE_SPIN_LOCK(&gct->gc_spin);
1073 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1074 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1075 gct->thread_index);
1076 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1077 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1078
1079 SET_GCT(saved_gct);
1080 }
1081
1082 #endif
1083
1084 #if defined(THREADED_RTS)
1085
1086 void
1087 waitForGcThreads (Capability *cap USED_IF_THREADS)
1088 {
1089 const nat n_threads = n_capabilities;
1090 const nat me = cap->no;
1091 nat i, j;
1092 rtsBool retry = rtsTrue;
1093
1094 while(retry) {
1095 for (i=0; i < n_threads; i++) {
1096 if (i == me || gc_threads[i]->idle) continue;
1097 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1098 prodCapability(capabilities[i], cap->running_task);
1099 }
1100 }
1101 for (j=0; j < 10; j++) {
1102 retry = rtsFalse;
1103 for (i=0; i < n_threads; i++) {
1104 if (i == me || gc_threads[i]->idle) continue;
1105 write_barrier();
1106 interruptCapability(capabilities[i]);
1107 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1108 retry = rtsTrue;
1109 }
1110 }
1111 if (!retry) break;
1112 yieldThread();
1113 }
1114 }
1115 }
1116
1117 #endif // THREADED_RTS
1118
1119 static void
1120 start_gc_threads (void)
1121 {
1122 #if defined(THREADED_RTS)
1123 gc_running_threads = 0;
1124 #endif
1125 }
1126
1127 static void
1128 wakeup_gc_threads (nat me USED_IF_THREADS)
1129 {
1130 #if defined(THREADED_RTS)
1131 nat i;
1132
1133 if (n_gc_threads == 1) return;
1134
1135 for (i=0; i < n_gc_threads; i++) {
1136 if (i == me || gc_threads[i]->idle) continue;
1137 inc_running();
1138 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1139 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1140
1141 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1142 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1143 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1144 }
1145 #endif
1146 }
1147
1148 // After GC is complete, we must wait for all GC threads to enter the
1149 // standby state, otherwise they may still be executing inside
1150 // any_work(), and may even remain awake until the next GC starts.
1151 static void
1152 shutdown_gc_threads (nat me USED_IF_THREADS)
1153 {
1154 #if defined(THREADED_RTS)
1155 nat i;
1156
1157 if (n_gc_threads == 1) return;
1158
1159 for (i=0; i < n_gc_threads; i++) {
1160 if (i == me || gc_threads[i]->idle) continue;
1161 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) {
1162 busy_wait_nop();
1163 write_barrier();
1164 }
1165 }
1166 #endif
1167 }
1168
1169 #if defined(THREADED_RTS)
1170 void
1171 releaseGCThreads (Capability *cap USED_IF_THREADS)
1172 {
1173 const nat n_threads = n_capabilities;
1174 const nat me = cap->no;
1175 nat i;
1176 for (i=0; i < n_threads; i++) {
1177 if (i == me || gc_threads[i]->idle) continue;
1178 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1179 barf("releaseGCThreads");
1180
1181 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1182 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1183 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1184 }
1185 }
1186 #endif
1187
1188 /* ----------------------------------------------------------------------------
1189 Initialise a generation that is to be collected
1190 ------------------------------------------------------------------------- */
1191
1192 static void
1193 prepare_collected_gen (generation *gen)
1194 {
1195 nat i, g, n;
1196 gen_workspace *ws;
1197 bdescr *bd, *next;
1198
1199 // Throw away the current mutable list. Invariant: the mutable
1200 // list always has at least one block; this means we can avoid a
1201 // check for NULL in recordMutable().
1202 g = gen->no;
1203 if (g != 0) {
1204 for (i = 0; i < n_capabilities; i++) {
1205 freeChain(capabilities[i]->mut_lists[g]);
1206 capabilities[i]->mut_lists[g] = allocBlock();
1207 }
1208 }
1209
1210 gen = &generations[g];
1211 ASSERT(gen->no == g);
1212
1213 // we'll construct a new list of threads in this step
1214 // during GC, throw away the current list.
1215 gen->old_threads = gen->threads;
1216 gen->threads = END_TSO_QUEUE;
1217
1218 // deprecate the existing blocks
1219 gen->old_blocks = gen->blocks;
1220 gen->n_old_blocks = gen->n_blocks;
1221 gen->blocks = NULL;
1222 gen->n_blocks = 0;
1223 gen->n_words = 0;
1224 gen->live_estimate = 0;
1225
1226 // initialise the large object queues.
1227 ASSERT(gen->scavenged_large_objects == NULL);
1228 ASSERT(gen->n_scavenged_large_blocks == 0);
1229
1230 // grab all the partial blocks stashed in the gc_thread workspaces and
1231 // move them to the old_blocks list of this gen.
1232 for (n = 0; n < n_capabilities; n++) {
1233 ws = &gc_threads[n]->gens[gen->no];
1234
1235 for (bd = ws->part_list; bd != NULL; bd = next) {
1236 next = bd->link;
1237 bd->link = gen->old_blocks;
1238 gen->old_blocks = bd;
1239 gen->n_old_blocks += bd->blocks;
1240 }
1241 ws->part_list = NULL;
1242 ws->n_part_blocks = 0;
1243
1244 ASSERT(ws->scavd_list == NULL);
1245 ASSERT(ws->n_scavd_blocks == 0);
1246
1247 if (ws->todo_free != ws->todo_bd->start) {
1248 ws->todo_bd->free = ws->todo_free;
1249 ws->todo_bd->link = gen->old_blocks;
1250 gen->old_blocks = ws->todo_bd;
1251 gen->n_old_blocks += ws->todo_bd->blocks;
1252 alloc_todo_block(ws,0); // always has one block.
1253 }
1254 }
1255
1256 // mark the small objects as from-space
1257 for (bd = gen->old_blocks; bd; bd = bd->link) {
1258 bd->flags &= ~BF_EVACUATED;
1259 }
1260
1261 // mark the large objects as from-space
1262 for (bd = gen->large_objects; bd; bd = bd->link) {
1263 bd->flags &= ~BF_EVACUATED;
1264 }
1265
1266 // for a compacted generation, we need to allocate the bitmap
1267 if (gen->mark) {
1268 StgWord bitmap_size; // in bytes
1269 bdescr *bitmap_bdescr;
1270 StgWord *bitmap;
1271
1272 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1273
1274 if (bitmap_size > 0) {
1275 bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
1276 / BLOCK_SIZE);
1277 gen->bitmap = bitmap_bdescr;
1278 bitmap = bitmap_bdescr->start;
1279
1280 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1281 bitmap_size, bitmap);
1282
1283 // don't forget to fill it with zeros!
1284 memset(bitmap, 0, bitmap_size);
1285
1286 // For each block in this step, point to its bitmap from the
1287 // block descriptor.
1288 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1289 bd->u.bitmap = bitmap;
1290 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1291
1292 // Also at this point we set the BF_MARKED flag
1293 // for this block. The invariant is that
1294 // BF_MARKED is always unset, except during GC
1295 // when it is set on those blocks which will be
1296 // compacted.
1297 if (!(bd->flags & BF_FRAGMENTED)) {
1298 bd->flags |= BF_MARKED;
1299 }
1300
1301 // BF_SWEPT should be marked only for blocks that are being
1302 // collected in sweep()
1303 bd->flags &= ~BF_SWEPT;
1304 }
1305 }
1306 }
1307 }
1308
1309
1310 /* ----------------------------------------------------------------------------
1311 Save the mutable lists in saved_mut_lists
1312 ------------------------------------------------------------------------- */
1313
1314 static void
1315 stash_mut_list (Capability *cap, nat gen_no)
1316 {
1317 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1318 cap->mut_lists[gen_no] = allocBlock_sync();
1319 }
1320
1321 /* ----------------------------------------------------------------------------
1322 Initialise a generation that is *not* to be collected
1323 ------------------------------------------------------------------------- */
1324
1325 static void
1326 prepare_uncollected_gen (generation *gen)
1327 {
1328 nat i;
1329
1330
1331 ASSERT(gen->no > 0);
1332
1333 // save the current mutable lists for this generation, and
1334 // allocate a fresh block for each one. We'll traverse these
1335 // mutable lists as roots early on in the GC.
1336 for (i = 0; i < n_capabilities; i++) {
1337 stash_mut_list(capabilities[i], gen->no);
1338 }
1339
1340 ASSERT(gen->scavenged_large_objects == NULL);
1341 ASSERT(gen->n_scavenged_large_blocks == 0);
1342 }
1343
1344 /* -----------------------------------------------------------------------------
1345 Collect the completed blocks from a GC thread and attach them to
1346 the generation.
1347 -------------------------------------------------------------------------- */
1348
1349 static void
1350 collect_gct_blocks (void)
1351 {
1352 nat g;
1353 gen_workspace *ws;
1354 bdescr *bd, *prev;
1355
1356 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1357 ws = &gct->gens[g];
1358
1359 // there may still be a block attached to ws->todo_bd;
1360 // leave it there to use next time.
1361
1362 if (ws->scavd_list != NULL) {
1363 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1364
1365 ASSERT(gct->scan_bd == NULL);
1366 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1367
1368 prev = NULL;
1369 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1370 ws->gen->n_words += bd->free - bd->start;
1371 prev = bd;
1372 }
1373 if (prev != NULL) {
1374 prev->link = ws->gen->blocks;
1375 ws->gen->blocks = ws->scavd_list;
1376 }
1377 ws->gen->n_blocks += ws->n_scavd_blocks;
1378
1379 ws->scavd_list = NULL;
1380 ws->n_scavd_blocks = 0;
1381
1382 RELEASE_SPIN_LOCK(&ws->gen->sync);
1383 }
1384 }
1385 }
1386
1387 /* -----------------------------------------------------------------------------
1388 During mutation, any blocks that are filled by allocatePinned() are
1389 stashed on the local pinned_object_blocks list, to avoid needing to
1390 take a global lock. Here we collect those blocks from the
1391 cap->pinned_object_blocks lists and put them on the
1392 main g0->large_object list.
1393
1394 Returns: the number of words allocated this way, for stats
1395 purposes.
1396 -------------------------------------------------------------------------- */
1397
1398 static void
1399 collect_pinned_object_blocks (void)
1400 {
1401 nat n;
1402 bdescr *bd, *prev;
1403
1404 for (n = 0; n < n_capabilities; n++) {
1405 prev = NULL;
1406 for (bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
1407 prev = bd;
1408 }
1409 if (prev != NULL) {
1410 prev->link = g0->large_objects;
1411 if (g0->large_objects != NULL) {
1412 g0->large_objects->u.back = prev;
1413 }
1414 g0->large_objects = capabilities[n]->pinned_object_blocks;
1415 capabilities[n]->pinned_object_blocks = 0;
1416 }
1417 }
1418 }
1419
1420 /* -----------------------------------------------------------------------------
1421 Initialise a gc_thread before GC
1422 -------------------------------------------------------------------------- */
1423
1424 static void
1425 init_gc_thread (gc_thread *t)
1426 {
1427 t->static_objects = END_OF_STATIC_LIST;
1428 t->scavenged_static_objects = END_OF_STATIC_LIST;
1429 t->scan_bd = NULL;
1430 t->mut_lists = t->cap->mut_lists;
1431 t->evac_gen_no = 0;
1432 t->failed_to_evac = rtsFalse;
1433 t->eager_promotion = rtsTrue;
1434 t->thunk_selector_depth = 0;
1435 t->copied = 0;
1436 t->scanned = 0;
1437 t->any_work = 0;
1438 t->no_work = 0;
1439 t->scav_find_work = 0;
1440 }
1441
1442 /* -----------------------------------------------------------------------------
1443 Function we pass to evacuate roots.
1444 -------------------------------------------------------------------------- */
1445
1446 static void
1447 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1448 {
1449 // we stole a register for gct, but this function is called from
1450 // *outside* the GC where the register variable is not in effect,
1451 // so we need to save and restore it here. NB. only call
1452 // mark_root() from the main GC thread, otherwise gct will be
1453 // incorrect.
1454 #if defined(THREADED_RTS)
1455 gc_thread *saved_gct;
1456 saved_gct = gct;
1457 #endif
1458 SET_GCT(user);
1459
1460 evacuate(root);
1461
1462 SET_GCT(saved_gct);
1463 }
1464
1465 /* -----------------------------------------------------------------------------
1466 Initialising the static object & mutable lists
1467 -------------------------------------------------------------------------- */
1468
1469 static void
1470 zero_static_object_list(StgClosure* first_static)
1471 {
1472 StgClosure* p;
1473 StgClosure* link;
1474 const StgInfoTable *info;
1475
1476 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1477 info = get_itbl(p);
1478 link = *STATIC_LINK(info, p);
1479 *STATIC_LINK(info,p) = NULL;
1480 }
1481 }
1482
1483 /* ----------------------------------------------------------------------------
1484 Reset the sizes of the older generations when we do a major
1485 collection.
1486
1487 CURRENT STRATEGY: make all generations except zero the same size.
1488 We have to stay within the maximum heap size, and leave a certain
1489 percentage of the maximum heap size available to allocate into.
1490 ------------------------------------------------------------------------- */
1491
1492 static void
1493 resize_generations (void)
1494 {
1495 nat g;
1496
1497 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1498 W_ live, size, min_alloc, words;
1499 const W_ max = RtsFlags.GcFlags.maxHeapSize;
1500 const W_ gens = RtsFlags.GcFlags.generations;
1501
1502 // live in the oldest generations
1503 if (oldest_gen->live_estimate != 0) {
1504 words = oldest_gen->live_estimate;
1505 } else {
1506 words = oldest_gen->n_words;
1507 }
1508 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1509 oldest_gen->n_large_blocks;
1510
1511 // default max size for all generations except zero
1512 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1513 RtsFlags.GcFlags.minOldGenSize);
1514
1515 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1516 if (max > 0) {
1517 RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
1518 } else {
1519 RtsFlags.GcFlags.heapSizeSuggestion = size;
1520 }
1521 }
1522
1523 // minimum size for generation zero
1524 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1525 RtsFlags.GcFlags.minAllocAreaSize);
1526
1527 // Auto-enable compaction when the residency reaches a
1528 // certain percentage of the maximum heap size (default: 30%).
1529 if (RtsFlags.GcFlags.compact ||
1530 (max > 0 &&
1531 oldest_gen->n_blocks >
1532 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1533 oldest_gen->mark = 1;
1534 oldest_gen->compact = 1;
1535 // debugBelch("compaction: on\n", live);
1536 } else {
1537 oldest_gen->mark = 0;
1538 oldest_gen->compact = 0;
1539 // debugBelch("compaction: off\n", live);
1540 }
1541
1542 if (RtsFlags.GcFlags.sweep) {
1543 oldest_gen->mark = 1;
1544 }
1545
1546 // if we're going to go over the maximum heap size, reduce the
1547 // size of the generations accordingly. The calculation is
1548 // different if compaction is turned on, because we don't need
1549 // to double the space required to collect the old generation.
1550 if (max != 0) {
1551
1552 // this test is necessary to ensure that the calculations
1553 // below don't have any negative results - we're working
1554 // with unsigned values here.
1555 if (max < min_alloc) {
1556 heapOverflow();
1557 }
1558
1559 if (oldest_gen->compact) {
1560 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1561 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1562 }
1563 } else {
1564 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1565 size = (max - min_alloc) / ((gens - 1) * 2);
1566 }
1567 }
1568
1569 if (size < live) {
1570 heapOverflow();
1571 }
1572 }
1573
1574 #if 0
1575 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1576 min_alloc, size, max);
1577 #endif
1578
1579 for (g = 0; g < gens; g++) {
1580 generations[g].max_blocks = size;
1581 }
1582 }
1583 }
1584
1585 /* -----------------------------------------------------------------------------
1586 Calculate the new size of the nursery, and resize it.
1587 -------------------------------------------------------------------------- */
1588
1589 static void
1590 resize_nursery (void)
1591 {
1592 const StgWord min_nursery =
1593 RtsFlags.GcFlags.minAllocAreaSize * (StgWord)n_capabilities;
1594
1595 if (RtsFlags.GcFlags.generations == 1)
1596 { // Two-space collector:
1597 W_ blocks;
1598
1599 /* set up a new nursery. Allocate a nursery size based on a
1600 * function of the amount of live data (by default a factor of 2)
1601 * Use the blocks from the old nursery if possible, freeing up any
1602 * left over blocks.
1603 *
1604 * If we get near the maximum heap size, then adjust our nursery
1605 * size accordingly. If the nursery is the same size as the live
1606 * data (L), then we need 3L bytes. We can reduce the size of the
1607 * nursery to bring the required memory down near 2L bytes.
1608 *
1609 * A normal 2-space collector would need 4L bytes to give the same
1610 * performance we get from 3L bytes, reducing to the same
1611 * performance at 2L bytes.
1612 */
1613 blocks = generations[0].n_blocks;
1614
1615 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1616 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1617 RtsFlags.GcFlags.maxHeapSize )
1618 {
1619 long adjusted_blocks; // signed on purpose
1620 int pc_free;
1621
1622 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1623
1624 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1625 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1626
1627 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1628 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1629 {
1630 heapOverflow();
1631 }
1632 blocks = adjusted_blocks;
1633 }
1634 else
1635 {
1636 blocks *= RtsFlags.GcFlags.oldGenFactor;
1637 if (blocks < min_nursery)
1638 {
1639 blocks = min_nursery;
1640 }
1641 }
1642 resizeNurseries(blocks);
1643 }
1644 else // Generational collector
1645 {
1646 /*
1647 * If the user has given us a suggested heap size, adjust our
1648 * allocation area to make best use of the memory available.
1649 */
1650 if (RtsFlags.GcFlags.heapSizeSuggestion)
1651 {
1652 long blocks;
1653 StgWord needed;
1654
1655 calcNeeded(rtsFalse, &needed); // approx blocks needed at next GC
1656
1657 /* Guess how much will be live in generation 0 step 0 next time.
1658 * A good approximation is obtained by finding the
1659 * percentage of g0 that was live at the last minor GC.
1660 *
1661 * We have an accurate figure for the amount of copied data in
1662 * 'copied', but we must convert this to a number of blocks, with
1663 * a small adjustment for estimated slop at the end of a block
1664 * (- 10 words).
1665 */
1666 if (N == 0)
1667 {
1668 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1669 / countNurseryBlocks();
1670 }
1671
1672 /* Estimate a size for the allocation area based on the
1673 * information available. We might end up going slightly under
1674 * or over the suggested heap size, but we should be pretty
1675 * close on average.
1676 *
1677 * Formula: suggested - needed
1678 * ----------------------------
1679 * 1 + g0_pcnt_kept/100
1680 *
1681 * where 'needed' is the amount of memory needed at the next
1682 * collection for collecting all gens except g0.
1683 */
1684 blocks =
1685 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1686 (100 + (long)g0_pcnt_kept);
1687
1688 if (blocks < (long)min_nursery) {
1689 blocks = min_nursery;
1690 }
1691
1692 resizeNurseries((W_)blocks);
1693 }
1694 else
1695 {
1696 // we might have added extra blocks to the nursery, so
1697 // resize back to the original size again.
1698 resizeNurseriesFixed();
1699 }
1700 }
1701 }
1702
1703 /* -----------------------------------------------------------------------------
1704 Sanity code for CAF garbage collection.
1705
1706 With DEBUG turned on, we manage a CAF list in addition to the SRT
1707 mechanism. After GC, we run down the CAF list and blackhole any
1708 CAFs which have been garbage collected. This means we get an error
1709 whenever the program tries to enter a garbage collected CAF.
1710
1711 Any garbage collected CAFs are taken off the CAF list at the same
1712 time.
1713 -------------------------------------------------------------------------- */
1714
1715 #if defined(DEBUG)
1716
1717 static void gcCAFs(void)
1718 {
1719 StgIndStatic *p, *prev;
1720
1721 const StgInfoTable *info;
1722 nat i;
1723
1724 i = 0;
1725 p = debug_caf_list;
1726 prev = NULL;
1727
1728 for (p = debug_caf_list; p != (StgIndStatic*)END_OF_STATIC_LIST;
1729 p = (StgIndStatic*)p->saved_info) {
1730
1731 info = get_itbl((StgClosure*)p);
1732 ASSERT(info->type == IND_STATIC);
1733
1734 if (p->static_link == NULL) {
1735 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", p);
1736 SET_INFO((StgClosure*)p,&stg_GCD_CAF_info); // stub it
1737 if (prev == NULL) {
1738 debug_caf_list = (StgIndStatic*)p->saved_info;
1739 } else {
1740 prev->saved_info = p->saved_info;
1741 }
1742 } else {
1743 prev = p;
1744 i++;
1745 }
1746 }
1747
1748 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1749 }
1750 #endif