Eliminate zero_static_objects_list()
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "GC.h"
19 #include "GCThread.h"
20 #include "GCTDecl.h" // NB. before RtsSignals.h which
21 // clobbers REG_R1 on arm/Linux
22 #include "Compact.h"
23 #include "Evac.h"
24 #include "Scav.h"
25 #include "GCUtils.h"
26 #include "MarkStack.h"
27 #include "MarkWeak.h"
28 #include "Sparks.h"
29 #include "Sweep.h"
30
31 #include "Storage.h"
32 #include "RtsUtils.h"
33 #include "Apply.h"
34 #include "Updates.h"
35 #include "Stats.h"
36 #include "Schedule.h"
37 #include "Sanity.h"
38 #include "BlockAlloc.h"
39 #include "ProfHeap.h"
40 #include "Weak.h"
41 #include "Prelude.h"
42 #include "RtsSignals.h"
43 #include "STM.h"
44 #include "Trace.h"
45 #include "RetainerProfile.h"
46 #include "LdvProfile.h"
47 #include "RaiseAsync.h"
48 #include "Papi.h"
49 #include "Stable.h"
50 #include "CheckUnload.h"
51
52 #include <string.h> // for memset()
53 #include <unistd.h>
54
55 /* -----------------------------------------------------------------------------
56 Global variables
57 -------------------------------------------------------------------------- */
58
59 /* STATIC OBJECT LIST.
60 *
61 * During GC:
62 * We maintain a linked list of static objects that are still live.
63 * The requirements for this list are:
64 *
65 * - we need to scan the list while adding to it, in order to
66 * scavenge all the static objects (in the same way that
67 * breadth-first scavenging works for dynamic objects).
68 *
69 * - we need to be able to tell whether an object is already on
70 * the list, to break loops.
71 *
72 * Each static object has a "static link field", which we use for
73 * linking objects on to the list. We use a stack-type list, consing
74 * objects on the front as they are added (this means that the
75 * scavenge phase is depth-first, not breadth-first, but that
76 * shouldn't matter).
77 *
78 * A separate list is kept for objects that have been scavenged
79 * already - this is so that we can zero all the marks afterwards.
80 *
81 * An object is on the list if its static link field is non-zero; this
82 * means that we have to mark the end of the list with '1', not NULL.
83 *
84 * Extra notes for generational GC:
85 *
86 * Each generation has a static object list associated with it. When
87 * collecting generations up to N, we treat the static object lists
88 * from generations > N as roots.
89 *
90 * We build up a static object list while collecting generations 0..N,
91 * which is then appended to the static object list of generation N+1.
92 */
93
94 /* N is the oldest generation being collected, where the generations
95 * are numbered starting at 0. A major GC (indicated by the major_gc
96 * flag) is when we're collecting all generations. We only attempt to
97 * deal with static objects and GC CAFs when doing a major GC.
98 */
99 nat N;
100 rtsBool major_gc;
101
102 /* Data used for allocation area sizing.
103 */
104 static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
105
106 /* Mut-list stats */
107 #ifdef DEBUG
108 nat mutlist_MUTVARS,
109 mutlist_MUTARRS,
110 mutlist_MVARS,
111 mutlist_TVAR,
112 mutlist_TVAR_WATCH_QUEUE,
113 mutlist_TREC_CHUNK,
114 mutlist_TREC_HEADER,
115 mutlist_ATOMIC_INVARIANT,
116 mutlist_INVARIANT_CHECK_QUEUE,
117 mutlist_OTHERS;
118 #endif
119
120 /* Thread-local data for each GC thread
121 */
122 gc_thread **gc_threads = NULL;
123
124 #if !defined(THREADED_RTS)
125 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
126 #endif
127
128 // Number of threads running in *this* GC. Affects how many
129 // step->todos[] lists we have to look in to find work.
130 nat n_gc_threads;
131
132 // For stats:
133 long copied; // *words* copied & scavenged during this GC
134
135 rtsBool work_stealing;
136
137 nat static_flag = STATIC_FLAG_B;
138 nat prev_static_flag = STATIC_FLAG_A;
139
140 DECLARE_GCT
141
142 /* -----------------------------------------------------------------------------
143 Static function declarations
144 -------------------------------------------------------------------------- */
145
146 static void mark_root (void *user, StgClosure **root);
147 static void prepare_collected_gen (generation *gen);
148 static void prepare_uncollected_gen (generation *gen);
149 static void init_gc_thread (gc_thread *t);
150 static void resize_generations (void);
151 static void resize_nursery (void);
152 static void start_gc_threads (void);
153 static void scavenge_until_all_done (void);
154 static StgWord inc_running (void);
155 static StgWord dec_running (void);
156 static void wakeup_gc_threads (nat me);
157 static void shutdown_gc_threads (nat me);
158 static void collect_gct_blocks (void);
159 static void collect_pinned_object_blocks (void);
160
161 #if defined(DEBUG)
162 static void gcCAFs (void);
163 #endif
164
165 /* -----------------------------------------------------------------------------
166 The mark stack.
167 -------------------------------------------------------------------------- */
168
169 bdescr *mark_stack_top_bd; // topmost block in the mark stack
170 bdescr *mark_stack_bd; // current block in the mark stack
171 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
172
173 /* -----------------------------------------------------------------------------
174 GarbageCollect: the main entry point to the garbage collector.
175
176 The collect_gen parameter is gotten by calling calcNeeded().
177
178 Locks held: all capabilities are held throughout GarbageCollect().
179 -------------------------------------------------------------------------- */
180
181 void
182 GarbageCollect (nat collect_gen,
183 rtsBool do_heap_census,
184 nat gc_type USED_IF_THREADS,
185 Capability *cap)
186 {
187 bdescr *bd;
188 generation *gen;
189 StgWord live_blocks, live_words, par_max_copied, par_tot_copied;
190 #if defined(THREADED_RTS)
191 gc_thread *saved_gct;
192 #endif
193 nat g, n;
194
195 // necessary if we stole a callee-saves register for gct:
196 #if defined(THREADED_RTS)
197 saved_gct = gct;
198 #endif
199
200 #ifdef PROFILING
201 CostCentreStack *save_CCS[n_capabilities];
202 #endif
203
204 ACQUIRE_SM_LOCK;
205
206 #if defined(RTS_USER_SIGNALS)
207 if (RtsFlags.MiscFlags.install_signal_handlers) {
208 // block signals
209 blockUserSignals();
210 }
211 #endif
212
213 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
214 // otherwise adjust the padding in gen_workspace.
215
216 // this is the main thread
217 SET_GCT(gc_threads[cap->no]);
218
219 // tell the stats department that we've started a GC
220 stat_startGC(cap, gct);
221
222 // lock the StablePtr table
223 stableLock();
224
225 #ifdef DEBUG
226 mutlist_MUTVARS = 0;
227 mutlist_MUTARRS = 0;
228 mutlist_MVARS = 0;
229 mutlist_TVAR = 0;
230 mutlist_TVAR_WATCH_QUEUE = 0;
231 mutlist_TREC_CHUNK = 0;
232 mutlist_TREC_HEADER = 0;
233 mutlist_ATOMIC_INVARIANT = 0;
234 mutlist_INVARIANT_CHECK_QUEUE = 0;
235 mutlist_OTHERS = 0;
236 #endif
237
238 // attribute any costs to CCS_GC
239 #ifdef PROFILING
240 for (n = 0; n < n_capabilities; n++) {
241 save_CCS[n] = capabilities[n]->r.rCCCS;
242 capabilities[n]->r.rCCCS = CCS_GC;
243 }
244 #endif
245
246 /* Figure out which generation to collect
247 */
248 N = collect_gen;
249 major_gc = (N == RtsFlags.GcFlags.generations-1);
250
251 if (major_gc) {
252 prev_static_flag = static_flag;
253 static_flag =
254 static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
255 }
256
257 #if defined(THREADED_RTS)
258 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
259 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
260 // It's not always a good idea to do load balancing in parallel
261 // GC. In particular, for a parallel program we don't want to
262 // lose locality by moving cached data into another CPU's cache
263 // (this effect can be quite significant).
264 //
265 // We could have a more complex way to deterimine whether to do
266 // work stealing or not, e.g. it might be a good idea to do it
267 // if the heap is big. For now, we just turn it on or off with
268 // a flag.
269 #endif
270
271 /* Start threads, so they can be spinning up while we finish initialisation.
272 */
273 start_gc_threads();
274
275 #if defined(THREADED_RTS)
276 /* How many threads will be participating in this GC?
277 * We don't try to parallelise minor GCs (unless the user asks for
278 * it with +RTS -gn0), or mark/compact/sweep GC.
279 */
280 if (gc_type == SYNC_GC_PAR) {
281 n_gc_threads = n_capabilities;
282 } else {
283 n_gc_threads = 1;
284 }
285 #else
286 n_gc_threads = 1;
287 #endif
288
289 debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
290 N, n_gc_threads);
291
292 #ifdef DEBUG
293 // check for memory leaks if DEBUG is on
294 memInventory(DEBUG_gc);
295 #endif
296
297 // do this *before* we start scavenging
298 collectFreshWeakPtrs();
299
300 // check sanity *before* GC
301 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
302
303 // gather blocks allocated using allocatePinned() from each capability
304 // and put them on the g0->large_object list.
305 collect_pinned_object_blocks();
306
307 // Initialise all the generations/steps that we're collecting.
308 for (g = 0; g <= N; g++) {
309 prepare_collected_gen(&generations[g]);
310 }
311 // Initialise all the generations/steps that we're *not* collecting.
312 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
313 prepare_uncollected_gen(&generations[g]);
314 }
315
316 // Prepare this gc_thread
317 init_gc_thread(gct);
318
319 /* Allocate a mark stack if we're doing a major collection.
320 */
321 if (major_gc && oldest_gen->mark) {
322 mark_stack_bd = allocBlock();
323 mark_stack_top_bd = mark_stack_bd;
324 mark_stack_bd->link = NULL;
325 mark_stack_bd->u.back = NULL;
326 mark_sp = mark_stack_bd->start;
327 } else {
328 mark_stack_bd = NULL;
329 mark_stack_top_bd = NULL;
330 mark_sp = NULL;
331 }
332
333 /* -----------------------------------------------------------------------
334 * follow all the roots that we know about:
335 */
336
337 // the main thread is running: this prevents any other threads from
338 // exiting prematurely, so we can start them now.
339 // NB. do this after the mutable lists have been saved above, otherwise
340 // the other GC threads will be writing into the old mutable lists.
341 inc_running();
342 wakeup_gc_threads(gct->thread_index);
343
344 traceEventGcWork(gct->cap);
345
346 // scavenge the capability-private mutable lists. This isn't part
347 // of markSomeCapabilities() because markSomeCapabilities() can only
348 // call back into the GC via mark_root() (due to the gct register
349 // variable).
350 if (n_gc_threads == 1) {
351 for (n = 0; n < n_capabilities; n++) {
352 #if defined(THREADED_RTS)
353 scavenge_capability_mut_Lists1(capabilities[n]);
354 #else
355 scavenge_capability_mut_lists(capabilities[n]);
356 #endif
357 }
358 } else {
359 scavenge_capability_mut_lists(gct->cap);
360 for (n = 0; n < n_capabilities; n++) {
361 if (gc_threads[n]->idle) {
362 markCapability(mark_root, gct, capabilities[n],
363 rtsTrue/*don't mark sparks*/);
364 scavenge_capability_mut_lists(capabilities[n]);
365 }
366 }
367 }
368
369 // follow roots from the CAF list (used by GHCi)
370 gct->evac_gen_no = 0;
371 markCAFs(mark_root, gct);
372
373 // follow all the roots that the application knows about.
374 gct->evac_gen_no = 0;
375 if (n_gc_threads == 1) {
376 for (n = 0; n < n_capabilities; n++) {
377 markCapability(mark_root, gct, capabilities[n],
378 rtsTrue/*don't mark sparks*/);
379 }
380 } else {
381 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
382 }
383
384 markScheduler(mark_root, gct);
385
386 #if defined(RTS_USER_SIGNALS)
387 // mark the signal handlers (signals should be already blocked)
388 markSignalHandlers(mark_root, gct);
389 #endif
390
391 // Mark the weak pointer list, and prepare to detect dead weak pointers.
392 markWeakPtrList();
393 initWeakForGC();
394
395 // Mark the stable pointer table.
396 markStableTables(mark_root, gct);
397
398 /* -------------------------------------------------------------------------
399 * Repeatedly scavenge all the areas we know about until there's no
400 * more scavenging to be done.
401 */
402 for (;;)
403 {
404 scavenge_until_all_done();
405 // The other threads are now stopped. We might recurse back to
406 // here, but from now on this is the only thread.
407
408 // must be last... invariant is that everything is fully
409 // scavenged at this point.
410 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
411 inc_running();
412 continue;
413 }
414
415 // If we get to here, there's really nothing left to do.
416 break;
417 }
418
419 shutdown_gc_threads(gct->thread_index);
420
421 // Now see which stable names are still alive.
422 gcStableTables();
423
424 #ifdef THREADED_RTS
425 if (n_gc_threads == 1) {
426 for (n = 0; n < n_capabilities; n++) {
427 pruneSparkQueue(capabilities[n]);
428 }
429 } else {
430 for (n = 0; n < n_capabilities; n++) {
431 if (n == cap->no || gc_threads[n]->idle) {
432 pruneSparkQueue(capabilities[n]);
433 }
434 }
435 }
436 #endif
437
438 #ifdef PROFILING
439 // We call processHeapClosureForDead() on every closure destroyed during
440 // the current garbage collection, so we invoke LdvCensusForDead().
441 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
442 || RtsFlags.ProfFlags.bioSelector != NULL) {
443 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
444 LdvCensusForDead(N);
445 ACQUIRE_SM_LOCK;
446 }
447 #endif
448
449 // NO MORE EVACUATION AFTER THIS POINT!
450
451 // Finally: compact or sweep the oldest generation.
452 if (major_gc && oldest_gen->mark) {
453 if (oldest_gen->compact)
454 compact(gct->scavenged_static_objects);
455 else
456 sweep(oldest_gen);
457 }
458
459 copied = 0;
460 par_max_copied = 0;
461 par_tot_copied = 0;
462 {
463 nat i;
464 for (i=0; i < n_gc_threads; i++) {
465 if (n_gc_threads > 1) {
466 debugTrace(DEBUG_gc,"thread %d:", i);
467 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
468 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
469 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
470 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
471 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
472 }
473 copied += gc_threads[i]->copied;
474 par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
475 }
476 par_tot_copied = copied;
477 if (n_gc_threads == 1) {
478 par_max_copied = 0;
479 par_tot_copied = 0;
480 }
481 }
482
483 // Run through all the generations/steps and tidy up.
484 // We're going to:
485 // - count the amount of "live" data (live_words, live_blocks)
486 // - count the amount of "copied" data in this GC (copied)
487 // - free from-space
488 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
489 //
490 live_words = 0;
491 live_blocks = 0;
492
493 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
494
495 if (g == N) {
496 generations[g].collections++; // for stats
497 if (n_gc_threads > 1) generations[g].par_collections++;
498 }
499
500 // Count the mutable list as bytes "copied" for the purposes of
501 // stats. Every mutable list is copied during every GC.
502 if (g > 0) {
503 W_ mut_list_size = 0;
504 for (n = 0; n < n_capabilities; n++) {
505 mut_list_size += countOccupied(capabilities[n]->mut_lists[g]);
506 }
507 copied += mut_list_size;
508
509 debugTrace(DEBUG_gc,
510 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d ATOMIC_INVARIANTs, %d INVARIANT_CHECK_QUEUEs, %d others)",
511 (unsigned long)(mut_list_size * sizeof(W_)),
512 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS,
513 mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE,
514 mutlist_TREC_CHUNK, mutlist_TREC_HEADER,
515 mutlist_ATOMIC_INVARIANT,
516 mutlist_INVARIANT_CHECK_QUEUE,
517 mutlist_OTHERS);
518 }
519
520 bdescr *next, *prev;
521 gen = &generations[g];
522
523 // for generations we collected...
524 if (g <= N) {
525
526 /* free old memory and shift to-space into from-space for all
527 * the collected steps (except the allocation area). These
528 * freed blocks will probaby be quickly recycled.
529 */
530 if (gen->mark)
531 {
532 // tack the new blocks on the end of the existing blocks
533 if (gen->old_blocks != NULL) {
534
535 prev = NULL;
536 for (bd = gen->old_blocks; bd != NULL; bd = next) {
537
538 next = bd->link;
539
540 if (!(bd->flags & BF_MARKED))
541 {
542 if (prev == NULL) {
543 gen->old_blocks = next;
544 } else {
545 prev->link = next;
546 }
547 freeGroup(bd);
548 gen->n_old_blocks--;
549 }
550 else
551 {
552 gen->n_words += bd->free - bd->start;
553
554 // NB. this step might not be compacted next
555 // time, so reset the BF_MARKED flags.
556 // They are set before GC if we're going to
557 // compact. (search for BF_MARKED above).
558 bd->flags &= ~BF_MARKED;
559
560 // between GCs, all blocks in the heap except
561 // for the nursery have the BF_EVACUATED flag set.
562 bd->flags |= BF_EVACUATED;
563
564 prev = bd;
565 }
566 }
567
568 if (prev != NULL) {
569 prev->link = gen->blocks;
570 gen->blocks = gen->old_blocks;
571 }
572 }
573 // add the new blocks to the block tally
574 gen->n_blocks += gen->n_old_blocks;
575 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
576 ASSERT(countOccupied(gen->blocks) == gen->n_words);
577 }
578 else // not copacted
579 {
580 freeChain(gen->old_blocks);
581 }
582
583 gen->old_blocks = NULL;
584 gen->n_old_blocks = 0;
585
586 /* LARGE OBJECTS. The current live large objects are chained on
587 * scavenged_large, having been moved during garbage
588 * collection from large_objects. Any objects left on the
589 * large_objects list are therefore dead, so we free them here.
590 */
591 freeChain(gen->large_objects);
592 gen->large_objects = gen->scavenged_large_objects;
593 gen->n_large_blocks = gen->n_scavenged_large_blocks;
594 gen->n_large_words = countOccupied(gen->large_objects);
595 gen->n_new_large_words = 0;
596 }
597 else // for generations > N
598 {
599 /* For older generations, we need to append the
600 * scavenged_large_object list (i.e. large objects that have been
601 * promoted during this GC) to the large_object list for that step.
602 */
603 for (bd = gen->scavenged_large_objects; bd; bd = next) {
604 next = bd->link;
605 dbl_link_onto(bd, &gen->large_objects);
606 gen->n_large_words += bd->free - bd->start;
607 }
608
609 // add the new blocks we promoted during this GC
610 gen->n_large_blocks += gen->n_scavenged_large_blocks;
611 }
612
613 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
614 ASSERT(countOccupied(gen->large_objects) == gen->n_large_words);
615
616 gen->scavenged_large_objects = NULL;
617 gen->n_scavenged_large_blocks = 0;
618
619 // Count "live" data
620 live_words += genLiveWords(gen);
621 live_blocks += genLiveBlocks(gen);
622
623 // add in the partial blocks in the gen_workspaces, but ignore gen 0
624 // if this is a local GC (we can't count another capability's part_list)
625 {
626 nat i;
627 for (i = 0; i < n_capabilities; i++) {
628 live_words += gcThreadLiveWords(i, gen->no);
629 live_blocks += gcThreadLiveBlocks(i, gen->no);
630 }
631 }
632 } // for all generations
633
634 // update the max size of older generations after a major GC
635 resize_generations();
636
637 // Free the mark stack.
638 if (mark_stack_top_bd != NULL) {
639 debugTrace(DEBUG_gc, "mark stack: %d blocks",
640 countBlocks(mark_stack_top_bd));
641 freeChain(mark_stack_top_bd);
642 }
643
644 // Free any bitmaps.
645 for (g = 0; g <= N; g++) {
646 gen = &generations[g];
647 if (gen->bitmap != NULL) {
648 freeGroup(gen->bitmap);
649 gen->bitmap = NULL;
650 }
651 }
652
653 resize_nursery();
654
655 resetNurseries();
656
657 // mark the garbage collected CAFs as dead
658 #if defined(DEBUG)
659 if (major_gc) { gcCAFs(); }
660 #endif
661
662 // Update the stable pointer hash table.
663 updateStableTables(major_gc);
664
665 // unlock the StablePtr table. Must be before scheduleFinalizers(),
666 // because a finalizer may call hs_free_fun_ptr() or
667 // hs_free_stable_ptr(), both of which access the StablePtr table.
668 stableUnlock();
669
670 // Must be after stableUnlock(), because it might free stable ptrs.
671 if (major_gc) {
672 checkUnload (gct->scavenged_static_objects);
673 }
674
675 #ifdef PROFILING
676 // resetStaticObjectForRetainerProfiling() must be called before
677 // zeroing below.
678
679 // ToDo: fix the gct->scavenged_static_objects below
680 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
681 #endif
682
683 // Start any pending finalizers. Must be after
684 // updateStableTables() and stableUnlock() (see #4221).
685 RELEASE_SM_LOCK;
686 scheduleFinalizers(cap, dead_weak_ptr_list);
687 ACQUIRE_SM_LOCK;
688
689 // check sanity after GC
690 // before resurrectThreads(), because that might overwrite some
691 // closures, which will cause problems with THREADED where we don't
692 // fill slop.
693 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
694
695 // If a heap census is due, we need to do it before
696 // resurrectThreads(), for the same reason as checkSanity above:
697 // resurrectThreads() will overwrite some closures and leave slop
698 // behind.
699 if (do_heap_census) {
700 debugTrace(DEBUG_sched, "performing heap census");
701 RELEASE_SM_LOCK;
702 heapCensus(gct->gc_start_cpu);
703 ACQUIRE_SM_LOCK;
704 }
705
706 // send exceptions to any threads which were about to die
707 RELEASE_SM_LOCK;
708 resurrectThreads(resurrected_threads);
709 ACQUIRE_SM_LOCK;
710
711 if (major_gc) {
712 W_ need, got;
713 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
714 got = mblocks_allocated;
715 /* If the amount of data remains constant, next major GC we'll
716 require (F+1)*need. We leave (F+2)*need in order to reduce
717 repeated deallocation and reallocation. */
718 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
719 if (got > need) {
720 returnMemoryToOS(got - need);
721 }
722 }
723
724 // extra GC trace info
725 IF_DEBUG(gc, statDescribeGens());
726
727 #ifdef DEBUG
728 // symbol-table based profiling
729 /* heapCensus(to_blocks); */ /* ToDo */
730 #endif
731
732 // restore enclosing cost centre
733 #ifdef PROFILING
734 for (n = 0; n < n_capabilities; n++) {
735 capabilities[n]->r.rCCCS = save_CCS[n];
736 }
737 #endif
738
739 #ifdef DEBUG
740 // check for memory leaks if DEBUG is on
741 memInventory(DEBUG_gc);
742 #endif
743
744 // ok, GC over: tell the stats department what happened.
745 stat_endGC(cap, gct, live_words, copied,
746 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
747 N, n_gc_threads, par_max_copied, par_tot_copied);
748
749 #if defined(RTS_USER_SIGNALS)
750 if (RtsFlags.MiscFlags.install_signal_handlers) {
751 // unblock signals again
752 unblockUserSignals();
753 }
754 #endif
755
756 RELEASE_SM_LOCK;
757
758 SET_GCT(saved_gct);
759 }
760
761 /* -----------------------------------------------------------------------------
762 Initialise the gc_thread structures.
763 -------------------------------------------------------------------------- */
764
765 #define GC_THREAD_INACTIVE 0
766 #define GC_THREAD_STANDING_BY 1
767 #define GC_THREAD_RUNNING 2
768 #define GC_THREAD_WAITING_TO_CONTINUE 3
769
770 static void
771 new_gc_thread (nat n, gc_thread *t)
772 {
773 nat g;
774 gen_workspace *ws;
775
776 t->cap = capabilities[n];
777
778 #ifdef THREADED_RTS
779 t->id = 0;
780 initSpinLock(&t->gc_spin);
781 initSpinLock(&t->mut_spin);
782 ACQUIRE_SPIN_LOCK(&t->gc_spin);
783 ACQUIRE_SPIN_LOCK(&t->mut_spin);
784 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
785 // thread to start up, see wakeup_gc_threads
786 #endif
787
788 t->thread_index = n;
789 t->idle = rtsFalse;
790 t->free_blocks = NULL;
791 t->gc_count = 0;
792
793 init_gc_thread(t);
794
795 #ifdef USE_PAPI
796 t->papi_events = -1;
797 #endif
798
799 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
800 {
801 ws = &t->gens[g];
802 ws->gen = &generations[g];
803 ASSERT(g == ws->gen->no);
804 ws->my_gct = t;
805
806 // We want to call
807 // alloc_todo_block(ws,0);
808 // but can't, because it uses gct which isn't set up at this point.
809 // Hence, allocate a block for todo_bd manually:
810 {
811 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
812 initBdescr(bd, ws->gen, ws->gen->to);
813 bd->flags = BF_EVACUATED;
814 bd->u.scan = bd->free = bd->start;
815
816 ws->todo_bd = bd;
817 ws->todo_free = bd->free;
818 ws->todo_lim = bd->start + BLOCK_SIZE_W;
819 }
820
821 ws->todo_q = newWSDeque(128);
822 ws->todo_overflow = NULL;
823 ws->n_todo_overflow = 0;
824 ws->todo_large_objects = NULL;
825
826 ws->part_list = NULL;
827 ws->n_part_blocks = 0;
828
829 ws->scavd_list = NULL;
830 ws->n_scavd_blocks = 0;
831 }
832 }
833
834
835 void
836 initGcThreads (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
837 {
838 #if defined(THREADED_RTS)
839 nat i;
840
841 if (from > 0) {
842 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
843 "initGcThreads");
844 } else {
845 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
846 "initGcThreads");
847 }
848
849 for (i = from; i < to; i++) {
850 gc_threads[i] =
851 stgMallocBytes(sizeof(gc_thread) +
852 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
853 "alloc_gc_threads");
854
855 new_gc_thread(i, gc_threads[i]);
856 }
857 #else
858 ASSERT(from == 0 && to == 1);
859 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
860 gc_threads[0] = gct;
861 new_gc_thread(0,gc_threads[0]);
862 #endif
863 }
864
865 void
866 freeGcThreads (void)
867 {
868 nat g;
869 if (gc_threads != NULL) {
870 #if defined(THREADED_RTS)
871 nat i;
872 for (i = 0; i < n_capabilities; i++) {
873 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
874 {
875 freeWSDeque(gc_threads[i]->gens[g].todo_q);
876 }
877 stgFree (gc_threads[i]);
878 }
879 stgFree (gc_threads);
880 #else
881 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
882 {
883 freeWSDeque(gc_threads[0]->gens[g].todo_q);
884 }
885 stgFree (gc_threads);
886 #endif
887 gc_threads = NULL;
888 }
889 }
890
891 /* ----------------------------------------------------------------------------
892 Start GC threads
893 ------------------------------------------------------------------------- */
894
895 static volatile StgWord gc_running_threads;
896
897 static StgWord
898 inc_running (void)
899 {
900 StgWord new;
901 new = atomic_inc(&gc_running_threads, 1);
902 ASSERT(new <= n_gc_threads);
903 return new;
904 }
905
906 static StgWord
907 dec_running (void)
908 {
909 ASSERT(gc_running_threads != 0);
910 return atomic_dec(&gc_running_threads);
911 }
912
913 static rtsBool
914 any_work (void)
915 {
916 int g;
917 gen_workspace *ws;
918
919 gct->any_work++;
920
921 write_barrier();
922
923 // scavenge objects in compacted generation
924 if (mark_stack_bd != NULL && !mark_stack_empty()) {
925 return rtsTrue;
926 }
927
928 // Check for global work in any step. We don't need to check for
929 // local work, because we have already exited scavenge_loop(),
930 // which means there is no local work for this thread.
931 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
932 ws = &gct->gens[g];
933 if (ws->todo_large_objects) return rtsTrue;
934 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
935 if (ws->todo_overflow) return rtsTrue;
936 }
937
938 #if defined(THREADED_RTS)
939 if (work_stealing) {
940 nat n;
941 // look for work to steal
942 for (n = 0; n < n_gc_threads; n++) {
943 if (n == gct->thread_index) continue;
944 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
945 ws = &gc_threads[n]->gens[g];
946 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
947 }
948 }
949 }
950 #endif
951
952 gct->no_work++;
953 #if defined(THREADED_RTS)
954 yieldThread();
955 #endif
956
957 return rtsFalse;
958 }
959
960 static void
961 scavenge_until_all_done (void)
962 {
963 DEBUG_ONLY( nat r );
964
965
966 loop:
967 #if defined(THREADED_RTS)
968 if (n_gc_threads > 1) {
969 scavenge_loop();
970 } else {
971 scavenge_loop1();
972 }
973 #else
974 scavenge_loop();
975 #endif
976
977 collect_gct_blocks();
978
979 // scavenge_loop() only exits when there's no work to do
980
981 #ifdef DEBUG
982 r = dec_running();
983 #else
984 dec_running();
985 #endif
986
987 traceEventGcIdle(gct->cap);
988
989 debugTrace(DEBUG_gc, "%d GC threads still running", r);
990
991 while (gc_running_threads != 0) {
992 // usleep(1);
993 if (any_work()) {
994 inc_running();
995 traceEventGcWork(gct->cap);
996 goto loop;
997 }
998 // any_work() does not remove the work from the queue, it
999 // just checks for the presence of work. If we find any,
1000 // then we increment gc_running_threads and go back to
1001 // scavenge_loop() to perform any pending work.
1002 }
1003
1004 traceEventGcDone(gct->cap);
1005 }
1006
1007 #if defined(THREADED_RTS)
1008
1009 void
1010 gcWorkerThread (Capability *cap)
1011 {
1012 gc_thread *saved_gct;
1013
1014 // necessary if we stole a callee-saves register for gct:
1015 saved_gct = gct;
1016
1017 SET_GCT(gc_threads[cap->no]);
1018 gct->id = osThreadId();
1019
1020 // Wait until we're told to wake up
1021 RELEASE_SPIN_LOCK(&gct->mut_spin);
1022 // yieldThread();
1023 // Strangely, adding a yieldThread() here makes the CPU time
1024 // measurements more accurate on Linux, perhaps because it syncs
1025 // the CPU time across the multiple cores. Without this, CPU time
1026 // is heavily skewed towards GC rather than MUT.
1027 gct->wakeup = GC_THREAD_STANDING_BY;
1028 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1029 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1030
1031 #ifdef USE_PAPI
1032 // start performance counters in this thread...
1033 if (gct->papi_events == -1) {
1034 papi_init_eventset(&gct->papi_events);
1035 }
1036 papi_thread_start_gc1_count(gct->papi_events);
1037 #endif
1038
1039 init_gc_thread(gct);
1040
1041 traceEventGcWork(gct->cap);
1042
1043 // Every thread evacuates some roots.
1044 gct->evac_gen_no = 0;
1045 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1046 scavenge_capability_mut_lists(cap);
1047
1048 scavenge_until_all_done();
1049
1050 #ifdef THREADED_RTS
1051 // Now that the whole heap is marked, we discard any sparks that
1052 // were found to be unreachable. The main GC thread is currently
1053 // marking heap reachable via weak pointers, so it is
1054 // non-deterministic whether a spark will be retained if it is
1055 // only reachable via weak pointers. To fix this problem would
1056 // require another GC barrier, which is too high a price.
1057 pruneSparkQueue(cap);
1058 #endif
1059
1060 #ifdef USE_PAPI
1061 // count events in this thread towards the GC totals
1062 papi_thread_stop_gc1_count(gct->papi_events);
1063 #endif
1064
1065 // Wait until we're told to continue
1066 RELEASE_SPIN_LOCK(&gct->gc_spin);
1067 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1068 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1069 gct->thread_index);
1070 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1071 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1072
1073 SET_GCT(saved_gct);
1074 }
1075
1076 #endif
1077
1078 #if defined(THREADED_RTS)
1079
1080 void
1081 waitForGcThreads (Capability *cap USED_IF_THREADS)
1082 {
1083 const nat n_threads = n_capabilities;
1084 const nat me = cap->no;
1085 nat i, j;
1086 rtsBool retry = rtsTrue;
1087
1088 stat_startGCSync(gc_threads[cap->no]);
1089
1090
1091 while(retry) {
1092 for (i=0; i < n_threads; i++) {
1093 if (i == me || gc_threads[i]->idle) continue;
1094 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1095 prodCapability(capabilities[i], cap->running_task);
1096 }
1097 }
1098 for (j=0; j < 10; j++) {
1099 retry = rtsFalse;
1100 for (i=0; i < n_threads; i++) {
1101 if (i == me || gc_threads[i]->idle) continue;
1102 write_barrier();
1103 interruptCapability(capabilities[i]);
1104 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1105 retry = rtsTrue;
1106 }
1107 }
1108 if (!retry) break;
1109 yieldThread();
1110 }
1111 }
1112 }
1113
1114 #endif // THREADED_RTS
1115
1116 static void
1117 start_gc_threads (void)
1118 {
1119 #if defined(THREADED_RTS)
1120 gc_running_threads = 0;
1121 #endif
1122 }
1123
1124 static void
1125 wakeup_gc_threads (nat me USED_IF_THREADS)
1126 {
1127 #if defined(THREADED_RTS)
1128 nat i;
1129
1130 if (n_gc_threads == 1) return;
1131
1132 for (i=0; i < n_gc_threads; i++) {
1133 if (i == me || gc_threads[i]->idle) continue;
1134 inc_running();
1135 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1136 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1137
1138 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1139 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1140 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1141 }
1142 #endif
1143 }
1144
1145 // After GC is complete, we must wait for all GC threads to enter the
1146 // standby state, otherwise they may still be executing inside
1147 // any_work(), and may even remain awake until the next GC starts.
1148 static void
1149 shutdown_gc_threads (nat me USED_IF_THREADS)
1150 {
1151 #if defined(THREADED_RTS)
1152 nat i;
1153
1154 if (n_gc_threads == 1) return;
1155
1156 for (i=0; i < n_gc_threads; i++) {
1157 if (i == me || gc_threads[i]->idle) continue;
1158 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) {
1159 busy_wait_nop();
1160 write_barrier();
1161 }
1162 }
1163 #endif
1164 }
1165
1166 #if defined(THREADED_RTS)
1167 void
1168 releaseGCThreads (Capability *cap USED_IF_THREADS)
1169 {
1170 const nat n_threads = n_capabilities;
1171 const nat me = cap->no;
1172 nat i;
1173 for (i=0; i < n_threads; i++) {
1174 if (i == me || gc_threads[i]->idle) continue;
1175 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1176 barf("releaseGCThreads");
1177
1178 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1179 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1180 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1181 }
1182 }
1183 #endif
1184
1185 /* ----------------------------------------------------------------------------
1186 Initialise a generation that is to be collected
1187 ------------------------------------------------------------------------- */
1188
1189 static void
1190 prepare_collected_gen (generation *gen)
1191 {
1192 nat i, g, n;
1193 gen_workspace *ws;
1194 bdescr *bd, *next;
1195
1196 // Throw away the current mutable list. Invariant: the mutable
1197 // list always has at least one block; this means we can avoid a
1198 // check for NULL in recordMutable().
1199 g = gen->no;
1200 if (g != 0) {
1201 for (i = 0; i < n_capabilities; i++) {
1202 freeChain(capabilities[i]->mut_lists[g]);
1203 capabilities[i]->mut_lists[g] = allocBlock();
1204 }
1205 }
1206
1207 gen = &generations[g];
1208 ASSERT(gen->no == g);
1209
1210 // we'll construct a new list of threads in this step
1211 // during GC, throw away the current list.
1212 gen->old_threads = gen->threads;
1213 gen->threads = END_TSO_QUEUE;
1214
1215 // deprecate the existing blocks
1216 gen->old_blocks = gen->blocks;
1217 gen->n_old_blocks = gen->n_blocks;
1218 gen->blocks = NULL;
1219 gen->n_blocks = 0;
1220 gen->n_words = 0;
1221 gen->live_estimate = 0;
1222
1223 // initialise the large object queues.
1224 ASSERT(gen->scavenged_large_objects == NULL);
1225 ASSERT(gen->n_scavenged_large_blocks == 0);
1226
1227 // grab all the partial blocks stashed in the gc_thread workspaces and
1228 // move them to the old_blocks list of this gen.
1229 for (n = 0; n < n_capabilities; n++) {
1230 ws = &gc_threads[n]->gens[gen->no];
1231
1232 for (bd = ws->part_list; bd != NULL; bd = next) {
1233 next = bd->link;
1234 bd->link = gen->old_blocks;
1235 gen->old_blocks = bd;
1236 gen->n_old_blocks += bd->blocks;
1237 }
1238 ws->part_list = NULL;
1239 ws->n_part_blocks = 0;
1240
1241 ASSERT(ws->scavd_list == NULL);
1242 ASSERT(ws->n_scavd_blocks == 0);
1243
1244 if (ws->todo_free != ws->todo_bd->start) {
1245 ws->todo_bd->free = ws->todo_free;
1246 ws->todo_bd->link = gen->old_blocks;
1247 gen->old_blocks = ws->todo_bd;
1248 gen->n_old_blocks += ws->todo_bd->blocks;
1249 alloc_todo_block(ws,0); // always has one block.
1250 }
1251 }
1252
1253 // mark the small objects as from-space
1254 for (bd = gen->old_blocks; bd; bd = bd->link) {
1255 bd->flags &= ~BF_EVACUATED;
1256 }
1257
1258 // mark the large objects as from-space
1259 for (bd = gen->large_objects; bd; bd = bd->link) {
1260 bd->flags &= ~BF_EVACUATED;
1261 }
1262
1263 // for a compacted generation, we need to allocate the bitmap
1264 if (gen->mark) {
1265 StgWord bitmap_size; // in bytes
1266 bdescr *bitmap_bdescr;
1267 StgWord *bitmap;
1268
1269 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1270
1271 if (bitmap_size > 0) {
1272 bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
1273 / BLOCK_SIZE);
1274 gen->bitmap = bitmap_bdescr;
1275 bitmap = bitmap_bdescr->start;
1276
1277 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1278 bitmap_size, bitmap);
1279
1280 // don't forget to fill it with zeros!
1281 memset(bitmap, 0, bitmap_size);
1282
1283 // For each block in this step, point to its bitmap from the
1284 // block descriptor.
1285 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1286 bd->u.bitmap = bitmap;
1287 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1288
1289 // Also at this point we set the BF_MARKED flag
1290 // for this block. The invariant is that
1291 // BF_MARKED is always unset, except during GC
1292 // when it is set on those blocks which will be
1293 // compacted.
1294 if (!(bd->flags & BF_FRAGMENTED)) {
1295 bd->flags |= BF_MARKED;
1296 }
1297
1298 // BF_SWEPT should be marked only for blocks that are being
1299 // collected in sweep()
1300 bd->flags &= ~BF_SWEPT;
1301 }
1302 }
1303 }
1304 }
1305
1306
1307 /* ----------------------------------------------------------------------------
1308 Save the mutable lists in saved_mut_lists
1309 ------------------------------------------------------------------------- */
1310
1311 static void
1312 stash_mut_list (Capability *cap, nat gen_no)
1313 {
1314 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1315 cap->mut_lists[gen_no] = allocBlock_sync();
1316 }
1317
1318 /* ----------------------------------------------------------------------------
1319 Initialise a generation that is *not* to be collected
1320 ------------------------------------------------------------------------- */
1321
1322 static void
1323 prepare_uncollected_gen (generation *gen)
1324 {
1325 nat i;
1326
1327
1328 ASSERT(gen->no > 0);
1329
1330 // save the current mutable lists for this generation, and
1331 // allocate a fresh block for each one. We'll traverse these
1332 // mutable lists as roots early on in the GC.
1333 for (i = 0; i < n_capabilities; i++) {
1334 stash_mut_list(capabilities[i], gen->no);
1335 }
1336
1337 ASSERT(gen->scavenged_large_objects == NULL);
1338 ASSERT(gen->n_scavenged_large_blocks == 0);
1339 }
1340
1341 /* -----------------------------------------------------------------------------
1342 Collect the completed blocks from a GC thread and attach them to
1343 the generation.
1344 -------------------------------------------------------------------------- */
1345
1346 static void
1347 collect_gct_blocks (void)
1348 {
1349 nat g;
1350 gen_workspace *ws;
1351 bdescr *bd, *prev;
1352
1353 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1354 ws = &gct->gens[g];
1355
1356 // there may still be a block attached to ws->todo_bd;
1357 // leave it there to use next time.
1358
1359 if (ws->scavd_list != NULL) {
1360 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1361
1362 ASSERT(gct->scan_bd == NULL);
1363 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1364
1365 prev = NULL;
1366 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1367 ws->gen->n_words += bd->free - bd->start;
1368 prev = bd;
1369 }
1370 if (prev != NULL) {
1371 prev->link = ws->gen->blocks;
1372 ws->gen->blocks = ws->scavd_list;
1373 }
1374 ws->gen->n_blocks += ws->n_scavd_blocks;
1375
1376 ws->scavd_list = NULL;
1377 ws->n_scavd_blocks = 0;
1378
1379 RELEASE_SPIN_LOCK(&ws->gen->sync);
1380 }
1381 }
1382 }
1383
1384 /* -----------------------------------------------------------------------------
1385 During mutation, any blocks that are filled by allocatePinned() are
1386 stashed on the local pinned_object_blocks list, to avoid needing to
1387 take a global lock. Here we collect those blocks from the
1388 cap->pinned_object_blocks lists and put them on the
1389 main g0->large_object list.
1390
1391 Returns: the number of words allocated this way, for stats
1392 purposes.
1393 -------------------------------------------------------------------------- */
1394
1395 static void
1396 collect_pinned_object_blocks (void)
1397 {
1398 nat n;
1399 bdescr *bd, *prev;
1400
1401 for (n = 0; n < n_capabilities; n++) {
1402 prev = NULL;
1403 for (bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
1404 prev = bd;
1405 }
1406 if (prev != NULL) {
1407 prev->link = g0->large_objects;
1408 if (g0->large_objects != NULL) {
1409 g0->large_objects->u.back = prev;
1410 }
1411 g0->large_objects = capabilities[n]->pinned_object_blocks;
1412 capabilities[n]->pinned_object_blocks = 0;
1413 }
1414 }
1415 }
1416
1417 /* -----------------------------------------------------------------------------
1418 Initialise a gc_thread before GC
1419 -------------------------------------------------------------------------- */
1420
1421 static void
1422 init_gc_thread (gc_thread *t)
1423 {
1424 t->static_objects = END_OF_STATIC_OBJECT_LIST;
1425 t->scavenged_static_objects = END_OF_STATIC_OBJECT_LIST;
1426 t->scan_bd = NULL;
1427 t->mut_lists = t->cap->mut_lists;
1428 t->evac_gen_no = 0;
1429 t->failed_to_evac = rtsFalse;
1430 t->eager_promotion = rtsTrue;
1431 t->thunk_selector_depth = 0;
1432 t->copied = 0;
1433 t->scanned = 0;
1434 t->any_work = 0;
1435 t->no_work = 0;
1436 t->scav_find_work = 0;
1437 }
1438
1439 /* -----------------------------------------------------------------------------
1440 Function we pass to evacuate roots.
1441 -------------------------------------------------------------------------- */
1442
1443 static void
1444 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1445 {
1446 // we stole a register for gct, but this function is called from
1447 // *outside* the GC where the register variable is not in effect,
1448 // so we need to save and restore it here. NB. only call
1449 // mark_root() from the main GC thread, otherwise gct will be
1450 // incorrect.
1451 #if defined(THREADED_RTS)
1452 gc_thread *saved_gct;
1453 saved_gct = gct;
1454 #endif
1455 SET_GCT(user);
1456
1457 evacuate(root);
1458
1459 SET_GCT(saved_gct);
1460 }
1461
1462 /* ----------------------------------------------------------------------------
1463 Reset the sizes of the older generations when we do a major
1464 collection.
1465
1466 CURRENT STRATEGY: make all generations except zero the same size.
1467 We have to stay within the maximum heap size, and leave a certain
1468 percentage of the maximum heap size available to allocate into.
1469 ------------------------------------------------------------------------- */
1470
1471 static void
1472 resize_generations (void)
1473 {
1474 nat g;
1475
1476 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1477 W_ live, size, min_alloc, words;
1478 const W_ max = RtsFlags.GcFlags.maxHeapSize;
1479 const W_ gens = RtsFlags.GcFlags.generations;
1480
1481 // live in the oldest generations
1482 if (oldest_gen->live_estimate != 0) {
1483 words = oldest_gen->live_estimate;
1484 } else {
1485 words = oldest_gen->n_words;
1486 }
1487 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1488 oldest_gen->n_large_blocks;
1489
1490 // default max size for all generations except zero
1491 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1492 RtsFlags.GcFlags.minOldGenSize);
1493
1494 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1495 if (max > 0) {
1496 RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
1497 } else {
1498 RtsFlags.GcFlags.heapSizeSuggestion = size;
1499 }
1500 }
1501
1502 // minimum size for generation zero
1503 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1504 RtsFlags.GcFlags.minAllocAreaSize);
1505
1506 // Auto-enable compaction when the residency reaches a
1507 // certain percentage of the maximum heap size (default: 30%).
1508 if (RtsFlags.GcFlags.compact ||
1509 (max > 0 &&
1510 oldest_gen->n_blocks >
1511 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1512 oldest_gen->mark = 1;
1513 oldest_gen->compact = 1;
1514 // debugBelch("compaction: on\n", live);
1515 } else {
1516 oldest_gen->mark = 0;
1517 oldest_gen->compact = 0;
1518 // debugBelch("compaction: off\n", live);
1519 }
1520
1521 if (RtsFlags.GcFlags.sweep) {
1522 oldest_gen->mark = 1;
1523 }
1524
1525 // if we're going to go over the maximum heap size, reduce the
1526 // size of the generations accordingly. The calculation is
1527 // different if compaction is turned on, because we don't need
1528 // to double the space required to collect the old generation.
1529 if (max != 0) {
1530
1531 // this test is necessary to ensure that the calculations
1532 // below don't have any negative results - we're working
1533 // with unsigned values here.
1534 if (max < min_alloc) {
1535 heapOverflow();
1536 }
1537
1538 if (oldest_gen->compact) {
1539 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1540 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1541 }
1542 } else {
1543 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1544 size = (max - min_alloc) / ((gens - 1) * 2);
1545 }
1546 }
1547
1548 if (size < live) {
1549 heapOverflow();
1550 }
1551 }
1552
1553 #if 0
1554 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1555 min_alloc, size, max);
1556 #endif
1557
1558 for (g = 0; g < gens; g++) {
1559 generations[g].max_blocks = size;
1560 }
1561 }
1562 }
1563
1564 /* -----------------------------------------------------------------------------
1565 Calculate the new size of the nursery, and resize it.
1566 -------------------------------------------------------------------------- */
1567
1568 static void
1569 resize_nursery (void)
1570 {
1571 const StgWord min_nursery =
1572 RtsFlags.GcFlags.minAllocAreaSize * (StgWord)n_capabilities;
1573
1574 if (RtsFlags.GcFlags.generations == 1)
1575 { // Two-space collector:
1576 W_ blocks;
1577
1578 /* set up a new nursery. Allocate a nursery size based on a
1579 * function of the amount of live data (by default a factor of 2)
1580 * Use the blocks from the old nursery if possible, freeing up any
1581 * left over blocks.
1582 *
1583 * If we get near the maximum heap size, then adjust our nursery
1584 * size accordingly. If the nursery is the same size as the live
1585 * data (L), then we need 3L bytes. We can reduce the size of the
1586 * nursery to bring the required memory down near 2L bytes.
1587 *
1588 * A normal 2-space collector would need 4L bytes to give the same
1589 * performance we get from 3L bytes, reducing to the same
1590 * performance at 2L bytes.
1591 */
1592 blocks = generations[0].n_blocks;
1593
1594 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1595 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1596 RtsFlags.GcFlags.maxHeapSize )
1597 {
1598 long adjusted_blocks; // signed on purpose
1599 int pc_free;
1600
1601 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1602
1603 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1604 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1605
1606 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1607 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1608 {
1609 heapOverflow();
1610 }
1611 blocks = adjusted_blocks;
1612 }
1613 else
1614 {
1615 blocks *= RtsFlags.GcFlags.oldGenFactor;
1616 if (blocks < min_nursery)
1617 {
1618 blocks = min_nursery;
1619 }
1620 }
1621 resizeNurseries(blocks);
1622 }
1623 else // Generational collector
1624 {
1625 /*
1626 * If the user has given us a suggested heap size, adjust our
1627 * allocation area to make best use of the memory available.
1628 */
1629 if (RtsFlags.GcFlags.heapSizeSuggestion)
1630 {
1631 long blocks;
1632 StgWord needed;
1633
1634 calcNeeded(rtsFalse, &needed); // approx blocks needed at next GC
1635
1636 /* Guess how much will be live in generation 0 step 0 next time.
1637 * A good approximation is obtained by finding the
1638 * percentage of g0 that was live at the last minor GC.
1639 *
1640 * We have an accurate figure for the amount of copied data in
1641 * 'copied', but we must convert this to a number of blocks, with
1642 * a small adjustment for estimated slop at the end of a block
1643 * (- 10 words).
1644 */
1645 if (N == 0)
1646 {
1647 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1648 / countNurseryBlocks();
1649 }
1650
1651 /* Estimate a size for the allocation area based on the
1652 * information available. We might end up going slightly under
1653 * or over the suggested heap size, but we should be pretty
1654 * close on average.
1655 *
1656 * Formula: suggested - needed
1657 * ----------------------------
1658 * 1 + g0_pcnt_kept/100
1659 *
1660 * where 'needed' is the amount of memory needed at the next
1661 * collection for collecting all gens except g0.
1662 */
1663 blocks =
1664 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1665 (100 + (long)g0_pcnt_kept);
1666
1667 if (blocks < (long)min_nursery) {
1668 blocks = min_nursery;
1669 }
1670
1671 resizeNurseries((W_)blocks);
1672 }
1673 else
1674 {
1675 // we might have added extra blocks to the nursery, so
1676 // resize back to the original size again.
1677 resizeNurseriesFixed();
1678 }
1679 }
1680 }
1681
1682 /* -----------------------------------------------------------------------------
1683 Sanity code for CAF garbage collection.
1684
1685 With DEBUG turned on, we manage a CAF list in addition to the SRT
1686 mechanism. After GC, we run down the CAF list and blackhole any
1687 CAFs which have been garbage collected. This means we get an error
1688 whenever the program tries to enter a garbage collected CAF.
1689
1690 Any garbage collected CAFs are taken off the CAF list at the same
1691 time.
1692 -------------------------------------------------------------------------- */
1693
1694 #if defined(DEBUG)
1695
1696 static void gcCAFs(void)
1697 {
1698 StgIndStatic *p, *prev;
1699
1700 const StgInfoTable *info;
1701 nat i;
1702
1703 i = 0;
1704 p = debug_caf_list;
1705 prev = NULL;
1706
1707 for (p = debug_caf_list; p != (StgIndStatic*)END_OF_CAF_LIST;
1708 p = (StgIndStatic*)p->saved_info) {
1709
1710 info = get_itbl((StgClosure*)p);
1711 ASSERT(info->type == IND_STATIC);
1712
1713 if (p->static_link == NULL) {
1714 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%p", p);
1715 SET_INFO((StgClosure*)p,&stg_GCD_CAF_info); // stub it
1716 if (prev == NULL) {
1717 debug_caf_list = (StgIndStatic*)p->saved_info;
1718 } else {
1719 prev->saved_info = p->saved_info;
1720 }
1721 } else {
1722 prev = p;
1723 i++;
1724 }
1725 }
1726
1727 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1728 }
1729 #endif