New flag +RTS -qi<n>, avoid waking up idle Capabilities to do parallel GC
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "Storage.h"
19 #include "RtsUtils.h"
20 #include "Apply.h"
21 #include "Updates.h"
22 #include "Stats.h"
23 #include "Schedule.h"
24 #include "Sanity.h"
25 #include "BlockAlloc.h"
26 #include "ProfHeap.h"
27 #include "Weak.h"
28 #include "Prelude.h"
29 #include "RtsSignals.h"
30 #include "STM.h"
31 #if defined(RTS_GTK_FRONTPANEL)
32 #include "FrontPanel.h"
33 #endif
34 #include "Trace.h"
35 #include "RetainerProfile.h"
36 #include "LdvProfile.h"
37 #include "RaiseAsync.h"
38 #include "Papi.h"
39 #include "Stable.h"
40
41 #include "GC.h"
42 #include "GCThread.h"
43 #include "GCTDecl.h"
44 #include "Compact.h"
45 #include "Evac.h"
46 #include "Scav.h"
47 #include "GCUtils.h"
48 #include "MarkStack.h"
49 #include "MarkWeak.h"
50 #include "Sparks.h"
51 #include "Sweep.h"
52
53 #include <string.h> // for memset()
54 #include <unistd.h>
55
56 /* -----------------------------------------------------------------------------
57 Global variables
58 -------------------------------------------------------------------------- */
59
60 /* STATIC OBJECT LIST.
61 *
62 * During GC:
63 * We maintain a linked list of static objects that are still live.
64 * The requirements for this list are:
65 *
66 * - we need to scan the list while adding to it, in order to
67 * scavenge all the static objects (in the same way that
68 * breadth-first scavenging works for dynamic objects).
69 *
70 * - we need to be able to tell whether an object is already on
71 * the list, to break loops.
72 *
73 * Each static object has a "static link field", which we use for
74 * linking objects on to the list. We use a stack-type list, consing
75 * objects on the front as they are added (this means that the
76 * scavenge phase is depth-first, not breadth-first, but that
77 * shouldn't matter).
78 *
79 * A separate list is kept for objects that have been scavenged
80 * already - this is so that we can zero all the marks afterwards.
81 *
82 * An object is on the list if its static link field is non-zero; this
83 * means that we have to mark the end of the list with '1', not NULL.
84 *
85 * Extra notes for generational GC:
86 *
87 * Each generation has a static object list associated with it. When
88 * collecting generations up to N, we treat the static object lists
89 * from generations > N as roots.
90 *
91 * We build up a static object list while collecting generations 0..N,
92 * which is then appended to the static object list of generation N+1.
93 */
94
95 /* N is the oldest generation being collected, where the generations
96 * are numbered starting at 0. A major GC (indicated by the major_gc
97 * flag) is when we're collecting all generations. We only attempt to
98 * deal with static objects and GC CAFs when doing a major GC.
99 */
100 nat N;
101 rtsBool major_gc;
102
103 /* Data used for allocation area sizing.
104 */
105 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
106
107 /* Mut-list stats */
108 #ifdef DEBUG
109 nat mutlist_MUTVARS,
110 mutlist_MUTARRS,
111 mutlist_MVARS,
112 mutlist_OTHERS;
113 #endif
114
115 /* Thread-local data for each GC thread
116 */
117 gc_thread **gc_threads = NULL;
118
119 #if !defined(THREADED_RTS)
120 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
121 #endif
122
123 // Number of threads running in *this* GC. Affects how many
124 // step->todos[] lists we have to look in to find work.
125 nat n_gc_threads;
126
127 // For stats:
128 long copied; // *words* copied & scavenged during this GC
129
130 rtsBool work_stealing;
131
132 DECLARE_GCT
133
134 /* -----------------------------------------------------------------------------
135 Static function declarations
136 -------------------------------------------------------------------------- */
137
138 static void mark_root (void *user, StgClosure **root);
139 static void zero_static_object_list (StgClosure* first_static);
140 static nat initialise_N (rtsBool force_major_gc);
141 static void prepare_collected_gen (generation *gen);
142 static void prepare_uncollected_gen (generation *gen);
143 static void init_gc_thread (gc_thread *t);
144 static void resize_generations (void);
145 static void resize_nursery (void);
146 static void start_gc_threads (void);
147 static void scavenge_until_all_done (void);
148 static StgWord inc_running (void);
149 static StgWord dec_running (void);
150 static void wakeup_gc_threads (nat me);
151 static void shutdown_gc_threads (nat me);
152 static void collect_gct_blocks (void);
153
154 #if 0 && defined(DEBUG)
155 static void gcCAFs (void);
156 #endif
157
158 /* -----------------------------------------------------------------------------
159 The mark stack.
160 -------------------------------------------------------------------------- */
161
162 bdescr *mark_stack_top_bd; // topmost block in the mark stack
163 bdescr *mark_stack_bd; // current block in the mark stack
164 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
165
166 /* -----------------------------------------------------------------------------
167 GarbageCollect: the main entry point to the garbage collector.
168
169 Locks held: all capabilities are held throughout GarbageCollect().
170 -------------------------------------------------------------------------- */
171
172 void
173 GarbageCollect (rtsBool force_major_gc,
174 rtsBool do_heap_census,
175 nat gc_type USED_IF_THREADS,
176 Capability *cap)
177 {
178 bdescr *bd;
179 generation *gen;
180 lnat live_blocks, live_words, allocated, max_copied, avg_copied;
181 #if defined(THREADED_RTS)
182 gc_thread *saved_gct;
183 #endif
184 nat g, n;
185
186 // necessary if we stole a callee-saves register for gct:
187 #if defined(THREADED_RTS)
188 saved_gct = gct;
189 #endif
190
191 #ifdef PROFILING
192 CostCentreStack *save_CCS[n_capabilities];
193 #endif
194
195 ACQUIRE_SM_LOCK;
196
197 #if defined(RTS_USER_SIGNALS)
198 if (RtsFlags.MiscFlags.install_signal_handlers) {
199 // block signals
200 blockUserSignals();
201 }
202 #endif
203
204 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
205 // otherwise adjust the padding in gen_workspace.
206
207 // this is the main thread
208 SET_GCT(gc_threads[cap->no]);
209
210 // tell the stats department that we've started a GC
211 stat_startGC(gct);
212
213 // lock the StablePtr table
214 stablePtrPreGC();
215
216 #ifdef DEBUG
217 mutlist_MUTVARS = 0;
218 mutlist_MUTARRS = 0;
219 mutlist_OTHERS = 0;
220 #endif
221
222 // attribute any costs to CCS_GC
223 #ifdef PROFILING
224 for (n = 0; n < n_capabilities; n++) {
225 save_CCS[n] = capabilities[n].r.rCCCS;
226 capabilities[n].r.rCCCS = CCS_GC;
227 }
228 #endif
229
230 /* Approximate how much we allocated.
231 * Todo: only when generating stats?
232 */
233 allocated = calcAllocated(rtsFalse/* don't count the nursery yet */);
234
235 /* Figure out which generation to collect
236 */
237 n = initialise_N(force_major_gc);
238
239 #if defined(THREADED_RTS)
240 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
241 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
242 // It's not always a good idea to do load balancing in parallel
243 // GC. In particular, for a parallel program we don't want to
244 // lose locality by moving cached data into another CPU's cache
245 // (this effect can be quite significant).
246 //
247 // We could have a more complex way to deterimine whether to do
248 // work stealing or not, e.g. it might be a good idea to do it
249 // if the heap is big. For now, we just turn it on or off with
250 // a flag.
251 #endif
252
253 /* Start threads, so they can be spinning up while we finish initialisation.
254 */
255 start_gc_threads();
256
257 #if defined(THREADED_RTS)
258 /* How many threads will be participating in this GC?
259 * We don't try to parallelise minor GCs (unless the user asks for
260 * it with +RTS -gn0), or mark/compact/sweep GC.
261 */
262 if (gc_type == SYNC_GC_PAR) {
263 n_gc_threads = n_capabilities;
264 } else {
265 n_gc_threads = 1;
266 }
267 #else
268 n_gc_threads = 1;
269 #endif
270
271 debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
272 N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
273
274 #ifdef RTS_GTK_FRONTPANEL
275 if (RtsFlags.GcFlags.frontpanel) {
276 updateFrontPanelBeforeGC(N);
277 }
278 #endif
279
280 #ifdef DEBUG
281 // check for memory leaks if DEBUG is on
282 memInventory(DEBUG_gc);
283 #endif
284
285 // check sanity *before* GC
286 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
287
288 // Initialise all the generations/steps that we're collecting.
289 for (g = 0; g <= N; g++) {
290 prepare_collected_gen(&generations[g]);
291 }
292 // Initialise all the generations/steps that we're *not* collecting.
293 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
294 prepare_uncollected_gen(&generations[g]);
295 }
296
297 // Prepare this gc_thread
298 init_gc_thread(gct);
299
300 /* Allocate a mark stack if we're doing a major collection.
301 */
302 if (major_gc && oldest_gen->mark) {
303 mark_stack_bd = allocBlock();
304 mark_stack_top_bd = mark_stack_bd;
305 mark_stack_bd->link = NULL;
306 mark_stack_bd->u.back = NULL;
307 mark_sp = mark_stack_bd->start;
308 } else {
309 mark_stack_bd = NULL;
310 mark_stack_top_bd = NULL;
311 mark_sp = NULL;
312 }
313
314 /* -----------------------------------------------------------------------
315 * follow all the roots that we know about:
316 */
317
318 // the main thread is running: this prevents any other threads from
319 // exiting prematurely, so we can start them now.
320 // NB. do this after the mutable lists have been saved above, otherwise
321 // the other GC threads will be writing into the old mutable lists.
322 inc_running();
323 wakeup_gc_threads(gct->thread_index);
324
325 traceEventGcWork(gct->cap);
326
327 // scavenge the capability-private mutable lists. This isn't part
328 // of markSomeCapabilities() because markSomeCapabilities() can only
329 // call back into the GC via mark_root() (due to the gct register
330 // variable).
331 if (n_gc_threads == 1) {
332 for (n = 0; n < n_capabilities; n++) {
333 #if defined(THREADED_RTS)
334 scavenge_capability_mut_Lists1(&capabilities[n]);
335 #else
336 scavenge_capability_mut_lists(&capabilities[n]);
337 #endif
338 }
339 } else {
340 scavenge_capability_mut_lists(gct->cap);
341 for (n = 0; n < n_capabilities; n++) {
342 if (gc_threads[n]->idle) {
343 markCapability(mark_root, gct, &capabilities[n],
344 rtsTrue/*don't mark sparks*/);
345 scavenge_capability_mut_lists(&capabilities[n]);
346 }
347 }
348 }
349
350 // follow roots from the CAF list (used by GHCi)
351 gct->evac_gen_no = 0;
352 markCAFs(mark_root, gct);
353
354 // follow all the roots that the application knows about.
355 gct->evac_gen_no = 0;
356 if (n_gc_threads == 1) {
357 for (n = 0; n < n_capabilities; n++) {
358 markCapability(mark_root, gct, &capabilities[n],
359 rtsTrue/*don't mark sparks*/);
360 }
361 } else {
362 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
363 }
364
365 markScheduler(mark_root, gct);
366
367 #if defined(RTS_USER_SIGNALS)
368 // mark the signal handlers (signals should be already blocked)
369 markSignalHandlers(mark_root, gct);
370 #endif
371
372 // Mark the weak pointer list, and prepare to detect dead weak pointers.
373 markWeakPtrList();
374 initWeakForGC();
375
376 // Mark the stable pointer table.
377 markStablePtrTable(mark_root, gct);
378
379 /* -------------------------------------------------------------------------
380 * Repeatedly scavenge all the areas we know about until there's no
381 * more scavenging to be done.
382 */
383 for (;;)
384 {
385 scavenge_until_all_done();
386 // The other threads are now stopped. We might recurse back to
387 // here, but from now on this is the only thread.
388
389 // must be last... invariant is that everything is fully
390 // scavenged at this point.
391 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
392 inc_running();
393 continue;
394 }
395
396 // If we get to here, there's really nothing left to do.
397 break;
398 }
399
400 shutdown_gc_threads(gct->thread_index);
401
402 // Now see which stable names are still alive.
403 gcStablePtrTable();
404
405 #ifdef THREADED_RTS
406 if (n_gc_threads == 1) {
407 for (n = 0; n < n_capabilities; n++) {
408 pruneSparkQueue(&capabilities[n]);
409 }
410 } else {
411 for (n = 0; n < n_capabilities; n++) {
412 if (n == cap->no || gc_threads[n]->idle) {
413 pruneSparkQueue(&capabilities[n]);
414 }
415 }
416 }
417 #endif
418
419 #ifdef PROFILING
420 // We call processHeapClosureForDead() on every closure destroyed during
421 // the current garbage collection, so we invoke LdvCensusForDead().
422 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
423 || RtsFlags.ProfFlags.bioSelector != NULL) {
424 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
425 LdvCensusForDead(N);
426 ACQUIRE_SM_LOCK;
427 }
428 #endif
429
430 // NO MORE EVACUATION AFTER THIS POINT!
431
432 // Finally: compact or sweep the oldest generation.
433 if (major_gc && oldest_gen->mark) {
434 if (oldest_gen->compact)
435 compact(gct->scavenged_static_objects);
436 else
437 sweep(oldest_gen);
438 }
439
440 copied = 0;
441 max_copied = 0;
442 avg_copied = 0;
443 {
444 nat i;
445 for (i=0; i < n_gc_threads; i++) {
446 if (n_gc_threads > 1) {
447 debugTrace(DEBUG_gc,"thread %d:", i);
448 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
449 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
450 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
451 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
452 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
453 }
454 copied += gc_threads[i]->copied;
455 max_copied = stg_max(gc_threads[i]->copied, max_copied);
456 }
457 if (n_gc_threads == 1) {
458 max_copied = 0;
459 avg_copied = 0;
460 } else {
461 avg_copied = copied;
462 }
463 }
464
465 // Run through all the generations/steps and tidy up.
466 // We're going to:
467 // - count the amount of "live" data (live_words, live_blocks)
468 // - count the amount of "copied" data in this GC (copied)
469 // - free from-space
470 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
471 //
472 live_words = 0;
473 live_blocks = 0;
474
475 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
476
477 if (g == N) {
478 generations[g].collections++; // for stats
479 if (n_gc_threads > 1) generations[g].par_collections++;
480 }
481
482 // Count the mutable list as bytes "copied" for the purposes of
483 // stats. Every mutable list is copied during every GC.
484 if (g > 0) {
485 nat mut_list_size = 0;
486 for (n = 0; n < n_capabilities; n++) {
487 mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
488 }
489 copied += mut_list_size;
490
491 debugTrace(DEBUG_gc,
492 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
493 (unsigned long)(mut_list_size * sizeof(W_)),
494 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
495 }
496
497 bdescr *next, *prev;
498 gen = &generations[g];
499
500 // for generations we collected...
501 if (g <= N) {
502
503 /* free old memory and shift to-space into from-space for all
504 * the collected steps (except the allocation area). These
505 * freed blocks will probaby be quickly recycled.
506 */
507 if (gen->mark)
508 {
509 // tack the new blocks on the end of the existing blocks
510 if (gen->old_blocks != NULL) {
511
512 prev = NULL;
513 for (bd = gen->old_blocks; bd != NULL; bd = next) {
514
515 next = bd->link;
516
517 if (!(bd->flags & BF_MARKED))
518 {
519 if (prev == NULL) {
520 gen->old_blocks = next;
521 } else {
522 prev->link = next;
523 }
524 freeGroup(bd);
525 gen->n_old_blocks--;
526 }
527 else
528 {
529 gen->n_words += bd->free - bd->start;
530
531 // NB. this step might not be compacted next
532 // time, so reset the BF_MARKED flags.
533 // They are set before GC if we're going to
534 // compact. (search for BF_MARKED above).
535 bd->flags &= ~BF_MARKED;
536
537 // between GCs, all blocks in the heap except
538 // for the nursery have the BF_EVACUATED flag set.
539 bd->flags |= BF_EVACUATED;
540
541 prev = bd;
542 }
543 }
544
545 if (prev != NULL) {
546 prev->link = gen->blocks;
547 gen->blocks = gen->old_blocks;
548 }
549 }
550 // add the new blocks to the block tally
551 gen->n_blocks += gen->n_old_blocks;
552 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
553 ASSERT(countOccupied(gen->blocks) == gen->n_words);
554 }
555 else // not copacted
556 {
557 freeChain(gen->old_blocks);
558 }
559
560 gen->old_blocks = NULL;
561 gen->n_old_blocks = 0;
562
563 /* LARGE OBJECTS. The current live large objects are chained on
564 * scavenged_large, having been moved during garbage
565 * collection from large_objects. Any objects left on the
566 * large_objects list are therefore dead, so we free them here.
567 */
568 freeChain(gen->large_objects);
569 gen->large_objects = gen->scavenged_large_objects;
570 gen->n_large_blocks = gen->n_scavenged_large_blocks;
571 gen->n_new_large_words = 0;
572 }
573 else // for generations > N
574 {
575 /* For older generations, we need to append the
576 * scavenged_large_object list (i.e. large objects that have been
577 * promoted during this GC) to the large_object list for that step.
578 */
579 for (bd = gen->scavenged_large_objects; bd; bd = next) {
580 next = bd->link;
581 dbl_link_onto(bd, &gen->large_objects);
582 }
583
584 // add the new blocks we promoted during this GC
585 gen->n_large_blocks += gen->n_scavenged_large_blocks;
586 }
587
588 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
589
590 gen->scavenged_large_objects = NULL;
591 gen->n_scavenged_large_blocks = 0;
592
593 // Count "live" data
594 live_words += genLiveWords(gen);
595 live_blocks += genLiveBlocks(gen);
596
597 // add in the partial blocks in the gen_workspaces, but ignore gen 0
598 // if this is a local GC (we can't count another capability's part_list)
599 {
600 nat i;
601 for (i = 0; i < n_capabilities; i++) {
602 live_words += gcThreadLiveWords(i, gen->no);
603 live_blocks += gcThreadLiveBlocks(i, gen->no);
604 }
605 }
606 } // for all generations
607
608 // update the max size of older generations after a major GC
609 resize_generations();
610
611 // Free the mark stack.
612 if (mark_stack_top_bd != NULL) {
613 debugTrace(DEBUG_gc, "mark stack: %d blocks",
614 countBlocks(mark_stack_top_bd));
615 freeChain(mark_stack_top_bd);
616 }
617
618 // Free any bitmaps.
619 for (g = 0; g <= N; g++) {
620 gen = &generations[g];
621 if (gen->bitmap != NULL) {
622 freeGroup(gen->bitmap);
623 gen->bitmap = NULL;
624 }
625 }
626
627 // Reset the nursery: make the blocks empty
628 allocated += clearNurseries();
629
630 resize_nursery();
631
632 resetNurseries();
633
634 // mark the garbage collected CAFs as dead
635 #if 0 && defined(DEBUG) // doesn't work at the moment
636 if (major_gc) { gcCAFs(); }
637 #endif
638
639 #ifdef PROFILING
640 // resetStaticObjectForRetainerProfiling() must be called before
641 // zeroing below.
642
643 // ToDo: fix the gct->scavenged_static_objects below
644 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
645 #endif
646
647 // zero the scavenged static object list
648 if (major_gc) {
649 nat i;
650 if (n_gc_threads == 1) {
651 zero_static_object_list(gct->scavenged_static_objects);
652 } else {
653 for (i = 0; i < n_gc_threads; i++) {
654 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
655 }
656 }
657 }
658
659 // Update the stable pointer hash table.
660 updateStablePtrTable(major_gc);
661
662 // unlock the StablePtr table. Must be before scheduleFinalizers(),
663 // because a finalizer may call hs_free_fun_ptr() or
664 // hs_free_stable_ptr(), both of which access the StablePtr table.
665 stablePtrPostGC();
666
667 // Start any pending finalizers. Must be after
668 // updateStablePtrTable() and stablePtrPostGC() (see #4221).
669 RELEASE_SM_LOCK;
670 scheduleFinalizers(cap, old_weak_ptr_list);
671 ACQUIRE_SM_LOCK;
672
673 // check sanity after GC
674 // before resurrectThreads(), because that might overwrite some
675 // closures, which will cause problems with THREADED where we don't
676 // fill slop.
677 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
678
679 // If a heap census is due, we need to do it before
680 // resurrectThreads(), for the same reason as checkSanity above:
681 // resurrectThreads() will overwrite some closures and leave slop
682 // behind.
683 if (do_heap_census) {
684 debugTrace(DEBUG_sched, "performing heap census");
685 RELEASE_SM_LOCK;
686 heapCensus(gct->gc_start_cpu);
687 ACQUIRE_SM_LOCK;
688 }
689
690 // send exceptions to any threads which were about to die
691 RELEASE_SM_LOCK;
692 resurrectThreads(resurrected_threads);
693 ACQUIRE_SM_LOCK;
694
695 if (major_gc) {
696 nat need, got;
697 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
698 got = mblocks_allocated;
699 /* If the amount of data remains constant, next major GC we'll
700 require (F+1)*need. We leave (F+2)*need in order to reduce
701 repeated deallocation and reallocation. */
702 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
703 if (got > need) {
704 returnMemoryToOS(got - need);
705 }
706 }
707
708 // extra GC trace info
709 IF_DEBUG(gc, statDescribeGens());
710
711 #ifdef DEBUG
712 // symbol-table based profiling
713 /* heapCensus(to_blocks); */ /* ToDo */
714 #endif
715
716 // restore enclosing cost centre
717 #ifdef PROFILING
718 for (n = 0; n < n_capabilities; n++) {
719 capabilities[n].r.rCCCS = save_CCS[n];
720 }
721 #endif
722
723 #ifdef DEBUG
724 // check for memory leaks if DEBUG is on
725 memInventory(DEBUG_gc);
726 #endif
727
728 #ifdef RTS_GTK_FRONTPANEL
729 if (RtsFlags.GcFlags.frontpanel) {
730 updateFrontPanelAfterGC( N, live );
731 }
732 #endif
733
734 // ok, GC over: tell the stats department what happened.
735 stat_endGC(gct, allocated, live_words,
736 copied, N, max_copied, avg_copied,
737 live_blocks * BLOCK_SIZE_W - live_words /* slop */);
738
739 // Guess which generation we'll collect *next* time
740 initialise_N(force_major_gc);
741
742 #if defined(RTS_USER_SIGNALS)
743 if (RtsFlags.MiscFlags.install_signal_handlers) {
744 // unblock signals again
745 unblockUserSignals();
746 }
747 #endif
748
749 RELEASE_SM_LOCK;
750
751 SET_GCT(saved_gct);
752 }
753
754 /* -----------------------------------------------------------------------------
755 Figure out which generation to collect, initialise N and major_gc.
756
757 Also returns the total number of blocks in generations that will be
758 collected.
759 -------------------------------------------------------------------------- */
760
761 static nat
762 initialise_N (rtsBool force_major_gc)
763 {
764 int g;
765 nat blocks, blocks_total;
766
767 blocks = 0;
768 blocks_total = 0;
769
770 if (force_major_gc) {
771 N = RtsFlags.GcFlags.generations - 1;
772 } else {
773 N = 0;
774 }
775
776 for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
777
778 blocks = generations[g].n_words / BLOCK_SIZE_W
779 + generations[g].n_large_blocks;
780
781 if (blocks >= generations[g].max_blocks) {
782 N = stg_max(N,g);
783 }
784 if ((nat)g <= N) {
785 blocks_total += blocks;
786 }
787 }
788
789 blocks_total += countNurseryBlocks();
790
791 major_gc = (N == RtsFlags.GcFlags.generations-1);
792 return blocks_total;
793 }
794
795 /* -----------------------------------------------------------------------------
796 Initialise the gc_thread structures.
797 -------------------------------------------------------------------------- */
798
799 #define GC_THREAD_INACTIVE 0
800 #define GC_THREAD_STANDING_BY 1
801 #define GC_THREAD_RUNNING 2
802 #define GC_THREAD_WAITING_TO_CONTINUE 3
803
804 static void
805 new_gc_thread (nat n, gc_thread *t)
806 {
807 nat g;
808 gen_workspace *ws;
809
810 t->cap = &capabilities[n];
811
812 #ifdef THREADED_RTS
813 t->id = 0;
814 initSpinLock(&t->gc_spin);
815 initSpinLock(&t->mut_spin);
816 ACQUIRE_SPIN_LOCK(&t->gc_spin);
817 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
818 // thread to start up, see wakeup_gc_threads
819 #endif
820
821 t->thread_index = n;
822 t->idle = rtsFalse;
823 t->free_blocks = NULL;
824 t->gc_count = 0;
825
826 init_gc_thread(t);
827
828 #ifdef USE_PAPI
829 t->papi_events = -1;
830 #endif
831
832 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
833 {
834 ws = &t->gens[g];
835 ws->gen = &generations[g];
836 ASSERT(g == ws->gen->no);
837 ws->my_gct = t;
838
839 // We want to call
840 // alloc_todo_block(ws,0);
841 // but can't, because it uses gct which isn't set up at this point.
842 // Hence, allocate a block for todo_bd manually:
843 {
844 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
845 initBdescr(bd, ws->gen, ws->gen->to);
846 bd->flags = BF_EVACUATED;
847 bd->u.scan = bd->free = bd->start;
848
849 ws->todo_bd = bd;
850 ws->todo_free = bd->free;
851 ws->todo_lim = bd->start + BLOCK_SIZE_W;
852 }
853
854 ws->todo_q = newWSDeque(128);
855 ws->todo_overflow = NULL;
856 ws->n_todo_overflow = 0;
857 ws->todo_large_objects = NULL;
858
859 ws->part_list = NULL;
860 ws->n_part_blocks = 0;
861
862 ws->scavd_list = NULL;
863 ws->n_scavd_blocks = 0;
864 }
865 }
866
867
868 void
869 initGcThreads (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
870 {
871 #if defined(THREADED_RTS)
872 nat i;
873
874 if (from > 0) {
875 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
876 "initGcThreads");
877 } else {
878 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
879 "initGcThreads");
880 }
881
882 // We have to update the gct->cap pointers to point to the new
883 // Capability array now.
884 for (i = 0; i < from; i++) {
885 gc_threads[i]->cap = &capabilities[gc_threads[i]->cap->no];
886 }
887
888 for (i = from; i < to; i++) {
889 gc_threads[i] =
890 stgMallocBytes(sizeof(gc_thread) +
891 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
892 "alloc_gc_threads");
893
894 new_gc_thread(i, gc_threads[i]);
895 }
896 #else
897 ASSERT(from == 0 && to == 1);
898 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
899 gc_threads[0] = gct;
900 new_gc_thread(0,gc_threads[0]);
901 #endif
902 }
903
904 void
905 freeGcThreads (void)
906 {
907 nat g;
908 if (gc_threads != NULL) {
909 #if defined(THREADED_RTS)
910 nat i;
911 for (i = 0; i < n_capabilities; i++) {
912 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
913 {
914 freeWSDeque(gc_threads[i]->gens[g].todo_q);
915 }
916 stgFree (gc_threads[i]);
917 }
918 stgFree (gc_threads);
919 #else
920 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
921 {
922 freeWSDeque(gc_threads[0]->gens[g].todo_q);
923 }
924 stgFree (gc_threads);
925 #endif
926 gc_threads = NULL;
927 }
928 }
929
930 /* ----------------------------------------------------------------------------
931 Start GC threads
932 ------------------------------------------------------------------------- */
933
934 static volatile StgWord gc_running_threads;
935
936 static StgWord
937 inc_running (void)
938 {
939 StgWord new;
940 new = atomic_inc(&gc_running_threads);
941 ASSERT(new <= n_gc_threads);
942 return new;
943 }
944
945 static StgWord
946 dec_running (void)
947 {
948 ASSERT(gc_running_threads != 0);
949 return atomic_dec(&gc_running_threads);
950 }
951
952 static rtsBool
953 any_work (void)
954 {
955 int g;
956 gen_workspace *ws;
957
958 gct->any_work++;
959
960 write_barrier();
961
962 // scavenge objects in compacted generation
963 if (mark_stack_bd != NULL && !mark_stack_empty()) {
964 return rtsTrue;
965 }
966
967 // Check for global work in any step. We don't need to check for
968 // local work, because we have already exited scavenge_loop(),
969 // which means there is no local work for this thread.
970 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
971 ws = &gct->gens[g];
972 if (ws->todo_large_objects) return rtsTrue;
973 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
974 if (ws->todo_overflow) return rtsTrue;
975 }
976
977 #if defined(THREADED_RTS)
978 if (work_stealing) {
979 nat n;
980 // look for work to steal
981 for (n = 0; n < n_gc_threads; n++) {
982 if (n == gct->thread_index) continue;
983 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
984 ws = &gc_threads[n]->gens[g];
985 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
986 }
987 }
988 }
989 #endif
990
991 gct->no_work++;
992 #if defined(THREADED_RTS)
993 yieldThread();
994 #endif
995
996 return rtsFalse;
997 }
998
999 static void
1000 scavenge_until_all_done (void)
1001 {
1002 DEBUG_ONLY( nat r );
1003
1004
1005 loop:
1006 #if defined(THREADED_RTS)
1007 if (n_gc_threads > 1) {
1008 scavenge_loop();
1009 } else {
1010 scavenge_loop1();
1011 }
1012 #else
1013 scavenge_loop();
1014 #endif
1015
1016 collect_gct_blocks();
1017
1018 // scavenge_loop() only exits when there's no work to do
1019
1020 #ifdef DEBUG
1021 r = dec_running();
1022 #else
1023 dec_running();
1024 #endif
1025
1026 traceEventGcIdle(gct->cap);
1027
1028 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1029
1030 while (gc_running_threads != 0) {
1031 // usleep(1);
1032 if (any_work()) {
1033 inc_running();
1034 traceEventGcWork(gct->cap);
1035 goto loop;
1036 }
1037 // any_work() does not remove the work from the queue, it
1038 // just checks for the presence of work. If we find any,
1039 // then we increment gc_running_threads and go back to
1040 // scavenge_loop() to perform any pending work.
1041 }
1042
1043 traceEventGcDone(gct->cap);
1044 }
1045
1046 #if defined(THREADED_RTS)
1047
1048 void
1049 gcWorkerThread (Capability *cap)
1050 {
1051 gc_thread *saved_gct;
1052
1053 // necessary if we stole a callee-saves register for gct:
1054 saved_gct = gct;
1055
1056 SET_GCT(gc_threads[cap->no]);
1057 gct->id = osThreadId();
1058
1059 stat_gcWorkerThreadStart(gct);
1060
1061 // Wait until we're told to wake up
1062 RELEASE_SPIN_LOCK(&gct->mut_spin);
1063 gct->wakeup = GC_THREAD_STANDING_BY;
1064 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1065 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1066
1067 #ifdef USE_PAPI
1068 // start performance counters in this thread...
1069 if (gct->papi_events == -1) {
1070 papi_init_eventset(&gct->papi_events);
1071 }
1072 papi_thread_start_gc1_count(gct->papi_events);
1073 #endif
1074
1075 init_gc_thread(gct);
1076
1077 traceEventGcWork(gct->cap);
1078
1079 // Every thread evacuates some roots.
1080 gct->evac_gen_no = 0;
1081 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1082 scavenge_capability_mut_lists(cap);
1083
1084 scavenge_until_all_done();
1085
1086 #ifdef THREADED_RTS
1087 // Now that the whole heap is marked, we discard any sparks that
1088 // were found to be unreachable. The main GC thread is currently
1089 // marking heap reachable via weak pointers, so it is
1090 // non-deterministic whether a spark will be retained if it is
1091 // only reachable via weak pointers. To fix this problem would
1092 // require another GC barrier, which is too high a price.
1093 pruneSparkQueue(cap);
1094 #endif
1095
1096 #ifdef USE_PAPI
1097 // count events in this thread towards the GC totals
1098 papi_thread_stop_gc1_count(gct->papi_events);
1099 #endif
1100
1101 // Wait until we're told to continue
1102 RELEASE_SPIN_LOCK(&gct->gc_spin);
1103 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1104 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1105 gct->thread_index);
1106 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1107 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1108
1109 // record the time spent doing GC in the Task structure
1110 stat_gcWorkerThreadDone(gct);
1111
1112 SET_GCT(saved_gct);
1113 }
1114
1115 #endif
1116
1117 #if defined(THREADED_RTS)
1118
1119 void
1120 waitForGcThreads (Capability *cap USED_IF_THREADS)
1121 {
1122 const nat n_threads = n_capabilities;
1123 const nat me = cap->no;
1124 nat i, j;
1125 rtsBool retry = rtsTrue;
1126
1127 while(retry) {
1128 for (i=0; i < n_threads; i++) {
1129 if (i == me || gc_threads[i]->idle) continue;
1130 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1131 prodCapability(&capabilities[i], cap->running_task);
1132 }
1133 }
1134 for (j=0; j < 10; j++) {
1135 retry = rtsFalse;
1136 for (i=0; i < n_threads; i++) {
1137 if (i == me || gc_threads[i]->idle) continue;
1138 write_barrier();
1139 interruptCapability(&capabilities[i]);
1140 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1141 retry = rtsTrue;
1142 }
1143 }
1144 if (!retry) break;
1145 yieldThread();
1146 }
1147 }
1148 }
1149
1150 #endif // THREADED_RTS
1151
1152 static void
1153 start_gc_threads (void)
1154 {
1155 #if defined(THREADED_RTS)
1156 gc_running_threads = 0;
1157 #endif
1158 }
1159
1160 static void
1161 wakeup_gc_threads (nat me USED_IF_THREADS)
1162 {
1163 #if defined(THREADED_RTS)
1164 nat i;
1165
1166 if (n_gc_threads == 1) return;
1167
1168 for (i=0; i < n_gc_threads; i++) {
1169 if (i == me || gc_threads[i]->idle) continue;
1170 inc_running();
1171 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1172 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1173
1174 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1175 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1176 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1177 }
1178 #endif
1179 }
1180
1181 // After GC is complete, we must wait for all GC threads to enter the
1182 // standby state, otherwise they may still be executing inside
1183 // any_work(), and may even remain awake until the next GC starts.
1184 static void
1185 shutdown_gc_threads (nat me USED_IF_THREADS)
1186 {
1187 #if defined(THREADED_RTS)
1188 nat i;
1189
1190 if (n_gc_threads == 1) return;
1191
1192 for (i=0; i < n_gc_threads; i++) {
1193 if (i == me || gc_threads[i]->idle) continue;
1194 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1195 }
1196 #endif
1197 }
1198
1199 #if defined(THREADED_RTS)
1200 void
1201 releaseGCThreads (Capability *cap USED_IF_THREADS)
1202 {
1203 const nat n_threads = n_capabilities;
1204 const nat me = cap->no;
1205 nat i;
1206 for (i=0; i < n_threads; i++) {
1207 if (i == me || gc_threads[i]->idle) continue;
1208 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1209 barf("releaseGCThreads");
1210
1211 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1212 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1213 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1214 }
1215 }
1216 #endif
1217
1218 /* ----------------------------------------------------------------------------
1219 Initialise a generation that is to be collected
1220 ------------------------------------------------------------------------- */
1221
1222 static void
1223 prepare_collected_gen (generation *gen)
1224 {
1225 nat i, g, n;
1226 gen_workspace *ws;
1227 bdescr *bd, *next;
1228
1229 // Throw away the current mutable list. Invariant: the mutable
1230 // list always has at least one block; this means we can avoid a
1231 // check for NULL in recordMutable().
1232 g = gen->no;
1233 if (g != 0) {
1234 for (i = 0; i < n_capabilities; i++) {
1235 freeChain(capabilities[i].mut_lists[g]);
1236 capabilities[i].mut_lists[g] = allocBlock();
1237 }
1238 }
1239
1240 gen = &generations[g];
1241 ASSERT(gen->no == g);
1242
1243 // we'll construct a new list of threads in this step
1244 // during GC, throw away the current list.
1245 gen->old_threads = gen->threads;
1246 gen->threads = END_TSO_QUEUE;
1247
1248 // deprecate the existing blocks
1249 gen->old_blocks = gen->blocks;
1250 gen->n_old_blocks = gen->n_blocks;
1251 gen->blocks = NULL;
1252 gen->n_blocks = 0;
1253 gen->n_words = 0;
1254 gen->live_estimate = 0;
1255
1256 // initialise the large object queues.
1257 ASSERT(gen->scavenged_large_objects == NULL);
1258 ASSERT(gen->n_scavenged_large_blocks == 0);
1259
1260 // grab all the partial blocks stashed in the gc_thread workspaces and
1261 // move them to the old_blocks list of this gen.
1262 for (n = 0; n < n_capabilities; n++) {
1263 ws = &gc_threads[n]->gens[gen->no];
1264
1265 for (bd = ws->part_list; bd != NULL; bd = next) {
1266 next = bd->link;
1267 bd->link = gen->old_blocks;
1268 gen->old_blocks = bd;
1269 gen->n_old_blocks += bd->blocks;
1270 }
1271 ws->part_list = NULL;
1272 ws->n_part_blocks = 0;
1273
1274 ASSERT(ws->scavd_list == NULL);
1275 ASSERT(ws->n_scavd_blocks == 0);
1276
1277 if (ws->todo_free != ws->todo_bd->start) {
1278 ws->todo_bd->free = ws->todo_free;
1279 ws->todo_bd->link = gen->old_blocks;
1280 gen->old_blocks = ws->todo_bd;
1281 gen->n_old_blocks += ws->todo_bd->blocks;
1282 alloc_todo_block(ws,0); // always has one block.
1283 }
1284 }
1285
1286 // mark the small objects as from-space
1287 for (bd = gen->old_blocks; bd; bd = bd->link) {
1288 bd->flags &= ~BF_EVACUATED;
1289 }
1290
1291 // mark the large objects as from-space
1292 for (bd = gen->large_objects; bd; bd = bd->link) {
1293 bd->flags &= ~BF_EVACUATED;
1294 }
1295
1296 // for a compacted generation, we need to allocate the bitmap
1297 if (gen->mark) {
1298 lnat bitmap_size; // in bytes
1299 bdescr *bitmap_bdescr;
1300 StgWord *bitmap;
1301
1302 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1303
1304 if (bitmap_size > 0) {
1305 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1306 / BLOCK_SIZE);
1307 gen->bitmap = bitmap_bdescr;
1308 bitmap = bitmap_bdescr->start;
1309
1310 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1311 bitmap_size, bitmap);
1312
1313 // don't forget to fill it with zeros!
1314 memset(bitmap, 0, bitmap_size);
1315
1316 // For each block in this step, point to its bitmap from the
1317 // block descriptor.
1318 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1319 bd->u.bitmap = bitmap;
1320 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1321
1322 // Also at this point we set the BF_MARKED flag
1323 // for this block. The invariant is that
1324 // BF_MARKED is always unset, except during GC
1325 // when it is set on those blocks which will be
1326 // compacted.
1327 if (!(bd->flags & BF_FRAGMENTED)) {
1328 bd->flags |= BF_MARKED;
1329 }
1330
1331 // BF_SWEPT should be marked only for blocks that are being
1332 // collected in sweep()
1333 bd->flags &= ~BF_SWEPT;
1334 }
1335 }
1336 }
1337 }
1338
1339
1340 /* ----------------------------------------------------------------------------
1341 Save the mutable lists in saved_mut_lists
1342 ------------------------------------------------------------------------- */
1343
1344 static void
1345 stash_mut_list (Capability *cap, nat gen_no)
1346 {
1347 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1348 cap->mut_lists[gen_no] = allocBlock_sync();
1349 }
1350
1351 /* ----------------------------------------------------------------------------
1352 Initialise a generation that is *not* to be collected
1353 ------------------------------------------------------------------------- */
1354
1355 static void
1356 prepare_uncollected_gen (generation *gen)
1357 {
1358 nat i;
1359
1360
1361 ASSERT(gen->no > 0);
1362
1363 // save the current mutable lists for this generation, and
1364 // allocate a fresh block for each one. We'll traverse these
1365 // mutable lists as roots early on in the GC.
1366 for (i = 0; i < n_capabilities; i++) {
1367 stash_mut_list(&capabilities[i], gen->no);
1368 }
1369
1370 ASSERT(gen->scavenged_large_objects == NULL);
1371 ASSERT(gen->n_scavenged_large_blocks == 0);
1372 }
1373
1374 /* -----------------------------------------------------------------------------
1375 Collect the completed blocks from a GC thread and attach them to
1376 the generation.
1377 -------------------------------------------------------------------------- */
1378
1379 static void
1380 collect_gct_blocks (void)
1381 {
1382 nat g;
1383 gen_workspace *ws;
1384 bdescr *bd, *prev;
1385
1386 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1387 ws = &gct->gens[g];
1388
1389 // there may still be a block attached to ws->todo_bd;
1390 // leave it there to use next time.
1391
1392 if (ws->scavd_list != NULL) {
1393 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1394
1395 ASSERT(gct->scan_bd == NULL);
1396 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1397
1398 prev = NULL;
1399 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1400 ws->gen->n_words += bd->free - bd->start;
1401 prev = bd;
1402 }
1403 if (prev != NULL) {
1404 prev->link = ws->gen->blocks;
1405 ws->gen->blocks = ws->scavd_list;
1406 }
1407 ws->gen->n_blocks += ws->n_scavd_blocks;
1408
1409 ws->scavd_list = NULL;
1410 ws->n_scavd_blocks = 0;
1411
1412 RELEASE_SPIN_LOCK(&ws->gen->sync);
1413 }
1414 }
1415 }
1416
1417 /* -----------------------------------------------------------------------------
1418 Initialise a gc_thread before GC
1419 -------------------------------------------------------------------------- */
1420
1421 static void
1422 init_gc_thread (gc_thread *t)
1423 {
1424 t->static_objects = END_OF_STATIC_LIST;
1425 t->scavenged_static_objects = END_OF_STATIC_LIST;
1426 t->scan_bd = NULL;
1427 t->mut_lists = t->cap->mut_lists;
1428 t->evac_gen_no = 0;
1429 t->failed_to_evac = rtsFalse;
1430 t->eager_promotion = rtsTrue;
1431 t->thunk_selector_depth = 0;
1432 t->copied = 0;
1433 t->scanned = 0;
1434 t->any_work = 0;
1435 t->no_work = 0;
1436 t->scav_find_work = 0;
1437 }
1438
1439 /* -----------------------------------------------------------------------------
1440 Function we pass to evacuate roots.
1441 -------------------------------------------------------------------------- */
1442
1443 static void
1444 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1445 {
1446 // we stole a register for gct, but this function is called from
1447 // *outside* the GC where the register variable is not in effect,
1448 // so we need to save and restore it here. NB. only call
1449 // mark_root() from the main GC thread, otherwise gct will be
1450 // incorrect.
1451 #if defined(THREADED_RTS)
1452 gc_thread *saved_gct;
1453 saved_gct = gct;
1454 #endif
1455 SET_GCT(user);
1456
1457 evacuate(root);
1458
1459 SET_GCT(saved_gct);
1460 }
1461
1462 /* -----------------------------------------------------------------------------
1463 Initialising the static object & mutable lists
1464 -------------------------------------------------------------------------- */
1465
1466 static void
1467 zero_static_object_list(StgClosure* first_static)
1468 {
1469 StgClosure* p;
1470 StgClosure* link;
1471 const StgInfoTable *info;
1472
1473 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1474 info = get_itbl(p);
1475 link = *STATIC_LINK(info, p);
1476 *STATIC_LINK(info,p) = NULL;
1477 }
1478 }
1479
1480 /* ----------------------------------------------------------------------------
1481 Reset the sizes of the older generations when we do a major
1482 collection.
1483
1484 CURRENT STRATEGY: make all generations except zero the same size.
1485 We have to stay within the maximum heap size, and leave a certain
1486 percentage of the maximum heap size available to allocate into.
1487 ------------------------------------------------------------------------- */
1488
1489 static void
1490 resize_generations (void)
1491 {
1492 nat g;
1493
1494 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1495 nat live, size, min_alloc, words;
1496 const nat max = RtsFlags.GcFlags.maxHeapSize;
1497 const nat gens = RtsFlags.GcFlags.generations;
1498
1499 // live in the oldest generations
1500 if (oldest_gen->live_estimate != 0) {
1501 words = oldest_gen->live_estimate;
1502 } else {
1503 words = oldest_gen->n_words;
1504 }
1505 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1506 oldest_gen->n_large_blocks;
1507
1508 // default max size for all generations except zero
1509 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1510 RtsFlags.GcFlags.minOldGenSize);
1511
1512 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1513 RtsFlags.GcFlags.heapSizeSuggestion = size;
1514 }
1515
1516 // minimum size for generation zero
1517 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1518 RtsFlags.GcFlags.minAllocAreaSize);
1519
1520 // Auto-enable compaction when the residency reaches a
1521 // certain percentage of the maximum heap size (default: 30%).
1522 if (RtsFlags.GcFlags.compact ||
1523 (max > 0 &&
1524 oldest_gen->n_blocks >
1525 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1526 oldest_gen->mark = 1;
1527 oldest_gen->compact = 1;
1528 // debugBelch("compaction: on\n", live);
1529 } else {
1530 oldest_gen->mark = 0;
1531 oldest_gen->compact = 0;
1532 // debugBelch("compaction: off\n", live);
1533 }
1534
1535 if (RtsFlags.GcFlags.sweep) {
1536 oldest_gen->mark = 1;
1537 }
1538
1539 // if we're going to go over the maximum heap size, reduce the
1540 // size of the generations accordingly. The calculation is
1541 // different if compaction is turned on, because we don't need
1542 // to double the space required to collect the old generation.
1543 if (max != 0) {
1544
1545 // this test is necessary to ensure that the calculations
1546 // below don't have any negative results - we're working
1547 // with unsigned values here.
1548 if (max < min_alloc) {
1549 heapOverflow();
1550 }
1551
1552 if (oldest_gen->compact) {
1553 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1554 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1555 }
1556 } else {
1557 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1558 size = (max - min_alloc) / ((gens - 1) * 2);
1559 }
1560 }
1561
1562 if (size < live) {
1563 heapOverflow();
1564 }
1565 }
1566
1567 #if 0
1568 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1569 min_alloc, size, max);
1570 #endif
1571
1572 for (g = 0; g < gens; g++) {
1573 generations[g].max_blocks = size;
1574 }
1575 }
1576 }
1577
1578 /* -----------------------------------------------------------------------------
1579 Calculate the new size of the nursery, and resize it.
1580 -------------------------------------------------------------------------- */
1581
1582 static void
1583 resize_nursery (void)
1584 {
1585 const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1586
1587 if (RtsFlags.GcFlags.generations == 1)
1588 { // Two-space collector:
1589 nat blocks;
1590
1591 /* set up a new nursery. Allocate a nursery size based on a
1592 * function of the amount of live data (by default a factor of 2)
1593 * Use the blocks from the old nursery if possible, freeing up any
1594 * left over blocks.
1595 *
1596 * If we get near the maximum heap size, then adjust our nursery
1597 * size accordingly. If the nursery is the same size as the live
1598 * data (L), then we need 3L bytes. We can reduce the size of the
1599 * nursery to bring the required memory down near 2L bytes.
1600 *
1601 * A normal 2-space collector would need 4L bytes to give the same
1602 * performance we get from 3L bytes, reducing to the same
1603 * performance at 2L bytes.
1604 */
1605 blocks = generations[0].n_blocks;
1606
1607 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1608 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1609 RtsFlags.GcFlags.maxHeapSize )
1610 {
1611 long adjusted_blocks; // signed on purpose
1612 int pc_free;
1613
1614 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1615
1616 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1617 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1618
1619 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1620 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1621 {
1622 heapOverflow();
1623 }
1624 blocks = adjusted_blocks;
1625 }
1626 else
1627 {
1628 blocks *= RtsFlags.GcFlags.oldGenFactor;
1629 if (blocks < min_nursery)
1630 {
1631 blocks = min_nursery;
1632 }
1633 }
1634 resizeNurseries(blocks);
1635 }
1636 else // Generational collector
1637 {
1638 /*
1639 * If the user has given us a suggested heap size, adjust our
1640 * allocation area to make best use of the memory available.
1641 */
1642 if (RtsFlags.GcFlags.heapSizeSuggestion)
1643 {
1644 long blocks;
1645 const nat needed = calcNeeded(); // approx blocks needed at next GC
1646
1647 /* Guess how much will be live in generation 0 step 0 next time.
1648 * A good approximation is obtained by finding the
1649 * percentage of g0 that was live at the last minor GC.
1650 *
1651 * We have an accurate figure for the amount of copied data in
1652 * 'copied', but we must convert this to a number of blocks, with
1653 * a small adjustment for estimated slop at the end of a block
1654 * (- 10 words).
1655 */
1656 if (N == 0)
1657 {
1658 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1659 / countNurseryBlocks();
1660 }
1661
1662 /* Estimate a size for the allocation area based on the
1663 * information available. We might end up going slightly under
1664 * or over the suggested heap size, but we should be pretty
1665 * close on average.
1666 *
1667 * Formula: suggested - needed
1668 * ----------------------------
1669 * 1 + g0_pcnt_kept/100
1670 *
1671 * where 'needed' is the amount of memory needed at the next
1672 * collection for collecting all gens except g0.
1673 */
1674 blocks =
1675 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1676 (100 + (long)g0_pcnt_kept);
1677
1678 if (blocks < (long)min_nursery) {
1679 blocks = min_nursery;
1680 }
1681
1682 resizeNurseries((nat)blocks);
1683 }
1684 else
1685 {
1686 // we might have added extra large blocks to the nursery, so
1687 // resize back to minAllocAreaSize again.
1688 resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1689 }
1690 }
1691 }
1692
1693 /* -----------------------------------------------------------------------------
1694 Sanity code for CAF garbage collection.
1695
1696 With DEBUG turned on, we manage a CAF list in addition to the SRT
1697 mechanism. After GC, we run down the CAF list and blackhole any
1698 CAFs which have been garbage collected. This means we get an error
1699 whenever the program tries to enter a garbage collected CAF.
1700
1701 Any garbage collected CAFs are taken off the CAF list at the same
1702 time.
1703 -------------------------------------------------------------------------- */
1704
1705 #if 0 && defined(DEBUG)
1706
1707 static void
1708 gcCAFs(void)
1709 {
1710 StgClosure* p;
1711 StgClosure** pp;
1712 const StgInfoTable *info;
1713 nat i;
1714
1715 i = 0;
1716 p = caf_list;
1717 pp = &caf_list;
1718
1719 while (p != NULL) {
1720
1721 info = get_itbl(p);
1722
1723 ASSERT(info->type == IND_STATIC);
1724
1725 if (STATIC_LINK(info,p) == NULL) {
1726 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1727 // black hole it
1728 SET_INFO(p,&stg_BLACKHOLE_info);
1729 p = STATIC_LINK2(info,p);
1730 *pp = p;
1731 }
1732 else {
1733 pp = &STATIC_LINK2(info,p);
1734 p = *pp;
1735 i++;
1736 }
1737
1738 }
1739
1740 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1741 }
1742 #endif