GC refactoring and cleanup
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "Storage.h"
19 #include "RtsUtils.h"
20 #include "Apply.h"
21 #include "Updates.h"
22 #include "Stats.h"
23 #include "Schedule.h"
24 #include "Sanity.h"
25 #include "BlockAlloc.h"
26 #include "ProfHeap.h"
27 #include "Weak.h"
28 #include "Prelude.h"
29 #include "RtsSignals.h"
30 #include "STM.h"
31 #if defined(RTS_GTK_FRONTPANEL)
32 #include "FrontPanel.h"
33 #endif
34 #include "Trace.h"
35 #include "RetainerProfile.h"
36 #include "LdvProfile.h"
37 #include "RaiseAsync.h"
38 #include "Papi.h"
39 #include "Stable.h"
40
41 #include "GC.h"
42 #include "GCThread.h"
43 #include "Compact.h"
44 #include "Evac.h"
45 #include "Scav.h"
46 #include "GCUtils.h"
47 #include "MarkStack.h"
48 #include "MarkWeak.h"
49 #include "Sparks.h"
50 #include "Sweep.h"
51
52 #include <string.h> // for memset()
53 #include <unistd.h>
54
55 /* -----------------------------------------------------------------------------
56 Global variables
57 -------------------------------------------------------------------------- */
58
59 /* STATIC OBJECT LIST.
60 *
61 * During GC:
62 * We maintain a linked list of static objects that are still live.
63 * The requirements for this list are:
64 *
65 * - we need to scan the list while adding to it, in order to
66 * scavenge all the static objects (in the same way that
67 * breadth-first scavenging works for dynamic objects).
68 *
69 * - we need to be able to tell whether an object is already on
70 * the list, to break loops.
71 *
72 * Each static object has a "static link field", which we use for
73 * linking objects on to the list. We use a stack-type list, consing
74 * objects on the front as they are added (this means that the
75 * scavenge phase is depth-first, not breadth-first, but that
76 * shouldn't matter).
77 *
78 * A separate list is kept for objects that have been scavenged
79 * already - this is so that we can zero all the marks afterwards.
80 *
81 * An object is on the list if its static link field is non-zero; this
82 * means that we have to mark the end of the list with '1', not NULL.
83 *
84 * Extra notes for generational GC:
85 *
86 * Each generation has a static object list associated with it. When
87 * collecting generations up to N, we treat the static object lists
88 * from generations > N as roots.
89 *
90 * We build up a static object list while collecting generations 0..N,
91 * which is then appended to the static object list of generation N+1.
92 */
93
94 /* N is the oldest generation being collected, where the generations
95 * are numbered starting at 0. A major GC (indicated by the major_gc
96 * flag) is when we're collecting all generations. We only attempt to
97 * deal with static objects and GC CAFs when doing a major GC.
98 */
99 nat N;
100 rtsBool major_gc;
101
102 /* Data used for allocation area sizing.
103 */
104 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
105
106 /* Mut-list stats */
107 #ifdef DEBUG
108 nat mutlist_MUTVARS,
109 mutlist_MUTARRS,
110 mutlist_MVARS,
111 mutlist_OTHERS;
112 #endif
113
114 /* Thread-local data for each GC thread
115 */
116 gc_thread **gc_threads = NULL;
117
118 #if !defined(THREADED_RTS)
119 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
120 #endif
121
122 // Number of threads running in *this* GC. Affects how many
123 // step->todos[] lists we have to look in to find work.
124 nat n_gc_threads;
125
126 // For stats:
127 long copied; // *words* copied & scavenged during this GC
128
129 rtsBool work_stealing;
130
131 DECLARE_GCT
132
133 /* -----------------------------------------------------------------------------
134 Static function declarations
135 -------------------------------------------------------------------------- */
136
137 static void mark_root (void *user, StgClosure **root);
138 static void zero_static_object_list (StgClosure* first_static);
139 static nat initialise_N (rtsBool force_major_gc);
140 static void prepare_collected_gen (generation *gen);
141 static void prepare_uncollected_gen (generation *gen);
142 static void init_gc_thread (gc_thread *t);
143 static void resize_generations (void);
144 static void resize_nursery (void);
145 static void start_gc_threads (void);
146 static void scavenge_until_all_done (void);
147 static StgWord inc_running (void);
148 static StgWord dec_running (void);
149 static void wakeup_gc_threads (nat n_threads, nat me);
150 static void shutdown_gc_threads (nat n_threads, nat me);
151 static void collect_gct_blocks (void);
152
153 #if 0 && defined(DEBUG)
154 static void gcCAFs (void);
155 #endif
156
157 /* -----------------------------------------------------------------------------
158 The mark stack.
159 -------------------------------------------------------------------------- */
160
161 bdescr *mark_stack_top_bd; // topmost block in the mark stack
162 bdescr *mark_stack_bd; // current block in the mark stack
163 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
164
165 /* -----------------------------------------------------------------------------
166 GarbageCollect: the main entry point to the garbage collector.
167
168 Locks held: all capabilities are held throughout GarbageCollect().
169 -------------------------------------------------------------------------- */
170
171 void
172 GarbageCollect (rtsBool force_major_gc,
173 nat gc_type USED_IF_THREADS,
174 Capability *cap)
175 {
176 bdescr *bd;
177 generation *gen;
178 lnat live_blocks, live_words, allocated, max_copied, avg_copied;
179 gc_thread *saved_gct;
180 nat g, t, n;
181
182 // necessary if we stole a callee-saves register for gct:
183 saved_gct = gct;
184
185 #ifdef PROFILING
186 CostCentreStack *prev_CCS;
187 #endif
188
189 ACQUIRE_SM_LOCK;
190
191 #if defined(RTS_USER_SIGNALS)
192 if (RtsFlags.MiscFlags.install_signal_handlers) {
193 // block signals
194 blockUserSignals();
195 }
196 #endif
197
198 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
199 // otherwise adjust the padding in gen_workspace.
200
201 // tell the stats department that we've started a GC
202 stat_startGC();
203
204 // tell the STM to discard any cached closures it's hoping to re-use
205 stmPreGCHook();
206
207 // lock the StablePtr table
208 stablePtrPreGC();
209
210 #ifdef DEBUG
211 mutlist_MUTVARS = 0;
212 mutlist_MUTARRS = 0;
213 mutlist_OTHERS = 0;
214 #endif
215
216 // attribute any costs to CCS_GC
217 #ifdef PROFILING
218 prev_CCS = CCCS;
219 CCCS = CCS_GC;
220 #endif
221
222 /* Approximate how much we allocated.
223 * Todo: only when generating stats?
224 */
225 allocated = calcAllocated(rtsFalse/* don't count the nursery yet */);
226
227 /* Figure out which generation to collect
228 */
229 n = initialise_N(force_major_gc);
230
231 #if defined(THREADED_RTS)
232 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
233 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
234 // It's not always a good idea to do load balancing in parallel
235 // GC. In particular, for a parallel program we don't want to
236 // lose locality by moving cached data into another CPU's cache
237 // (this effect can be quite significant).
238 //
239 // We could have a more complex way to deterimine whether to do
240 // work stealing or not, e.g. it might be a good idea to do it
241 // if the heap is big. For now, we just turn it on or off with
242 // a flag.
243 #endif
244
245 /* Start threads, so they can be spinning up while we finish initialisation.
246 */
247 start_gc_threads();
248
249 #if defined(THREADED_RTS)
250 /* How many threads will be participating in this GC?
251 * We don't try to parallelise minor GCs (unless the user asks for
252 * it with +RTS -gn0), or mark/compact/sweep GC.
253 */
254 if (gc_type == PENDING_GC_PAR) {
255 n_gc_threads = RtsFlags.ParFlags.nNodes;
256 } else {
257 n_gc_threads = 1;
258 }
259 #else
260 n_gc_threads = 1;
261 #endif
262
263 debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
264 N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
265
266 #ifdef RTS_GTK_FRONTPANEL
267 if (RtsFlags.GcFlags.frontpanel) {
268 updateFrontPanelBeforeGC(N);
269 }
270 #endif
271
272 #ifdef DEBUG
273 // check for memory leaks if DEBUG is on
274 memInventory(DEBUG_gc);
275 #endif
276
277 // check sanity *before* GC
278 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
279
280 // Initialise all our gc_thread structures
281 for (t = 0; t < n_gc_threads; t++) {
282 init_gc_thread(gc_threads[t]);
283 }
284
285 // Initialise all the generations/steps that we're collecting.
286 for (g = 0; g <= N; g++) {
287 prepare_collected_gen(&generations[g]);
288 }
289 // Initialise all the generations/steps that we're *not* collecting.
290 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
291 prepare_uncollected_gen(&generations[g]);
292 }
293
294 /* Allocate a mark stack if we're doing a major collection.
295 */
296 if (major_gc && oldest_gen->mark) {
297 mark_stack_bd = allocBlock();
298 mark_stack_top_bd = mark_stack_bd;
299 mark_stack_bd->link = NULL;
300 mark_stack_bd->u.back = NULL;
301 mark_sp = mark_stack_bd->start;
302 } else {
303 mark_stack_bd = NULL;
304 mark_stack_top_bd = NULL;
305 mark_sp = NULL;
306 }
307
308 // this is the main thread
309 #ifdef THREADED_RTS
310 if (n_gc_threads == 1) {
311 SET_GCT(gc_threads[0]);
312 } else {
313 SET_GCT(gc_threads[cap->no]);
314 }
315 #else
316 SET_GCT(gc_threads[0]);
317 #endif
318
319 /* -----------------------------------------------------------------------
320 * follow all the roots that we know about:
321 */
322
323 // the main thread is running: this prevents any other threads from
324 // exiting prematurely, so we can start them now.
325 // NB. do this after the mutable lists have been saved above, otherwise
326 // the other GC threads will be writing into the old mutable lists.
327 inc_running();
328 wakeup_gc_threads(n_gc_threads, gct->thread_index);
329
330 // scavenge the capability-private mutable lists. This isn't part
331 // of markSomeCapabilities() because markSomeCapabilities() can only
332 // call back into the GC via mark_root() (due to the gct register
333 // variable).
334 if (n_gc_threads == 1) {
335 for (n = 0; n < n_capabilities; n++) {
336 #if defined(THREADED_RTS)
337 scavenge_capability_mut_Lists1(&capabilities[n]);
338 #else
339 scavenge_capability_mut_lists(&capabilities[n]);
340 #endif
341 }
342 } else {
343 scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
344 }
345
346 // follow roots from the CAF list (used by GHCi)
347 gct->evac_gen_no = 0;
348 markCAFs(mark_root, gct);
349
350 // follow all the roots that the application knows about.
351 gct->evac_gen_no = 0;
352 markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
353 rtsTrue/*prune sparks*/);
354
355 #if defined(RTS_USER_SIGNALS)
356 // mark the signal handlers (signals should be already blocked)
357 markSignalHandlers(mark_root, gct);
358 #endif
359
360 // Mark the weak pointer list, and prepare to detect dead weak pointers.
361 markWeakPtrList();
362 initWeakForGC();
363
364 // Mark the stable pointer table.
365 markStablePtrTable(mark_root, gct);
366
367 /* -------------------------------------------------------------------------
368 * Repeatedly scavenge all the areas we know about until there's no
369 * more scavenging to be done.
370 */
371 for (;;)
372 {
373 scavenge_until_all_done();
374 // The other threads are now stopped. We might recurse back to
375 // here, but from now on this is the only thread.
376
377 // must be last... invariant is that everything is fully
378 // scavenged at this point.
379 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
380 inc_running();
381 continue;
382 }
383
384 // If we get to here, there's really nothing left to do.
385 break;
386 }
387
388 shutdown_gc_threads(n_gc_threads, gct->thread_index);
389
390 // Now see which stable names are still alive.
391 gcStablePtrTable();
392
393 #ifdef THREADED_RTS
394 if (n_gc_threads == 1) {
395 for (n = 0; n < n_capabilities; n++) {
396 pruneSparkQueue(&capabilities[n]);
397 }
398 } else {
399 pruneSparkQueue(&capabilities[gct->thread_index]);
400 }
401 #endif
402
403 #ifdef PROFILING
404 // We call processHeapClosureForDead() on every closure destroyed during
405 // the current garbage collection, so we invoke LdvCensusForDead().
406 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
407 || RtsFlags.ProfFlags.bioSelector != NULL)
408 LdvCensusForDead(N);
409 #endif
410
411 // NO MORE EVACUATION AFTER THIS POINT!
412
413 // Two-space collector: free the old to-space.
414 // g0->old_blocks is the old nursery
415 // g0->blocks is to-space from the previous GC
416 if (RtsFlags.GcFlags.generations == 1) {
417 if (g0->blocks != NULL) {
418 freeChain(g0->blocks);
419 g0->blocks = NULL;
420 }
421 }
422
423 // Finally: compact or sweep the oldest generation.
424 if (major_gc && oldest_gen->mark) {
425 if (oldest_gen->compact)
426 compact(gct->scavenged_static_objects);
427 else
428 sweep(oldest_gen);
429 }
430
431 copied = 0;
432 max_copied = 0;
433 avg_copied = 0;
434 {
435 nat i;
436 for (i=0; i < n_gc_threads; i++) {
437 if (n_gc_threads > 1) {
438 debugTrace(DEBUG_gc,"thread %d:", i);
439 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
440 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
441 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
442 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
443 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
444 }
445 copied += gc_threads[i]->copied;
446 max_copied = stg_max(gc_threads[i]->copied, max_copied);
447 }
448 if (n_gc_threads == 1) {
449 max_copied = 0;
450 avg_copied = 0;
451 } else {
452 avg_copied = copied;
453 }
454 }
455
456 // Run through all the generations/steps and tidy up.
457 // We're going to:
458 // - count the amount of "live" data (live_words, live_blocks)
459 // - count the amount of "copied" data in this GC (copied)
460 // - free from-space
461 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
462 //
463 live_words = 0;
464 live_blocks = 0;
465
466 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
467
468 if (g == N) {
469 generations[g].collections++; // for stats
470 if (n_gc_threads > 1) generations[g].par_collections++;
471 }
472
473 // Count the mutable list as bytes "copied" for the purposes of
474 // stats. Every mutable list is copied during every GC.
475 if (g > 0) {
476 nat mut_list_size = 0;
477 for (n = 0; n < n_capabilities; n++) {
478 mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
479 }
480 copied += mut_list_size;
481
482 debugTrace(DEBUG_gc,
483 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
484 (unsigned long)(mut_list_size * sizeof(W_)),
485 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
486 }
487
488 bdescr *next, *prev;
489 gen = &generations[g];
490
491 // for generations we collected...
492 if (g <= N) {
493
494 /* free old memory and shift to-space into from-space for all
495 * the collected steps (except the allocation area). These
496 * freed blocks will probaby be quickly recycled.
497 */
498 if (gen->mark)
499 {
500 // tack the new blocks on the end of the existing blocks
501 if (gen->old_blocks != NULL) {
502
503 prev = NULL;
504 for (bd = gen->old_blocks; bd != NULL; bd = next) {
505
506 next = bd->link;
507
508 if (!(bd->flags & BF_MARKED))
509 {
510 if (prev == NULL) {
511 gen->old_blocks = next;
512 } else {
513 prev->link = next;
514 }
515 freeGroup(bd);
516 gen->n_old_blocks--;
517 }
518 else
519 {
520 gen->n_words += bd->free - bd->start;
521
522 // NB. this step might not be compacted next
523 // time, so reset the BF_MARKED flags.
524 // They are set before GC if we're going to
525 // compact. (search for BF_MARKED above).
526 bd->flags &= ~BF_MARKED;
527
528 // between GCs, all blocks in the heap except
529 // for the nursery have the BF_EVACUATED flag set.
530 bd->flags |= BF_EVACUATED;
531
532 prev = bd;
533 }
534 }
535
536 if (prev != NULL) {
537 prev->link = gen->blocks;
538 gen->blocks = gen->old_blocks;
539 }
540 }
541 // add the new blocks to the block tally
542 gen->n_blocks += gen->n_old_blocks;
543 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
544 ASSERT(countOccupied(gen->blocks) == gen->n_words);
545 }
546 else // not copacted
547 {
548 freeChain(gen->old_blocks);
549 }
550
551 gen->old_blocks = NULL;
552 gen->n_old_blocks = 0;
553
554 /* LARGE OBJECTS. The current live large objects are chained on
555 * scavenged_large, having been moved during garbage
556 * collection from large_objects. Any objects left on the
557 * large_objects list are therefore dead, so we free them here.
558 */
559 freeChain(gen->large_objects);
560 gen->large_objects = gen->scavenged_large_objects;
561 gen->n_large_blocks = gen->n_scavenged_large_blocks;
562 gen->n_new_large_words = 0;
563 }
564 else // for generations > N
565 {
566 /* For older generations, we need to append the
567 * scavenged_large_object list (i.e. large objects that have been
568 * promoted during this GC) to the large_object list for that step.
569 */
570 for (bd = gen->scavenged_large_objects; bd; bd = next) {
571 next = bd->link;
572 dbl_link_onto(bd, &gen->large_objects);
573 }
574
575 // add the new blocks we promoted during this GC
576 gen->n_large_blocks += gen->n_scavenged_large_blocks;
577 }
578
579 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
580
581 gen->scavenged_large_objects = NULL;
582 gen->n_scavenged_large_blocks = 0;
583
584 // Count "live" data
585 live_words += genLiveWords(gen);
586 live_blocks += genLiveBlocks(gen);
587
588 // add in the partial blocks in the gen_workspaces, but ignore gen 0
589 // if this is a local GC (we can't count another capability's part_list)
590 {
591 nat i;
592 for (i = 0; i < n_capabilities; i++) {
593 live_words += gcThreadLiveWords(i, gen->no);
594 live_blocks += gcThreadLiveBlocks(i, gen->no);
595 }
596 }
597 } // for all generations
598
599 // update the max size of older generations after a major GC
600 resize_generations();
601
602 // Start a new pinned_object_block
603 for (n = 0; n < n_capabilities; n++) {
604 capabilities[n].pinned_object_block = NULL;
605 }
606
607 // Free the mark stack.
608 if (mark_stack_top_bd != NULL) {
609 debugTrace(DEBUG_gc, "mark stack: %d blocks",
610 countBlocks(mark_stack_top_bd));
611 freeChain(mark_stack_top_bd);
612 }
613
614 // Free any bitmaps.
615 for (g = 0; g <= N; g++) {
616 gen = &generations[g];
617 if (gen->bitmap != NULL) {
618 freeGroup(gen->bitmap);
619 gen->bitmap = NULL;
620 }
621 }
622
623 // Reset the nursery: make the blocks empty
624 allocated += clearNurseries();
625
626 resize_nursery();
627
628 resetNurseries();
629
630 // mark the garbage collected CAFs as dead
631 #if 0 && defined(DEBUG) // doesn't work at the moment
632 if (major_gc) { gcCAFs(); }
633 #endif
634
635 #ifdef PROFILING
636 // resetStaticObjectForRetainerProfiling() must be called before
637 // zeroing below.
638 if (n_gc_threads > 1) {
639 barf("profiling is currently broken with multi-threaded GC");
640 // ToDo: fix the gct->scavenged_static_objects below
641 }
642 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
643 #endif
644
645 // zero the scavenged static object list
646 if (major_gc) {
647 nat i;
648 for (i = 0; i < n_gc_threads; i++) {
649 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
650 }
651 }
652
653 // Update the stable pointer hash table.
654 updateStablePtrTable(major_gc);
655
656 // unlock the StablePtr table. Must be before scheduleFinalizers(),
657 // because a finalizer may call hs_free_fun_ptr() or
658 // hs_free_stable_ptr(), both of which access the StablePtr table.
659 stablePtrPostGC();
660
661 // Start any pending finalizers. Must be after
662 // updateStablePtrTable() and stablePtrPostGC() (see #4221).
663 RELEASE_SM_LOCK;
664 scheduleFinalizers(cap, old_weak_ptr_list);
665 ACQUIRE_SM_LOCK;
666
667 // check sanity after GC
668 // before resurrectThreads(), because that might overwrite some
669 // closures, which will cause problems with THREADED where we don't
670 // fill slop.
671 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
672
673 // send exceptions to any threads which were about to die
674 RELEASE_SM_LOCK;
675 resurrectThreads(resurrected_threads);
676 ACQUIRE_SM_LOCK;
677
678 if (major_gc) {
679 nat need, got;
680 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
681 got = mblocks_allocated;
682 /* If the amount of data remains constant, next major GC we'll
683 require (F+1)*need. We leave (F+2)*need in order to reduce
684 repeated deallocation and reallocation. */
685 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
686 if (got > need) {
687 returnMemoryToOS(got - need);
688 }
689 }
690
691 // extra GC trace info
692 IF_DEBUG(gc, statDescribeGens());
693
694 #ifdef DEBUG
695 // symbol-table based profiling
696 /* heapCensus(to_blocks); */ /* ToDo */
697 #endif
698
699 // restore enclosing cost centre
700 #ifdef PROFILING
701 CCCS = prev_CCS;
702 #endif
703
704 #ifdef DEBUG
705 // check for memory leaks if DEBUG is on
706 memInventory(DEBUG_gc);
707 #endif
708
709 #ifdef RTS_GTK_FRONTPANEL
710 if (RtsFlags.GcFlags.frontpanel) {
711 updateFrontPanelAfterGC( N, live );
712 }
713 #endif
714
715 // ok, GC over: tell the stats department what happened.
716 stat_endGC(allocated, live_words, copied, N, max_copied, avg_copied,
717 live_blocks * BLOCK_SIZE_W - live_words /* slop */);
718
719 // Guess which generation we'll collect *next* time
720 initialise_N(force_major_gc);
721
722 #if defined(RTS_USER_SIGNALS)
723 if (RtsFlags.MiscFlags.install_signal_handlers) {
724 // unblock signals again
725 unblockUserSignals();
726 }
727 #endif
728
729 RELEASE_SM_LOCK;
730
731 SET_GCT(saved_gct);
732 }
733
734 /* -----------------------------------------------------------------------------
735 Figure out which generation to collect, initialise N and major_gc.
736
737 Also returns the total number of blocks in generations that will be
738 collected.
739 -------------------------------------------------------------------------- */
740
741 static nat
742 initialise_N (rtsBool force_major_gc)
743 {
744 int g;
745 nat blocks, blocks_total;
746
747 blocks = 0;
748 blocks_total = 0;
749
750 if (force_major_gc) {
751 N = RtsFlags.GcFlags.generations - 1;
752 } else {
753 N = 0;
754 }
755
756 for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
757
758 blocks = generations[g].n_words / BLOCK_SIZE_W
759 + generations[g].n_large_blocks;
760
761 if (blocks >= generations[g].max_blocks) {
762 N = stg_max(N,g);
763 }
764 if ((nat)g <= N) {
765 blocks_total += blocks;
766 }
767 }
768
769 blocks_total += countNurseryBlocks();
770
771 major_gc = (N == RtsFlags.GcFlags.generations-1);
772 return blocks_total;
773 }
774
775 /* -----------------------------------------------------------------------------
776 Initialise the gc_thread structures.
777 -------------------------------------------------------------------------- */
778
779 #define GC_THREAD_INACTIVE 0
780 #define GC_THREAD_STANDING_BY 1
781 #define GC_THREAD_RUNNING 2
782 #define GC_THREAD_WAITING_TO_CONTINUE 3
783
784 static void
785 new_gc_thread (nat n, gc_thread *t)
786 {
787 nat g;
788 gen_workspace *ws;
789
790 #ifdef THREADED_RTS
791 t->id = 0;
792 initSpinLock(&t->gc_spin);
793 initSpinLock(&t->mut_spin);
794 ACQUIRE_SPIN_LOCK(&t->gc_spin);
795 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
796 // thread to start up, see wakeup_gc_threads
797 #endif
798
799 t->thread_index = n;
800 t->free_blocks = NULL;
801 t->gc_count = 0;
802
803 init_gc_thread(t);
804
805 #ifdef USE_PAPI
806 t->papi_events = -1;
807 #endif
808
809 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
810 {
811 ws = &t->gens[g];
812 ws->gen = &generations[g];
813 ASSERT(g == ws->gen->no);
814 ws->my_gct = t;
815
816 // We want to call
817 // alloc_todo_block(ws,0);
818 // but can't, because it uses gct which isn't set up at this point.
819 // Hence, allocate a block for todo_bd manually:
820 {
821 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
822 initBdescr(bd, ws->gen, ws->gen->to);
823 bd->flags = BF_EVACUATED;
824 bd->u.scan = bd->free = bd->start;
825
826 ws->todo_bd = bd;
827 ws->todo_free = bd->free;
828 ws->todo_lim = bd->start + BLOCK_SIZE_W;
829 }
830
831 ws->todo_q = newWSDeque(128);
832 ws->todo_overflow = NULL;
833 ws->n_todo_overflow = 0;
834
835 ws->part_list = NULL;
836 ws->n_part_blocks = 0;
837
838 ws->scavd_list = NULL;
839 ws->n_scavd_blocks = 0;
840 }
841 }
842
843
844 void
845 initGcThreads (void)
846 {
847 if (gc_threads == NULL) {
848 #if defined(THREADED_RTS)
849 nat i;
850 gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
851 sizeof(gc_thread*),
852 "alloc_gc_threads");
853
854 for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
855 gc_threads[i] =
856 stgMallocBytes(sizeof(gc_thread) +
857 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
858 "alloc_gc_threads");
859
860 new_gc_thread(i, gc_threads[i]);
861 }
862 #else
863 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
864 gc_threads[0] = gct;
865 new_gc_thread(0,gc_threads[0]);
866 #endif
867 }
868 }
869
870 void
871 freeGcThreads (void)
872 {
873 nat g;
874 if (gc_threads != NULL) {
875 #if defined(THREADED_RTS)
876 nat i;
877 for (i = 0; i < n_capabilities; i++) {
878 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
879 {
880 freeWSDeque(gc_threads[i]->gens[g].todo_q);
881 }
882 stgFree (gc_threads[i]);
883 }
884 stgFree (gc_threads);
885 #else
886 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
887 {
888 freeWSDeque(gc_threads[0]->gens[g].todo_q);
889 }
890 stgFree (gc_threads);
891 #endif
892 gc_threads = NULL;
893 }
894 }
895
896 /* ----------------------------------------------------------------------------
897 Start GC threads
898 ------------------------------------------------------------------------- */
899
900 static volatile StgWord gc_running_threads;
901
902 static StgWord
903 inc_running (void)
904 {
905 StgWord new;
906 new = atomic_inc(&gc_running_threads);
907 ASSERT(new <= n_gc_threads);
908 return new;
909 }
910
911 static StgWord
912 dec_running (void)
913 {
914 ASSERT(gc_running_threads != 0);
915 return atomic_dec(&gc_running_threads);
916 }
917
918 static rtsBool
919 any_work (void)
920 {
921 int g;
922 gen_workspace *ws;
923
924 gct->any_work++;
925
926 write_barrier();
927
928 // scavenge objects in compacted generation
929 if (mark_stack_bd != NULL && !mark_stack_empty()) {
930 return rtsTrue;
931 }
932
933 // Check for global work in any step. We don't need to check for
934 // local work, because we have already exited scavenge_loop(),
935 // which means there is no local work for this thread.
936 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
937 ws = &gct->gens[g];
938 if (ws->todo_large_objects) return rtsTrue;
939 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
940 if (ws->todo_overflow) return rtsTrue;
941 }
942
943 #if defined(THREADED_RTS)
944 if (work_stealing) {
945 nat n;
946 // look for work to steal
947 for (n = 0; n < n_gc_threads; n++) {
948 if (n == gct->thread_index) continue;
949 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
950 ws = &gc_threads[n]->gens[g];
951 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
952 }
953 }
954 }
955 #endif
956
957 gct->no_work++;
958 #if defined(THREADED_RTS)
959 yieldThread();
960 #endif
961
962 return rtsFalse;
963 }
964
965 static void
966 scavenge_until_all_done (void)
967 {
968 nat r;
969
970
971 loop:
972 traceEventGcWork(&capabilities[gct->thread_index]);
973
974 #if defined(THREADED_RTS)
975 if (n_gc_threads > 1) {
976 scavenge_loop();
977 } else {
978 scavenge_loop1();
979 }
980 #else
981 scavenge_loop();
982 #endif
983
984 collect_gct_blocks();
985
986 // scavenge_loop() only exits when there's no work to do
987 r = dec_running();
988
989 traceEventGcIdle(&capabilities[gct->thread_index]);
990
991 debugTrace(DEBUG_gc, "%d GC threads still running", r);
992
993 while (gc_running_threads != 0) {
994 // usleep(1);
995 if (any_work()) {
996 inc_running();
997 goto loop;
998 }
999 // any_work() does not remove the work from the queue, it
1000 // just checks for the presence of work. If we find any,
1001 // then we increment gc_running_threads and go back to
1002 // scavenge_loop() to perform any pending work.
1003 }
1004
1005 traceEventGcDone(&capabilities[gct->thread_index]);
1006 }
1007
1008 #if defined(THREADED_RTS)
1009
1010 void
1011 gcWorkerThread (Capability *cap)
1012 {
1013 gc_thread *saved_gct;
1014
1015 // necessary if we stole a callee-saves register for gct:
1016 saved_gct = gct;
1017
1018 gct = gc_threads[cap->no];
1019 gct->id = osThreadId();
1020
1021 // Wait until we're told to wake up
1022 RELEASE_SPIN_LOCK(&gct->mut_spin);
1023 gct->wakeup = GC_THREAD_STANDING_BY;
1024 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1025 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1026
1027 #ifdef USE_PAPI
1028 // start performance counters in this thread...
1029 if (gct->papi_events == -1) {
1030 papi_init_eventset(&gct->papi_events);
1031 }
1032 papi_thread_start_gc1_count(gct->papi_events);
1033 #endif
1034
1035 // Every thread evacuates some roots.
1036 gct->evac_gen_no = 0;
1037 markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
1038 rtsTrue/*prune sparks*/);
1039 scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
1040
1041 scavenge_until_all_done();
1042
1043 #ifdef THREADED_RTS
1044 // Now that the whole heap is marked, we discard any sparks that
1045 // were found to be unreachable. The main GC thread is currently
1046 // marking heap reachable via weak pointers, so it is
1047 // non-deterministic whether a spark will be retained if it is
1048 // only reachable via weak pointers. To fix this problem would
1049 // require another GC barrier, which is too high a price.
1050 pruneSparkQueue(cap);
1051 #endif
1052
1053 #ifdef USE_PAPI
1054 // count events in this thread towards the GC totals
1055 papi_thread_stop_gc1_count(gct->papi_events);
1056 #endif
1057
1058 // Wait until we're told to continue
1059 RELEASE_SPIN_LOCK(&gct->gc_spin);
1060 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1061 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1062 gct->thread_index);
1063 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1064 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1065
1066 SET_GCT(saved_gct);
1067 }
1068
1069 #endif
1070
1071 #if defined(THREADED_RTS)
1072
1073 void
1074 waitForGcThreads (Capability *cap USED_IF_THREADS)
1075 {
1076 const nat n_threads = RtsFlags.ParFlags.nNodes;
1077 const nat me = cap->no;
1078 nat i, j;
1079 rtsBool retry = rtsTrue;
1080
1081 while(retry) {
1082 for (i=0; i < n_threads; i++) {
1083 if (i == me) continue;
1084 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1085 prodCapability(&capabilities[i], cap->running_task);
1086 }
1087 }
1088 for (j=0; j < 10; j++) {
1089 retry = rtsFalse;
1090 for (i=0; i < n_threads; i++) {
1091 if (i == me) continue;
1092 write_barrier();
1093 setContextSwitches();
1094 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1095 retry = rtsTrue;
1096 }
1097 }
1098 if (!retry) break;
1099 yieldThread();
1100 }
1101 }
1102 }
1103
1104 #endif // THREADED_RTS
1105
1106 static void
1107 start_gc_threads (void)
1108 {
1109 #if defined(THREADED_RTS)
1110 gc_running_threads = 0;
1111 #endif
1112 }
1113
1114 static void
1115 wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1116 {
1117 #if defined(THREADED_RTS)
1118 nat i;
1119 for (i=0; i < n_threads; i++) {
1120 if (i == me) continue;
1121 inc_running();
1122 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1123 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1124
1125 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1126 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1127 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1128 }
1129 #endif
1130 }
1131
1132 // After GC is complete, we must wait for all GC threads to enter the
1133 // standby state, otherwise they may still be executing inside
1134 // any_work(), and may even remain awake until the next GC starts.
1135 static void
1136 shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1137 {
1138 #if defined(THREADED_RTS)
1139 nat i;
1140 for (i=0; i < n_threads; i++) {
1141 if (i == me) continue;
1142 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1143 }
1144 #endif
1145 }
1146
1147 #if defined(THREADED_RTS)
1148 void
1149 releaseGCThreads (Capability *cap USED_IF_THREADS)
1150 {
1151 const nat n_threads = RtsFlags.ParFlags.nNodes;
1152 const nat me = cap->no;
1153 nat i;
1154 for (i=0; i < n_threads; i++) {
1155 if (i == me) continue;
1156 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1157 barf("releaseGCThreads");
1158
1159 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1160 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1161 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1162 }
1163 }
1164 #endif
1165
1166 /* ----------------------------------------------------------------------------
1167 Initialise a generation that is to be collected
1168 ------------------------------------------------------------------------- */
1169
1170 static void
1171 prepare_collected_gen (generation *gen)
1172 {
1173 nat i, g, n;
1174 gen_workspace *ws;
1175 bdescr *bd, *next;
1176
1177 // Throw away the current mutable list. Invariant: the mutable
1178 // list always has at least one block; this means we can avoid a
1179 // check for NULL in recordMutable().
1180 g = gen->no;
1181 if (g != 0) {
1182 for (i = 0; i < n_capabilities; i++) {
1183 freeChain(capabilities[i].mut_lists[g]);
1184 capabilities[i].mut_lists[g] = allocBlock();
1185 }
1186 }
1187
1188 gen = &generations[g];
1189 ASSERT(gen->no == g);
1190
1191 // we'll construct a new list of threads in this step
1192 // during GC, throw away the current list.
1193 gen->old_threads = gen->threads;
1194 gen->threads = END_TSO_QUEUE;
1195
1196 // deprecate the existing blocks
1197 gen->old_blocks = gen->blocks;
1198 gen->n_old_blocks = gen->n_blocks;
1199 gen->blocks = NULL;
1200 gen->n_blocks = 0;
1201 gen->n_words = 0;
1202 gen->live_estimate = 0;
1203
1204 // initialise the large object queues.
1205 ASSERT(gen->scavenged_large_objects == NULL);
1206 ASSERT(gen->n_scavenged_large_blocks == 0);
1207
1208 // grab all the partial blocks stashed in the gc_thread workspaces and
1209 // move them to the old_blocks list of this gen.
1210 for (n = 0; n < n_capabilities; n++) {
1211 ws = &gc_threads[n]->gens[gen->no];
1212
1213 for (bd = ws->part_list; bd != NULL; bd = next) {
1214 next = bd->link;
1215 bd->link = gen->old_blocks;
1216 gen->old_blocks = bd;
1217 gen->n_old_blocks += bd->blocks;
1218 }
1219 ws->part_list = NULL;
1220 ws->n_part_blocks = 0;
1221
1222 ASSERT(ws->scavd_list == NULL);
1223 ASSERT(ws->n_scavd_blocks == 0);
1224
1225 if (ws->todo_free != ws->todo_bd->start) {
1226 ws->todo_bd->free = ws->todo_free;
1227 ws->todo_bd->link = gen->old_blocks;
1228 gen->old_blocks = ws->todo_bd;
1229 gen->n_old_blocks += ws->todo_bd->blocks;
1230 alloc_todo_block(ws,0); // always has one block.
1231 }
1232 }
1233
1234 // mark the small objects as from-space
1235 for (bd = gen->old_blocks; bd; bd = bd->link) {
1236 bd->flags &= ~BF_EVACUATED;
1237 }
1238
1239 // mark the large objects as from-space
1240 for (bd = gen->large_objects; bd; bd = bd->link) {
1241 bd->flags &= ~BF_EVACUATED;
1242 }
1243
1244 // for a compacted generation, we need to allocate the bitmap
1245 if (gen->mark) {
1246 nat bitmap_size; // in bytes
1247 bdescr *bitmap_bdescr;
1248 StgWord *bitmap;
1249
1250 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1251
1252 if (bitmap_size > 0) {
1253 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1254 / BLOCK_SIZE);
1255 gen->bitmap = bitmap_bdescr;
1256 bitmap = bitmap_bdescr->start;
1257
1258 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1259 bitmap_size, bitmap);
1260
1261 // don't forget to fill it with zeros!
1262 memset(bitmap, 0, bitmap_size);
1263
1264 // For each block in this step, point to its bitmap from the
1265 // block descriptor.
1266 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1267 bd->u.bitmap = bitmap;
1268 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1269
1270 // Also at this point we set the BF_MARKED flag
1271 // for this block. The invariant is that
1272 // BF_MARKED is always unset, except during GC
1273 // when it is set on those blocks which will be
1274 // compacted.
1275 if (!(bd->flags & BF_FRAGMENTED)) {
1276 bd->flags |= BF_MARKED;
1277 }
1278
1279 // BF_SWEPT should be marked only for blocks that are being
1280 // collected in sweep()
1281 bd->flags &= ~BF_SWEPT;
1282 }
1283 }
1284 }
1285 }
1286
1287
1288 /* ----------------------------------------------------------------------------
1289 Save the mutable lists in saved_mut_lists
1290 ------------------------------------------------------------------------- */
1291
1292 static void
1293 stash_mut_list (Capability *cap, nat gen_no)
1294 {
1295 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1296 cap->mut_lists[gen_no] = allocBlock_sync();
1297 }
1298
1299 /* ----------------------------------------------------------------------------
1300 Initialise a generation that is *not* to be collected
1301 ------------------------------------------------------------------------- */
1302
1303 static void
1304 prepare_uncollected_gen (generation *gen)
1305 {
1306 nat i;
1307
1308
1309 ASSERT(gen->no > 0);
1310
1311 // save the current mutable lists for this generation, and
1312 // allocate a fresh block for each one. We'll traverse these
1313 // mutable lists as roots early on in the GC.
1314 for (i = 0; i < n_capabilities; i++) {
1315 stash_mut_list(&capabilities[i], gen->no);
1316 }
1317
1318 ASSERT(gen->scavenged_large_objects == NULL);
1319 ASSERT(gen->n_scavenged_large_blocks == 0);
1320 }
1321
1322 /* -----------------------------------------------------------------------------
1323 Collect the completed blocks from a GC thread and attach them to
1324 the generation.
1325 -------------------------------------------------------------------------- */
1326
1327 static void
1328 collect_gct_blocks (void)
1329 {
1330 nat g;
1331 gen_workspace *ws;
1332 bdescr *bd, *prev;
1333
1334 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1335 ws = &gct->gens[g];
1336
1337 // there may still be a block attached to ws->todo_bd;
1338 // leave it there to use next time.
1339
1340 if (ws->scavd_list != NULL) {
1341 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1342
1343 ASSERT(gct->scan_bd == NULL);
1344 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1345
1346 prev = NULL;
1347 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1348 ws->gen->n_words += bd->free - bd->start;
1349 prev = bd;
1350 }
1351 if (prev != NULL) {
1352 prev->link = ws->gen->blocks;
1353 ws->gen->blocks = ws->scavd_list;
1354 }
1355 ws->gen->n_blocks += ws->n_scavd_blocks;
1356
1357 ws->scavd_list = NULL;
1358 ws->n_scavd_blocks = 0;
1359
1360 RELEASE_SPIN_LOCK(&ws->gen->sync);
1361 }
1362 }
1363 }
1364
1365 /* -----------------------------------------------------------------------------
1366 Initialise a gc_thread before GC
1367 -------------------------------------------------------------------------- */
1368
1369 static void
1370 init_gc_thread (gc_thread *t)
1371 {
1372 t->static_objects = END_OF_STATIC_LIST;
1373 t->scavenged_static_objects = END_OF_STATIC_LIST;
1374 t->scan_bd = NULL;
1375 t->mut_lists = capabilities[t->thread_index].mut_lists;
1376 t->evac_gen_no = 0;
1377 t->failed_to_evac = rtsFalse;
1378 t->eager_promotion = rtsTrue;
1379 t->thunk_selector_depth = 0;
1380 t->copied = 0;
1381 t->scanned = 0;
1382 t->any_work = 0;
1383 t->no_work = 0;
1384 t->scav_find_work = 0;
1385 }
1386
1387 /* -----------------------------------------------------------------------------
1388 Function we pass to evacuate roots.
1389 -------------------------------------------------------------------------- */
1390
1391 static void
1392 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1393 {
1394 // we stole a register for gct, but this function is called from
1395 // *outside* the GC where the register variable is not in effect,
1396 // so we need to save and restore it here. NB. only call
1397 // mark_root() from the main GC thread, otherwise gct will be
1398 // incorrect.
1399 gc_thread *saved_gct;
1400 saved_gct = gct;
1401 SET_GCT(user);
1402
1403 evacuate(root);
1404
1405 SET_GCT(saved_gct);
1406 }
1407
1408 /* -----------------------------------------------------------------------------
1409 Initialising the static object & mutable lists
1410 -------------------------------------------------------------------------- */
1411
1412 static void
1413 zero_static_object_list(StgClosure* first_static)
1414 {
1415 StgClosure* p;
1416 StgClosure* link;
1417 const StgInfoTable *info;
1418
1419 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1420 info = get_itbl(p);
1421 link = *STATIC_LINK(info, p);
1422 *STATIC_LINK(info,p) = NULL;
1423 }
1424 }
1425
1426 /* ----------------------------------------------------------------------------
1427 Reset the sizes of the older generations when we do a major
1428 collection.
1429
1430 CURRENT STRATEGY: make all generations except zero the same size.
1431 We have to stay within the maximum heap size, and leave a certain
1432 percentage of the maximum heap size available to allocate into.
1433 ------------------------------------------------------------------------- */
1434
1435 static void
1436 resize_generations (void)
1437 {
1438 nat g;
1439
1440 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1441 nat live, size, min_alloc, words;
1442 const nat max = RtsFlags.GcFlags.maxHeapSize;
1443 const nat gens = RtsFlags.GcFlags.generations;
1444
1445 // live in the oldest generations
1446 if (oldest_gen->live_estimate != 0) {
1447 words = oldest_gen->live_estimate;
1448 } else {
1449 words = oldest_gen->n_words;
1450 }
1451 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1452 oldest_gen->n_large_blocks;
1453
1454 // default max size for all generations except zero
1455 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1456 RtsFlags.GcFlags.minOldGenSize);
1457
1458 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1459 RtsFlags.GcFlags.heapSizeSuggestion = size;
1460 }
1461
1462 // minimum size for generation zero
1463 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1464 RtsFlags.GcFlags.minAllocAreaSize);
1465
1466 // Auto-enable compaction when the residency reaches a
1467 // certain percentage of the maximum heap size (default: 30%).
1468 if (RtsFlags.GcFlags.compact ||
1469 (max > 0 &&
1470 oldest_gen->n_blocks >
1471 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1472 oldest_gen->mark = 1;
1473 oldest_gen->compact = 1;
1474 // debugBelch("compaction: on\n", live);
1475 } else {
1476 oldest_gen->mark = 0;
1477 oldest_gen->compact = 0;
1478 // debugBelch("compaction: off\n", live);
1479 }
1480
1481 if (RtsFlags.GcFlags.sweep) {
1482 oldest_gen->mark = 1;
1483 }
1484
1485 // if we're going to go over the maximum heap size, reduce the
1486 // size of the generations accordingly. The calculation is
1487 // different if compaction is turned on, because we don't need
1488 // to double the space required to collect the old generation.
1489 if (max != 0) {
1490
1491 // this test is necessary to ensure that the calculations
1492 // below don't have any negative results - we're working
1493 // with unsigned values here.
1494 if (max < min_alloc) {
1495 heapOverflow();
1496 }
1497
1498 if (oldest_gen->compact) {
1499 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1500 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1501 }
1502 } else {
1503 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1504 size = (max - min_alloc) / ((gens - 1) * 2);
1505 }
1506 }
1507
1508 if (size < live) {
1509 heapOverflow();
1510 }
1511 }
1512
1513 #if 0
1514 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1515 min_alloc, size, max);
1516 #endif
1517
1518 for (g = 0; g < gens; g++) {
1519 generations[g].max_blocks = size;
1520 }
1521 }
1522 }
1523
1524 /* -----------------------------------------------------------------------------
1525 Calculate the new size of the nursery, and resize it.
1526 -------------------------------------------------------------------------- */
1527
1528 static void
1529 resize_nursery (void)
1530 {
1531 const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1532
1533 if (RtsFlags.GcFlags.generations == 1)
1534 { // Two-space collector:
1535 nat blocks;
1536
1537 /* set up a new nursery. Allocate a nursery size based on a
1538 * function of the amount of live data (by default a factor of 2)
1539 * Use the blocks from the old nursery if possible, freeing up any
1540 * left over blocks.
1541 *
1542 * If we get near the maximum heap size, then adjust our nursery
1543 * size accordingly. If the nursery is the same size as the live
1544 * data (L), then we need 3L bytes. We can reduce the size of the
1545 * nursery to bring the required memory down near 2L bytes.
1546 *
1547 * A normal 2-space collector would need 4L bytes to give the same
1548 * performance we get from 3L bytes, reducing to the same
1549 * performance at 2L bytes.
1550 */
1551 blocks = generations[0].n_blocks;
1552
1553 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1554 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1555 RtsFlags.GcFlags.maxHeapSize )
1556 {
1557 long adjusted_blocks; // signed on purpose
1558 int pc_free;
1559
1560 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1561
1562 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1563 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1564
1565 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1566 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1567 {
1568 heapOverflow();
1569 }
1570 blocks = adjusted_blocks;
1571 }
1572 else
1573 {
1574 blocks *= RtsFlags.GcFlags.oldGenFactor;
1575 if (blocks < min_nursery)
1576 {
1577 blocks = min_nursery;
1578 }
1579 }
1580 resizeNurseries(blocks);
1581 }
1582 else // Generational collector
1583 {
1584 /*
1585 * If the user has given us a suggested heap size, adjust our
1586 * allocation area to make best use of the memory available.
1587 */
1588 if (RtsFlags.GcFlags.heapSizeSuggestion)
1589 {
1590 long blocks;
1591 const nat needed = calcNeeded(); // approx blocks needed at next GC
1592
1593 /* Guess how much will be live in generation 0 step 0 next time.
1594 * A good approximation is obtained by finding the
1595 * percentage of g0 that was live at the last minor GC.
1596 *
1597 * We have an accurate figure for the amount of copied data in
1598 * 'copied', but we must convert this to a number of blocks, with
1599 * a small adjustment for estimated slop at the end of a block
1600 * (- 10 words).
1601 */
1602 if (N == 0)
1603 {
1604 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1605 / countNurseryBlocks();
1606 }
1607
1608 /* Estimate a size for the allocation area based on the
1609 * information available. We might end up going slightly under
1610 * or over the suggested heap size, but we should be pretty
1611 * close on average.
1612 *
1613 * Formula: suggested - needed
1614 * ----------------------------
1615 * 1 + g0_pcnt_kept/100
1616 *
1617 * where 'needed' is the amount of memory needed at the next
1618 * collection for collecting all gens except g0.
1619 */
1620 blocks =
1621 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1622 (100 + (long)g0_pcnt_kept);
1623
1624 if (blocks < (long)min_nursery) {
1625 blocks = min_nursery;
1626 }
1627
1628 resizeNurseries((nat)blocks);
1629 }
1630 else
1631 {
1632 // we might have added extra large blocks to the nursery, so
1633 // resize back to minAllocAreaSize again.
1634 resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1635 }
1636 }
1637 }
1638
1639 /* -----------------------------------------------------------------------------
1640 Sanity code for CAF garbage collection.
1641
1642 With DEBUG turned on, we manage a CAF list in addition to the SRT
1643 mechanism. After GC, we run down the CAF list and blackhole any
1644 CAFs which have been garbage collected. This means we get an error
1645 whenever the program tries to enter a garbage collected CAF.
1646
1647 Any garbage collected CAFs are taken off the CAF list at the same
1648 time.
1649 -------------------------------------------------------------------------- */
1650
1651 #if 0 && defined(DEBUG)
1652
1653 static void
1654 gcCAFs(void)
1655 {
1656 StgClosure* p;
1657 StgClosure** pp;
1658 const StgInfoTable *info;
1659 nat i;
1660
1661 i = 0;
1662 p = caf_list;
1663 pp = &caf_list;
1664
1665 while (p != NULL) {
1666
1667 info = get_itbl(p);
1668
1669 ASSERT(info->type == IND_STATIC);
1670
1671 if (STATIC_LINK(info,p) == NULL) {
1672 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1673 // black hole it
1674 SET_INFO(p,&stg_BLACKHOLE_info);
1675 p = STATIC_LINK2(info,p);
1676 *pp = p;
1677 }
1678 else {
1679 pp = &STATIC_LINK2(info,p);
1680 p = *pp;
1681 i++;
1682 }
1683
1684 }
1685
1686 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1687 }
1688 #endif