fix an integer overflow (#5086), and pre-emptively avoid more of these
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "Storage.h"
19 #include "RtsUtils.h"
20 #include "Apply.h"
21 #include "Updates.h"
22 #include "Stats.h"
23 #include "Schedule.h"
24 #include "Sanity.h"
25 #include "BlockAlloc.h"
26 #include "ProfHeap.h"
27 #include "Weak.h"
28 #include "Prelude.h"
29 #include "RtsSignals.h"
30 #include "STM.h"
31 #if defined(RTS_GTK_FRONTPANEL)
32 #include "FrontPanel.h"
33 #endif
34 #include "Trace.h"
35 #include "RetainerProfile.h"
36 #include "LdvProfile.h"
37 #include "RaiseAsync.h"
38 #include "Papi.h"
39 #include "Stable.h"
40
41 #include "GC.h"
42 #include "GCThread.h"
43 #include "GCTDecl.h"
44 #include "Compact.h"
45 #include "Evac.h"
46 #include "Scav.h"
47 #include "GCUtils.h"
48 #include "MarkStack.h"
49 #include "MarkWeak.h"
50 #include "Sparks.h"
51 #include "Sweep.h"
52
53 #include <string.h> // for memset()
54 #include <unistd.h>
55
56 /* -----------------------------------------------------------------------------
57 Global variables
58 -------------------------------------------------------------------------- */
59
60 /* STATIC OBJECT LIST.
61 *
62 * During GC:
63 * We maintain a linked list of static objects that are still live.
64 * The requirements for this list are:
65 *
66 * - we need to scan the list while adding to it, in order to
67 * scavenge all the static objects (in the same way that
68 * breadth-first scavenging works for dynamic objects).
69 *
70 * - we need to be able to tell whether an object is already on
71 * the list, to break loops.
72 *
73 * Each static object has a "static link field", which we use for
74 * linking objects on to the list. We use a stack-type list, consing
75 * objects on the front as they are added (this means that the
76 * scavenge phase is depth-first, not breadth-first, but that
77 * shouldn't matter).
78 *
79 * A separate list is kept for objects that have been scavenged
80 * already - this is so that we can zero all the marks afterwards.
81 *
82 * An object is on the list if its static link field is non-zero; this
83 * means that we have to mark the end of the list with '1', not NULL.
84 *
85 * Extra notes for generational GC:
86 *
87 * Each generation has a static object list associated with it. When
88 * collecting generations up to N, we treat the static object lists
89 * from generations > N as roots.
90 *
91 * We build up a static object list while collecting generations 0..N,
92 * which is then appended to the static object list of generation N+1.
93 */
94
95 /* N is the oldest generation being collected, where the generations
96 * are numbered starting at 0. A major GC (indicated by the major_gc
97 * flag) is when we're collecting all generations. We only attempt to
98 * deal with static objects and GC CAFs when doing a major GC.
99 */
100 nat N;
101 rtsBool major_gc;
102
103 /* Data used for allocation area sizing.
104 */
105 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
106
107 /* Mut-list stats */
108 #ifdef DEBUG
109 nat mutlist_MUTVARS,
110 mutlist_MUTARRS,
111 mutlist_MVARS,
112 mutlist_OTHERS;
113 #endif
114
115 /* Thread-local data for each GC thread
116 */
117 gc_thread **gc_threads = NULL;
118
119 #if !defined(THREADED_RTS)
120 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
121 #endif
122
123 // Number of threads running in *this* GC. Affects how many
124 // step->todos[] lists we have to look in to find work.
125 nat n_gc_threads;
126
127 // For stats:
128 long copied; // *words* copied & scavenged during this GC
129
130 rtsBool work_stealing;
131
132 DECLARE_GCT
133
134 /* -----------------------------------------------------------------------------
135 Static function declarations
136 -------------------------------------------------------------------------- */
137
138 static void mark_root (void *user, StgClosure **root);
139 static void zero_static_object_list (StgClosure* first_static);
140 static nat initialise_N (rtsBool force_major_gc);
141 static void prepare_collected_gen (generation *gen);
142 static void prepare_uncollected_gen (generation *gen);
143 static void init_gc_thread (gc_thread *t);
144 static void resize_generations (void);
145 static void resize_nursery (void);
146 static void start_gc_threads (void);
147 static void scavenge_until_all_done (void);
148 static StgWord inc_running (void);
149 static StgWord dec_running (void);
150 static void wakeup_gc_threads (nat me);
151 static void shutdown_gc_threads (nat me);
152 static void collect_gct_blocks (void);
153
154 #if 0 && defined(DEBUG)
155 static void gcCAFs (void);
156 #endif
157
158 /* -----------------------------------------------------------------------------
159 The mark stack.
160 -------------------------------------------------------------------------- */
161
162 bdescr *mark_stack_top_bd; // topmost block in the mark stack
163 bdescr *mark_stack_bd; // current block in the mark stack
164 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
165
166 /* -----------------------------------------------------------------------------
167 GarbageCollect: the main entry point to the garbage collector.
168
169 Locks held: all capabilities are held throughout GarbageCollect().
170 -------------------------------------------------------------------------- */
171
172 void
173 GarbageCollect (rtsBool force_major_gc,
174 nat gc_type USED_IF_THREADS,
175 Capability *cap)
176 {
177 bdescr *bd;
178 generation *gen;
179 lnat live_blocks, live_words, allocated, max_copied, avg_copied;
180 gc_thread *saved_gct;
181 nat g, n;
182
183 // necessary if we stole a callee-saves register for gct:
184 saved_gct = gct;
185
186 #ifdef PROFILING
187 CostCentreStack *prev_CCS;
188 #endif
189
190 ACQUIRE_SM_LOCK;
191
192 #if defined(RTS_USER_SIGNALS)
193 if (RtsFlags.MiscFlags.install_signal_handlers) {
194 // block signals
195 blockUserSignals();
196 }
197 #endif
198
199 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
200 // otherwise adjust the padding in gen_workspace.
201
202 // this is the main thread
203 SET_GCT(gc_threads[cap->no]);
204
205 // tell the stats department that we've started a GC
206 stat_startGC(gct);
207
208 // lock the StablePtr table
209 stablePtrPreGC();
210
211 #ifdef DEBUG
212 mutlist_MUTVARS = 0;
213 mutlist_MUTARRS = 0;
214 mutlist_OTHERS = 0;
215 #endif
216
217 // attribute any costs to CCS_GC
218 #ifdef PROFILING
219 prev_CCS = CCCS;
220 CCCS = CCS_GC;
221 #endif
222
223 /* Approximate how much we allocated.
224 * Todo: only when generating stats?
225 */
226 allocated = calcAllocated(rtsFalse/* don't count the nursery yet */);
227
228 /* Figure out which generation to collect
229 */
230 n = initialise_N(force_major_gc);
231
232 #if defined(THREADED_RTS)
233 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
234 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
235 // It's not always a good idea to do load balancing in parallel
236 // GC. In particular, for a parallel program we don't want to
237 // lose locality by moving cached data into another CPU's cache
238 // (this effect can be quite significant).
239 //
240 // We could have a more complex way to deterimine whether to do
241 // work stealing or not, e.g. it might be a good idea to do it
242 // if the heap is big. For now, we just turn it on or off with
243 // a flag.
244 #endif
245
246 /* Start threads, so they can be spinning up while we finish initialisation.
247 */
248 start_gc_threads();
249
250 #if defined(THREADED_RTS)
251 /* How many threads will be participating in this GC?
252 * We don't try to parallelise minor GCs (unless the user asks for
253 * it with +RTS -gn0), or mark/compact/sweep GC.
254 */
255 if (gc_type == PENDING_GC_PAR) {
256 n_gc_threads = RtsFlags.ParFlags.nNodes;
257 } else {
258 n_gc_threads = 1;
259 }
260 #else
261 n_gc_threads = 1;
262 #endif
263
264 debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
265 N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
266
267 #ifdef RTS_GTK_FRONTPANEL
268 if (RtsFlags.GcFlags.frontpanel) {
269 updateFrontPanelBeforeGC(N);
270 }
271 #endif
272
273 #ifdef DEBUG
274 // check for memory leaks if DEBUG is on
275 memInventory(DEBUG_gc);
276 #endif
277
278 // check sanity *before* GC
279 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
280
281 // Initialise all the generations/steps that we're collecting.
282 for (g = 0; g <= N; g++) {
283 prepare_collected_gen(&generations[g]);
284 }
285 // Initialise all the generations/steps that we're *not* collecting.
286 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
287 prepare_uncollected_gen(&generations[g]);
288 }
289
290 // Prepare this gc_thread
291 init_gc_thread(gct);
292
293 /* Allocate a mark stack if we're doing a major collection.
294 */
295 if (major_gc && oldest_gen->mark) {
296 mark_stack_bd = allocBlock();
297 mark_stack_top_bd = mark_stack_bd;
298 mark_stack_bd->link = NULL;
299 mark_stack_bd->u.back = NULL;
300 mark_sp = mark_stack_bd->start;
301 } else {
302 mark_stack_bd = NULL;
303 mark_stack_top_bd = NULL;
304 mark_sp = NULL;
305 }
306
307 /* -----------------------------------------------------------------------
308 * follow all the roots that we know about:
309 */
310
311 // the main thread is running: this prevents any other threads from
312 // exiting prematurely, so we can start them now.
313 // NB. do this after the mutable lists have been saved above, otherwise
314 // the other GC threads will be writing into the old mutable lists.
315 inc_running();
316 wakeup_gc_threads(gct->thread_index);
317
318 traceEventGcWork(gct->cap);
319
320 // scavenge the capability-private mutable lists. This isn't part
321 // of markSomeCapabilities() because markSomeCapabilities() can only
322 // call back into the GC via mark_root() (due to the gct register
323 // variable).
324 if (n_gc_threads == 1) {
325 for (n = 0; n < n_capabilities; n++) {
326 #if defined(THREADED_RTS)
327 scavenge_capability_mut_Lists1(&capabilities[n]);
328 #else
329 scavenge_capability_mut_lists(&capabilities[n]);
330 #endif
331 }
332 } else {
333 scavenge_capability_mut_lists(gct->cap);
334 }
335
336 // follow roots from the CAF list (used by GHCi)
337 gct->evac_gen_no = 0;
338 markCAFs(mark_root, gct);
339
340 // follow all the roots that the application knows about.
341 gct->evac_gen_no = 0;
342 if (n_gc_threads == 1) {
343 for (n = 0; n < n_capabilities; n++) {
344 markCapability(mark_root, gct, &capabilities[n],
345 rtsTrue/*don't mark sparks*/);
346 }
347 } else {
348 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
349 }
350
351 markScheduler(mark_root, gct);
352
353 #if defined(RTS_USER_SIGNALS)
354 // mark the signal handlers (signals should be already blocked)
355 markSignalHandlers(mark_root, gct);
356 #endif
357
358 // Mark the weak pointer list, and prepare to detect dead weak pointers.
359 markWeakPtrList();
360 initWeakForGC();
361
362 // Mark the stable pointer table.
363 markStablePtrTable(mark_root, gct);
364
365 /* -------------------------------------------------------------------------
366 * Repeatedly scavenge all the areas we know about until there's no
367 * more scavenging to be done.
368 */
369 for (;;)
370 {
371 scavenge_until_all_done();
372 // The other threads are now stopped. We might recurse back to
373 // here, but from now on this is the only thread.
374
375 // must be last... invariant is that everything is fully
376 // scavenged at this point.
377 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
378 inc_running();
379 continue;
380 }
381
382 // If we get to here, there's really nothing left to do.
383 break;
384 }
385
386 shutdown_gc_threads(gct->thread_index);
387
388 // Now see which stable names are still alive.
389 gcStablePtrTable();
390
391 #ifdef THREADED_RTS
392 if (n_gc_threads == 1) {
393 for (n = 0; n < n_capabilities; n++) {
394 pruneSparkQueue(&capabilities[n]);
395 }
396 } else {
397 pruneSparkQueue(gct->cap);
398 }
399 #endif
400
401 #ifdef PROFILING
402 // We call processHeapClosureForDead() on every closure destroyed during
403 // the current garbage collection, so we invoke LdvCensusForDead().
404 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
405 || RtsFlags.ProfFlags.bioSelector != NULL)
406 LdvCensusForDead(N);
407 #endif
408
409 // NO MORE EVACUATION AFTER THIS POINT!
410
411 // Finally: compact or sweep the oldest generation.
412 if (major_gc && oldest_gen->mark) {
413 if (oldest_gen->compact)
414 compact(gct->scavenged_static_objects);
415 else
416 sweep(oldest_gen);
417 }
418
419 copied = 0;
420 max_copied = 0;
421 avg_copied = 0;
422 {
423 nat i;
424 for (i=0; i < n_gc_threads; i++) {
425 if (n_gc_threads > 1) {
426 debugTrace(DEBUG_gc,"thread %d:", i);
427 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
428 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
429 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
430 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
431 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
432 }
433 copied += gc_threads[i]->copied;
434 max_copied = stg_max(gc_threads[i]->copied, max_copied);
435 }
436 if (n_gc_threads == 1) {
437 max_copied = 0;
438 avg_copied = 0;
439 } else {
440 avg_copied = copied;
441 }
442 }
443
444 // Run through all the generations/steps and tidy up.
445 // We're going to:
446 // - count the amount of "live" data (live_words, live_blocks)
447 // - count the amount of "copied" data in this GC (copied)
448 // - free from-space
449 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
450 //
451 live_words = 0;
452 live_blocks = 0;
453
454 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
455
456 if (g == N) {
457 generations[g].collections++; // for stats
458 if (n_gc_threads > 1) generations[g].par_collections++;
459 }
460
461 // Count the mutable list as bytes "copied" for the purposes of
462 // stats. Every mutable list is copied during every GC.
463 if (g > 0) {
464 nat mut_list_size = 0;
465 for (n = 0; n < n_capabilities; n++) {
466 mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
467 }
468 copied += mut_list_size;
469
470 debugTrace(DEBUG_gc,
471 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
472 (unsigned long)(mut_list_size * sizeof(W_)),
473 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
474 }
475
476 bdescr *next, *prev;
477 gen = &generations[g];
478
479 // for generations we collected...
480 if (g <= N) {
481
482 /* free old memory and shift to-space into from-space for all
483 * the collected steps (except the allocation area). These
484 * freed blocks will probaby be quickly recycled.
485 */
486 if (gen->mark)
487 {
488 // tack the new blocks on the end of the existing blocks
489 if (gen->old_blocks != NULL) {
490
491 prev = NULL;
492 for (bd = gen->old_blocks; bd != NULL; bd = next) {
493
494 next = bd->link;
495
496 if (!(bd->flags & BF_MARKED))
497 {
498 if (prev == NULL) {
499 gen->old_blocks = next;
500 } else {
501 prev->link = next;
502 }
503 freeGroup(bd);
504 gen->n_old_blocks--;
505 }
506 else
507 {
508 gen->n_words += bd->free - bd->start;
509
510 // NB. this step might not be compacted next
511 // time, so reset the BF_MARKED flags.
512 // They are set before GC if we're going to
513 // compact. (search for BF_MARKED above).
514 bd->flags &= ~BF_MARKED;
515
516 // between GCs, all blocks in the heap except
517 // for the nursery have the BF_EVACUATED flag set.
518 bd->flags |= BF_EVACUATED;
519
520 prev = bd;
521 }
522 }
523
524 if (prev != NULL) {
525 prev->link = gen->blocks;
526 gen->blocks = gen->old_blocks;
527 }
528 }
529 // add the new blocks to the block tally
530 gen->n_blocks += gen->n_old_blocks;
531 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
532 ASSERT(countOccupied(gen->blocks) == gen->n_words);
533 }
534 else // not copacted
535 {
536 freeChain(gen->old_blocks);
537 }
538
539 gen->old_blocks = NULL;
540 gen->n_old_blocks = 0;
541
542 /* LARGE OBJECTS. The current live large objects are chained on
543 * scavenged_large, having been moved during garbage
544 * collection from large_objects. Any objects left on the
545 * large_objects list are therefore dead, so we free them here.
546 */
547 freeChain(gen->large_objects);
548 gen->large_objects = gen->scavenged_large_objects;
549 gen->n_large_blocks = gen->n_scavenged_large_blocks;
550 gen->n_new_large_words = 0;
551 }
552 else // for generations > N
553 {
554 /* For older generations, we need to append the
555 * scavenged_large_object list (i.e. large objects that have been
556 * promoted during this GC) to the large_object list for that step.
557 */
558 for (bd = gen->scavenged_large_objects; bd; bd = next) {
559 next = bd->link;
560 dbl_link_onto(bd, &gen->large_objects);
561 }
562
563 // add the new blocks we promoted during this GC
564 gen->n_large_blocks += gen->n_scavenged_large_blocks;
565 }
566
567 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
568
569 gen->scavenged_large_objects = NULL;
570 gen->n_scavenged_large_blocks = 0;
571
572 // Count "live" data
573 live_words += genLiveWords(gen);
574 live_blocks += genLiveBlocks(gen);
575
576 // add in the partial blocks in the gen_workspaces, but ignore gen 0
577 // if this is a local GC (we can't count another capability's part_list)
578 {
579 nat i;
580 for (i = 0; i < n_capabilities; i++) {
581 live_words += gcThreadLiveWords(i, gen->no);
582 live_blocks += gcThreadLiveBlocks(i, gen->no);
583 }
584 }
585 } // for all generations
586
587 // update the max size of older generations after a major GC
588 resize_generations();
589
590 // Free the mark stack.
591 if (mark_stack_top_bd != NULL) {
592 debugTrace(DEBUG_gc, "mark stack: %d blocks",
593 countBlocks(mark_stack_top_bd));
594 freeChain(mark_stack_top_bd);
595 }
596
597 // Free any bitmaps.
598 for (g = 0; g <= N; g++) {
599 gen = &generations[g];
600 if (gen->bitmap != NULL) {
601 freeGroup(gen->bitmap);
602 gen->bitmap = NULL;
603 }
604 }
605
606 // Reset the nursery: make the blocks empty
607 allocated += clearNurseries();
608
609 resize_nursery();
610
611 resetNurseries();
612
613 // mark the garbage collected CAFs as dead
614 #if 0 && defined(DEBUG) // doesn't work at the moment
615 if (major_gc) { gcCAFs(); }
616 #endif
617
618 #ifdef PROFILING
619 // resetStaticObjectForRetainerProfiling() must be called before
620 // zeroing below.
621 if (n_gc_threads > 1) {
622 barf("profiling is currently broken with multi-threaded GC");
623 // ToDo: fix the gct->scavenged_static_objects below
624 }
625 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
626 #endif
627
628 // zero the scavenged static object list
629 if (major_gc) {
630 nat i;
631 if (n_gc_threads == 1) {
632 zero_static_object_list(gct->scavenged_static_objects);
633 } else {
634 for (i = 0; i < n_gc_threads; i++) {
635 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
636 }
637 }
638 }
639
640 // Update the stable pointer hash table.
641 updateStablePtrTable(major_gc);
642
643 // unlock the StablePtr table. Must be before scheduleFinalizers(),
644 // because a finalizer may call hs_free_fun_ptr() or
645 // hs_free_stable_ptr(), both of which access the StablePtr table.
646 stablePtrPostGC();
647
648 // Start any pending finalizers. Must be after
649 // updateStablePtrTable() and stablePtrPostGC() (see #4221).
650 RELEASE_SM_LOCK;
651 scheduleFinalizers(cap, old_weak_ptr_list);
652 ACQUIRE_SM_LOCK;
653
654 // check sanity after GC
655 // before resurrectThreads(), because that might overwrite some
656 // closures, which will cause problems with THREADED where we don't
657 // fill slop.
658 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
659
660 // send exceptions to any threads which were about to die
661 RELEASE_SM_LOCK;
662 resurrectThreads(resurrected_threads);
663 ACQUIRE_SM_LOCK;
664
665 if (major_gc) {
666 nat need, got;
667 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
668 got = mblocks_allocated;
669 /* If the amount of data remains constant, next major GC we'll
670 require (F+1)*need. We leave (F+2)*need in order to reduce
671 repeated deallocation and reallocation. */
672 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
673 if (got > need) {
674 returnMemoryToOS(got - need);
675 }
676 }
677
678 // extra GC trace info
679 IF_DEBUG(gc, statDescribeGens());
680
681 #ifdef DEBUG
682 // symbol-table based profiling
683 /* heapCensus(to_blocks); */ /* ToDo */
684 #endif
685
686 // restore enclosing cost centre
687 #ifdef PROFILING
688 CCCS = prev_CCS;
689 #endif
690
691 #ifdef DEBUG
692 // check for memory leaks if DEBUG is on
693 memInventory(DEBUG_gc);
694 #endif
695
696 #ifdef RTS_GTK_FRONTPANEL
697 if (RtsFlags.GcFlags.frontpanel) {
698 updateFrontPanelAfterGC( N, live );
699 }
700 #endif
701
702 // ok, GC over: tell the stats department what happened.
703 stat_endGC(gct, allocated, live_words,
704 copied, N, max_copied, avg_copied,
705 live_blocks * BLOCK_SIZE_W - live_words /* slop */);
706
707 // Guess which generation we'll collect *next* time
708 initialise_N(force_major_gc);
709
710 #if defined(RTS_USER_SIGNALS)
711 if (RtsFlags.MiscFlags.install_signal_handlers) {
712 // unblock signals again
713 unblockUserSignals();
714 }
715 #endif
716
717 RELEASE_SM_LOCK;
718
719 SET_GCT(saved_gct);
720 }
721
722 /* -----------------------------------------------------------------------------
723 Figure out which generation to collect, initialise N and major_gc.
724
725 Also returns the total number of blocks in generations that will be
726 collected.
727 -------------------------------------------------------------------------- */
728
729 static nat
730 initialise_N (rtsBool force_major_gc)
731 {
732 int g;
733 nat blocks, blocks_total;
734
735 blocks = 0;
736 blocks_total = 0;
737
738 if (force_major_gc) {
739 N = RtsFlags.GcFlags.generations - 1;
740 } else {
741 N = 0;
742 }
743
744 for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
745
746 blocks = generations[g].n_words / BLOCK_SIZE_W
747 + generations[g].n_large_blocks;
748
749 if (blocks >= generations[g].max_blocks) {
750 N = stg_max(N,g);
751 }
752 if ((nat)g <= N) {
753 blocks_total += blocks;
754 }
755 }
756
757 blocks_total += countNurseryBlocks();
758
759 major_gc = (N == RtsFlags.GcFlags.generations-1);
760 return blocks_total;
761 }
762
763 /* -----------------------------------------------------------------------------
764 Initialise the gc_thread structures.
765 -------------------------------------------------------------------------- */
766
767 #define GC_THREAD_INACTIVE 0
768 #define GC_THREAD_STANDING_BY 1
769 #define GC_THREAD_RUNNING 2
770 #define GC_THREAD_WAITING_TO_CONTINUE 3
771
772 static void
773 new_gc_thread (nat n, gc_thread *t)
774 {
775 nat g;
776 gen_workspace *ws;
777
778 t->cap = &capabilities[n];
779
780 #ifdef THREADED_RTS
781 t->id = 0;
782 initSpinLock(&t->gc_spin);
783 initSpinLock(&t->mut_spin);
784 ACQUIRE_SPIN_LOCK(&t->gc_spin);
785 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
786 // thread to start up, see wakeup_gc_threads
787 #endif
788
789 t->thread_index = n;
790 t->free_blocks = NULL;
791 t->gc_count = 0;
792
793 init_gc_thread(t);
794
795 #ifdef USE_PAPI
796 t->papi_events = -1;
797 #endif
798
799 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
800 {
801 ws = &t->gens[g];
802 ws->gen = &generations[g];
803 ASSERT(g == ws->gen->no);
804 ws->my_gct = t;
805
806 // We want to call
807 // alloc_todo_block(ws,0);
808 // but can't, because it uses gct which isn't set up at this point.
809 // Hence, allocate a block for todo_bd manually:
810 {
811 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
812 initBdescr(bd, ws->gen, ws->gen->to);
813 bd->flags = BF_EVACUATED;
814 bd->u.scan = bd->free = bd->start;
815
816 ws->todo_bd = bd;
817 ws->todo_free = bd->free;
818 ws->todo_lim = bd->start + BLOCK_SIZE_W;
819 }
820
821 ws->todo_q = newWSDeque(128);
822 ws->todo_overflow = NULL;
823 ws->n_todo_overflow = 0;
824 ws->todo_large_objects = NULL;
825
826 ws->part_list = NULL;
827 ws->n_part_blocks = 0;
828
829 ws->scavd_list = NULL;
830 ws->n_scavd_blocks = 0;
831 }
832 }
833
834
835 void
836 initGcThreads (void)
837 {
838 if (gc_threads == NULL) {
839 #if defined(THREADED_RTS)
840 nat i;
841 gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
842 sizeof(gc_thread*),
843 "alloc_gc_threads");
844
845 for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
846 gc_threads[i] =
847 stgMallocBytes(sizeof(gc_thread) +
848 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
849 "alloc_gc_threads");
850
851 new_gc_thread(i, gc_threads[i]);
852 }
853 #else
854 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
855 gc_threads[0] = gct;
856 new_gc_thread(0,gc_threads[0]);
857 #endif
858 }
859 }
860
861 void
862 freeGcThreads (void)
863 {
864 nat g;
865 if (gc_threads != NULL) {
866 #if defined(THREADED_RTS)
867 nat i;
868 for (i = 0; i < n_capabilities; i++) {
869 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
870 {
871 freeWSDeque(gc_threads[i]->gens[g].todo_q);
872 }
873 stgFree (gc_threads[i]);
874 }
875 stgFree (gc_threads);
876 #else
877 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
878 {
879 freeWSDeque(gc_threads[0]->gens[g].todo_q);
880 }
881 stgFree (gc_threads);
882 #endif
883 gc_threads = NULL;
884 }
885 }
886
887 /* ----------------------------------------------------------------------------
888 Start GC threads
889 ------------------------------------------------------------------------- */
890
891 static volatile StgWord gc_running_threads;
892
893 static StgWord
894 inc_running (void)
895 {
896 StgWord new;
897 new = atomic_inc(&gc_running_threads);
898 ASSERT(new <= n_gc_threads);
899 return new;
900 }
901
902 static StgWord
903 dec_running (void)
904 {
905 ASSERT(gc_running_threads != 0);
906 return atomic_dec(&gc_running_threads);
907 }
908
909 static rtsBool
910 any_work (void)
911 {
912 int g;
913 gen_workspace *ws;
914
915 gct->any_work++;
916
917 write_barrier();
918
919 // scavenge objects in compacted generation
920 if (mark_stack_bd != NULL && !mark_stack_empty()) {
921 return rtsTrue;
922 }
923
924 // Check for global work in any step. We don't need to check for
925 // local work, because we have already exited scavenge_loop(),
926 // which means there is no local work for this thread.
927 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
928 ws = &gct->gens[g];
929 if (ws->todo_large_objects) return rtsTrue;
930 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
931 if (ws->todo_overflow) return rtsTrue;
932 }
933
934 #if defined(THREADED_RTS)
935 if (work_stealing) {
936 nat n;
937 // look for work to steal
938 for (n = 0; n < n_gc_threads; n++) {
939 if (n == gct->thread_index) continue;
940 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
941 ws = &gc_threads[n]->gens[g];
942 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
943 }
944 }
945 }
946 #endif
947
948 gct->no_work++;
949 #if defined(THREADED_RTS)
950 yieldThread();
951 #endif
952
953 return rtsFalse;
954 }
955
956 static void
957 scavenge_until_all_done (void)
958 {
959 nat r;
960
961
962 loop:
963 #if defined(THREADED_RTS)
964 if (n_gc_threads > 1) {
965 scavenge_loop();
966 } else {
967 scavenge_loop1();
968 }
969 #else
970 scavenge_loop();
971 #endif
972
973 collect_gct_blocks();
974
975 // scavenge_loop() only exits when there's no work to do
976 r = dec_running();
977
978 traceEventGcIdle(gct->cap);
979
980 debugTrace(DEBUG_gc, "%d GC threads still running", r);
981
982 while (gc_running_threads != 0) {
983 // usleep(1);
984 if (any_work()) {
985 inc_running();
986 traceEventGcWork(gct->cap);
987 goto loop;
988 }
989 // any_work() does not remove the work from the queue, it
990 // just checks for the presence of work. If we find any,
991 // then we increment gc_running_threads and go back to
992 // scavenge_loop() to perform any pending work.
993 }
994
995 traceEventGcDone(gct->cap);
996 }
997
998 #if defined(THREADED_RTS)
999
1000 void
1001 gcWorkerThread (Capability *cap)
1002 {
1003 gc_thread *saved_gct;
1004
1005 // necessary if we stole a callee-saves register for gct:
1006 saved_gct = gct;
1007
1008 gct = gc_threads[cap->no];
1009 gct->id = osThreadId();
1010
1011 stat_gcWorkerThreadStart(gct);
1012
1013 // Wait until we're told to wake up
1014 RELEASE_SPIN_LOCK(&gct->mut_spin);
1015 gct->wakeup = GC_THREAD_STANDING_BY;
1016 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1017 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1018
1019 #ifdef USE_PAPI
1020 // start performance counters in this thread...
1021 if (gct->papi_events == -1) {
1022 papi_init_eventset(&gct->papi_events);
1023 }
1024 papi_thread_start_gc1_count(gct->papi_events);
1025 #endif
1026
1027 init_gc_thread(gct);
1028
1029 traceEventGcWork(gct->cap);
1030
1031 // Every thread evacuates some roots.
1032 gct->evac_gen_no = 0;
1033 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1034 scavenge_capability_mut_lists(cap);
1035
1036 scavenge_until_all_done();
1037
1038 #ifdef THREADED_RTS
1039 // Now that the whole heap is marked, we discard any sparks that
1040 // were found to be unreachable. The main GC thread is currently
1041 // marking heap reachable via weak pointers, so it is
1042 // non-deterministic whether a spark will be retained if it is
1043 // only reachable via weak pointers. To fix this problem would
1044 // require another GC barrier, which is too high a price.
1045 pruneSparkQueue(cap);
1046 #endif
1047
1048 #ifdef USE_PAPI
1049 // count events in this thread towards the GC totals
1050 papi_thread_stop_gc1_count(gct->papi_events);
1051 #endif
1052
1053 // Wait until we're told to continue
1054 RELEASE_SPIN_LOCK(&gct->gc_spin);
1055 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1056 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1057 gct->thread_index);
1058 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1059 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1060
1061 // record the time spent doing GC in the Task structure
1062 stat_gcWorkerThreadDone(gct);
1063
1064 SET_GCT(saved_gct);
1065 }
1066
1067 #endif
1068
1069 #if defined(THREADED_RTS)
1070
1071 void
1072 waitForGcThreads (Capability *cap USED_IF_THREADS)
1073 {
1074 const nat n_threads = RtsFlags.ParFlags.nNodes;
1075 const nat me = cap->no;
1076 nat i, j;
1077 rtsBool retry = rtsTrue;
1078
1079 while(retry) {
1080 for (i=0; i < n_threads; i++) {
1081 if (i == me) continue;
1082 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1083 prodCapability(&capabilities[i], cap->running_task);
1084 }
1085 }
1086 for (j=0; j < 10; j++) {
1087 retry = rtsFalse;
1088 for (i=0; i < n_threads; i++) {
1089 if (i == me) continue;
1090 write_barrier();
1091 setContextSwitches();
1092 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1093 retry = rtsTrue;
1094 }
1095 }
1096 if (!retry) break;
1097 yieldThread();
1098 }
1099 }
1100 }
1101
1102 #endif // THREADED_RTS
1103
1104 static void
1105 start_gc_threads (void)
1106 {
1107 #if defined(THREADED_RTS)
1108 gc_running_threads = 0;
1109 #endif
1110 }
1111
1112 static void
1113 wakeup_gc_threads (nat me USED_IF_THREADS)
1114 {
1115 #if defined(THREADED_RTS)
1116 nat i;
1117
1118 if (n_gc_threads == 1) return;
1119
1120 for (i=0; i < n_gc_threads; i++) {
1121 if (i == me) continue;
1122 inc_running();
1123 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1124 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1125
1126 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1127 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1128 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1129 }
1130 #endif
1131 }
1132
1133 // After GC is complete, we must wait for all GC threads to enter the
1134 // standby state, otherwise they may still be executing inside
1135 // any_work(), and may even remain awake until the next GC starts.
1136 static void
1137 shutdown_gc_threads (nat me USED_IF_THREADS)
1138 {
1139 #if defined(THREADED_RTS)
1140 nat i;
1141
1142 if (n_gc_threads == 1) return;
1143
1144 for (i=0; i < n_gc_threads; i++) {
1145 if (i == me) continue;
1146 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1147 }
1148 #endif
1149 }
1150
1151 #if defined(THREADED_RTS)
1152 void
1153 releaseGCThreads (Capability *cap USED_IF_THREADS)
1154 {
1155 const nat n_threads = RtsFlags.ParFlags.nNodes;
1156 const nat me = cap->no;
1157 nat i;
1158 for (i=0; i < n_threads; i++) {
1159 if (i == me) continue;
1160 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1161 barf("releaseGCThreads");
1162
1163 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1164 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1165 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1166 }
1167 }
1168 #endif
1169
1170 /* ----------------------------------------------------------------------------
1171 Initialise a generation that is to be collected
1172 ------------------------------------------------------------------------- */
1173
1174 static void
1175 prepare_collected_gen (generation *gen)
1176 {
1177 nat i, g, n;
1178 gen_workspace *ws;
1179 bdescr *bd, *next;
1180
1181 // Throw away the current mutable list. Invariant: the mutable
1182 // list always has at least one block; this means we can avoid a
1183 // check for NULL in recordMutable().
1184 g = gen->no;
1185 if (g != 0) {
1186 for (i = 0; i < n_capabilities; i++) {
1187 freeChain(capabilities[i].mut_lists[g]);
1188 capabilities[i].mut_lists[g] = allocBlock();
1189 }
1190 }
1191
1192 gen = &generations[g];
1193 ASSERT(gen->no == g);
1194
1195 // we'll construct a new list of threads in this step
1196 // during GC, throw away the current list.
1197 gen->old_threads = gen->threads;
1198 gen->threads = END_TSO_QUEUE;
1199
1200 // deprecate the existing blocks
1201 gen->old_blocks = gen->blocks;
1202 gen->n_old_blocks = gen->n_blocks;
1203 gen->blocks = NULL;
1204 gen->n_blocks = 0;
1205 gen->n_words = 0;
1206 gen->live_estimate = 0;
1207
1208 // initialise the large object queues.
1209 ASSERT(gen->scavenged_large_objects == NULL);
1210 ASSERT(gen->n_scavenged_large_blocks == 0);
1211
1212 // grab all the partial blocks stashed in the gc_thread workspaces and
1213 // move them to the old_blocks list of this gen.
1214 for (n = 0; n < n_capabilities; n++) {
1215 ws = &gc_threads[n]->gens[gen->no];
1216
1217 for (bd = ws->part_list; bd != NULL; bd = next) {
1218 next = bd->link;
1219 bd->link = gen->old_blocks;
1220 gen->old_blocks = bd;
1221 gen->n_old_blocks += bd->blocks;
1222 }
1223 ws->part_list = NULL;
1224 ws->n_part_blocks = 0;
1225
1226 ASSERT(ws->scavd_list == NULL);
1227 ASSERT(ws->n_scavd_blocks == 0);
1228
1229 if (ws->todo_free != ws->todo_bd->start) {
1230 ws->todo_bd->free = ws->todo_free;
1231 ws->todo_bd->link = gen->old_blocks;
1232 gen->old_blocks = ws->todo_bd;
1233 gen->n_old_blocks += ws->todo_bd->blocks;
1234 alloc_todo_block(ws,0); // always has one block.
1235 }
1236 }
1237
1238 // mark the small objects as from-space
1239 for (bd = gen->old_blocks; bd; bd = bd->link) {
1240 bd->flags &= ~BF_EVACUATED;
1241 }
1242
1243 // mark the large objects as from-space
1244 for (bd = gen->large_objects; bd; bd = bd->link) {
1245 bd->flags &= ~BF_EVACUATED;
1246 }
1247
1248 // for a compacted generation, we need to allocate the bitmap
1249 if (gen->mark) {
1250 lnat bitmap_size; // in bytes
1251 bdescr *bitmap_bdescr;
1252 StgWord *bitmap;
1253
1254 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1255
1256 if (bitmap_size > 0) {
1257 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1258 / BLOCK_SIZE);
1259 gen->bitmap = bitmap_bdescr;
1260 bitmap = bitmap_bdescr->start;
1261
1262 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1263 bitmap_size, bitmap);
1264
1265 // don't forget to fill it with zeros!
1266 memset(bitmap, 0, bitmap_size);
1267
1268 // For each block in this step, point to its bitmap from the
1269 // block descriptor.
1270 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1271 bd->u.bitmap = bitmap;
1272 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1273
1274 // Also at this point we set the BF_MARKED flag
1275 // for this block. The invariant is that
1276 // BF_MARKED is always unset, except during GC
1277 // when it is set on those blocks which will be
1278 // compacted.
1279 if (!(bd->flags & BF_FRAGMENTED)) {
1280 bd->flags |= BF_MARKED;
1281 }
1282
1283 // BF_SWEPT should be marked only for blocks that are being
1284 // collected in sweep()
1285 bd->flags &= ~BF_SWEPT;
1286 }
1287 }
1288 }
1289 }
1290
1291
1292 /* ----------------------------------------------------------------------------
1293 Save the mutable lists in saved_mut_lists
1294 ------------------------------------------------------------------------- */
1295
1296 static void
1297 stash_mut_list (Capability *cap, nat gen_no)
1298 {
1299 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1300 cap->mut_lists[gen_no] = allocBlock_sync();
1301 }
1302
1303 /* ----------------------------------------------------------------------------
1304 Initialise a generation that is *not* to be collected
1305 ------------------------------------------------------------------------- */
1306
1307 static void
1308 prepare_uncollected_gen (generation *gen)
1309 {
1310 nat i;
1311
1312
1313 ASSERT(gen->no > 0);
1314
1315 // save the current mutable lists for this generation, and
1316 // allocate a fresh block for each one. We'll traverse these
1317 // mutable lists as roots early on in the GC.
1318 for (i = 0; i < n_capabilities; i++) {
1319 stash_mut_list(&capabilities[i], gen->no);
1320 }
1321
1322 ASSERT(gen->scavenged_large_objects == NULL);
1323 ASSERT(gen->n_scavenged_large_blocks == 0);
1324 }
1325
1326 /* -----------------------------------------------------------------------------
1327 Collect the completed blocks from a GC thread and attach them to
1328 the generation.
1329 -------------------------------------------------------------------------- */
1330
1331 static void
1332 collect_gct_blocks (void)
1333 {
1334 nat g;
1335 gen_workspace *ws;
1336 bdescr *bd, *prev;
1337
1338 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1339 ws = &gct->gens[g];
1340
1341 // there may still be a block attached to ws->todo_bd;
1342 // leave it there to use next time.
1343
1344 if (ws->scavd_list != NULL) {
1345 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1346
1347 ASSERT(gct->scan_bd == NULL);
1348 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1349
1350 prev = NULL;
1351 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1352 ws->gen->n_words += bd->free - bd->start;
1353 prev = bd;
1354 }
1355 if (prev != NULL) {
1356 prev->link = ws->gen->blocks;
1357 ws->gen->blocks = ws->scavd_list;
1358 }
1359 ws->gen->n_blocks += ws->n_scavd_blocks;
1360
1361 ws->scavd_list = NULL;
1362 ws->n_scavd_blocks = 0;
1363
1364 RELEASE_SPIN_LOCK(&ws->gen->sync);
1365 }
1366 }
1367 }
1368
1369 /* -----------------------------------------------------------------------------
1370 Initialise a gc_thread before GC
1371 -------------------------------------------------------------------------- */
1372
1373 static void
1374 init_gc_thread (gc_thread *t)
1375 {
1376 t->static_objects = END_OF_STATIC_LIST;
1377 t->scavenged_static_objects = END_OF_STATIC_LIST;
1378 t->scan_bd = NULL;
1379 t->mut_lists = t->cap->mut_lists;
1380 t->evac_gen_no = 0;
1381 t->failed_to_evac = rtsFalse;
1382 t->eager_promotion = rtsTrue;
1383 t->thunk_selector_depth = 0;
1384 t->copied = 0;
1385 t->scanned = 0;
1386 t->any_work = 0;
1387 t->no_work = 0;
1388 t->scav_find_work = 0;
1389 }
1390
1391 /* -----------------------------------------------------------------------------
1392 Function we pass to evacuate roots.
1393 -------------------------------------------------------------------------- */
1394
1395 static void
1396 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1397 {
1398 // we stole a register for gct, but this function is called from
1399 // *outside* the GC where the register variable is not in effect,
1400 // so we need to save and restore it here. NB. only call
1401 // mark_root() from the main GC thread, otherwise gct will be
1402 // incorrect.
1403 gc_thread *saved_gct;
1404 saved_gct = gct;
1405 SET_GCT(user);
1406
1407 evacuate(root);
1408
1409 SET_GCT(saved_gct);
1410 }
1411
1412 /* -----------------------------------------------------------------------------
1413 Initialising the static object & mutable lists
1414 -------------------------------------------------------------------------- */
1415
1416 static void
1417 zero_static_object_list(StgClosure* first_static)
1418 {
1419 StgClosure* p;
1420 StgClosure* link;
1421 const StgInfoTable *info;
1422
1423 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1424 info = get_itbl(p);
1425 link = *STATIC_LINK(info, p);
1426 *STATIC_LINK(info,p) = NULL;
1427 }
1428 }
1429
1430 /* ----------------------------------------------------------------------------
1431 Reset the sizes of the older generations when we do a major
1432 collection.
1433
1434 CURRENT STRATEGY: make all generations except zero the same size.
1435 We have to stay within the maximum heap size, and leave a certain
1436 percentage of the maximum heap size available to allocate into.
1437 ------------------------------------------------------------------------- */
1438
1439 static void
1440 resize_generations (void)
1441 {
1442 nat g;
1443
1444 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1445 nat live, size, min_alloc, words;
1446 const nat max = RtsFlags.GcFlags.maxHeapSize;
1447 const nat gens = RtsFlags.GcFlags.generations;
1448
1449 // live in the oldest generations
1450 if (oldest_gen->live_estimate != 0) {
1451 words = oldest_gen->live_estimate;
1452 } else {
1453 words = oldest_gen->n_words;
1454 }
1455 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1456 oldest_gen->n_large_blocks;
1457
1458 // default max size for all generations except zero
1459 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1460 RtsFlags.GcFlags.minOldGenSize);
1461
1462 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1463 RtsFlags.GcFlags.heapSizeSuggestion = size;
1464 }
1465
1466 // minimum size for generation zero
1467 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1468 RtsFlags.GcFlags.minAllocAreaSize);
1469
1470 // Auto-enable compaction when the residency reaches a
1471 // certain percentage of the maximum heap size (default: 30%).
1472 if (RtsFlags.GcFlags.compact ||
1473 (max > 0 &&
1474 oldest_gen->n_blocks >
1475 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1476 oldest_gen->mark = 1;
1477 oldest_gen->compact = 1;
1478 // debugBelch("compaction: on\n", live);
1479 } else {
1480 oldest_gen->mark = 0;
1481 oldest_gen->compact = 0;
1482 // debugBelch("compaction: off\n", live);
1483 }
1484
1485 if (RtsFlags.GcFlags.sweep) {
1486 oldest_gen->mark = 1;
1487 }
1488
1489 // if we're going to go over the maximum heap size, reduce the
1490 // size of the generations accordingly. The calculation is
1491 // different if compaction is turned on, because we don't need
1492 // to double the space required to collect the old generation.
1493 if (max != 0) {
1494
1495 // this test is necessary to ensure that the calculations
1496 // below don't have any negative results - we're working
1497 // with unsigned values here.
1498 if (max < min_alloc) {
1499 heapOverflow();
1500 }
1501
1502 if (oldest_gen->compact) {
1503 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1504 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1505 }
1506 } else {
1507 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1508 size = (max - min_alloc) / ((gens - 1) * 2);
1509 }
1510 }
1511
1512 if (size < live) {
1513 heapOverflow();
1514 }
1515 }
1516
1517 #if 0
1518 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1519 min_alloc, size, max);
1520 #endif
1521
1522 for (g = 0; g < gens; g++) {
1523 generations[g].max_blocks = size;
1524 }
1525 }
1526 }
1527
1528 /* -----------------------------------------------------------------------------
1529 Calculate the new size of the nursery, and resize it.
1530 -------------------------------------------------------------------------- */
1531
1532 static void
1533 resize_nursery (void)
1534 {
1535 const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1536
1537 if (RtsFlags.GcFlags.generations == 1)
1538 { // Two-space collector:
1539 nat blocks;
1540
1541 /* set up a new nursery. Allocate a nursery size based on a
1542 * function of the amount of live data (by default a factor of 2)
1543 * Use the blocks from the old nursery if possible, freeing up any
1544 * left over blocks.
1545 *
1546 * If we get near the maximum heap size, then adjust our nursery
1547 * size accordingly. If the nursery is the same size as the live
1548 * data (L), then we need 3L bytes. We can reduce the size of the
1549 * nursery to bring the required memory down near 2L bytes.
1550 *
1551 * A normal 2-space collector would need 4L bytes to give the same
1552 * performance we get from 3L bytes, reducing to the same
1553 * performance at 2L bytes.
1554 */
1555 blocks = generations[0].n_blocks;
1556
1557 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1558 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1559 RtsFlags.GcFlags.maxHeapSize )
1560 {
1561 long adjusted_blocks; // signed on purpose
1562 int pc_free;
1563
1564 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1565
1566 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1567 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1568
1569 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1570 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1571 {
1572 heapOverflow();
1573 }
1574 blocks = adjusted_blocks;
1575 }
1576 else
1577 {
1578 blocks *= RtsFlags.GcFlags.oldGenFactor;
1579 if (blocks < min_nursery)
1580 {
1581 blocks = min_nursery;
1582 }
1583 }
1584 resizeNurseries(blocks);
1585 }
1586 else // Generational collector
1587 {
1588 /*
1589 * If the user has given us a suggested heap size, adjust our
1590 * allocation area to make best use of the memory available.
1591 */
1592 if (RtsFlags.GcFlags.heapSizeSuggestion)
1593 {
1594 long blocks;
1595 const nat needed = calcNeeded(); // approx blocks needed at next GC
1596
1597 /* Guess how much will be live in generation 0 step 0 next time.
1598 * A good approximation is obtained by finding the
1599 * percentage of g0 that was live at the last minor GC.
1600 *
1601 * We have an accurate figure for the amount of copied data in
1602 * 'copied', but we must convert this to a number of blocks, with
1603 * a small adjustment for estimated slop at the end of a block
1604 * (- 10 words).
1605 */
1606 if (N == 0)
1607 {
1608 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1609 / countNurseryBlocks();
1610 }
1611
1612 /* Estimate a size for the allocation area based on the
1613 * information available. We might end up going slightly under
1614 * or over the suggested heap size, but we should be pretty
1615 * close on average.
1616 *
1617 * Formula: suggested - needed
1618 * ----------------------------
1619 * 1 + g0_pcnt_kept/100
1620 *
1621 * where 'needed' is the amount of memory needed at the next
1622 * collection for collecting all gens except g0.
1623 */
1624 blocks =
1625 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1626 (100 + (long)g0_pcnt_kept);
1627
1628 if (blocks < (long)min_nursery) {
1629 blocks = min_nursery;
1630 }
1631
1632 resizeNurseries((nat)blocks);
1633 }
1634 else
1635 {
1636 // we might have added extra large blocks to the nursery, so
1637 // resize back to minAllocAreaSize again.
1638 resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1639 }
1640 }
1641 }
1642
1643 /* -----------------------------------------------------------------------------
1644 Sanity code for CAF garbage collection.
1645
1646 With DEBUG turned on, we manage a CAF list in addition to the SRT
1647 mechanism. After GC, we run down the CAF list and blackhole any
1648 CAFs which have been garbage collected. This means we get an error
1649 whenever the program tries to enter a garbage collected CAF.
1650
1651 Any garbage collected CAFs are taken off the CAF list at the same
1652 time.
1653 -------------------------------------------------------------------------- */
1654
1655 #if 0 && defined(DEBUG)
1656
1657 static void
1658 gcCAFs(void)
1659 {
1660 StgClosure* p;
1661 StgClosure** pp;
1662 const StgInfoTable *info;
1663 nat i;
1664
1665 i = 0;
1666 p = caf_list;
1667 pp = &caf_list;
1668
1669 while (p != NULL) {
1670
1671 info = get_itbl(p);
1672
1673 ASSERT(info->type == IND_STATIC);
1674
1675 if (STATIC_LINK(info,p) == NULL) {
1676 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1677 // black hole it
1678 SET_INFO(p,&stg_BLACKHOLE_info);
1679 p = STATIC_LINK2(info,p);
1680 *pp = p;
1681 }
1682 else {
1683 pp = &STATIC_LINK2(info,p);
1684 p = *pp;
1685 i++;
1686 }
1687
1688 }
1689
1690 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1691 }
1692 #endif