Fix a scheduling bug in the threaded RTS
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "Storage.h"
19 #include "RtsUtils.h"
20 #include "Apply.h"
21 #include "Updates.h"
22 #include "Stats.h"
23 #include "Schedule.h"
24 #include "Sanity.h"
25 #include "BlockAlloc.h"
26 #include "ProfHeap.h"
27 #include "Weak.h"
28 #include "Prelude.h"
29 #include "RtsSignals.h"
30 #include "STM.h"
31 #if defined(RTS_GTK_FRONTPANEL)
32 #include "FrontPanel.h"
33 #endif
34 #include "Trace.h"
35 #include "RetainerProfile.h"
36 #include "LdvProfile.h"
37 #include "RaiseAsync.h"
38 #include "Papi.h"
39 #include "Stable.h"
40
41 #include "GC.h"
42 #include "GCThread.h"
43 #include "GCTDecl.h"
44 #include "Compact.h"
45 #include "Evac.h"
46 #include "Scav.h"
47 #include "GCUtils.h"
48 #include "MarkStack.h"
49 #include "MarkWeak.h"
50 #include "Sparks.h"
51 #include "Sweep.h"
52
53 #include <string.h> // for memset()
54 #include <unistd.h>
55
56 /* -----------------------------------------------------------------------------
57 Global variables
58 -------------------------------------------------------------------------- */
59
60 /* STATIC OBJECT LIST.
61 *
62 * During GC:
63 * We maintain a linked list of static objects that are still live.
64 * The requirements for this list are:
65 *
66 * - we need to scan the list while adding to it, in order to
67 * scavenge all the static objects (in the same way that
68 * breadth-first scavenging works for dynamic objects).
69 *
70 * - we need to be able to tell whether an object is already on
71 * the list, to break loops.
72 *
73 * Each static object has a "static link field", which we use for
74 * linking objects on to the list. We use a stack-type list, consing
75 * objects on the front as they are added (this means that the
76 * scavenge phase is depth-first, not breadth-first, but that
77 * shouldn't matter).
78 *
79 * A separate list is kept for objects that have been scavenged
80 * already - this is so that we can zero all the marks afterwards.
81 *
82 * An object is on the list if its static link field is non-zero; this
83 * means that we have to mark the end of the list with '1', not NULL.
84 *
85 * Extra notes for generational GC:
86 *
87 * Each generation has a static object list associated with it. When
88 * collecting generations up to N, we treat the static object lists
89 * from generations > N as roots.
90 *
91 * We build up a static object list while collecting generations 0..N,
92 * which is then appended to the static object list of generation N+1.
93 */
94
95 /* N is the oldest generation being collected, where the generations
96 * are numbered starting at 0. A major GC (indicated by the major_gc
97 * flag) is when we're collecting all generations. We only attempt to
98 * deal with static objects and GC CAFs when doing a major GC.
99 */
100 nat N;
101 rtsBool major_gc;
102
103 /* Data used for allocation area sizing.
104 */
105 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
106
107 /* Mut-list stats */
108 #ifdef DEBUG
109 nat mutlist_MUTVARS,
110 mutlist_MUTARRS,
111 mutlist_MVARS,
112 mutlist_OTHERS;
113 #endif
114
115 /* Thread-local data for each GC thread
116 */
117 gc_thread **gc_threads = NULL;
118
119 #if !defined(THREADED_RTS)
120 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
121 #endif
122
123 // Number of threads running in *this* GC. Affects how many
124 // step->todos[] lists we have to look in to find work.
125 nat n_gc_threads;
126
127 // For stats:
128 long copied; // *words* copied & scavenged during this GC
129
130 rtsBool work_stealing;
131
132 DECLARE_GCT
133
134 /* -----------------------------------------------------------------------------
135 Static function declarations
136 -------------------------------------------------------------------------- */
137
138 static void mark_root (void *user, StgClosure **root);
139 static void zero_static_object_list (StgClosure* first_static);
140 static nat initialise_N (rtsBool force_major_gc);
141 static void prepare_collected_gen (generation *gen);
142 static void prepare_uncollected_gen (generation *gen);
143 static void init_gc_thread (gc_thread *t);
144 static void resize_generations (void);
145 static void resize_nursery (void);
146 static void start_gc_threads (void);
147 static void scavenge_until_all_done (void);
148 static StgWord inc_running (void);
149 static StgWord dec_running (void);
150 static void wakeup_gc_threads (nat me);
151 static void shutdown_gc_threads (nat me);
152 static void collect_gct_blocks (void);
153
154 #if 0 && defined(DEBUG)
155 static void gcCAFs (void);
156 #endif
157
158 /* -----------------------------------------------------------------------------
159 The mark stack.
160 -------------------------------------------------------------------------- */
161
162 bdescr *mark_stack_top_bd; // topmost block in the mark stack
163 bdescr *mark_stack_bd; // current block in the mark stack
164 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
165
166 /* -----------------------------------------------------------------------------
167 GarbageCollect: the main entry point to the garbage collector.
168
169 Locks held: all capabilities are held throughout GarbageCollect().
170 -------------------------------------------------------------------------- */
171
172 void
173 GarbageCollect (rtsBool force_major_gc,
174 rtsBool do_heap_census,
175 nat gc_type USED_IF_THREADS,
176 Capability *cap)
177 {
178 bdescr *bd;
179 generation *gen;
180 lnat live_blocks, live_words, allocated, max_copied, avg_copied;
181 #if defined(THREADED_RTS)
182 gc_thread *saved_gct;
183 #endif
184 nat g, n;
185
186 // necessary if we stole a callee-saves register for gct:
187 #if defined(THREADED_RTS)
188 saved_gct = gct;
189 #endif
190
191 #ifdef PROFILING
192 CostCentreStack *save_CCS[n_capabilities];
193 #endif
194
195 ACQUIRE_SM_LOCK;
196
197 #if defined(RTS_USER_SIGNALS)
198 if (RtsFlags.MiscFlags.install_signal_handlers) {
199 // block signals
200 blockUserSignals();
201 }
202 #endif
203
204 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
205 // otherwise adjust the padding in gen_workspace.
206
207 // this is the main thread
208 SET_GCT(gc_threads[cap->no]);
209
210 // tell the stats department that we've started a GC
211 stat_startGC(gct);
212
213 // lock the StablePtr table
214 stablePtrPreGC();
215
216 #ifdef DEBUG
217 mutlist_MUTVARS = 0;
218 mutlist_MUTARRS = 0;
219 mutlist_OTHERS = 0;
220 #endif
221
222 // attribute any costs to CCS_GC
223 #ifdef PROFILING
224 for (n = 0; n < n_capabilities; n++) {
225 save_CCS[n] = capabilities[n].r.rCCCS;
226 capabilities[n].r.rCCCS = CCS_GC;
227 }
228 #endif
229
230 /* Approximate how much we allocated.
231 * Todo: only when generating stats?
232 */
233 allocated = calcAllocated(rtsFalse/* don't count the nursery yet */);
234
235 /* Figure out which generation to collect
236 */
237 n = initialise_N(force_major_gc);
238
239 #if defined(THREADED_RTS)
240 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
241 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
242 // It's not always a good idea to do load balancing in parallel
243 // GC. In particular, for a parallel program we don't want to
244 // lose locality by moving cached data into another CPU's cache
245 // (this effect can be quite significant).
246 //
247 // We could have a more complex way to deterimine whether to do
248 // work stealing or not, e.g. it might be a good idea to do it
249 // if the heap is big. For now, we just turn it on or off with
250 // a flag.
251 #endif
252
253 /* Start threads, so they can be spinning up while we finish initialisation.
254 */
255 start_gc_threads();
256
257 #if defined(THREADED_RTS)
258 /* How many threads will be participating in this GC?
259 * We don't try to parallelise minor GCs (unless the user asks for
260 * it with +RTS -gn0), or mark/compact/sweep GC.
261 */
262 if (gc_type == PENDING_GC_PAR) {
263 n_gc_threads = RtsFlags.ParFlags.nNodes;
264 } else {
265 n_gc_threads = 1;
266 }
267 #else
268 n_gc_threads = 1;
269 #endif
270
271 debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
272 N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
273
274 #ifdef RTS_GTK_FRONTPANEL
275 if (RtsFlags.GcFlags.frontpanel) {
276 updateFrontPanelBeforeGC(N);
277 }
278 #endif
279
280 #ifdef DEBUG
281 // check for memory leaks if DEBUG is on
282 memInventory(DEBUG_gc);
283 #endif
284
285 // check sanity *before* GC
286 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
287
288 // Initialise all the generations/steps that we're collecting.
289 for (g = 0; g <= N; g++) {
290 prepare_collected_gen(&generations[g]);
291 }
292 // Initialise all the generations/steps that we're *not* collecting.
293 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
294 prepare_uncollected_gen(&generations[g]);
295 }
296
297 // Prepare this gc_thread
298 init_gc_thread(gct);
299
300 /* Allocate a mark stack if we're doing a major collection.
301 */
302 if (major_gc && oldest_gen->mark) {
303 mark_stack_bd = allocBlock();
304 mark_stack_top_bd = mark_stack_bd;
305 mark_stack_bd->link = NULL;
306 mark_stack_bd->u.back = NULL;
307 mark_sp = mark_stack_bd->start;
308 } else {
309 mark_stack_bd = NULL;
310 mark_stack_top_bd = NULL;
311 mark_sp = NULL;
312 }
313
314 /* -----------------------------------------------------------------------
315 * follow all the roots that we know about:
316 */
317
318 // the main thread is running: this prevents any other threads from
319 // exiting prematurely, so we can start them now.
320 // NB. do this after the mutable lists have been saved above, otherwise
321 // the other GC threads will be writing into the old mutable lists.
322 inc_running();
323 wakeup_gc_threads(gct->thread_index);
324
325 traceEventGcWork(gct->cap);
326
327 // scavenge the capability-private mutable lists. This isn't part
328 // of markSomeCapabilities() because markSomeCapabilities() can only
329 // call back into the GC via mark_root() (due to the gct register
330 // variable).
331 if (n_gc_threads == 1) {
332 for (n = 0; n < n_capabilities; n++) {
333 #if defined(THREADED_RTS)
334 scavenge_capability_mut_Lists1(&capabilities[n]);
335 #else
336 scavenge_capability_mut_lists(&capabilities[n]);
337 #endif
338 }
339 } else {
340 scavenge_capability_mut_lists(gct->cap);
341 }
342
343 // follow roots from the CAF list (used by GHCi)
344 gct->evac_gen_no = 0;
345 markCAFs(mark_root, gct);
346
347 // follow all the roots that the application knows about.
348 gct->evac_gen_no = 0;
349 if (n_gc_threads == 1) {
350 for (n = 0; n < n_capabilities; n++) {
351 markCapability(mark_root, gct, &capabilities[n],
352 rtsTrue/*don't mark sparks*/);
353 }
354 } else {
355 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
356 }
357
358 markScheduler(mark_root, gct);
359
360 #if defined(RTS_USER_SIGNALS)
361 // mark the signal handlers (signals should be already blocked)
362 markSignalHandlers(mark_root, gct);
363 #endif
364
365 // Mark the weak pointer list, and prepare to detect dead weak pointers.
366 markWeakPtrList();
367 initWeakForGC();
368
369 // Mark the stable pointer table.
370 markStablePtrTable(mark_root, gct);
371
372 /* -------------------------------------------------------------------------
373 * Repeatedly scavenge all the areas we know about until there's no
374 * more scavenging to be done.
375 */
376 for (;;)
377 {
378 scavenge_until_all_done();
379 // The other threads are now stopped. We might recurse back to
380 // here, but from now on this is the only thread.
381
382 // must be last... invariant is that everything is fully
383 // scavenged at this point.
384 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
385 inc_running();
386 continue;
387 }
388
389 // If we get to here, there's really nothing left to do.
390 break;
391 }
392
393 shutdown_gc_threads(gct->thread_index);
394
395 // Now see which stable names are still alive.
396 gcStablePtrTable();
397
398 #ifdef THREADED_RTS
399 if (n_gc_threads == 1) {
400 for (n = 0; n < n_capabilities; n++) {
401 pruneSparkQueue(&capabilities[n]);
402 }
403 } else {
404 pruneSparkQueue(gct->cap);
405 }
406 #endif
407
408 #ifdef PROFILING
409 // We call processHeapClosureForDead() on every closure destroyed during
410 // the current garbage collection, so we invoke LdvCensusForDead().
411 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
412 || RtsFlags.ProfFlags.bioSelector != NULL) {
413 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
414 LdvCensusForDead(N);
415 ACQUIRE_SM_LOCK;
416 }
417 #endif
418
419 // NO MORE EVACUATION AFTER THIS POINT!
420
421 // Finally: compact or sweep the oldest generation.
422 if (major_gc && oldest_gen->mark) {
423 if (oldest_gen->compact)
424 compact(gct->scavenged_static_objects);
425 else
426 sweep(oldest_gen);
427 }
428
429 copied = 0;
430 max_copied = 0;
431 avg_copied = 0;
432 {
433 nat i;
434 for (i=0; i < n_gc_threads; i++) {
435 if (n_gc_threads > 1) {
436 debugTrace(DEBUG_gc,"thread %d:", i);
437 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
438 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
439 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
440 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
441 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
442 }
443 copied += gc_threads[i]->copied;
444 max_copied = stg_max(gc_threads[i]->copied, max_copied);
445 }
446 if (n_gc_threads == 1) {
447 max_copied = 0;
448 avg_copied = 0;
449 } else {
450 avg_copied = copied;
451 }
452 }
453
454 // Run through all the generations/steps and tidy up.
455 // We're going to:
456 // - count the amount of "live" data (live_words, live_blocks)
457 // - count the amount of "copied" data in this GC (copied)
458 // - free from-space
459 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
460 //
461 live_words = 0;
462 live_blocks = 0;
463
464 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
465
466 if (g == N) {
467 generations[g].collections++; // for stats
468 if (n_gc_threads > 1) generations[g].par_collections++;
469 }
470
471 // Count the mutable list as bytes "copied" for the purposes of
472 // stats. Every mutable list is copied during every GC.
473 if (g > 0) {
474 nat mut_list_size = 0;
475 for (n = 0; n < n_capabilities; n++) {
476 mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
477 }
478 copied += mut_list_size;
479
480 debugTrace(DEBUG_gc,
481 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
482 (unsigned long)(mut_list_size * sizeof(W_)),
483 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
484 }
485
486 bdescr *next, *prev;
487 gen = &generations[g];
488
489 // for generations we collected...
490 if (g <= N) {
491
492 /* free old memory and shift to-space into from-space for all
493 * the collected steps (except the allocation area). These
494 * freed blocks will probaby be quickly recycled.
495 */
496 if (gen->mark)
497 {
498 // tack the new blocks on the end of the existing blocks
499 if (gen->old_blocks != NULL) {
500
501 prev = NULL;
502 for (bd = gen->old_blocks; bd != NULL; bd = next) {
503
504 next = bd->link;
505
506 if (!(bd->flags & BF_MARKED))
507 {
508 if (prev == NULL) {
509 gen->old_blocks = next;
510 } else {
511 prev->link = next;
512 }
513 freeGroup(bd);
514 gen->n_old_blocks--;
515 }
516 else
517 {
518 gen->n_words += bd->free - bd->start;
519
520 // NB. this step might not be compacted next
521 // time, so reset the BF_MARKED flags.
522 // They are set before GC if we're going to
523 // compact. (search for BF_MARKED above).
524 bd->flags &= ~BF_MARKED;
525
526 // between GCs, all blocks in the heap except
527 // for the nursery have the BF_EVACUATED flag set.
528 bd->flags |= BF_EVACUATED;
529
530 prev = bd;
531 }
532 }
533
534 if (prev != NULL) {
535 prev->link = gen->blocks;
536 gen->blocks = gen->old_blocks;
537 }
538 }
539 // add the new blocks to the block tally
540 gen->n_blocks += gen->n_old_blocks;
541 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
542 ASSERT(countOccupied(gen->blocks) == gen->n_words);
543 }
544 else // not copacted
545 {
546 freeChain(gen->old_blocks);
547 }
548
549 gen->old_blocks = NULL;
550 gen->n_old_blocks = 0;
551
552 /* LARGE OBJECTS. The current live large objects are chained on
553 * scavenged_large, having been moved during garbage
554 * collection from large_objects. Any objects left on the
555 * large_objects list are therefore dead, so we free them here.
556 */
557 freeChain(gen->large_objects);
558 gen->large_objects = gen->scavenged_large_objects;
559 gen->n_large_blocks = gen->n_scavenged_large_blocks;
560 gen->n_new_large_words = 0;
561 }
562 else // for generations > N
563 {
564 /* For older generations, we need to append the
565 * scavenged_large_object list (i.e. large objects that have been
566 * promoted during this GC) to the large_object list for that step.
567 */
568 for (bd = gen->scavenged_large_objects; bd; bd = next) {
569 next = bd->link;
570 dbl_link_onto(bd, &gen->large_objects);
571 }
572
573 // add the new blocks we promoted during this GC
574 gen->n_large_blocks += gen->n_scavenged_large_blocks;
575 }
576
577 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
578
579 gen->scavenged_large_objects = NULL;
580 gen->n_scavenged_large_blocks = 0;
581
582 // Count "live" data
583 live_words += genLiveWords(gen);
584 live_blocks += genLiveBlocks(gen);
585
586 // add in the partial blocks in the gen_workspaces, but ignore gen 0
587 // if this is a local GC (we can't count another capability's part_list)
588 {
589 nat i;
590 for (i = 0; i < n_capabilities; i++) {
591 live_words += gcThreadLiveWords(i, gen->no);
592 live_blocks += gcThreadLiveBlocks(i, gen->no);
593 }
594 }
595 } // for all generations
596
597 // update the max size of older generations after a major GC
598 resize_generations();
599
600 // Free the mark stack.
601 if (mark_stack_top_bd != NULL) {
602 debugTrace(DEBUG_gc, "mark stack: %d blocks",
603 countBlocks(mark_stack_top_bd));
604 freeChain(mark_stack_top_bd);
605 }
606
607 // Free any bitmaps.
608 for (g = 0; g <= N; g++) {
609 gen = &generations[g];
610 if (gen->bitmap != NULL) {
611 freeGroup(gen->bitmap);
612 gen->bitmap = NULL;
613 }
614 }
615
616 // Reset the nursery: make the blocks empty
617 allocated += clearNurseries();
618
619 resize_nursery();
620
621 resetNurseries();
622
623 // mark the garbage collected CAFs as dead
624 #if 0 && defined(DEBUG) // doesn't work at the moment
625 if (major_gc) { gcCAFs(); }
626 #endif
627
628 #ifdef PROFILING
629 // resetStaticObjectForRetainerProfiling() must be called before
630 // zeroing below.
631
632 // ToDo: fix the gct->scavenged_static_objects below
633 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
634 #endif
635
636 // zero the scavenged static object list
637 if (major_gc) {
638 nat i;
639 if (n_gc_threads == 1) {
640 zero_static_object_list(gct->scavenged_static_objects);
641 } else {
642 for (i = 0; i < n_gc_threads; i++) {
643 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
644 }
645 }
646 }
647
648 // Update the stable pointer hash table.
649 updateStablePtrTable(major_gc);
650
651 // unlock the StablePtr table. Must be before scheduleFinalizers(),
652 // because a finalizer may call hs_free_fun_ptr() or
653 // hs_free_stable_ptr(), both of which access the StablePtr table.
654 stablePtrPostGC();
655
656 // Start any pending finalizers. Must be after
657 // updateStablePtrTable() and stablePtrPostGC() (see #4221).
658 RELEASE_SM_LOCK;
659 scheduleFinalizers(cap, old_weak_ptr_list);
660 ACQUIRE_SM_LOCK;
661
662 // check sanity after GC
663 // before resurrectThreads(), because that might overwrite some
664 // closures, which will cause problems with THREADED where we don't
665 // fill slop.
666 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
667
668 // If a heap census is due, we need to do it before
669 // resurrectThreads(), for the same reason as checkSanity above:
670 // resurrectThreads() will overwrite some closures and leave slop
671 // behind.
672 if (do_heap_census) {
673 debugTrace(DEBUG_sched, "performing heap census");
674 RELEASE_SM_LOCK;
675 heapCensus(gct->gc_start_cpu);
676 ACQUIRE_SM_LOCK;
677 }
678
679 // send exceptions to any threads which were about to die
680 RELEASE_SM_LOCK;
681 resurrectThreads(resurrected_threads);
682 ACQUIRE_SM_LOCK;
683
684 if (major_gc) {
685 nat need, got;
686 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
687 got = mblocks_allocated;
688 /* If the amount of data remains constant, next major GC we'll
689 require (F+1)*need. We leave (F+2)*need in order to reduce
690 repeated deallocation and reallocation. */
691 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
692 if (got > need) {
693 returnMemoryToOS(got - need);
694 }
695 }
696
697 // extra GC trace info
698 IF_DEBUG(gc, statDescribeGens());
699
700 #ifdef DEBUG
701 // symbol-table based profiling
702 /* heapCensus(to_blocks); */ /* ToDo */
703 #endif
704
705 // restore enclosing cost centre
706 #ifdef PROFILING
707 for (n = 0; n < n_capabilities; n++) {
708 capabilities[n].r.rCCCS = save_CCS[n];
709 }
710 #endif
711
712 #ifdef DEBUG
713 // check for memory leaks if DEBUG is on
714 memInventory(DEBUG_gc);
715 #endif
716
717 #ifdef RTS_GTK_FRONTPANEL
718 if (RtsFlags.GcFlags.frontpanel) {
719 updateFrontPanelAfterGC( N, live );
720 }
721 #endif
722
723 // ok, GC over: tell the stats department what happened.
724 stat_endGC(gct, allocated, live_words,
725 copied, N, max_copied, avg_copied,
726 live_blocks * BLOCK_SIZE_W - live_words /* slop */);
727
728 // Guess which generation we'll collect *next* time
729 initialise_N(force_major_gc);
730
731 #if defined(RTS_USER_SIGNALS)
732 if (RtsFlags.MiscFlags.install_signal_handlers) {
733 // unblock signals again
734 unblockUserSignals();
735 }
736 #endif
737
738 RELEASE_SM_LOCK;
739
740 SET_GCT(saved_gct);
741 }
742
743 /* -----------------------------------------------------------------------------
744 Figure out which generation to collect, initialise N and major_gc.
745
746 Also returns the total number of blocks in generations that will be
747 collected.
748 -------------------------------------------------------------------------- */
749
750 static nat
751 initialise_N (rtsBool force_major_gc)
752 {
753 int g;
754 nat blocks, blocks_total;
755
756 blocks = 0;
757 blocks_total = 0;
758
759 if (force_major_gc) {
760 N = RtsFlags.GcFlags.generations - 1;
761 } else {
762 N = 0;
763 }
764
765 for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
766
767 blocks = generations[g].n_words / BLOCK_SIZE_W
768 + generations[g].n_large_blocks;
769
770 if (blocks >= generations[g].max_blocks) {
771 N = stg_max(N,g);
772 }
773 if ((nat)g <= N) {
774 blocks_total += blocks;
775 }
776 }
777
778 blocks_total += countNurseryBlocks();
779
780 major_gc = (N == RtsFlags.GcFlags.generations-1);
781 return blocks_total;
782 }
783
784 /* -----------------------------------------------------------------------------
785 Initialise the gc_thread structures.
786 -------------------------------------------------------------------------- */
787
788 #define GC_THREAD_INACTIVE 0
789 #define GC_THREAD_STANDING_BY 1
790 #define GC_THREAD_RUNNING 2
791 #define GC_THREAD_WAITING_TO_CONTINUE 3
792
793 static void
794 new_gc_thread (nat n, gc_thread *t)
795 {
796 nat g;
797 gen_workspace *ws;
798
799 t->cap = &capabilities[n];
800
801 #ifdef THREADED_RTS
802 t->id = 0;
803 initSpinLock(&t->gc_spin);
804 initSpinLock(&t->mut_spin);
805 ACQUIRE_SPIN_LOCK(&t->gc_spin);
806 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
807 // thread to start up, see wakeup_gc_threads
808 #endif
809
810 t->thread_index = n;
811 t->free_blocks = NULL;
812 t->gc_count = 0;
813
814 init_gc_thread(t);
815
816 #ifdef USE_PAPI
817 t->papi_events = -1;
818 #endif
819
820 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
821 {
822 ws = &t->gens[g];
823 ws->gen = &generations[g];
824 ASSERT(g == ws->gen->no);
825 ws->my_gct = t;
826
827 // We want to call
828 // alloc_todo_block(ws,0);
829 // but can't, because it uses gct which isn't set up at this point.
830 // Hence, allocate a block for todo_bd manually:
831 {
832 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
833 initBdescr(bd, ws->gen, ws->gen->to);
834 bd->flags = BF_EVACUATED;
835 bd->u.scan = bd->free = bd->start;
836
837 ws->todo_bd = bd;
838 ws->todo_free = bd->free;
839 ws->todo_lim = bd->start + BLOCK_SIZE_W;
840 }
841
842 ws->todo_q = newWSDeque(128);
843 ws->todo_overflow = NULL;
844 ws->n_todo_overflow = 0;
845 ws->todo_large_objects = NULL;
846
847 ws->part_list = NULL;
848 ws->n_part_blocks = 0;
849
850 ws->scavd_list = NULL;
851 ws->n_scavd_blocks = 0;
852 }
853 }
854
855
856 void
857 initGcThreads (void)
858 {
859 if (gc_threads == NULL) {
860 #if defined(THREADED_RTS)
861 nat i;
862 gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
863 sizeof(gc_thread*),
864 "alloc_gc_threads");
865
866 for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
867 gc_threads[i] =
868 stgMallocBytes(sizeof(gc_thread) +
869 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
870 "alloc_gc_threads");
871
872 new_gc_thread(i, gc_threads[i]);
873 }
874 #else
875 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
876 gc_threads[0] = gct;
877 new_gc_thread(0,gc_threads[0]);
878 #endif
879 }
880 }
881
882 void
883 freeGcThreads (void)
884 {
885 nat g;
886 if (gc_threads != NULL) {
887 #if defined(THREADED_RTS)
888 nat i;
889 for (i = 0; i < n_capabilities; i++) {
890 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
891 {
892 freeWSDeque(gc_threads[i]->gens[g].todo_q);
893 }
894 stgFree (gc_threads[i]);
895 }
896 stgFree (gc_threads);
897 #else
898 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
899 {
900 freeWSDeque(gc_threads[0]->gens[g].todo_q);
901 }
902 stgFree (gc_threads);
903 #endif
904 gc_threads = NULL;
905 }
906 }
907
908 /* ----------------------------------------------------------------------------
909 Start GC threads
910 ------------------------------------------------------------------------- */
911
912 static volatile StgWord gc_running_threads;
913
914 static StgWord
915 inc_running (void)
916 {
917 StgWord new;
918 new = atomic_inc(&gc_running_threads);
919 ASSERT(new <= n_gc_threads);
920 return new;
921 }
922
923 static StgWord
924 dec_running (void)
925 {
926 ASSERT(gc_running_threads != 0);
927 return atomic_dec(&gc_running_threads);
928 }
929
930 static rtsBool
931 any_work (void)
932 {
933 int g;
934 gen_workspace *ws;
935
936 gct->any_work++;
937
938 write_barrier();
939
940 // scavenge objects in compacted generation
941 if (mark_stack_bd != NULL && !mark_stack_empty()) {
942 return rtsTrue;
943 }
944
945 // Check for global work in any step. We don't need to check for
946 // local work, because we have already exited scavenge_loop(),
947 // which means there is no local work for this thread.
948 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
949 ws = &gct->gens[g];
950 if (ws->todo_large_objects) return rtsTrue;
951 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
952 if (ws->todo_overflow) return rtsTrue;
953 }
954
955 #if defined(THREADED_RTS)
956 if (work_stealing) {
957 nat n;
958 // look for work to steal
959 for (n = 0; n < n_gc_threads; n++) {
960 if (n == gct->thread_index) continue;
961 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
962 ws = &gc_threads[n]->gens[g];
963 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
964 }
965 }
966 }
967 #endif
968
969 gct->no_work++;
970 #if defined(THREADED_RTS)
971 yieldThread();
972 #endif
973
974 return rtsFalse;
975 }
976
977 static void
978 scavenge_until_all_done (void)
979 {
980 DEBUG_ONLY( nat r );
981
982
983 loop:
984 #if defined(THREADED_RTS)
985 if (n_gc_threads > 1) {
986 scavenge_loop();
987 } else {
988 scavenge_loop1();
989 }
990 #else
991 scavenge_loop();
992 #endif
993
994 collect_gct_blocks();
995
996 // scavenge_loop() only exits when there's no work to do
997
998 #ifdef DEBUG
999 r = dec_running();
1000 #else
1001 dec_running();
1002 #endif
1003
1004 traceEventGcIdle(gct->cap);
1005
1006 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1007
1008 while (gc_running_threads != 0) {
1009 // usleep(1);
1010 if (any_work()) {
1011 inc_running();
1012 traceEventGcWork(gct->cap);
1013 goto loop;
1014 }
1015 // any_work() does not remove the work from the queue, it
1016 // just checks for the presence of work. If we find any,
1017 // then we increment gc_running_threads and go back to
1018 // scavenge_loop() to perform any pending work.
1019 }
1020
1021 traceEventGcDone(gct->cap);
1022 }
1023
1024 #if defined(THREADED_RTS)
1025
1026 void
1027 gcWorkerThread (Capability *cap)
1028 {
1029 gc_thread *saved_gct;
1030
1031 // necessary if we stole a callee-saves register for gct:
1032 saved_gct = gct;
1033
1034 SET_GCT(gc_threads[cap->no]);
1035 gct->id = osThreadId();
1036
1037 stat_gcWorkerThreadStart(gct);
1038
1039 // Wait until we're told to wake up
1040 RELEASE_SPIN_LOCK(&gct->mut_spin);
1041 gct->wakeup = GC_THREAD_STANDING_BY;
1042 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1043 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1044
1045 #ifdef USE_PAPI
1046 // start performance counters in this thread...
1047 if (gct->papi_events == -1) {
1048 papi_init_eventset(&gct->papi_events);
1049 }
1050 papi_thread_start_gc1_count(gct->papi_events);
1051 #endif
1052
1053 init_gc_thread(gct);
1054
1055 traceEventGcWork(gct->cap);
1056
1057 // Every thread evacuates some roots.
1058 gct->evac_gen_no = 0;
1059 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1060 scavenge_capability_mut_lists(cap);
1061
1062 scavenge_until_all_done();
1063
1064 #ifdef THREADED_RTS
1065 // Now that the whole heap is marked, we discard any sparks that
1066 // were found to be unreachable. The main GC thread is currently
1067 // marking heap reachable via weak pointers, so it is
1068 // non-deterministic whether a spark will be retained if it is
1069 // only reachable via weak pointers. To fix this problem would
1070 // require another GC barrier, which is too high a price.
1071 pruneSparkQueue(cap);
1072 #endif
1073
1074 #ifdef USE_PAPI
1075 // count events in this thread towards the GC totals
1076 papi_thread_stop_gc1_count(gct->papi_events);
1077 #endif
1078
1079 // Wait until we're told to continue
1080 RELEASE_SPIN_LOCK(&gct->gc_spin);
1081 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1082 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1083 gct->thread_index);
1084 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1085 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1086
1087 // record the time spent doing GC in the Task structure
1088 stat_gcWorkerThreadDone(gct);
1089
1090 SET_GCT(saved_gct);
1091 }
1092
1093 #endif
1094
1095 #if defined(THREADED_RTS)
1096
1097 void
1098 waitForGcThreads (Capability *cap USED_IF_THREADS)
1099 {
1100 const nat n_threads = RtsFlags.ParFlags.nNodes;
1101 const nat me = cap->no;
1102 nat i, j;
1103 rtsBool retry = rtsTrue;
1104
1105 while(retry) {
1106 for (i=0; i < n_threads; i++) {
1107 if (i == me) continue;
1108 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1109 prodCapability(&capabilities[i], cap->running_task);
1110 }
1111 }
1112 for (j=0; j < 10; j++) {
1113 retry = rtsFalse;
1114 for (i=0; i < n_threads; i++) {
1115 if (i == me) continue;
1116 write_barrier();
1117 interruptAllCapabilities();
1118 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1119 retry = rtsTrue;
1120 }
1121 }
1122 if (!retry) break;
1123 yieldThread();
1124 }
1125 }
1126 }
1127
1128 #endif // THREADED_RTS
1129
1130 static void
1131 start_gc_threads (void)
1132 {
1133 #if defined(THREADED_RTS)
1134 gc_running_threads = 0;
1135 #endif
1136 }
1137
1138 static void
1139 wakeup_gc_threads (nat me USED_IF_THREADS)
1140 {
1141 #if defined(THREADED_RTS)
1142 nat i;
1143
1144 if (n_gc_threads == 1) return;
1145
1146 for (i=0; i < n_gc_threads; i++) {
1147 if (i == me) continue;
1148 inc_running();
1149 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1150 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1151
1152 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1153 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1154 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1155 }
1156 #endif
1157 }
1158
1159 // After GC is complete, we must wait for all GC threads to enter the
1160 // standby state, otherwise they may still be executing inside
1161 // any_work(), and may even remain awake until the next GC starts.
1162 static void
1163 shutdown_gc_threads (nat me USED_IF_THREADS)
1164 {
1165 #if defined(THREADED_RTS)
1166 nat i;
1167
1168 if (n_gc_threads == 1) return;
1169
1170 for (i=0; i < n_gc_threads; i++) {
1171 if (i == me) continue;
1172 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1173 }
1174 #endif
1175 }
1176
1177 #if defined(THREADED_RTS)
1178 void
1179 releaseGCThreads (Capability *cap USED_IF_THREADS)
1180 {
1181 const nat n_threads = RtsFlags.ParFlags.nNodes;
1182 const nat me = cap->no;
1183 nat i;
1184 for (i=0; i < n_threads; i++) {
1185 if (i == me) continue;
1186 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1187 barf("releaseGCThreads");
1188
1189 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1190 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1191 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1192 }
1193 }
1194 #endif
1195
1196 /* ----------------------------------------------------------------------------
1197 Initialise a generation that is to be collected
1198 ------------------------------------------------------------------------- */
1199
1200 static void
1201 prepare_collected_gen (generation *gen)
1202 {
1203 nat i, g, n;
1204 gen_workspace *ws;
1205 bdescr *bd, *next;
1206
1207 // Throw away the current mutable list. Invariant: the mutable
1208 // list always has at least one block; this means we can avoid a
1209 // check for NULL in recordMutable().
1210 g = gen->no;
1211 if (g != 0) {
1212 for (i = 0; i < n_capabilities; i++) {
1213 freeChain(capabilities[i].mut_lists[g]);
1214 capabilities[i].mut_lists[g] = allocBlock();
1215 }
1216 }
1217
1218 gen = &generations[g];
1219 ASSERT(gen->no == g);
1220
1221 // we'll construct a new list of threads in this step
1222 // during GC, throw away the current list.
1223 gen->old_threads = gen->threads;
1224 gen->threads = END_TSO_QUEUE;
1225
1226 // deprecate the existing blocks
1227 gen->old_blocks = gen->blocks;
1228 gen->n_old_blocks = gen->n_blocks;
1229 gen->blocks = NULL;
1230 gen->n_blocks = 0;
1231 gen->n_words = 0;
1232 gen->live_estimate = 0;
1233
1234 // initialise the large object queues.
1235 ASSERT(gen->scavenged_large_objects == NULL);
1236 ASSERT(gen->n_scavenged_large_blocks == 0);
1237
1238 // grab all the partial blocks stashed in the gc_thread workspaces and
1239 // move them to the old_blocks list of this gen.
1240 for (n = 0; n < n_capabilities; n++) {
1241 ws = &gc_threads[n]->gens[gen->no];
1242
1243 for (bd = ws->part_list; bd != NULL; bd = next) {
1244 next = bd->link;
1245 bd->link = gen->old_blocks;
1246 gen->old_blocks = bd;
1247 gen->n_old_blocks += bd->blocks;
1248 }
1249 ws->part_list = NULL;
1250 ws->n_part_blocks = 0;
1251
1252 ASSERT(ws->scavd_list == NULL);
1253 ASSERT(ws->n_scavd_blocks == 0);
1254
1255 if (ws->todo_free != ws->todo_bd->start) {
1256 ws->todo_bd->free = ws->todo_free;
1257 ws->todo_bd->link = gen->old_blocks;
1258 gen->old_blocks = ws->todo_bd;
1259 gen->n_old_blocks += ws->todo_bd->blocks;
1260 alloc_todo_block(ws,0); // always has one block.
1261 }
1262 }
1263
1264 // mark the small objects as from-space
1265 for (bd = gen->old_blocks; bd; bd = bd->link) {
1266 bd->flags &= ~BF_EVACUATED;
1267 }
1268
1269 // mark the large objects as from-space
1270 for (bd = gen->large_objects; bd; bd = bd->link) {
1271 bd->flags &= ~BF_EVACUATED;
1272 }
1273
1274 // for a compacted generation, we need to allocate the bitmap
1275 if (gen->mark) {
1276 lnat bitmap_size; // in bytes
1277 bdescr *bitmap_bdescr;
1278 StgWord *bitmap;
1279
1280 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1281
1282 if (bitmap_size > 0) {
1283 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1284 / BLOCK_SIZE);
1285 gen->bitmap = bitmap_bdescr;
1286 bitmap = bitmap_bdescr->start;
1287
1288 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1289 bitmap_size, bitmap);
1290
1291 // don't forget to fill it with zeros!
1292 memset(bitmap, 0, bitmap_size);
1293
1294 // For each block in this step, point to its bitmap from the
1295 // block descriptor.
1296 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1297 bd->u.bitmap = bitmap;
1298 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1299
1300 // Also at this point we set the BF_MARKED flag
1301 // for this block. The invariant is that
1302 // BF_MARKED is always unset, except during GC
1303 // when it is set on those blocks which will be
1304 // compacted.
1305 if (!(bd->flags & BF_FRAGMENTED)) {
1306 bd->flags |= BF_MARKED;
1307 }
1308
1309 // BF_SWEPT should be marked only for blocks that are being
1310 // collected in sweep()
1311 bd->flags &= ~BF_SWEPT;
1312 }
1313 }
1314 }
1315 }
1316
1317
1318 /* ----------------------------------------------------------------------------
1319 Save the mutable lists in saved_mut_lists
1320 ------------------------------------------------------------------------- */
1321
1322 static void
1323 stash_mut_list (Capability *cap, nat gen_no)
1324 {
1325 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1326 cap->mut_lists[gen_no] = allocBlock_sync();
1327 }
1328
1329 /* ----------------------------------------------------------------------------
1330 Initialise a generation that is *not* to be collected
1331 ------------------------------------------------------------------------- */
1332
1333 static void
1334 prepare_uncollected_gen (generation *gen)
1335 {
1336 nat i;
1337
1338
1339 ASSERT(gen->no > 0);
1340
1341 // save the current mutable lists for this generation, and
1342 // allocate a fresh block for each one. We'll traverse these
1343 // mutable lists as roots early on in the GC.
1344 for (i = 0; i < n_capabilities; i++) {
1345 stash_mut_list(&capabilities[i], gen->no);
1346 }
1347
1348 ASSERT(gen->scavenged_large_objects == NULL);
1349 ASSERT(gen->n_scavenged_large_blocks == 0);
1350 }
1351
1352 /* -----------------------------------------------------------------------------
1353 Collect the completed blocks from a GC thread and attach them to
1354 the generation.
1355 -------------------------------------------------------------------------- */
1356
1357 static void
1358 collect_gct_blocks (void)
1359 {
1360 nat g;
1361 gen_workspace *ws;
1362 bdescr *bd, *prev;
1363
1364 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1365 ws = &gct->gens[g];
1366
1367 // there may still be a block attached to ws->todo_bd;
1368 // leave it there to use next time.
1369
1370 if (ws->scavd_list != NULL) {
1371 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1372
1373 ASSERT(gct->scan_bd == NULL);
1374 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1375
1376 prev = NULL;
1377 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1378 ws->gen->n_words += bd->free - bd->start;
1379 prev = bd;
1380 }
1381 if (prev != NULL) {
1382 prev->link = ws->gen->blocks;
1383 ws->gen->blocks = ws->scavd_list;
1384 }
1385 ws->gen->n_blocks += ws->n_scavd_blocks;
1386
1387 ws->scavd_list = NULL;
1388 ws->n_scavd_blocks = 0;
1389
1390 RELEASE_SPIN_LOCK(&ws->gen->sync);
1391 }
1392 }
1393 }
1394
1395 /* -----------------------------------------------------------------------------
1396 Initialise a gc_thread before GC
1397 -------------------------------------------------------------------------- */
1398
1399 static void
1400 init_gc_thread (gc_thread *t)
1401 {
1402 t->static_objects = END_OF_STATIC_LIST;
1403 t->scavenged_static_objects = END_OF_STATIC_LIST;
1404 t->scan_bd = NULL;
1405 t->mut_lists = t->cap->mut_lists;
1406 t->evac_gen_no = 0;
1407 t->failed_to_evac = rtsFalse;
1408 t->eager_promotion = rtsTrue;
1409 t->thunk_selector_depth = 0;
1410 t->copied = 0;
1411 t->scanned = 0;
1412 t->any_work = 0;
1413 t->no_work = 0;
1414 t->scav_find_work = 0;
1415 }
1416
1417 /* -----------------------------------------------------------------------------
1418 Function we pass to evacuate roots.
1419 -------------------------------------------------------------------------- */
1420
1421 static void
1422 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1423 {
1424 // we stole a register for gct, but this function is called from
1425 // *outside* the GC where the register variable is not in effect,
1426 // so we need to save and restore it here. NB. only call
1427 // mark_root() from the main GC thread, otherwise gct will be
1428 // incorrect.
1429 #if defined(THREADED_RTS)
1430 gc_thread *saved_gct;
1431 saved_gct = gct;
1432 #endif
1433 SET_GCT(user);
1434
1435 evacuate(root);
1436
1437 SET_GCT(saved_gct);
1438 }
1439
1440 /* -----------------------------------------------------------------------------
1441 Initialising the static object & mutable lists
1442 -------------------------------------------------------------------------- */
1443
1444 static void
1445 zero_static_object_list(StgClosure* first_static)
1446 {
1447 StgClosure* p;
1448 StgClosure* link;
1449 const StgInfoTable *info;
1450
1451 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1452 info = get_itbl(p);
1453 link = *STATIC_LINK(info, p);
1454 *STATIC_LINK(info,p) = NULL;
1455 }
1456 }
1457
1458 /* ----------------------------------------------------------------------------
1459 Reset the sizes of the older generations when we do a major
1460 collection.
1461
1462 CURRENT STRATEGY: make all generations except zero the same size.
1463 We have to stay within the maximum heap size, and leave a certain
1464 percentage of the maximum heap size available to allocate into.
1465 ------------------------------------------------------------------------- */
1466
1467 static void
1468 resize_generations (void)
1469 {
1470 nat g;
1471
1472 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1473 nat live, size, min_alloc, words;
1474 const nat max = RtsFlags.GcFlags.maxHeapSize;
1475 const nat gens = RtsFlags.GcFlags.generations;
1476
1477 // live in the oldest generations
1478 if (oldest_gen->live_estimate != 0) {
1479 words = oldest_gen->live_estimate;
1480 } else {
1481 words = oldest_gen->n_words;
1482 }
1483 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1484 oldest_gen->n_large_blocks;
1485
1486 // default max size for all generations except zero
1487 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1488 RtsFlags.GcFlags.minOldGenSize);
1489
1490 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1491 RtsFlags.GcFlags.heapSizeSuggestion = size;
1492 }
1493
1494 // minimum size for generation zero
1495 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1496 RtsFlags.GcFlags.minAllocAreaSize);
1497
1498 // Auto-enable compaction when the residency reaches a
1499 // certain percentage of the maximum heap size (default: 30%).
1500 if (RtsFlags.GcFlags.compact ||
1501 (max > 0 &&
1502 oldest_gen->n_blocks >
1503 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1504 oldest_gen->mark = 1;
1505 oldest_gen->compact = 1;
1506 // debugBelch("compaction: on\n", live);
1507 } else {
1508 oldest_gen->mark = 0;
1509 oldest_gen->compact = 0;
1510 // debugBelch("compaction: off\n", live);
1511 }
1512
1513 if (RtsFlags.GcFlags.sweep) {
1514 oldest_gen->mark = 1;
1515 }
1516
1517 // if we're going to go over the maximum heap size, reduce the
1518 // size of the generations accordingly. The calculation is
1519 // different if compaction is turned on, because we don't need
1520 // to double the space required to collect the old generation.
1521 if (max != 0) {
1522
1523 // this test is necessary to ensure that the calculations
1524 // below don't have any negative results - we're working
1525 // with unsigned values here.
1526 if (max < min_alloc) {
1527 heapOverflow();
1528 }
1529
1530 if (oldest_gen->compact) {
1531 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1532 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1533 }
1534 } else {
1535 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1536 size = (max - min_alloc) / ((gens - 1) * 2);
1537 }
1538 }
1539
1540 if (size < live) {
1541 heapOverflow();
1542 }
1543 }
1544
1545 #if 0
1546 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1547 min_alloc, size, max);
1548 #endif
1549
1550 for (g = 0; g < gens; g++) {
1551 generations[g].max_blocks = size;
1552 }
1553 }
1554 }
1555
1556 /* -----------------------------------------------------------------------------
1557 Calculate the new size of the nursery, and resize it.
1558 -------------------------------------------------------------------------- */
1559
1560 static void
1561 resize_nursery (void)
1562 {
1563 const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1564
1565 if (RtsFlags.GcFlags.generations == 1)
1566 { // Two-space collector:
1567 nat blocks;
1568
1569 /* set up a new nursery. Allocate a nursery size based on a
1570 * function of the amount of live data (by default a factor of 2)
1571 * Use the blocks from the old nursery if possible, freeing up any
1572 * left over blocks.
1573 *
1574 * If we get near the maximum heap size, then adjust our nursery
1575 * size accordingly. If the nursery is the same size as the live
1576 * data (L), then we need 3L bytes. We can reduce the size of the
1577 * nursery to bring the required memory down near 2L bytes.
1578 *
1579 * A normal 2-space collector would need 4L bytes to give the same
1580 * performance we get from 3L bytes, reducing to the same
1581 * performance at 2L bytes.
1582 */
1583 blocks = generations[0].n_blocks;
1584
1585 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1586 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1587 RtsFlags.GcFlags.maxHeapSize )
1588 {
1589 long adjusted_blocks; // signed on purpose
1590 int pc_free;
1591
1592 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1593
1594 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1595 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1596
1597 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1598 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1599 {
1600 heapOverflow();
1601 }
1602 blocks = adjusted_blocks;
1603 }
1604 else
1605 {
1606 blocks *= RtsFlags.GcFlags.oldGenFactor;
1607 if (blocks < min_nursery)
1608 {
1609 blocks = min_nursery;
1610 }
1611 }
1612 resizeNurseries(blocks);
1613 }
1614 else // Generational collector
1615 {
1616 /*
1617 * If the user has given us a suggested heap size, adjust our
1618 * allocation area to make best use of the memory available.
1619 */
1620 if (RtsFlags.GcFlags.heapSizeSuggestion)
1621 {
1622 long blocks;
1623 const nat needed = calcNeeded(); // approx blocks needed at next GC
1624
1625 /* Guess how much will be live in generation 0 step 0 next time.
1626 * A good approximation is obtained by finding the
1627 * percentage of g0 that was live at the last minor GC.
1628 *
1629 * We have an accurate figure for the amount of copied data in
1630 * 'copied', but we must convert this to a number of blocks, with
1631 * a small adjustment for estimated slop at the end of a block
1632 * (- 10 words).
1633 */
1634 if (N == 0)
1635 {
1636 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1637 / countNurseryBlocks();
1638 }
1639
1640 /* Estimate a size for the allocation area based on the
1641 * information available. We might end up going slightly under
1642 * or over the suggested heap size, but we should be pretty
1643 * close on average.
1644 *
1645 * Formula: suggested - needed
1646 * ----------------------------
1647 * 1 + g0_pcnt_kept/100
1648 *
1649 * where 'needed' is the amount of memory needed at the next
1650 * collection for collecting all gens except g0.
1651 */
1652 blocks =
1653 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1654 (100 + (long)g0_pcnt_kept);
1655
1656 if (blocks < (long)min_nursery) {
1657 blocks = min_nursery;
1658 }
1659
1660 resizeNurseries((nat)blocks);
1661 }
1662 else
1663 {
1664 // we might have added extra large blocks to the nursery, so
1665 // resize back to minAllocAreaSize again.
1666 resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1667 }
1668 }
1669 }
1670
1671 /* -----------------------------------------------------------------------------
1672 Sanity code for CAF garbage collection.
1673
1674 With DEBUG turned on, we manage a CAF list in addition to the SRT
1675 mechanism. After GC, we run down the CAF list and blackhole any
1676 CAFs which have been garbage collected. This means we get an error
1677 whenever the program tries to enter a garbage collected CAF.
1678
1679 Any garbage collected CAFs are taken off the CAF list at the same
1680 time.
1681 -------------------------------------------------------------------------- */
1682
1683 #if 0 && defined(DEBUG)
1684
1685 static void
1686 gcCAFs(void)
1687 {
1688 StgClosure* p;
1689 StgClosure** pp;
1690 const StgInfoTable *info;
1691 nat i;
1692
1693 i = 0;
1694 p = caf_list;
1695 pp = &caf_list;
1696
1697 while (p != NULL) {
1698
1699 info = get_itbl(p);
1700
1701 ASSERT(info->type == IND_STATIC);
1702
1703 if (STATIC_LINK(info,p) == NULL) {
1704 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1705 // black hole it
1706 SET_INFO(p,&stg_BLACKHOLE_info);
1707 p = STATIC_LINK2(info,p);
1708 *pp = p;
1709 }
1710 else {
1711 pp = &STATIC_LINK2(info,p);
1712 p = *pp;
1713 i++;
1714 }
1715
1716 }
1717
1718 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1719 }
1720 #endif