Fix heap profiling times
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "Storage.h"
19 #include "RtsUtils.h"
20 #include "Apply.h"
21 #include "Updates.h"
22 #include "Stats.h"
23 #include "Schedule.h"
24 #include "Sanity.h"
25 #include "BlockAlloc.h"
26 #include "ProfHeap.h"
27 #include "Weak.h"
28 #include "Prelude.h"
29 #include "RtsSignals.h"
30 #include "STM.h"
31 #if defined(RTS_GTK_FRONTPANEL)
32 #include "FrontPanel.h"
33 #endif
34 #include "Trace.h"
35 #include "RetainerProfile.h"
36 #include "LdvProfile.h"
37 #include "RaiseAsync.h"
38 #include "Papi.h"
39 #include "Stable.h"
40
41 #include "GC.h"
42 #include "GCThread.h"
43 #include "GCTDecl.h"
44 #include "Compact.h"
45 #include "Evac.h"
46 #include "Scav.h"
47 #include "GCUtils.h"
48 #include "MarkStack.h"
49 #include "MarkWeak.h"
50 #include "Sparks.h"
51 #include "Sweep.h"
52
53 #include <string.h> // for memset()
54 #include <unistd.h>
55
56 /* -----------------------------------------------------------------------------
57 Global variables
58 -------------------------------------------------------------------------- */
59
60 /* STATIC OBJECT LIST.
61 *
62 * During GC:
63 * We maintain a linked list of static objects that are still live.
64 * The requirements for this list are:
65 *
66 * - we need to scan the list while adding to it, in order to
67 * scavenge all the static objects (in the same way that
68 * breadth-first scavenging works for dynamic objects).
69 *
70 * - we need to be able to tell whether an object is already on
71 * the list, to break loops.
72 *
73 * Each static object has a "static link field", which we use for
74 * linking objects on to the list. We use a stack-type list, consing
75 * objects on the front as they are added (this means that the
76 * scavenge phase is depth-first, not breadth-first, but that
77 * shouldn't matter).
78 *
79 * A separate list is kept for objects that have been scavenged
80 * already - this is so that we can zero all the marks afterwards.
81 *
82 * An object is on the list if its static link field is non-zero; this
83 * means that we have to mark the end of the list with '1', not NULL.
84 *
85 * Extra notes for generational GC:
86 *
87 * Each generation has a static object list associated with it. When
88 * collecting generations up to N, we treat the static object lists
89 * from generations > N as roots.
90 *
91 * We build up a static object list while collecting generations 0..N,
92 * which is then appended to the static object list of generation N+1.
93 */
94
95 /* N is the oldest generation being collected, where the generations
96 * are numbered starting at 0. A major GC (indicated by the major_gc
97 * flag) is when we're collecting all generations. We only attempt to
98 * deal with static objects and GC CAFs when doing a major GC.
99 */
100 nat N;
101 rtsBool major_gc;
102
103 /* Data used for allocation area sizing.
104 */
105 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
106
107 /* Mut-list stats */
108 #ifdef DEBUG
109 nat mutlist_MUTVARS,
110 mutlist_MUTARRS,
111 mutlist_MVARS,
112 mutlist_OTHERS;
113 #endif
114
115 /* Thread-local data for each GC thread
116 */
117 gc_thread **gc_threads = NULL;
118
119 #if !defined(THREADED_RTS)
120 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
121 #endif
122
123 // Number of threads running in *this* GC. Affects how many
124 // step->todos[] lists we have to look in to find work.
125 nat n_gc_threads;
126
127 // For stats:
128 long copied; // *words* copied & scavenged during this GC
129
130 rtsBool work_stealing;
131
132 DECLARE_GCT
133
134 /* -----------------------------------------------------------------------------
135 Static function declarations
136 -------------------------------------------------------------------------- */
137
138 static void mark_root (void *user, StgClosure **root);
139 static void zero_static_object_list (StgClosure* first_static);
140 static nat initialise_N (rtsBool force_major_gc);
141 static void prepare_collected_gen (generation *gen);
142 static void prepare_uncollected_gen (generation *gen);
143 static void init_gc_thread (gc_thread *t);
144 static void resize_generations (void);
145 static void resize_nursery (void);
146 static void start_gc_threads (void);
147 static void scavenge_until_all_done (void);
148 static StgWord inc_running (void);
149 static StgWord dec_running (void);
150 static void wakeup_gc_threads (nat me);
151 static void shutdown_gc_threads (nat me);
152 static void collect_gct_blocks (void);
153
154 #if 0 && defined(DEBUG)
155 static void gcCAFs (void);
156 #endif
157
158 /* -----------------------------------------------------------------------------
159 The mark stack.
160 -------------------------------------------------------------------------- */
161
162 bdescr *mark_stack_top_bd; // topmost block in the mark stack
163 bdescr *mark_stack_bd; // current block in the mark stack
164 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
165
166 /* -----------------------------------------------------------------------------
167 GarbageCollect: the main entry point to the garbage collector.
168
169 Locks held: all capabilities are held throughout GarbageCollect().
170 -------------------------------------------------------------------------- */
171
172 void
173 GarbageCollect (rtsBool force_major_gc,
174 rtsBool do_heap_census,
175 nat gc_type USED_IF_THREADS,
176 Capability *cap)
177 {
178 bdescr *bd;
179 generation *gen;
180 lnat live_blocks, live_words, allocated, max_copied, avg_copied;
181 #if defined(THREADED_RTS)
182 gc_thread *saved_gct;
183 #endif
184 nat g, n;
185
186 // necessary if we stole a callee-saves register for gct:
187 #if defined(THREADED_RTS)
188 saved_gct = gct;
189 #endif
190
191 #ifdef PROFILING
192 CostCentreStack *prev_CCS;
193 #endif
194
195 ACQUIRE_SM_LOCK;
196
197 #if defined(RTS_USER_SIGNALS)
198 if (RtsFlags.MiscFlags.install_signal_handlers) {
199 // block signals
200 blockUserSignals();
201 }
202 #endif
203
204 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
205 // otherwise adjust the padding in gen_workspace.
206
207 // this is the main thread
208 SET_GCT(gc_threads[cap->no]);
209
210 // tell the stats department that we've started a GC
211 stat_startGC(gct);
212
213 // lock the StablePtr table
214 stablePtrPreGC();
215
216 #ifdef DEBUG
217 mutlist_MUTVARS = 0;
218 mutlist_MUTARRS = 0;
219 mutlist_OTHERS = 0;
220 #endif
221
222 // attribute any costs to CCS_GC
223 #ifdef PROFILING
224 prev_CCS = CCCS;
225 CCCS = CCS_GC;
226 #endif
227
228 /* Approximate how much we allocated.
229 * Todo: only when generating stats?
230 */
231 allocated = calcAllocated(rtsFalse/* don't count the nursery yet */);
232
233 /* Figure out which generation to collect
234 */
235 n = initialise_N(force_major_gc);
236
237 #if defined(THREADED_RTS)
238 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
239 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
240 // It's not always a good idea to do load balancing in parallel
241 // GC. In particular, for a parallel program we don't want to
242 // lose locality by moving cached data into another CPU's cache
243 // (this effect can be quite significant).
244 //
245 // We could have a more complex way to deterimine whether to do
246 // work stealing or not, e.g. it might be a good idea to do it
247 // if the heap is big. For now, we just turn it on or off with
248 // a flag.
249 #endif
250
251 /* Start threads, so they can be spinning up while we finish initialisation.
252 */
253 start_gc_threads();
254
255 #if defined(THREADED_RTS)
256 /* How many threads will be participating in this GC?
257 * We don't try to parallelise minor GCs (unless the user asks for
258 * it with +RTS -gn0), or mark/compact/sweep GC.
259 */
260 if (gc_type == PENDING_GC_PAR) {
261 n_gc_threads = RtsFlags.ParFlags.nNodes;
262 } else {
263 n_gc_threads = 1;
264 }
265 #else
266 n_gc_threads = 1;
267 #endif
268
269 debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
270 N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
271
272 #ifdef RTS_GTK_FRONTPANEL
273 if (RtsFlags.GcFlags.frontpanel) {
274 updateFrontPanelBeforeGC(N);
275 }
276 #endif
277
278 #ifdef DEBUG
279 // check for memory leaks if DEBUG is on
280 memInventory(DEBUG_gc);
281 #endif
282
283 // check sanity *before* GC
284 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
285
286 // Initialise all the generations/steps that we're collecting.
287 for (g = 0; g <= N; g++) {
288 prepare_collected_gen(&generations[g]);
289 }
290 // Initialise all the generations/steps that we're *not* collecting.
291 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
292 prepare_uncollected_gen(&generations[g]);
293 }
294
295 // Prepare this gc_thread
296 init_gc_thread(gct);
297
298 /* Allocate a mark stack if we're doing a major collection.
299 */
300 if (major_gc && oldest_gen->mark) {
301 mark_stack_bd = allocBlock();
302 mark_stack_top_bd = mark_stack_bd;
303 mark_stack_bd->link = NULL;
304 mark_stack_bd->u.back = NULL;
305 mark_sp = mark_stack_bd->start;
306 } else {
307 mark_stack_bd = NULL;
308 mark_stack_top_bd = NULL;
309 mark_sp = NULL;
310 }
311
312 /* -----------------------------------------------------------------------
313 * follow all the roots that we know about:
314 */
315
316 // the main thread is running: this prevents any other threads from
317 // exiting prematurely, so we can start them now.
318 // NB. do this after the mutable lists have been saved above, otherwise
319 // the other GC threads will be writing into the old mutable lists.
320 inc_running();
321 wakeup_gc_threads(gct->thread_index);
322
323 traceEventGcWork(gct->cap);
324
325 // scavenge the capability-private mutable lists. This isn't part
326 // of markSomeCapabilities() because markSomeCapabilities() can only
327 // call back into the GC via mark_root() (due to the gct register
328 // variable).
329 if (n_gc_threads == 1) {
330 for (n = 0; n < n_capabilities; n++) {
331 #if defined(THREADED_RTS)
332 scavenge_capability_mut_Lists1(&capabilities[n]);
333 #else
334 scavenge_capability_mut_lists(&capabilities[n]);
335 #endif
336 }
337 } else {
338 scavenge_capability_mut_lists(gct->cap);
339 }
340
341 // follow roots from the CAF list (used by GHCi)
342 gct->evac_gen_no = 0;
343 markCAFs(mark_root, gct);
344
345 // follow all the roots that the application knows about.
346 gct->evac_gen_no = 0;
347 if (n_gc_threads == 1) {
348 for (n = 0; n < n_capabilities; n++) {
349 markCapability(mark_root, gct, &capabilities[n],
350 rtsTrue/*don't mark sparks*/);
351 }
352 } else {
353 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
354 }
355
356 markScheduler(mark_root, gct);
357
358 #if defined(RTS_USER_SIGNALS)
359 // mark the signal handlers (signals should be already blocked)
360 markSignalHandlers(mark_root, gct);
361 #endif
362
363 // Mark the weak pointer list, and prepare to detect dead weak pointers.
364 markWeakPtrList();
365 initWeakForGC();
366
367 // Mark the stable pointer table.
368 markStablePtrTable(mark_root, gct);
369
370 /* -------------------------------------------------------------------------
371 * Repeatedly scavenge all the areas we know about until there's no
372 * more scavenging to be done.
373 */
374 for (;;)
375 {
376 scavenge_until_all_done();
377 // The other threads are now stopped. We might recurse back to
378 // here, but from now on this is the only thread.
379
380 // must be last... invariant is that everything is fully
381 // scavenged at this point.
382 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
383 inc_running();
384 continue;
385 }
386
387 // If we get to here, there's really nothing left to do.
388 break;
389 }
390
391 shutdown_gc_threads(gct->thread_index);
392
393 // Now see which stable names are still alive.
394 gcStablePtrTable();
395
396 #ifdef THREADED_RTS
397 if (n_gc_threads == 1) {
398 for (n = 0; n < n_capabilities; n++) {
399 pruneSparkQueue(&capabilities[n]);
400 }
401 } else {
402 pruneSparkQueue(gct->cap);
403 }
404 #endif
405
406 #ifdef PROFILING
407 // We call processHeapClosureForDead() on every closure destroyed during
408 // the current garbage collection, so we invoke LdvCensusForDead().
409 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
410 || RtsFlags.ProfFlags.bioSelector != NULL)
411 LdvCensusForDead(N);
412 #endif
413
414 // NO MORE EVACUATION AFTER THIS POINT!
415
416 // Finally: compact or sweep the oldest generation.
417 if (major_gc && oldest_gen->mark) {
418 if (oldest_gen->compact)
419 compact(gct->scavenged_static_objects);
420 else
421 sweep(oldest_gen);
422 }
423
424 copied = 0;
425 max_copied = 0;
426 avg_copied = 0;
427 {
428 nat i;
429 for (i=0; i < n_gc_threads; i++) {
430 if (n_gc_threads > 1) {
431 debugTrace(DEBUG_gc,"thread %d:", i);
432 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
433 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
434 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
435 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
436 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
437 }
438 copied += gc_threads[i]->copied;
439 max_copied = stg_max(gc_threads[i]->copied, max_copied);
440 }
441 if (n_gc_threads == 1) {
442 max_copied = 0;
443 avg_copied = 0;
444 } else {
445 avg_copied = copied;
446 }
447 }
448
449 // Run through all the generations/steps and tidy up.
450 // We're going to:
451 // - count the amount of "live" data (live_words, live_blocks)
452 // - count the amount of "copied" data in this GC (copied)
453 // - free from-space
454 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
455 //
456 live_words = 0;
457 live_blocks = 0;
458
459 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
460
461 if (g == N) {
462 generations[g].collections++; // for stats
463 if (n_gc_threads > 1) generations[g].par_collections++;
464 }
465
466 // Count the mutable list as bytes "copied" for the purposes of
467 // stats. Every mutable list is copied during every GC.
468 if (g > 0) {
469 nat mut_list_size = 0;
470 for (n = 0; n < n_capabilities; n++) {
471 mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
472 }
473 copied += mut_list_size;
474
475 debugTrace(DEBUG_gc,
476 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
477 (unsigned long)(mut_list_size * sizeof(W_)),
478 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
479 }
480
481 bdescr *next, *prev;
482 gen = &generations[g];
483
484 // for generations we collected...
485 if (g <= N) {
486
487 /* free old memory and shift to-space into from-space for all
488 * the collected steps (except the allocation area). These
489 * freed blocks will probaby be quickly recycled.
490 */
491 if (gen->mark)
492 {
493 // tack the new blocks on the end of the existing blocks
494 if (gen->old_blocks != NULL) {
495
496 prev = NULL;
497 for (bd = gen->old_blocks; bd != NULL; bd = next) {
498
499 next = bd->link;
500
501 if (!(bd->flags & BF_MARKED))
502 {
503 if (prev == NULL) {
504 gen->old_blocks = next;
505 } else {
506 prev->link = next;
507 }
508 freeGroup(bd);
509 gen->n_old_blocks--;
510 }
511 else
512 {
513 gen->n_words += bd->free - bd->start;
514
515 // NB. this step might not be compacted next
516 // time, so reset the BF_MARKED flags.
517 // They are set before GC if we're going to
518 // compact. (search for BF_MARKED above).
519 bd->flags &= ~BF_MARKED;
520
521 // between GCs, all blocks in the heap except
522 // for the nursery have the BF_EVACUATED flag set.
523 bd->flags |= BF_EVACUATED;
524
525 prev = bd;
526 }
527 }
528
529 if (prev != NULL) {
530 prev->link = gen->blocks;
531 gen->blocks = gen->old_blocks;
532 }
533 }
534 // add the new blocks to the block tally
535 gen->n_blocks += gen->n_old_blocks;
536 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
537 ASSERT(countOccupied(gen->blocks) == gen->n_words);
538 }
539 else // not copacted
540 {
541 freeChain(gen->old_blocks);
542 }
543
544 gen->old_blocks = NULL;
545 gen->n_old_blocks = 0;
546
547 /* LARGE OBJECTS. The current live large objects are chained on
548 * scavenged_large, having been moved during garbage
549 * collection from large_objects. Any objects left on the
550 * large_objects list are therefore dead, so we free them here.
551 */
552 freeChain(gen->large_objects);
553 gen->large_objects = gen->scavenged_large_objects;
554 gen->n_large_blocks = gen->n_scavenged_large_blocks;
555 gen->n_new_large_words = 0;
556 }
557 else // for generations > N
558 {
559 /* For older generations, we need to append the
560 * scavenged_large_object list (i.e. large objects that have been
561 * promoted during this GC) to the large_object list for that step.
562 */
563 for (bd = gen->scavenged_large_objects; bd; bd = next) {
564 next = bd->link;
565 dbl_link_onto(bd, &gen->large_objects);
566 }
567
568 // add the new blocks we promoted during this GC
569 gen->n_large_blocks += gen->n_scavenged_large_blocks;
570 }
571
572 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
573
574 gen->scavenged_large_objects = NULL;
575 gen->n_scavenged_large_blocks = 0;
576
577 // Count "live" data
578 live_words += genLiveWords(gen);
579 live_blocks += genLiveBlocks(gen);
580
581 // add in the partial blocks in the gen_workspaces, but ignore gen 0
582 // if this is a local GC (we can't count another capability's part_list)
583 {
584 nat i;
585 for (i = 0; i < n_capabilities; i++) {
586 live_words += gcThreadLiveWords(i, gen->no);
587 live_blocks += gcThreadLiveBlocks(i, gen->no);
588 }
589 }
590 } // for all generations
591
592 // update the max size of older generations after a major GC
593 resize_generations();
594
595 // Free the mark stack.
596 if (mark_stack_top_bd != NULL) {
597 debugTrace(DEBUG_gc, "mark stack: %d blocks",
598 countBlocks(mark_stack_top_bd));
599 freeChain(mark_stack_top_bd);
600 }
601
602 // Free any bitmaps.
603 for (g = 0; g <= N; g++) {
604 gen = &generations[g];
605 if (gen->bitmap != NULL) {
606 freeGroup(gen->bitmap);
607 gen->bitmap = NULL;
608 }
609 }
610
611 // Reset the nursery: make the blocks empty
612 allocated += clearNurseries();
613
614 resize_nursery();
615
616 resetNurseries();
617
618 // mark the garbage collected CAFs as dead
619 #if 0 && defined(DEBUG) // doesn't work at the moment
620 if (major_gc) { gcCAFs(); }
621 #endif
622
623 #ifdef PROFILING
624 // resetStaticObjectForRetainerProfiling() must be called before
625 // zeroing below.
626 if (n_gc_threads > 1) {
627 barf("profiling is currently broken with multi-threaded GC");
628 // ToDo: fix the gct->scavenged_static_objects below
629 }
630 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
631 #endif
632
633 // zero the scavenged static object list
634 if (major_gc) {
635 nat i;
636 if (n_gc_threads == 1) {
637 zero_static_object_list(gct->scavenged_static_objects);
638 } else {
639 for (i = 0; i < n_gc_threads; i++) {
640 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
641 }
642 }
643 }
644
645 // Update the stable pointer hash table.
646 updateStablePtrTable(major_gc);
647
648 // unlock the StablePtr table. Must be before scheduleFinalizers(),
649 // because a finalizer may call hs_free_fun_ptr() or
650 // hs_free_stable_ptr(), both of which access the StablePtr table.
651 stablePtrPostGC();
652
653 // Start any pending finalizers. Must be after
654 // updateStablePtrTable() and stablePtrPostGC() (see #4221).
655 RELEASE_SM_LOCK;
656 scheduleFinalizers(cap, old_weak_ptr_list);
657 ACQUIRE_SM_LOCK;
658
659 // check sanity after GC
660 // before resurrectThreads(), because that might overwrite some
661 // closures, which will cause problems with THREADED where we don't
662 // fill slop.
663 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
664
665 // If a heap census is due, we need to do it before
666 // resurrectThreads(), for the same reason as checkSanity above:
667 // resurrectThreads() will overwrite some closures and leave slop
668 // behind.
669 if (do_heap_census) {
670 debugTrace(DEBUG_sched, "performing heap census");
671 RELEASE_SM_LOCK;
672 heapCensus(gct->gc_start_cpu);
673 ACQUIRE_SM_LOCK;
674 }
675
676 // send exceptions to any threads which were about to die
677 RELEASE_SM_LOCK;
678 resurrectThreads(resurrected_threads);
679 ACQUIRE_SM_LOCK;
680
681 if (major_gc) {
682 nat need, got;
683 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
684 got = mblocks_allocated;
685 /* If the amount of data remains constant, next major GC we'll
686 require (F+1)*need. We leave (F+2)*need in order to reduce
687 repeated deallocation and reallocation. */
688 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
689 if (got > need) {
690 returnMemoryToOS(got - need);
691 }
692 }
693
694 // extra GC trace info
695 IF_DEBUG(gc, statDescribeGens());
696
697 #ifdef DEBUG
698 // symbol-table based profiling
699 /* heapCensus(to_blocks); */ /* ToDo */
700 #endif
701
702 // restore enclosing cost centre
703 #ifdef PROFILING
704 CCCS = prev_CCS;
705 #endif
706
707 #ifdef DEBUG
708 // check for memory leaks if DEBUG is on
709 memInventory(DEBUG_gc);
710 #endif
711
712 #ifdef RTS_GTK_FRONTPANEL
713 if (RtsFlags.GcFlags.frontpanel) {
714 updateFrontPanelAfterGC( N, live );
715 }
716 #endif
717
718 // ok, GC over: tell the stats department what happened.
719 stat_endGC(gct, allocated, live_words,
720 copied, N, max_copied, avg_copied,
721 live_blocks * BLOCK_SIZE_W - live_words /* slop */);
722
723 // Guess which generation we'll collect *next* time
724 initialise_N(force_major_gc);
725
726 #if defined(RTS_USER_SIGNALS)
727 if (RtsFlags.MiscFlags.install_signal_handlers) {
728 // unblock signals again
729 unblockUserSignals();
730 }
731 #endif
732
733 RELEASE_SM_LOCK;
734
735 SET_GCT(saved_gct);
736 }
737
738 /* -----------------------------------------------------------------------------
739 Figure out which generation to collect, initialise N and major_gc.
740
741 Also returns the total number of blocks in generations that will be
742 collected.
743 -------------------------------------------------------------------------- */
744
745 static nat
746 initialise_N (rtsBool force_major_gc)
747 {
748 int g;
749 nat blocks, blocks_total;
750
751 blocks = 0;
752 blocks_total = 0;
753
754 if (force_major_gc) {
755 N = RtsFlags.GcFlags.generations - 1;
756 } else {
757 N = 0;
758 }
759
760 for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
761
762 blocks = generations[g].n_words / BLOCK_SIZE_W
763 + generations[g].n_large_blocks;
764
765 if (blocks >= generations[g].max_blocks) {
766 N = stg_max(N,g);
767 }
768 if ((nat)g <= N) {
769 blocks_total += blocks;
770 }
771 }
772
773 blocks_total += countNurseryBlocks();
774
775 major_gc = (N == RtsFlags.GcFlags.generations-1);
776 return blocks_total;
777 }
778
779 /* -----------------------------------------------------------------------------
780 Initialise the gc_thread structures.
781 -------------------------------------------------------------------------- */
782
783 #define GC_THREAD_INACTIVE 0
784 #define GC_THREAD_STANDING_BY 1
785 #define GC_THREAD_RUNNING 2
786 #define GC_THREAD_WAITING_TO_CONTINUE 3
787
788 static void
789 new_gc_thread (nat n, gc_thread *t)
790 {
791 nat g;
792 gen_workspace *ws;
793
794 t->cap = &capabilities[n];
795
796 #ifdef THREADED_RTS
797 t->id = 0;
798 initSpinLock(&t->gc_spin);
799 initSpinLock(&t->mut_spin);
800 ACQUIRE_SPIN_LOCK(&t->gc_spin);
801 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
802 // thread to start up, see wakeup_gc_threads
803 #endif
804
805 t->thread_index = n;
806 t->free_blocks = NULL;
807 t->gc_count = 0;
808
809 init_gc_thread(t);
810
811 #ifdef USE_PAPI
812 t->papi_events = -1;
813 #endif
814
815 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
816 {
817 ws = &t->gens[g];
818 ws->gen = &generations[g];
819 ASSERT(g == ws->gen->no);
820 ws->my_gct = t;
821
822 // We want to call
823 // alloc_todo_block(ws,0);
824 // but can't, because it uses gct which isn't set up at this point.
825 // Hence, allocate a block for todo_bd manually:
826 {
827 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
828 initBdescr(bd, ws->gen, ws->gen->to);
829 bd->flags = BF_EVACUATED;
830 bd->u.scan = bd->free = bd->start;
831
832 ws->todo_bd = bd;
833 ws->todo_free = bd->free;
834 ws->todo_lim = bd->start + BLOCK_SIZE_W;
835 }
836
837 ws->todo_q = newWSDeque(128);
838 ws->todo_overflow = NULL;
839 ws->n_todo_overflow = 0;
840 ws->todo_large_objects = NULL;
841
842 ws->part_list = NULL;
843 ws->n_part_blocks = 0;
844
845 ws->scavd_list = NULL;
846 ws->n_scavd_blocks = 0;
847 }
848 }
849
850
851 void
852 initGcThreads (void)
853 {
854 if (gc_threads == NULL) {
855 #if defined(THREADED_RTS)
856 nat i;
857 gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
858 sizeof(gc_thread*),
859 "alloc_gc_threads");
860
861 for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
862 gc_threads[i] =
863 stgMallocBytes(sizeof(gc_thread) +
864 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
865 "alloc_gc_threads");
866
867 new_gc_thread(i, gc_threads[i]);
868 }
869 #else
870 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
871 gc_threads[0] = gct;
872 new_gc_thread(0,gc_threads[0]);
873 #endif
874 }
875 }
876
877 void
878 freeGcThreads (void)
879 {
880 nat g;
881 if (gc_threads != NULL) {
882 #if defined(THREADED_RTS)
883 nat i;
884 for (i = 0; i < n_capabilities; i++) {
885 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
886 {
887 freeWSDeque(gc_threads[i]->gens[g].todo_q);
888 }
889 stgFree (gc_threads[i]);
890 }
891 stgFree (gc_threads);
892 #else
893 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
894 {
895 freeWSDeque(gc_threads[0]->gens[g].todo_q);
896 }
897 stgFree (gc_threads);
898 #endif
899 gc_threads = NULL;
900 }
901 }
902
903 /* ----------------------------------------------------------------------------
904 Start GC threads
905 ------------------------------------------------------------------------- */
906
907 static volatile StgWord gc_running_threads;
908
909 static StgWord
910 inc_running (void)
911 {
912 StgWord new;
913 new = atomic_inc(&gc_running_threads);
914 ASSERT(new <= n_gc_threads);
915 return new;
916 }
917
918 static StgWord
919 dec_running (void)
920 {
921 ASSERT(gc_running_threads != 0);
922 return atomic_dec(&gc_running_threads);
923 }
924
925 static rtsBool
926 any_work (void)
927 {
928 int g;
929 gen_workspace *ws;
930
931 gct->any_work++;
932
933 write_barrier();
934
935 // scavenge objects in compacted generation
936 if (mark_stack_bd != NULL && !mark_stack_empty()) {
937 return rtsTrue;
938 }
939
940 // Check for global work in any step. We don't need to check for
941 // local work, because we have already exited scavenge_loop(),
942 // which means there is no local work for this thread.
943 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
944 ws = &gct->gens[g];
945 if (ws->todo_large_objects) return rtsTrue;
946 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
947 if (ws->todo_overflow) return rtsTrue;
948 }
949
950 #if defined(THREADED_RTS)
951 if (work_stealing) {
952 nat n;
953 // look for work to steal
954 for (n = 0; n < n_gc_threads; n++) {
955 if (n == gct->thread_index) continue;
956 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
957 ws = &gc_threads[n]->gens[g];
958 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
959 }
960 }
961 }
962 #endif
963
964 gct->no_work++;
965 #if defined(THREADED_RTS)
966 yieldThread();
967 #endif
968
969 return rtsFalse;
970 }
971
972 static void
973 scavenge_until_all_done (void)
974 {
975 DEBUG_ONLY( nat r );
976
977
978 loop:
979 #if defined(THREADED_RTS)
980 if (n_gc_threads > 1) {
981 scavenge_loop();
982 } else {
983 scavenge_loop1();
984 }
985 #else
986 scavenge_loop();
987 #endif
988
989 collect_gct_blocks();
990
991 // scavenge_loop() only exits when there's no work to do
992
993 #ifdef DEBUG
994 r = dec_running();
995 #else
996 dec_running();
997 #endif
998
999 traceEventGcIdle(gct->cap);
1000
1001 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1002
1003 while (gc_running_threads != 0) {
1004 // usleep(1);
1005 if (any_work()) {
1006 inc_running();
1007 traceEventGcWork(gct->cap);
1008 goto loop;
1009 }
1010 // any_work() does not remove the work from the queue, it
1011 // just checks for the presence of work. If we find any,
1012 // then we increment gc_running_threads and go back to
1013 // scavenge_loop() to perform any pending work.
1014 }
1015
1016 traceEventGcDone(gct->cap);
1017 }
1018
1019 #if defined(THREADED_RTS)
1020
1021 void
1022 gcWorkerThread (Capability *cap)
1023 {
1024 gc_thread *saved_gct;
1025
1026 // necessary if we stole a callee-saves register for gct:
1027 saved_gct = gct;
1028
1029 gct = gc_threads[cap->no];
1030 gct->id = osThreadId();
1031
1032 stat_gcWorkerThreadStart(gct);
1033
1034 // Wait until we're told to wake up
1035 RELEASE_SPIN_LOCK(&gct->mut_spin);
1036 gct->wakeup = GC_THREAD_STANDING_BY;
1037 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1038 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1039
1040 #ifdef USE_PAPI
1041 // start performance counters in this thread...
1042 if (gct->papi_events == -1) {
1043 papi_init_eventset(&gct->papi_events);
1044 }
1045 papi_thread_start_gc1_count(gct->papi_events);
1046 #endif
1047
1048 init_gc_thread(gct);
1049
1050 traceEventGcWork(gct->cap);
1051
1052 // Every thread evacuates some roots.
1053 gct->evac_gen_no = 0;
1054 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1055 scavenge_capability_mut_lists(cap);
1056
1057 scavenge_until_all_done();
1058
1059 #ifdef THREADED_RTS
1060 // Now that the whole heap is marked, we discard any sparks that
1061 // were found to be unreachable. The main GC thread is currently
1062 // marking heap reachable via weak pointers, so it is
1063 // non-deterministic whether a spark will be retained if it is
1064 // only reachable via weak pointers. To fix this problem would
1065 // require another GC barrier, which is too high a price.
1066 pruneSparkQueue(cap);
1067 #endif
1068
1069 #ifdef USE_PAPI
1070 // count events in this thread towards the GC totals
1071 papi_thread_stop_gc1_count(gct->papi_events);
1072 #endif
1073
1074 // Wait until we're told to continue
1075 RELEASE_SPIN_LOCK(&gct->gc_spin);
1076 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1077 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1078 gct->thread_index);
1079 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1080 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1081
1082 // record the time spent doing GC in the Task structure
1083 stat_gcWorkerThreadDone(gct);
1084
1085 SET_GCT(saved_gct);
1086 }
1087
1088 #endif
1089
1090 #if defined(THREADED_RTS)
1091
1092 void
1093 waitForGcThreads (Capability *cap USED_IF_THREADS)
1094 {
1095 const nat n_threads = RtsFlags.ParFlags.nNodes;
1096 const nat me = cap->no;
1097 nat i, j;
1098 rtsBool retry = rtsTrue;
1099
1100 while(retry) {
1101 for (i=0; i < n_threads; i++) {
1102 if (i == me) continue;
1103 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1104 prodCapability(&capabilities[i], cap->running_task);
1105 }
1106 }
1107 for (j=0; j < 10; j++) {
1108 retry = rtsFalse;
1109 for (i=0; i < n_threads; i++) {
1110 if (i == me) continue;
1111 write_barrier();
1112 setContextSwitches();
1113 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1114 retry = rtsTrue;
1115 }
1116 }
1117 if (!retry) break;
1118 yieldThread();
1119 }
1120 }
1121 }
1122
1123 #endif // THREADED_RTS
1124
1125 static void
1126 start_gc_threads (void)
1127 {
1128 #if defined(THREADED_RTS)
1129 gc_running_threads = 0;
1130 #endif
1131 }
1132
1133 static void
1134 wakeup_gc_threads (nat me USED_IF_THREADS)
1135 {
1136 #if defined(THREADED_RTS)
1137 nat i;
1138
1139 if (n_gc_threads == 1) return;
1140
1141 for (i=0; i < n_gc_threads; i++) {
1142 if (i == me) continue;
1143 inc_running();
1144 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1145 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1146
1147 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1148 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1149 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1150 }
1151 #endif
1152 }
1153
1154 // After GC is complete, we must wait for all GC threads to enter the
1155 // standby state, otherwise they may still be executing inside
1156 // any_work(), and may even remain awake until the next GC starts.
1157 static void
1158 shutdown_gc_threads (nat me USED_IF_THREADS)
1159 {
1160 #if defined(THREADED_RTS)
1161 nat i;
1162
1163 if (n_gc_threads == 1) return;
1164
1165 for (i=0; i < n_gc_threads; i++) {
1166 if (i == me) continue;
1167 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1168 }
1169 #endif
1170 }
1171
1172 #if defined(THREADED_RTS)
1173 void
1174 releaseGCThreads (Capability *cap USED_IF_THREADS)
1175 {
1176 const nat n_threads = RtsFlags.ParFlags.nNodes;
1177 const nat me = cap->no;
1178 nat i;
1179 for (i=0; i < n_threads; i++) {
1180 if (i == me) continue;
1181 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1182 barf("releaseGCThreads");
1183
1184 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1185 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1186 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1187 }
1188 }
1189 #endif
1190
1191 /* ----------------------------------------------------------------------------
1192 Initialise a generation that is to be collected
1193 ------------------------------------------------------------------------- */
1194
1195 static void
1196 prepare_collected_gen (generation *gen)
1197 {
1198 nat i, g, n;
1199 gen_workspace *ws;
1200 bdescr *bd, *next;
1201
1202 // Throw away the current mutable list. Invariant: the mutable
1203 // list always has at least one block; this means we can avoid a
1204 // check for NULL in recordMutable().
1205 g = gen->no;
1206 if (g != 0) {
1207 for (i = 0; i < n_capabilities; i++) {
1208 freeChain(capabilities[i].mut_lists[g]);
1209 capabilities[i].mut_lists[g] = allocBlock();
1210 }
1211 }
1212
1213 gen = &generations[g];
1214 ASSERT(gen->no == g);
1215
1216 // we'll construct a new list of threads in this step
1217 // during GC, throw away the current list.
1218 gen->old_threads = gen->threads;
1219 gen->threads = END_TSO_QUEUE;
1220
1221 // deprecate the existing blocks
1222 gen->old_blocks = gen->blocks;
1223 gen->n_old_blocks = gen->n_blocks;
1224 gen->blocks = NULL;
1225 gen->n_blocks = 0;
1226 gen->n_words = 0;
1227 gen->live_estimate = 0;
1228
1229 // initialise the large object queues.
1230 ASSERT(gen->scavenged_large_objects == NULL);
1231 ASSERT(gen->n_scavenged_large_blocks == 0);
1232
1233 // grab all the partial blocks stashed in the gc_thread workspaces and
1234 // move them to the old_blocks list of this gen.
1235 for (n = 0; n < n_capabilities; n++) {
1236 ws = &gc_threads[n]->gens[gen->no];
1237
1238 for (bd = ws->part_list; bd != NULL; bd = next) {
1239 next = bd->link;
1240 bd->link = gen->old_blocks;
1241 gen->old_blocks = bd;
1242 gen->n_old_blocks += bd->blocks;
1243 }
1244 ws->part_list = NULL;
1245 ws->n_part_blocks = 0;
1246
1247 ASSERT(ws->scavd_list == NULL);
1248 ASSERT(ws->n_scavd_blocks == 0);
1249
1250 if (ws->todo_free != ws->todo_bd->start) {
1251 ws->todo_bd->free = ws->todo_free;
1252 ws->todo_bd->link = gen->old_blocks;
1253 gen->old_blocks = ws->todo_bd;
1254 gen->n_old_blocks += ws->todo_bd->blocks;
1255 alloc_todo_block(ws,0); // always has one block.
1256 }
1257 }
1258
1259 // mark the small objects as from-space
1260 for (bd = gen->old_blocks; bd; bd = bd->link) {
1261 bd->flags &= ~BF_EVACUATED;
1262 }
1263
1264 // mark the large objects as from-space
1265 for (bd = gen->large_objects; bd; bd = bd->link) {
1266 bd->flags &= ~BF_EVACUATED;
1267 }
1268
1269 // for a compacted generation, we need to allocate the bitmap
1270 if (gen->mark) {
1271 lnat bitmap_size; // in bytes
1272 bdescr *bitmap_bdescr;
1273 StgWord *bitmap;
1274
1275 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1276
1277 if (bitmap_size > 0) {
1278 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1279 / BLOCK_SIZE);
1280 gen->bitmap = bitmap_bdescr;
1281 bitmap = bitmap_bdescr->start;
1282
1283 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1284 bitmap_size, bitmap);
1285
1286 // don't forget to fill it with zeros!
1287 memset(bitmap, 0, bitmap_size);
1288
1289 // For each block in this step, point to its bitmap from the
1290 // block descriptor.
1291 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1292 bd->u.bitmap = bitmap;
1293 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1294
1295 // Also at this point we set the BF_MARKED flag
1296 // for this block. The invariant is that
1297 // BF_MARKED is always unset, except during GC
1298 // when it is set on those blocks which will be
1299 // compacted.
1300 if (!(bd->flags & BF_FRAGMENTED)) {
1301 bd->flags |= BF_MARKED;
1302 }
1303
1304 // BF_SWEPT should be marked only for blocks that are being
1305 // collected in sweep()
1306 bd->flags &= ~BF_SWEPT;
1307 }
1308 }
1309 }
1310 }
1311
1312
1313 /* ----------------------------------------------------------------------------
1314 Save the mutable lists in saved_mut_lists
1315 ------------------------------------------------------------------------- */
1316
1317 static void
1318 stash_mut_list (Capability *cap, nat gen_no)
1319 {
1320 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1321 cap->mut_lists[gen_no] = allocBlock_sync();
1322 }
1323
1324 /* ----------------------------------------------------------------------------
1325 Initialise a generation that is *not* to be collected
1326 ------------------------------------------------------------------------- */
1327
1328 static void
1329 prepare_uncollected_gen (generation *gen)
1330 {
1331 nat i;
1332
1333
1334 ASSERT(gen->no > 0);
1335
1336 // save the current mutable lists for this generation, and
1337 // allocate a fresh block for each one. We'll traverse these
1338 // mutable lists as roots early on in the GC.
1339 for (i = 0; i < n_capabilities; i++) {
1340 stash_mut_list(&capabilities[i], gen->no);
1341 }
1342
1343 ASSERT(gen->scavenged_large_objects == NULL);
1344 ASSERT(gen->n_scavenged_large_blocks == 0);
1345 }
1346
1347 /* -----------------------------------------------------------------------------
1348 Collect the completed blocks from a GC thread and attach them to
1349 the generation.
1350 -------------------------------------------------------------------------- */
1351
1352 static void
1353 collect_gct_blocks (void)
1354 {
1355 nat g;
1356 gen_workspace *ws;
1357 bdescr *bd, *prev;
1358
1359 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1360 ws = &gct->gens[g];
1361
1362 // there may still be a block attached to ws->todo_bd;
1363 // leave it there to use next time.
1364
1365 if (ws->scavd_list != NULL) {
1366 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1367
1368 ASSERT(gct->scan_bd == NULL);
1369 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1370
1371 prev = NULL;
1372 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1373 ws->gen->n_words += bd->free - bd->start;
1374 prev = bd;
1375 }
1376 if (prev != NULL) {
1377 prev->link = ws->gen->blocks;
1378 ws->gen->blocks = ws->scavd_list;
1379 }
1380 ws->gen->n_blocks += ws->n_scavd_blocks;
1381
1382 ws->scavd_list = NULL;
1383 ws->n_scavd_blocks = 0;
1384
1385 RELEASE_SPIN_LOCK(&ws->gen->sync);
1386 }
1387 }
1388 }
1389
1390 /* -----------------------------------------------------------------------------
1391 Initialise a gc_thread before GC
1392 -------------------------------------------------------------------------- */
1393
1394 static void
1395 init_gc_thread (gc_thread *t)
1396 {
1397 t->static_objects = END_OF_STATIC_LIST;
1398 t->scavenged_static_objects = END_OF_STATIC_LIST;
1399 t->scan_bd = NULL;
1400 t->mut_lists = t->cap->mut_lists;
1401 t->evac_gen_no = 0;
1402 t->failed_to_evac = rtsFalse;
1403 t->eager_promotion = rtsTrue;
1404 t->thunk_selector_depth = 0;
1405 t->copied = 0;
1406 t->scanned = 0;
1407 t->any_work = 0;
1408 t->no_work = 0;
1409 t->scav_find_work = 0;
1410 }
1411
1412 /* -----------------------------------------------------------------------------
1413 Function we pass to evacuate roots.
1414 -------------------------------------------------------------------------- */
1415
1416 static void
1417 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1418 {
1419 // we stole a register for gct, but this function is called from
1420 // *outside* the GC where the register variable is not in effect,
1421 // so we need to save and restore it here. NB. only call
1422 // mark_root() from the main GC thread, otherwise gct will be
1423 // incorrect.
1424 #if defined(THREADED_RTS)
1425 gc_thread *saved_gct;
1426 saved_gct = gct;
1427 #endif
1428 SET_GCT(user);
1429
1430 evacuate(root);
1431
1432 SET_GCT(saved_gct);
1433 }
1434
1435 /* -----------------------------------------------------------------------------
1436 Initialising the static object & mutable lists
1437 -------------------------------------------------------------------------- */
1438
1439 static void
1440 zero_static_object_list(StgClosure* first_static)
1441 {
1442 StgClosure* p;
1443 StgClosure* link;
1444 const StgInfoTable *info;
1445
1446 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1447 info = get_itbl(p);
1448 link = *STATIC_LINK(info, p);
1449 *STATIC_LINK(info,p) = NULL;
1450 }
1451 }
1452
1453 /* ----------------------------------------------------------------------------
1454 Reset the sizes of the older generations when we do a major
1455 collection.
1456
1457 CURRENT STRATEGY: make all generations except zero the same size.
1458 We have to stay within the maximum heap size, and leave a certain
1459 percentage of the maximum heap size available to allocate into.
1460 ------------------------------------------------------------------------- */
1461
1462 static void
1463 resize_generations (void)
1464 {
1465 nat g;
1466
1467 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1468 nat live, size, min_alloc, words;
1469 const nat max = RtsFlags.GcFlags.maxHeapSize;
1470 const nat gens = RtsFlags.GcFlags.generations;
1471
1472 // live in the oldest generations
1473 if (oldest_gen->live_estimate != 0) {
1474 words = oldest_gen->live_estimate;
1475 } else {
1476 words = oldest_gen->n_words;
1477 }
1478 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1479 oldest_gen->n_large_blocks;
1480
1481 // default max size for all generations except zero
1482 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1483 RtsFlags.GcFlags.minOldGenSize);
1484
1485 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1486 RtsFlags.GcFlags.heapSizeSuggestion = size;
1487 }
1488
1489 // minimum size for generation zero
1490 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1491 RtsFlags.GcFlags.minAllocAreaSize);
1492
1493 // Auto-enable compaction when the residency reaches a
1494 // certain percentage of the maximum heap size (default: 30%).
1495 if (RtsFlags.GcFlags.compact ||
1496 (max > 0 &&
1497 oldest_gen->n_blocks >
1498 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1499 oldest_gen->mark = 1;
1500 oldest_gen->compact = 1;
1501 // debugBelch("compaction: on\n", live);
1502 } else {
1503 oldest_gen->mark = 0;
1504 oldest_gen->compact = 0;
1505 // debugBelch("compaction: off\n", live);
1506 }
1507
1508 if (RtsFlags.GcFlags.sweep) {
1509 oldest_gen->mark = 1;
1510 }
1511
1512 // if we're going to go over the maximum heap size, reduce the
1513 // size of the generations accordingly. The calculation is
1514 // different if compaction is turned on, because we don't need
1515 // to double the space required to collect the old generation.
1516 if (max != 0) {
1517
1518 // this test is necessary to ensure that the calculations
1519 // below don't have any negative results - we're working
1520 // with unsigned values here.
1521 if (max < min_alloc) {
1522 heapOverflow();
1523 }
1524
1525 if (oldest_gen->compact) {
1526 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1527 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1528 }
1529 } else {
1530 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1531 size = (max - min_alloc) / ((gens - 1) * 2);
1532 }
1533 }
1534
1535 if (size < live) {
1536 heapOverflow();
1537 }
1538 }
1539
1540 #if 0
1541 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1542 min_alloc, size, max);
1543 #endif
1544
1545 for (g = 0; g < gens; g++) {
1546 generations[g].max_blocks = size;
1547 }
1548 }
1549 }
1550
1551 /* -----------------------------------------------------------------------------
1552 Calculate the new size of the nursery, and resize it.
1553 -------------------------------------------------------------------------- */
1554
1555 static void
1556 resize_nursery (void)
1557 {
1558 const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1559
1560 if (RtsFlags.GcFlags.generations == 1)
1561 { // Two-space collector:
1562 nat blocks;
1563
1564 /* set up a new nursery. Allocate a nursery size based on a
1565 * function of the amount of live data (by default a factor of 2)
1566 * Use the blocks from the old nursery if possible, freeing up any
1567 * left over blocks.
1568 *
1569 * If we get near the maximum heap size, then adjust our nursery
1570 * size accordingly. If the nursery is the same size as the live
1571 * data (L), then we need 3L bytes. We can reduce the size of the
1572 * nursery to bring the required memory down near 2L bytes.
1573 *
1574 * A normal 2-space collector would need 4L bytes to give the same
1575 * performance we get from 3L bytes, reducing to the same
1576 * performance at 2L bytes.
1577 */
1578 blocks = generations[0].n_blocks;
1579
1580 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1581 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1582 RtsFlags.GcFlags.maxHeapSize )
1583 {
1584 long adjusted_blocks; // signed on purpose
1585 int pc_free;
1586
1587 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1588
1589 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1590 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1591
1592 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1593 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1594 {
1595 heapOverflow();
1596 }
1597 blocks = adjusted_blocks;
1598 }
1599 else
1600 {
1601 blocks *= RtsFlags.GcFlags.oldGenFactor;
1602 if (blocks < min_nursery)
1603 {
1604 blocks = min_nursery;
1605 }
1606 }
1607 resizeNurseries(blocks);
1608 }
1609 else // Generational collector
1610 {
1611 /*
1612 * If the user has given us a suggested heap size, adjust our
1613 * allocation area to make best use of the memory available.
1614 */
1615 if (RtsFlags.GcFlags.heapSizeSuggestion)
1616 {
1617 long blocks;
1618 const nat needed = calcNeeded(); // approx blocks needed at next GC
1619
1620 /* Guess how much will be live in generation 0 step 0 next time.
1621 * A good approximation is obtained by finding the
1622 * percentage of g0 that was live at the last minor GC.
1623 *
1624 * We have an accurate figure for the amount of copied data in
1625 * 'copied', but we must convert this to a number of blocks, with
1626 * a small adjustment for estimated slop at the end of a block
1627 * (- 10 words).
1628 */
1629 if (N == 0)
1630 {
1631 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1632 / countNurseryBlocks();
1633 }
1634
1635 /* Estimate a size for the allocation area based on the
1636 * information available. We might end up going slightly under
1637 * or over the suggested heap size, but we should be pretty
1638 * close on average.
1639 *
1640 * Formula: suggested - needed
1641 * ----------------------------
1642 * 1 + g0_pcnt_kept/100
1643 *
1644 * where 'needed' is the amount of memory needed at the next
1645 * collection for collecting all gens except g0.
1646 */
1647 blocks =
1648 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1649 (100 + (long)g0_pcnt_kept);
1650
1651 if (blocks < (long)min_nursery) {
1652 blocks = min_nursery;
1653 }
1654
1655 resizeNurseries((nat)blocks);
1656 }
1657 else
1658 {
1659 // we might have added extra large blocks to the nursery, so
1660 // resize back to minAllocAreaSize again.
1661 resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1662 }
1663 }
1664 }
1665
1666 /* -----------------------------------------------------------------------------
1667 Sanity code for CAF garbage collection.
1668
1669 With DEBUG turned on, we manage a CAF list in addition to the SRT
1670 mechanism. After GC, we run down the CAF list and blackhole any
1671 CAFs which have been garbage collected. This means we get an error
1672 whenever the program tries to enter a garbage collected CAF.
1673
1674 Any garbage collected CAFs are taken off the CAF list at the same
1675 time.
1676 -------------------------------------------------------------------------- */
1677
1678 #if 0 && defined(DEBUG)
1679
1680 static void
1681 gcCAFs(void)
1682 {
1683 StgClosure* p;
1684 StgClosure** pp;
1685 const StgInfoTable *info;
1686 nat i;
1687
1688 i = 0;
1689 p = caf_list;
1690 pp = &caf_list;
1691
1692 while (p != NULL) {
1693
1694 info = get_itbl(p);
1695
1696 ASSERT(info->type == IND_STATIC);
1697
1698 if (STATIC_LINK(info,p) == NULL) {
1699 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1700 // black hole it
1701 SET_INFO(p,&stg_BLACKHOLE_info);
1702 p = STATIC_LINK2(info,p);
1703 *pp = p;
1704 }
1705 else {
1706 pp = &STATIC_LINK2(info,p);
1707 p = *pp;
1708 i++;
1709 }
1710
1711 }
1712
1713 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1714 }
1715 #endif