Fix for a bug in +RTS -qi (crash in zero_static_object_list)
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "Storage.h"
19 #include "RtsUtils.h"
20 #include "Apply.h"
21 #include "Updates.h"
22 #include "Stats.h"
23 #include "Schedule.h"
24 #include "Sanity.h"
25 #include "BlockAlloc.h"
26 #include "ProfHeap.h"
27 #include "Weak.h"
28 #include "Prelude.h"
29 #include "RtsSignals.h"
30 #include "STM.h"
31 #if defined(RTS_GTK_FRONTPANEL)
32 #include "FrontPanel.h"
33 #endif
34 #include "Trace.h"
35 #include "RetainerProfile.h"
36 #include "LdvProfile.h"
37 #include "RaiseAsync.h"
38 #include "Papi.h"
39 #include "Stable.h"
40
41 #include "GC.h"
42 #include "GCThread.h"
43 #include "GCTDecl.h"
44 #include "Compact.h"
45 #include "Evac.h"
46 #include "Scav.h"
47 #include "GCUtils.h"
48 #include "MarkStack.h"
49 #include "MarkWeak.h"
50 #include "Sparks.h"
51 #include "Sweep.h"
52
53 #include <string.h> // for memset()
54 #include <unistd.h>
55
56 /* -----------------------------------------------------------------------------
57 Global variables
58 -------------------------------------------------------------------------- */
59
60 /* STATIC OBJECT LIST.
61 *
62 * During GC:
63 * We maintain a linked list of static objects that are still live.
64 * The requirements for this list are:
65 *
66 * - we need to scan the list while adding to it, in order to
67 * scavenge all the static objects (in the same way that
68 * breadth-first scavenging works for dynamic objects).
69 *
70 * - we need to be able to tell whether an object is already on
71 * the list, to break loops.
72 *
73 * Each static object has a "static link field", which we use for
74 * linking objects on to the list. We use a stack-type list, consing
75 * objects on the front as they are added (this means that the
76 * scavenge phase is depth-first, not breadth-first, but that
77 * shouldn't matter).
78 *
79 * A separate list is kept for objects that have been scavenged
80 * already - this is so that we can zero all the marks afterwards.
81 *
82 * An object is on the list if its static link field is non-zero; this
83 * means that we have to mark the end of the list with '1', not NULL.
84 *
85 * Extra notes for generational GC:
86 *
87 * Each generation has a static object list associated with it. When
88 * collecting generations up to N, we treat the static object lists
89 * from generations > N as roots.
90 *
91 * We build up a static object list while collecting generations 0..N,
92 * which is then appended to the static object list of generation N+1.
93 */
94
95 /* N is the oldest generation being collected, where the generations
96 * are numbered starting at 0. A major GC (indicated by the major_gc
97 * flag) is when we're collecting all generations. We only attempt to
98 * deal with static objects and GC CAFs when doing a major GC.
99 */
100 nat N;
101 rtsBool major_gc;
102
103 /* Data used for allocation area sizing.
104 */
105 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
106
107 /* Mut-list stats */
108 #ifdef DEBUG
109 nat mutlist_MUTVARS,
110 mutlist_MUTARRS,
111 mutlist_MVARS,
112 mutlist_OTHERS;
113 #endif
114
115 /* Thread-local data for each GC thread
116 */
117 gc_thread **gc_threads = NULL;
118
119 #if !defined(THREADED_RTS)
120 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
121 #endif
122
123 // Number of threads running in *this* GC. Affects how many
124 // step->todos[] lists we have to look in to find work.
125 nat n_gc_threads;
126
127 // For stats:
128 long copied; // *words* copied & scavenged during this GC
129
130 rtsBool work_stealing;
131
132 DECLARE_GCT
133
134 /* -----------------------------------------------------------------------------
135 Static function declarations
136 -------------------------------------------------------------------------- */
137
138 static void mark_root (void *user, StgClosure **root);
139 static void zero_static_object_list (StgClosure* first_static);
140 static nat initialise_N (rtsBool force_major_gc);
141 static void prepare_collected_gen (generation *gen);
142 static void prepare_uncollected_gen (generation *gen);
143 static void init_gc_thread (gc_thread *t);
144 static void resize_generations (void);
145 static void resize_nursery (void);
146 static void start_gc_threads (void);
147 static void scavenge_until_all_done (void);
148 static StgWord inc_running (void);
149 static StgWord dec_running (void);
150 static void wakeup_gc_threads (nat me);
151 static void shutdown_gc_threads (nat me);
152 static void collect_gct_blocks (void);
153
154 #if 0 && defined(DEBUG)
155 static void gcCAFs (void);
156 #endif
157
158 /* -----------------------------------------------------------------------------
159 The mark stack.
160 -------------------------------------------------------------------------- */
161
162 bdescr *mark_stack_top_bd; // topmost block in the mark stack
163 bdescr *mark_stack_bd; // current block in the mark stack
164 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
165
166 /* -----------------------------------------------------------------------------
167 GarbageCollect: the main entry point to the garbage collector.
168
169 Locks held: all capabilities are held throughout GarbageCollect().
170 -------------------------------------------------------------------------- */
171
172 void
173 GarbageCollect (rtsBool force_major_gc,
174 rtsBool do_heap_census,
175 nat gc_type USED_IF_THREADS,
176 Capability *cap)
177 {
178 bdescr *bd;
179 generation *gen;
180 lnat live_blocks, live_words, allocated, max_copied, avg_copied;
181 #if defined(THREADED_RTS)
182 gc_thread *saved_gct;
183 #endif
184 nat g, n;
185
186 // necessary if we stole a callee-saves register for gct:
187 #if defined(THREADED_RTS)
188 saved_gct = gct;
189 #endif
190
191 #ifdef PROFILING
192 CostCentreStack *save_CCS[n_capabilities];
193 #endif
194
195 ACQUIRE_SM_LOCK;
196
197 #if defined(RTS_USER_SIGNALS)
198 if (RtsFlags.MiscFlags.install_signal_handlers) {
199 // block signals
200 blockUserSignals();
201 }
202 #endif
203
204 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
205 // otherwise adjust the padding in gen_workspace.
206
207 // this is the main thread
208 SET_GCT(gc_threads[cap->no]);
209
210 // tell the stats department that we've started a GC
211 stat_startGC(gct);
212
213 // lock the StablePtr table
214 stablePtrPreGC();
215
216 #ifdef DEBUG
217 mutlist_MUTVARS = 0;
218 mutlist_MUTARRS = 0;
219 mutlist_OTHERS = 0;
220 #endif
221
222 // attribute any costs to CCS_GC
223 #ifdef PROFILING
224 for (n = 0; n < n_capabilities; n++) {
225 save_CCS[n] = capabilities[n].r.rCCCS;
226 capabilities[n].r.rCCCS = CCS_GC;
227 }
228 #endif
229
230 /* Approximate how much we allocated.
231 * Todo: only when generating stats?
232 */
233 allocated = calcAllocated(rtsFalse/* don't count the nursery yet */);
234
235 /* Figure out which generation to collect
236 */
237 n = initialise_N(force_major_gc);
238
239 #if defined(THREADED_RTS)
240 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
241 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
242 // It's not always a good idea to do load balancing in parallel
243 // GC. In particular, for a parallel program we don't want to
244 // lose locality by moving cached data into another CPU's cache
245 // (this effect can be quite significant).
246 //
247 // We could have a more complex way to deterimine whether to do
248 // work stealing or not, e.g. it might be a good idea to do it
249 // if the heap is big. For now, we just turn it on or off with
250 // a flag.
251 #endif
252
253 /* Start threads, so they can be spinning up while we finish initialisation.
254 */
255 start_gc_threads();
256
257 #if defined(THREADED_RTS)
258 /* How many threads will be participating in this GC?
259 * We don't try to parallelise minor GCs (unless the user asks for
260 * it with +RTS -gn0), or mark/compact/sweep GC.
261 */
262 if (gc_type == SYNC_GC_PAR) {
263 n_gc_threads = n_capabilities;
264 } else {
265 n_gc_threads = 1;
266 }
267 #else
268 n_gc_threads = 1;
269 #endif
270
271 debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
272 N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
273
274 #ifdef RTS_GTK_FRONTPANEL
275 if (RtsFlags.GcFlags.frontpanel) {
276 updateFrontPanelBeforeGC(N);
277 }
278 #endif
279
280 #ifdef DEBUG
281 // check for memory leaks if DEBUG is on
282 memInventory(DEBUG_gc);
283 #endif
284
285 // check sanity *before* GC
286 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
287
288 // Initialise all the generations/steps that we're collecting.
289 for (g = 0; g <= N; g++) {
290 prepare_collected_gen(&generations[g]);
291 }
292 // Initialise all the generations/steps that we're *not* collecting.
293 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
294 prepare_uncollected_gen(&generations[g]);
295 }
296
297 // Prepare this gc_thread
298 init_gc_thread(gct);
299
300 /* Allocate a mark stack if we're doing a major collection.
301 */
302 if (major_gc && oldest_gen->mark) {
303 mark_stack_bd = allocBlock();
304 mark_stack_top_bd = mark_stack_bd;
305 mark_stack_bd->link = NULL;
306 mark_stack_bd->u.back = NULL;
307 mark_sp = mark_stack_bd->start;
308 } else {
309 mark_stack_bd = NULL;
310 mark_stack_top_bd = NULL;
311 mark_sp = NULL;
312 }
313
314 /* -----------------------------------------------------------------------
315 * follow all the roots that we know about:
316 */
317
318 // the main thread is running: this prevents any other threads from
319 // exiting prematurely, so we can start them now.
320 // NB. do this after the mutable lists have been saved above, otherwise
321 // the other GC threads will be writing into the old mutable lists.
322 inc_running();
323 wakeup_gc_threads(gct->thread_index);
324
325 traceEventGcWork(gct->cap);
326
327 // scavenge the capability-private mutable lists. This isn't part
328 // of markSomeCapabilities() because markSomeCapabilities() can only
329 // call back into the GC via mark_root() (due to the gct register
330 // variable).
331 if (n_gc_threads == 1) {
332 for (n = 0; n < n_capabilities; n++) {
333 #if defined(THREADED_RTS)
334 scavenge_capability_mut_Lists1(&capabilities[n]);
335 #else
336 scavenge_capability_mut_lists(&capabilities[n]);
337 #endif
338 }
339 } else {
340 scavenge_capability_mut_lists(gct->cap);
341 for (n = 0; n < n_capabilities; n++) {
342 if (gc_threads[n]->idle) {
343 markCapability(mark_root, gct, &capabilities[n],
344 rtsTrue/*don't mark sparks*/);
345 scavenge_capability_mut_lists(&capabilities[n]);
346 }
347 }
348 }
349
350 // follow roots from the CAF list (used by GHCi)
351 gct->evac_gen_no = 0;
352 markCAFs(mark_root, gct);
353
354 // follow all the roots that the application knows about.
355 gct->evac_gen_no = 0;
356 if (n_gc_threads == 1) {
357 for (n = 0; n < n_capabilities; n++) {
358 markCapability(mark_root, gct, &capabilities[n],
359 rtsTrue/*don't mark sparks*/);
360 }
361 } else {
362 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
363 }
364
365 markScheduler(mark_root, gct);
366
367 #if defined(RTS_USER_SIGNALS)
368 // mark the signal handlers (signals should be already blocked)
369 markSignalHandlers(mark_root, gct);
370 #endif
371
372 // Mark the weak pointer list, and prepare to detect dead weak pointers.
373 markWeakPtrList();
374 initWeakForGC();
375
376 // Mark the stable pointer table.
377 markStablePtrTable(mark_root, gct);
378
379 /* -------------------------------------------------------------------------
380 * Repeatedly scavenge all the areas we know about until there's no
381 * more scavenging to be done.
382 */
383 for (;;)
384 {
385 scavenge_until_all_done();
386 // The other threads are now stopped. We might recurse back to
387 // here, but from now on this is the only thread.
388
389 // must be last... invariant is that everything is fully
390 // scavenged at this point.
391 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
392 inc_running();
393 continue;
394 }
395
396 // If we get to here, there's really nothing left to do.
397 break;
398 }
399
400 shutdown_gc_threads(gct->thread_index);
401
402 // Now see which stable names are still alive.
403 gcStablePtrTable();
404
405 #ifdef THREADED_RTS
406 if (n_gc_threads == 1) {
407 for (n = 0; n < n_capabilities; n++) {
408 pruneSparkQueue(&capabilities[n]);
409 }
410 } else {
411 for (n = 0; n < n_capabilities; n++) {
412 if (n == cap->no || gc_threads[n]->idle) {
413 pruneSparkQueue(&capabilities[n]);
414 }
415 }
416 }
417 #endif
418
419 #ifdef PROFILING
420 // We call processHeapClosureForDead() on every closure destroyed during
421 // the current garbage collection, so we invoke LdvCensusForDead().
422 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
423 || RtsFlags.ProfFlags.bioSelector != NULL) {
424 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
425 LdvCensusForDead(N);
426 ACQUIRE_SM_LOCK;
427 }
428 #endif
429
430 // NO MORE EVACUATION AFTER THIS POINT!
431
432 // Finally: compact or sweep the oldest generation.
433 if (major_gc && oldest_gen->mark) {
434 if (oldest_gen->compact)
435 compact(gct->scavenged_static_objects);
436 else
437 sweep(oldest_gen);
438 }
439
440 copied = 0;
441 max_copied = 0;
442 avg_copied = 0;
443 {
444 nat i;
445 for (i=0; i < n_gc_threads; i++) {
446 if (n_gc_threads > 1) {
447 debugTrace(DEBUG_gc,"thread %d:", i);
448 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
449 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
450 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
451 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
452 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
453 }
454 copied += gc_threads[i]->copied;
455 max_copied = stg_max(gc_threads[i]->copied, max_copied);
456 }
457 if (n_gc_threads == 1) {
458 max_copied = 0;
459 avg_copied = 0;
460 } else {
461 avg_copied = copied;
462 }
463 }
464
465 // Run through all the generations/steps and tidy up.
466 // We're going to:
467 // - count the amount of "live" data (live_words, live_blocks)
468 // - count the amount of "copied" data in this GC (copied)
469 // - free from-space
470 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
471 //
472 live_words = 0;
473 live_blocks = 0;
474
475 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
476
477 if (g == N) {
478 generations[g].collections++; // for stats
479 if (n_gc_threads > 1) generations[g].par_collections++;
480 }
481
482 // Count the mutable list as bytes "copied" for the purposes of
483 // stats. Every mutable list is copied during every GC.
484 if (g > 0) {
485 nat mut_list_size = 0;
486 for (n = 0; n < n_capabilities; n++) {
487 mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
488 }
489 copied += mut_list_size;
490
491 debugTrace(DEBUG_gc,
492 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
493 (unsigned long)(mut_list_size * sizeof(W_)),
494 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
495 }
496
497 bdescr *next, *prev;
498 gen = &generations[g];
499
500 // for generations we collected...
501 if (g <= N) {
502
503 /* free old memory and shift to-space into from-space for all
504 * the collected steps (except the allocation area). These
505 * freed blocks will probaby be quickly recycled.
506 */
507 if (gen->mark)
508 {
509 // tack the new blocks on the end of the existing blocks
510 if (gen->old_blocks != NULL) {
511
512 prev = NULL;
513 for (bd = gen->old_blocks; bd != NULL; bd = next) {
514
515 next = bd->link;
516
517 if (!(bd->flags & BF_MARKED))
518 {
519 if (prev == NULL) {
520 gen->old_blocks = next;
521 } else {
522 prev->link = next;
523 }
524 freeGroup(bd);
525 gen->n_old_blocks--;
526 }
527 else
528 {
529 gen->n_words += bd->free - bd->start;
530
531 // NB. this step might not be compacted next
532 // time, so reset the BF_MARKED flags.
533 // They are set before GC if we're going to
534 // compact. (search for BF_MARKED above).
535 bd->flags &= ~BF_MARKED;
536
537 // between GCs, all blocks in the heap except
538 // for the nursery have the BF_EVACUATED flag set.
539 bd->flags |= BF_EVACUATED;
540
541 prev = bd;
542 }
543 }
544
545 if (prev != NULL) {
546 prev->link = gen->blocks;
547 gen->blocks = gen->old_blocks;
548 }
549 }
550 // add the new blocks to the block tally
551 gen->n_blocks += gen->n_old_blocks;
552 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
553 ASSERT(countOccupied(gen->blocks) == gen->n_words);
554 }
555 else // not copacted
556 {
557 freeChain(gen->old_blocks);
558 }
559
560 gen->old_blocks = NULL;
561 gen->n_old_blocks = 0;
562
563 /* LARGE OBJECTS. The current live large objects are chained on
564 * scavenged_large, having been moved during garbage
565 * collection from large_objects. Any objects left on the
566 * large_objects list are therefore dead, so we free them here.
567 */
568 freeChain(gen->large_objects);
569 gen->large_objects = gen->scavenged_large_objects;
570 gen->n_large_blocks = gen->n_scavenged_large_blocks;
571 gen->n_new_large_words = 0;
572 }
573 else // for generations > N
574 {
575 /* For older generations, we need to append the
576 * scavenged_large_object list (i.e. large objects that have been
577 * promoted during this GC) to the large_object list for that step.
578 */
579 for (bd = gen->scavenged_large_objects; bd; bd = next) {
580 next = bd->link;
581 dbl_link_onto(bd, &gen->large_objects);
582 }
583
584 // add the new blocks we promoted during this GC
585 gen->n_large_blocks += gen->n_scavenged_large_blocks;
586 }
587
588 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
589
590 gen->scavenged_large_objects = NULL;
591 gen->n_scavenged_large_blocks = 0;
592
593 // Count "live" data
594 live_words += genLiveWords(gen);
595 live_blocks += genLiveBlocks(gen);
596
597 // add in the partial blocks in the gen_workspaces, but ignore gen 0
598 // if this is a local GC (we can't count another capability's part_list)
599 {
600 nat i;
601 for (i = 0; i < n_capabilities; i++) {
602 live_words += gcThreadLiveWords(i, gen->no);
603 live_blocks += gcThreadLiveBlocks(i, gen->no);
604 }
605 }
606 } // for all generations
607
608 // update the max size of older generations after a major GC
609 resize_generations();
610
611 // Free the mark stack.
612 if (mark_stack_top_bd != NULL) {
613 debugTrace(DEBUG_gc, "mark stack: %d blocks",
614 countBlocks(mark_stack_top_bd));
615 freeChain(mark_stack_top_bd);
616 }
617
618 // Free any bitmaps.
619 for (g = 0; g <= N; g++) {
620 gen = &generations[g];
621 if (gen->bitmap != NULL) {
622 freeGroup(gen->bitmap);
623 gen->bitmap = NULL;
624 }
625 }
626
627 // Reset the nursery: make the blocks empty
628 allocated += clearNurseries();
629
630 resize_nursery();
631
632 resetNurseries();
633
634 // mark the garbage collected CAFs as dead
635 #if 0 && defined(DEBUG) // doesn't work at the moment
636 if (major_gc) { gcCAFs(); }
637 #endif
638
639 #ifdef PROFILING
640 // resetStaticObjectForRetainerProfiling() must be called before
641 // zeroing below.
642
643 // ToDo: fix the gct->scavenged_static_objects below
644 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
645 #endif
646
647 // zero the scavenged static object list
648 if (major_gc) {
649 nat i;
650 if (n_gc_threads == 1) {
651 zero_static_object_list(gct->scavenged_static_objects);
652 } else {
653 for (i = 0; i < n_gc_threads; i++) {
654 if (!gc_threads[i]->idle) {
655 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
656 }
657 }
658 }
659 }
660
661 // Update the stable pointer hash table.
662 updateStablePtrTable(major_gc);
663
664 // unlock the StablePtr table. Must be before scheduleFinalizers(),
665 // because a finalizer may call hs_free_fun_ptr() or
666 // hs_free_stable_ptr(), both of which access the StablePtr table.
667 stablePtrPostGC();
668
669 // Start any pending finalizers. Must be after
670 // updateStablePtrTable() and stablePtrPostGC() (see #4221).
671 RELEASE_SM_LOCK;
672 scheduleFinalizers(cap, old_weak_ptr_list);
673 ACQUIRE_SM_LOCK;
674
675 // check sanity after GC
676 // before resurrectThreads(), because that might overwrite some
677 // closures, which will cause problems with THREADED where we don't
678 // fill slop.
679 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
680
681 // If a heap census is due, we need to do it before
682 // resurrectThreads(), for the same reason as checkSanity above:
683 // resurrectThreads() will overwrite some closures and leave slop
684 // behind.
685 if (do_heap_census) {
686 debugTrace(DEBUG_sched, "performing heap census");
687 RELEASE_SM_LOCK;
688 heapCensus(gct->gc_start_cpu);
689 ACQUIRE_SM_LOCK;
690 }
691
692 // send exceptions to any threads which were about to die
693 RELEASE_SM_LOCK;
694 resurrectThreads(resurrected_threads);
695 ACQUIRE_SM_LOCK;
696
697 if (major_gc) {
698 nat need, got;
699 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
700 got = mblocks_allocated;
701 /* If the amount of data remains constant, next major GC we'll
702 require (F+1)*need. We leave (F+2)*need in order to reduce
703 repeated deallocation and reallocation. */
704 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
705 if (got > need) {
706 returnMemoryToOS(got - need);
707 }
708 }
709
710 // extra GC trace info
711 IF_DEBUG(gc, statDescribeGens());
712
713 #ifdef DEBUG
714 // symbol-table based profiling
715 /* heapCensus(to_blocks); */ /* ToDo */
716 #endif
717
718 // restore enclosing cost centre
719 #ifdef PROFILING
720 for (n = 0; n < n_capabilities; n++) {
721 capabilities[n].r.rCCCS = save_CCS[n];
722 }
723 #endif
724
725 #ifdef DEBUG
726 // check for memory leaks if DEBUG is on
727 memInventory(DEBUG_gc);
728 #endif
729
730 #ifdef RTS_GTK_FRONTPANEL
731 if (RtsFlags.GcFlags.frontpanel) {
732 updateFrontPanelAfterGC( N, live );
733 }
734 #endif
735
736 // ok, GC over: tell the stats department what happened.
737 stat_endGC(gct, allocated, live_words,
738 copied, N, max_copied, avg_copied,
739 live_blocks * BLOCK_SIZE_W - live_words /* slop */);
740
741 // Guess which generation we'll collect *next* time
742 initialise_N(force_major_gc);
743
744 #if defined(RTS_USER_SIGNALS)
745 if (RtsFlags.MiscFlags.install_signal_handlers) {
746 // unblock signals again
747 unblockUserSignals();
748 }
749 #endif
750
751 RELEASE_SM_LOCK;
752
753 SET_GCT(saved_gct);
754 }
755
756 /* -----------------------------------------------------------------------------
757 Figure out which generation to collect, initialise N and major_gc.
758
759 Also returns the total number of blocks in generations that will be
760 collected.
761 -------------------------------------------------------------------------- */
762
763 static nat
764 initialise_N (rtsBool force_major_gc)
765 {
766 int g;
767 nat blocks, blocks_total;
768
769 blocks = 0;
770 blocks_total = 0;
771
772 if (force_major_gc) {
773 N = RtsFlags.GcFlags.generations - 1;
774 } else {
775 N = 0;
776 }
777
778 for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
779
780 blocks = generations[g].n_words / BLOCK_SIZE_W
781 + generations[g].n_large_blocks;
782
783 if (blocks >= generations[g].max_blocks) {
784 N = stg_max(N,g);
785 }
786 if ((nat)g <= N) {
787 blocks_total += blocks;
788 }
789 }
790
791 blocks_total += countNurseryBlocks();
792
793 major_gc = (N == RtsFlags.GcFlags.generations-1);
794 return blocks_total;
795 }
796
797 /* -----------------------------------------------------------------------------
798 Initialise the gc_thread structures.
799 -------------------------------------------------------------------------- */
800
801 #define GC_THREAD_INACTIVE 0
802 #define GC_THREAD_STANDING_BY 1
803 #define GC_THREAD_RUNNING 2
804 #define GC_THREAD_WAITING_TO_CONTINUE 3
805
806 static void
807 new_gc_thread (nat n, gc_thread *t)
808 {
809 nat g;
810 gen_workspace *ws;
811
812 t->cap = &capabilities[n];
813
814 #ifdef THREADED_RTS
815 t->id = 0;
816 initSpinLock(&t->gc_spin);
817 initSpinLock(&t->mut_spin);
818 ACQUIRE_SPIN_LOCK(&t->gc_spin);
819 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
820 // thread to start up, see wakeup_gc_threads
821 #endif
822
823 t->thread_index = n;
824 t->idle = rtsFalse;
825 t->free_blocks = NULL;
826 t->gc_count = 0;
827
828 init_gc_thread(t);
829
830 #ifdef USE_PAPI
831 t->papi_events = -1;
832 #endif
833
834 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
835 {
836 ws = &t->gens[g];
837 ws->gen = &generations[g];
838 ASSERT(g == ws->gen->no);
839 ws->my_gct = t;
840
841 // We want to call
842 // alloc_todo_block(ws,0);
843 // but can't, because it uses gct which isn't set up at this point.
844 // Hence, allocate a block for todo_bd manually:
845 {
846 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
847 initBdescr(bd, ws->gen, ws->gen->to);
848 bd->flags = BF_EVACUATED;
849 bd->u.scan = bd->free = bd->start;
850
851 ws->todo_bd = bd;
852 ws->todo_free = bd->free;
853 ws->todo_lim = bd->start + BLOCK_SIZE_W;
854 }
855
856 ws->todo_q = newWSDeque(128);
857 ws->todo_overflow = NULL;
858 ws->n_todo_overflow = 0;
859 ws->todo_large_objects = NULL;
860
861 ws->part_list = NULL;
862 ws->n_part_blocks = 0;
863
864 ws->scavd_list = NULL;
865 ws->n_scavd_blocks = 0;
866 }
867 }
868
869
870 void
871 initGcThreads (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
872 {
873 #if defined(THREADED_RTS)
874 nat i;
875
876 if (from > 0) {
877 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
878 "initGcThreads");
879 } else {
880 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
881 "initGcThreads");
882 }
883
884 // We have to update the gct->cap pointers to point to the new
885 // Capability array now.
886 for (i = 0; i < from; i++) {
887 gc_threads[i]->cap = &capabilities[gc_threads[i]->cap->no];
888 }
889
890 for (i = from; i < to; i++) {
891 gc_threads[i] =
892 stgMallocBytes(sizeof(gc_thread) +
893 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
894 "alloc_gc_threads");
895
896 new_gc_thread(i, gc_threads[i]);
897 }
898 #else
899 ASSERT(from == 0 && to == 1);
900 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
901 gc_threads[0] = gct;
902 new_gc_thread(0,gc_threads[0]);
903 #endif
904 }
905
906 void
907 freeGcThreads (void)
908 {
909 nat g;
910 if (gc_threads != NULL) {
911 #if defined(THREADED_RTS)
912 nat i;
913 for (i = 0; i < n_capabilities; i++) {
914 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
915 {
916 freeWSDeque(gc_threads[i]->gens[g].todo_q);
917 }
918 stgFree (gc_threads[i]);
919 }
920 stgFree (gc_threads);
921 #else
922 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
923 {
924 freeWSDeque(gc_threads[0]->gens[g].todo_q);
925 }
926 stgFree (gc_threads);
927 #endif
928 gc_threads = NULL;
929 }
930 }
931
932 /* ----------------------------------------------------------------------------
933 Start GC threads
934 ------------------------------------------------------------------------- */
935
936 static volatile StgWord gc_running_threads;
937
938 static StgWord
939 inc_running (void)
940 {
941 StgWord new;
942 new = atomic_inc(&gc_running_threads);
943 ASSERT(new <= n_gc_threads);
944 return new;
945 }
946
947 static StgWord
948 dec_running (void)
949 {
950 ASSERT(gc_running_threads != 0);
951 return atomic_dec(&gc_running_threads);
952 }
953
954 static rtsBool
955 any_work (void)
956 {
957 int g;
958 gen_workspace *ws;
959
960 gct->any_work++;
961
962 write_barrier();
963
964 // scavenge objects in compacted generation
965 if (mark_stack_bd != NULL && !mark_stack_empty()) {
966 return rtsTrue;
967 }
968
969 // Check for global work in any step. We don't need to check for
970 // local work, because we have already exited scavenge_loop(),
971 // which means there is no local work for this thread.
972 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
973 ws = &gct->gens[g];
974 if (ws->todo_large_objects) return rtsTrue;
975 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
976 if (ws->todo_overflow) return rtsTrue;
977 }
978
979 #if defined(THREADED_RTS)
980 if (work_stealing) {
981 nat n;
982 // look for work to steal
983 for (n = 0; n < n_gc_threads; n++) {
984 if (n == gct->thread_index) continue;
985 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
986 ws = &gc_threads[n]->gens[g];
987 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
988 }
989 }
990 }
991 #endif
992
993 gct->no_work++;
994 #if defined(THREADED_RTS)
995 yieldThread();
996 #endif
997
998 return rtsFalse;
999 }
1000
1001 static void
1002 scavenge_until_all_done (void)
1003 {
1004 DEBUG_ONLY( nat r );
1005
1006
1007 loop:
1008 #if defined(THREADED_RTS)
1009 if (n_gc_threads > 1) {
1010 scavenge_loop();
1011 } else {
1012 scavenge_loop1();
1013 }
1014 #else
1015 scavenge_loop();
1016 #endif
1017
1018 collect_gct_blocks();
1019
1020 // scavenge_loop() only exits when there's no work to do
1021
1022 #ifdef DEBUG
1023 r = dec_running();
1024 #else
1025 dec_running();
1026 #endif
1027
1028 traceEventGcIdle(gct->cap);
1029
1030 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1031
1032 while (gc_running_threads != 0) {
1033 // usleep(1);
1034 if (any_work()) {
1035 inc_running();
1036 traceEventGcWork(gct->cap);
1037 goto loop;
1038 }
1039 // any_work() does not remove the work from the queue, it
1040 // just checks for the presence of work. If we find any,
1041 // then we increment gc_running_threads and go back to
1042 // scavenge_loop() to perform any pending work.
1043 }
1044
1045 traceEventGcDone(gct->cap);
1046 }
1047
1048 #if defined(THREADED_RTS)
1049
1050 void
1051 gcWorkerThread (Capability *cap)
1052 {
1053 gc_thread *saved_gct;
1054
1055 // necessary if we stole a callee-saves register for gct:
1056 saved_gct = gct;
1057
1058 SET_GCT(gc_threads[cap->no]);
1059 gct->id = osThreadId();
1060
1061 stat_gcWorkerThreadStart(gct);
1062
1063 // Wait until we're told to wake up
1064 RELEASE_SPIN_LOCK(&gct->mut_spin);
1065 // yieldThread();
1066 // Strangely, adding a yieldThread() here makes the CPU time
1067 // measurements more accurate on Linux, perhaps because it syncs
1068 // the CPU time across the multiple cores. Without this, CPU time
1069 // is heavily skewed towards GC rather than MUT.
1070 gct->wakeup = GC_THREAD_STANDING_BY;
1071 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1072 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1073
1074 #ifdef USE_PAPI
1075 // start performance counters in this thread...
1076 if (gct->papi_events == -1) {
1077 papi_init_eventset(&gct->papi_events);
1078 }
1079 papi_thread_start_gc1_count(gct->papi_events);
1080 #endif
1081
1082 init_gc_thread(gct);
1083
1084 traceEventGcWork(gct->cap);
1085
1086 // Every thread evacuates some roots.
1087 gct->evac_gen_no = 0;
1088 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1089 scavenge_capability_mut_lists(cap);
1090
1091 scavenge_until_all_done();
1092
1093 #ifdef THREADED_RTS
1094 // Now that the whole heap is marked, we discard any sparks that
1095 // were found to be unreachable. The main GC thread is currently
1096 // marking heap reachable via weak pointers, so it is
1097 // non-deterministic whether a spark will be retained if it is
1098 // only reachable via weak pointers. To fix this problem would
1099 // require another GC barrier, which is too high a price.
1100 pruneSparkQueue(cap);
1101 #endif
1102
1103 #ifdef USE_PAPI
1104 // count events in this thread towards the GC totals
1105 papi_thread_stop_gc1_count(gct->papi_events);
1106 #endif
1107
1108 // Wait until we're told to continue
1109 RELEASE_SPIN_LOCK(&gct->gc_spin);
1110 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1111 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1112 gct->thread_index);
1113 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1114 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1115
1116 // record the time spent doing GC in the Task structure
1117 stat_gcWorkerThreadDone(gct);
1118
1119 SET_GCT(saved_gct);
1120 }
1121
1122 #endif
1123
1124 #if defined(THREADED_RTS)
1125
1126 void
1127 waitForGcThreads (Capability *cap USED_IF_THREADS)
1128 {
1129 const nat n_threads = n_capabilities;
1130 const nat me = cap->no;
1131 nat i, j;
1132 rtsBool retry = rtsTrue;
1133
1134 while(retry) {
1135 for (i=0; i < n_threads; i++) {
1136 if (i == me || gc_threads[i]->idle) continue;
1137 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1138 prodCapability(&capabilities[i], cap->running_task);
1139 }
1140 }
1141 for (j=0; j < 10; j++) {
1142 retry = rtsFalse;
1143 for (i=0; i < n_threads; i++) {
1144 if (i == me || gc_threads[i]->idle) continue;
1145 write_barrier();
1146 interruptCapability(&capabilities[i]);
1147 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1148 retry = rtsTrue;
1149 }
1150 }
1151 if (!retry) break;
1152 yieldThread();
1153 }
1154 }
1155 }
1156
1157 #endif // THREADED_RTS
1158
1159 static void
1160 start_gc_threads (void)
1161 {
1162 #if defined(THREADED_RTS)
1163 gc_running_threads = 0;
1164 #endif
1165 }
1166
1167 static void
1168 wakeup_gc_threads (nat me USED_IF_THREADS)
1169 {
1170 #if defined(THREADED_RTS)
1171 nat i;
1172
1173 if (n_gc_threads == 1) return;
1174
1175 for (i=0; i < n_gc_threads; i++) {
1176 if (i == me || gc_threads[i]->idle) continue;
1177 inc_running();
1178 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1179 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1180
1181 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1182 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1183 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1184 }
1185 #endif
1186 }
1187
1188 // After GC is complete, we must wait for all GC threads to enter the
1189 // standby state, otherwise they may still be executing inside
1190 // any_work(), and may even remain awake until the next GC starts.
1191 static void
1192 shutdown_gc_threads (nat me USED_IF_THREADS)
1193 {
1194 #if defined(THREADED_RTS)
1195 nat i;
1196
1197 if (n_gc_threads == 1) return;
1198
1199 for (i=0; i < n_gc_threads; i++) {
1200 if (i == me || gc_threads[i]->idle) continue;
1201 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1202 }
1203 #endif
1204 }
1205
1206 #if defined(THREADED_RTS)
1207 void
1208 releaseGCThreads (Capability *cap USED_IF_THREADS)
1209 {
1210 const nat n_threads = n_capabilities;
1211 const nat me = cap->no;
1212 nat i;
1213 for (i=0; i < n_threads; i++) {
1214 if (i == me || gc_threads[i]->idle) continue;
1215 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1216 barf("releaseGCThreads");
1217
1218 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1219 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1220 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1221 }
1222 }
1223 #endif
1224
1225 /* ----------------------------------------------------------------------------
1226 Initialise a generation that is to be collected
1227 ------------------------------------------------------------------------- */
1228
1229 static void
1230 prepare_collected_gen (generation *gen)
1231 {
1232 nat i, g, n;
1233 gen_workspace *ws;
1234 bdescr *bd, *next;
1235
1236 // Throw away the current mutable list. Invariant: the mutable
1237 // list always has at least one block; this means we can avoid a
1238 // check for NULL in recordMutable().
1239 g = gen->no;
1240 if (g != 0) {
1241 for (i = 0; i < n_capabilities; i++) {
1242 freeChain(capabilities[i].mut_lists[g]);
1243 capabilities[i].mut_lists[g] = allocBlock();
1244 }
1245 }
1246
1247 gen = &generations[g];
1248 ASSERT(gen->no == g);
1249
1250 // we'll construct a new list of threads in this step
1251 // during GC, throw away the current list.
1252 gen->old_threads = gen->threads;
1253 gen->threads = END_TSO_QUEUE;
1254
1255 // deprecate the existing blocks
1256 gen->old_blocks = gen->blocks;
1257 gen->n_old_blocks = gen->n_blocks;
1258 gen->blocks = NULL;
1259 gen->n_blocks = 0;
1260 gen->n_words = 0;
1261 gen->live_estimate = 0;
1262
1263 // initialise the large object queues.
1264 ASSERT(gen->scavenged_large_objects == NULL);
1265 ASSERT(gen->n_scavenged_large_blocks == 0);
1266
1267 // grab all the partial blocks stashed in the gc_thread workspaces and
1268 // move them to the old_blocks list of this gen.
1269 for (n = 0; n < n_capabilities; n++) {
1270 ws = &gc_threads[n]->gens[gen->no];
1271
1272 for (bd = ws->part_list; bd != NULL; bd = next) {
1273 next = bd->link;
1274 bd->link = gen->old_blocks;
1275 gen->old_blocks = bd;
1276 gen->n_old_blocks += bd->blocks;
1277 }
1278 ws->part_list = NULL;
1279 ws->n_part_blocks = 0;
1280
1281 ASSERT(ws->scavd_list == NULL);
1282 ASSERT(ws->n_scavd_blocks == 0);
1283
1284 if (ws->todo_free != ws->todo_bd->start) {
1285 ws->todo_bd->free = ws->todo_free;
1286 ws->todo_bd->link = gen->old_blocks;
1287 gen->old_blocks = ws->todo_bd;
1288 gen->n_old_blocks += ws->todo_bd->blocks;
1289 alloc_todo_block(ws,0); // always has one block.
1290 }
1291 }
1292
1293 // mark the small objects as from-space
1294 for (bd = gen->old_blocks; bd; bd = bd->link) {
1295 bd->flags &= ~BF_EVACUATED;
1296 }
1297
1298 // mark the large objects as from-space
1299 for (bd = gen->large_objects; bd; bd = bd->link) {
1300 bd->flags &= ~BF_EVACUATED;
1301 }
1302
1303 // for a compacted generation, we need to allocate the bitmap
1304 if (gen->mark) {
1305 lnat bitmap_size; // in bytes
1306 bdescr *bitmap_bdescr;
1307 StgWord *bitmap;
1308
1309 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1310
1311 if (bitmap_size > 0) {
1312 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1313 / BLOCK_SIZE);
1314 gen->bitmap = bitmap_bdescr;
1315 bitmap = bitmap_bdescr->start;
1316
1317 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1318 bitmap_size, bitmap);
1319
1320 // don't forget to fill it with zeros!
1321 memset(bitmap, 0, bitmap_size);
1322
1323 // For each block in this step, point to its bitmap from the
1324 // block descriptor.
1325 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1326 bd->u.bitmap = bitmap;
1327 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1328
1329 // Also at this point we set the BF_MARKED flag
1330 // for this block. The invariant is that
1331 // BF_MARKED is always unset, except during GC
1332 // when it is set on those blocks which will be
1333 // compacted.
1334 if (!(bd->flags & BF_FRAGMENTED)) {
1335 bd->flags |= BF_MARKED;
1336 }
1337
1338 // BF_SWEPT should be marked only for blocks that are being
1339 // collected in sweep()
1340 bd->flags &= ~BF_SWEPT;
1341 }
1342 }
1343 }
1344 }
1345
1346
1347 /* ----------------------------------------------------------------------------
1348 Save the mutable lists in saved_mut_lists
1349 ------------------------------------------------------------------------- */
1350
1351 static void
1352 stash_mut_list (Capability *cap, nat gen_no)
1353 {
1354 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1355 cap->mut_lists[gen_no] = allocBlock_sync();
1356 }
1357
1358 /* ----------------------------------------------------------------------------
1359 Initialise a generation that is *not* to be collected
1360 ------------------------------------------------------------------------- */
1361
1362 static void
1363 prepare_uncollected_gen (generation *gen)
1364 {
1365 nat i;
1366
1367
1368 ASSERT(gen->no > 0);
1369
1370 // save the current mutable lists for this generation, and
1371 // allocate a fresh block for each one. We'll traverse these
1372 // mutable lists as roots early on in the GC.
1373 for (i = 0; i < n_capabilities; i++) {
1374 stash_mut_list(&capabilities[i], gen->no);
1375 }
1376
1377 ASSERT(gen->scavenged_large_objects == NULL);
1378 ASSERT(gen->n_scavenged_large_blocks == 0);
1379 }
1380
1381 /* -----------------------------------------------------------------------------
1382 Collect the completed blocks from a GC thread and attach them to
1383 the generation.
1384 -------------------------------------------------------------------------- */
1385
1386 static void
1387 collect_gct_blocks (void)
1388 {
1389 nat g;
1390 gen_workspace *ws;
1391 bdescr *bd, *prev;
1392
1393 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1394 ws = &gct->gens[g];
1395
1396 // there may still be a block attached to ws->todo_bd;
1397 // leave it there to use next time.
1398
1399 if (ws->scavd_list != NULL) {
1400 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1401
1402 ASSERT(gct->scan_bd == NULL);
1403 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1404
1405 prev = NULL;
1406 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1407 ws->gen->n_words += bd->free - bd->start;
1408 prev = bd;
1409 }
1410 if (prev != NULL) {
1411 prev->link = ws->gen->blocks;
1412 ws->gen->blocks = ws->scavd_list;
1413 }
1414 ws->gen->n_blocks += ws->n_scavd_blocks;
1415
1416 ws->scavd_list = NULL;
1417 ws->n_scavd_blocks = 0;
1418
1419 RELEASE_SPIN_LOCK(&ws->gen->sync);
1420 }
1421 }
1422 }
1423
1424 /* -----------------------------------------------------------------------------
1425 Initialise a gc_thread before GC
1426 -------------------------------------------------------------------------- */
1427
1428 static void
1429 init_gc_thread (gc_thread *t)
1430 {
1431 t->static_objects = END_OF_STATIC_LIST;
1432 t->scavenged_static_objects = END_OF_STATIC_LIST;
1433 t->scan_bd = NULL;
1434 t->mut_lists = t->cap->mut_lists;
1435 t->evac_gen_no = 0;
1436 t->failed_to_evac = rtsFalse;
1437 t->eager_promotion = rtsTrue;
1438 t->thunk_selector_depth = 0;
1439 t->copied = 0;
1440 t->scanned = 0;
1441 t->any_work = 0;
1442 t->no_work = 0;
1443 t->scav_find_work = 0;
1444 }
1445
1446 /* -----------------------------------------------------------------------------
1447 Function we pass to evacuate roots.
1448 -------------------------------------------------------------------------- */
1449
1450 static void
1451 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1452 {
1453 // we stole a register for gct, but this function is called from
1454 // *outside* the GC where the register variable is not in effect,
1455 // so we need to save and restore it here. NB. only call
1456 // mark_root() from the main GC thread, otherwise gct will be
1457 // incorrect.
1458 #if defined(THREADED_RTS)
1459 gc_thread *saved_gct;
1460 saved_gct = gct;
1461 #endif
1462 SET_GCT(user);
1463
1464 evacuate(root);
1465
1466 SET_GCT(saved_gct);
1467 }
1468
1469 /* -----------------------------------------------------------------------------
1470 Initialising the static object & mutable lists
1471 -------------------------------------------------------------------------- */
1472
1473 static void
1474 zero_static_object_list(StgClosure* first_static)
1475 {
1476 StgClosure* p;
1477 StgClosure* link;
1478 const StgInfoTable *info;
1479
1480 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1481 info = get_itbl(p);
1482 link = *STATIC_LINK(info, p);
1483 *STATIC_LINK(info,p) = NULL;
1484 }
1485 }
1486
1487 /* ----------------------------------------------------------------------------
1488 Reset the sizes of the older generations when we do a major
1489 collection.
1490
1491 CURRENT STRATEGY: make all generations except zero the same size.
1492 We have to stay within the maximum heap size, and leave a certain
1493 percentage of the maximum heap size available to allocate into.
1494 ------------------------------------------------------------------------- */
1495
1496 static void
1497 resize_generations (void)
1498 {
1499 nat g;
1500
1501 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1502 nat live, size, min_alloc, words;
1503 const nat max = RtsFlags.GcFlags.maxHeapSize;
1504 const nat gens = RtsFlags.GcFlags.generations;
1505
1506 // live in the oldest generations
1507 if (oldest_gen->live_estimate != 0) {
1508 words = oldest_gen->live_estimate;
1509 } else {
1510 words = oldest_gen->n_words;
1511 }
1512 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1513 oldest_gen->n_large_blocks;
1514
1515 // default max size for all generations except zero
1516 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1517 RtsFlags.GcFlags.minOldGenSize);
1518
1519 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1520 RtsFlags.GcFlags.heapSizeSuggestion = size;
1521 }
1522
1523 // minimum size for generation zero
1524 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1525 RtsFlags.GcFlags.minAllocAreaSize);
1526
1527 // Auto-enable compaction when the residency reaches a
1528 // certain percentage of the maximum heap size (default: 30%).
1529 if (RtsFlags.GcFlags.compact ||
1530 (max > 0 &&
1531 oldest_gen->n_blocks >
1532 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1533 oldest_gen->mark = 1;
1534 oldest_gen->compact = 1;
1535 // debugBelch("compaction: on\n", live);
1536 } else {
1537 oldest_gen->mark = 0;
1538 oldest_gen->compact = 0;
1539 // debugBelch("compaction: off\n", live);
1540 }
1541
1542 if (RtsFlags.GcFlags.sweep) {
1543 oldest_gen->mark = 1;
1544 }
1545
1546 // if we're going to go over the maximum heap size, reduce the
1547 // size of the generations accordingly. The calculation is
1548 // different if compaction is turned on, because we don't need
1549 // to double the space required to collect the old generation.
1550 if (max != 0) {
1551
1552 // this test is necessary to ensure that the calculations
1553 // below don't have any negative results - we're working
1554 // with unsigned values here.
1555 if (max < min_alloc) {
1556 heapOverflow();
1557 }
1558
1559 if (oldest_gen->compact) {
1560 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1561 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1562 }
1563 } else {
1564 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1565 size = (max - min_alloc) / ((gens - 1) * 2);
1566 }
1567 }
1568
1569 if (size < live) {
1570 heapOverflow();
1571 }
1572 }
1573
1574 #if 0
1575 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1576 min_alloc, size, max);
1577 #endif
1578
1579 for (g = 0; g < gens; g++) {
1580 generations[g].max_blocks = size;
1581 }
1582 }
1583 }
1584
1585 /* -----------------------------------------------------------------------------
1586 Calculate the new size of the nursery, and resize it.
1587 -------------------------------------------------------------------------- */
1588
1589 static void
1590 resize_nursery (void)
1591 {
1592 const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1593
1594 if (RtsFlags.GcFlags.generations == 1)
1595 { // Two-space collector:
1596 nat blocks;
1597
1598 /* set up a new nursery. Allocate a nursery size based on a
1599 * function of the amount of live data (by default a factor of 2)
1600 * Use the blocks from the old nursery if possible, freeing up any
1601 * left over blocks.
1602 *
1603 * If we get near the maximum heap size, then adjust our nursery
1604 * size accordingly. If the nursery is the same size as the live
1605 * data (L), then we need 3L bytes. We can reduce the size of the
1606 * nursery to bring the required memory down near 2L bytes.
1607 *
1608 * A normal 2-space collector would need 4L bytes to give the same
1609 * performance we get from 3L bytes, reducing to the same
1610 * performance at 2L bytes.
1611 */
1612 blocks = generations[0].n_blocks;
1613
1614 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1615 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1616 RtsFlags.GcFlags.maxHeapSize )
1617 {
1618 long adjusted_blocks; // signed on purpose
1619 int pc_free;
1620
1621 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1622
1623 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1624 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1625
1626 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1627 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1628 {
1629 heapOverflow();
1630 }
1631 blocks = adjusted_blocks;
1632 }
1633 else
1634 {
1635 blocks *= RtsFlags.GcFlags.oldGenFactor;
1636 if (blocks < min_nursery)
1637 {
1638 blocks = min_nursery;
1639 }
1640 }
1641 resizeNurseries(blocks);
1642 }
1643 else // Generational collector
1644 {
1645 /*
1646 * If the user has given us a suggested heap size, adjust our
1647 * allocation area to make best use of the memory available.
1648 */
1649 if (RtsFlags.GcFlags.heapSizeSuggestion)
1650 {
1651 long blocks;
1652 const nat needed = calcNeeded(); // approx blocks needed at next GC
1653
1654 /* Guess how much will be live in generation 0 step 0 next time.
1655 * A good approximation is obtained by finding the
1656 * percentage of g0 that was live at the last minor GC.
1657 *
1658 * We have an accurate figure for the amount of copied data in
1659 * 'copied', but we must convert this to a number of blocks, with
1660 * a small adjustment for estimated slop at the end of a block
1661 * (- 10 words).
1662 */
1663 if (N == 0)
1664 {
1665 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1666 / countNurseryBlocks();
1667 }
1668
1669 /* Estimate a size for the allocation area based on the
1670 * information available. We might end up going slightly under
1671 * or over the suggested heap size, but we should be pretty
1672 * close on average.
1673 *
1674 * Formula: suggested - needed
1675 * ----------------------------
1676 * 1 + g0_pcnt_kept/100
1677 *
1678 * where 'needed' is the amount of memory needed at the next
1679 * collection for collecting all gens except g0.
1680 */
1681 blocks =
1682 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1683 (100 + (long)g0_pcnt_kept);
1684
1685 if (blocks < (long)min_nursery) {
1686 blocks = min_nursery;
1687 }
1688
1689 resizeNurseries((nat)blocks);
1690 }
1691 else
1692 {
1693 // we might have added extra large blocks to the nursery, so
1694 // resize back to minAllocAreaSize again.
1695 resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1696 }
1697 }
1698 }
1699
1700 /* -----------------------------------------------------------------------------
1701 Sanity code for CAF garbage collection.
1702
1703 With DEBUG turned on, we manage a CAF list in addition to the SRT
1704 mechanism. After GC, we run down the CAF list and blackhole any
1705 CAFs which have been garbage collected. This means we get an error
1706 whenever the program tries to enter a garbage collected CAF.
1707
1708 Any garbage collected CAFs are taken off the CAF list at the same
1709 time.
1710 -------------------------------------------------------------------------- */
1711
1712 #if 0 && defined(DEBUG)
1713
1714 static void
1715 gcCAFs(void)
1716 {
1717 StgClosure* p;
1718 StgClosure** pp;
1719 const StgInfoTable *info;
1720 nat i;
1721
1722 i = 0;
1723 p = caf_list;
1724 pp = &caf_list;
1725
1726 while (p != NULL) {
1727
1728 info = get_itbl(p);
1729
1730 ASSERT(info->type == IND_STATIC);
1731
1732 if (STATIC_LINK(info,p) == NULL) {
1733 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1734 // black hole it
1735 SET_INFO(p,&stg_BLACKHOLE_info);
1736 p = STATIC_LINK2(info,p);
1737 *pp = p;
1738 }
1739 else {
1740 pp = &STATIC_LINK2(info,p);
1741 p = *pp;
1742 i++;
1743 }
1744
1745 }
1746
1747 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1748 }
1749 #endif