Merge branch 'master' of darcs.haskell.org:/home/darcs/ghc
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "Storage.h"
19 #include "RtsUtils.h"
20 #include "Apply.h"
21 #include "Updates.h"
22 #include "Stats.h"
23 #include "Schedule.h"
24 #include "Sanity.h"
25 #include "BlockAlloc.h"
26 #include "ProfHeap.h"
27 #include "Weak.h"
28 #include "Prelude.h"
29 #include "RtsSignals.h"
30 #include "STM.h"
31 #if defined(RTS_GTK_FRONTPANEL)
32 #include "FrontPanel.h"
33 #endif
34 #include "Trace.h"
35 #include "RetainerProfile.h"
36 #include "LdvProfile.h"
37 #include "RaiseAsync.h"
38 #include "Papi.h"
39 #include "Stable.h"
40
41 #include "GC.h"
42 #include "GCThread.h"
43 #include "GCTDecl.h"
44 #include "Compact.h"
45 #include "Evac.h"
46 #include "Scav.h"
47 #include "GCUtils.h"
48 #include "MarkStack.h"
49 #include "MarkWeak.h"
50 #include "Sparks.h"
51 #include "Sweep.h"
52
53 #include <string.h> // for memset()
54 #include <unistd.h>
55
56 /* -----------------------------------------------------------------------------
57 Global variables
58 -------------------------------------------------------------------------- */
59
60 /* STATIC OBJECT LIST.
61 *
62 * During GC:
63 * We maintain a linked list of static objects that are still live.
64 * The requirements for this list are:
65 *
66 * - we need to scan the list while adding to it, in order to
67 * scavenge all the static objects (in the same way that
68 * breadth-first scavenging works for dynamic objects).
69 *
70 * - we need to be able to tell whether an object is already on
71 * the list, to break loops.
72 *
73 * Each static object has a "static link field", which we use for
74 * linking objects on to the list. We use a stack-type list, consing
75 * objects on the front as they are added (this means that the
76 * scavenge phase is depth-first, not breadth-first, but that
77 * shouldn't matter).
78 *
79 * A separate list is kept for objects that have been scavenged
80 * already - this is so that we can zero all the marks afterwards.
81 *
82 * An object is on the list if its static link field is non-zero; this
83 * means that we have to mark the end of the list with '1', not NULL.
84 *
85 * Extra notes for generational GC:
86 *
87 * Each generation has a static object list associated with it. When
88 * collecting generations up to N, we treat the static object lists
89 * from generations > N as roots.
90 *
91 * We build up a static object list while collecting generations 0..N,
92 * which is then appended to the static object list of generation N+1.
93 */
94
95 /* N is the oldest generation being collected, where the generations
96 * are numbered starting at 0. A major GC (indicated by the major_gc
97 * flag) is when we're collecting all generations. We only attempt to
98 * deal with static objects and GC CAFs when doing a major GC.
99 */
100 nat N;
101 rtsBool major_gc;
102
103 /* Data used for allocation area sizing.
104 */
105 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
106
107 /* Mut-list stats */
108 #ifdef DEBUG
109 nat mutlist_MUTVARS,
110 mutlist_MUTARRS,
111 mutlist_MVARS,
112 mutlist_OTHERS;
113 #endif
114
115 /* Thread-local data for each GC thread
116 */
117 gc_thread **gc_threads = NULL;
118
119 #if !defined(THREADED_RTS)
120 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
121 #endif
122
123 // Number of threads running in *this* GC. Affects how many
124 // step->todos[] lists we have to look in to find work.
125 nat n_gc_threads;
126
127 // For stats:
128 long copied; // *words* copied & scavenged during this GC
129
130 rtsBool work_stealing;
131
132 DECLARE_GCT
133
134 /* -----------------------------------------------------------------------------
135 Static function declarations
136 -------------------------------------------------------------------------- */
137
138 static void mark_root (void *user, StgClosure **root);
139 static void zero_static_object_list (StgClosure* first_static);
140 static void prepare_collected_gen (generation *gen);
141 static void prepare_uncollected_gen (generation *gen);
142 static void init_gc_thread (gc_thread *t);
143 static void resize_generations (void);
144 static void resize_nursery (void);
145 static void start_gc_threads (void);
146 static void scavenge_until_all_done (void);
147 static StgWord inc_running (void);
148 static StgWord dec_running (void);
149 static void wakeup_gc_threads (nat me);
150 static void shutdown_gc_threads (nat me);
151 static void collect_gct_blocks (void);
152 static lnat collect_pinned_object_blocks (void);
153
154 #if 0 && defined(DEBUG)
155 static void gcCAFs (void);
156 #endif
157
158 /* -----------------------------------------------------------------------------
159 The mark stack.
160 -------------------------------------------------------------------------- */
161
162 bdescr *mark_stack_top_bd; // topmost block in the mark stack
163 bdescr *mark_stack_bd; // current block in the mark stack
164 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
165
166 /* -----------------------------------------------------------------------------
167 GarbageCollect: the main entry point to the garbage collector.
168
169 The collect_gen parameter is gotten by calling calcNeeded().
170
171 Locks held: all capabilities are held throughout GarbageCollect().
172 -------------------------------------------------------------------------- */
173
174 void
175 GarbageCollect (nat collect_gen,
176 rtsBool do_heap_census,
177 nat gc_type USED_IF_THREADS,
178 Capability *cap)
179 {
180 bdescr *bd;
181 generation *gen;
182 lnat live_blocks, live_words, allocated, par_max_copied, par_tot_copied;
183 #if defined(THREADED_RTS)
184 gc_thread *saved_gct;
185 #endif
186 nat g, n;
187
188 // necessary if we stole a callee-saves register for gct:
189 #if defined(THREADED_RTS)
190 saved_gct = gct;
191 #endif
192
193 #ifdef PROFILING
194 CostCentreStack *save_CCS[n_capabilities];
195 #endif
196
197 ACQUIRE_SM_LOCK;
198
199 #if defined(RTS_USER_SIGNALS)
200 if (RtsFlags.MiscFlags.install_signal_handlers) {
201 // block signals
202 blockUserSignals();
203 }
204 #endif
205
206 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
207 // otherwise adjust the padding in gen_workspace.
208
209 // this is the main thread
210 SET_GCT(gc_threads[cap->no]);
211
212 // tell the stats department that we've started a GC
213 stat_startGC(cap, gct);
214
215 // lock the StablePtr table
216 stablePtrPreGC();
217
218 #ifdef DEBUG
219 mutlist_MUTVARS = 0;
220 mutlist_MUTARRS = 0;
221 mutlist_OTHERS = 0;
222 #endif
223
224 // attribute any costs to CCS_GC
225 #ifdef PROFILING
226 for (n = 0; n < n_capabilities; n++) {
227 save_CCS[n] = capabilities[n].r.rCCCS;
228 capabilities[n].r.rCCCS = CCS_GC;
229 }
230 #endif
231
232 /* Approximate how much we allocated.
233 * Todo: only when generating stats?
234 */
235 allocated = countLargeAllocated(); /* don't count the nursery yet */
236
237 /* Figure out which generation to collect
238 */
239 N = collect_gen;
240 major_gc = (N == RtsFlags.GcFlags.generations-1);
241
242 #if defined(THREADED_RTS)
243 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
244 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
245 // It's not always a good idea to do load balancing in parallel
246 // GC. In particular, for a parallel program we don't want to
247 // lose locality by moving cached data into another CPU's cache
248 // (this effect can be quite significant).
249 //
250 // We could have a more complex way to deterimine whether to do
251 // work stealing or not, e.g. it might be a good idea to do it
252 // if the heap is big. For now, we just turn it on or off with
253 // a flag.
254 #endif
255
256 /* Start threads, so they can be spinning up while we finish initialisation.
257 */
258 start_gc_threads();
259
260 #if defined(THREADED_RTS)
261 /* How many threads will be participating in this GC?
262 * We don't try to parallelise minor GCs (unless the user asks for
263 * it with +RTS -gn0), or mark/compact/sweep GC.
264 */
265 if (gc_type == SYNC_GC_PAR) {
266 n_gc_threads = n_capabilities;
267 } else {
268 n_gc_threads = 1;
269 }
270 #else
271 n_gc_threads = 1;
272 #endif
273
274 debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
275 N, n_gc_threads);
276
277 #ifdef RTS_GTK_FRONTPANEL
278 if (RtsFlags.GcFlags.frontpanel) {
279 updateFrontPanelBeforeGC(N);
280 }
281 #endif
282
283 #ifdef DEBUG
284 // check for memory leaks if DEBUG is on
285 memInventory(DEBUG_gc);
286 #endif
287
288 // check sanity *before* GC
289 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
290
291 // gather blocks allocated using allocatePinned() from each capability
292 // and put them on the g0->large_object list.
293 collect_pinned_object_blocks();
294
295 // Initialise all the generations/steps that we're collecting.
296 for (g = 0; g <= N; g++) {
297 prepare_collected_gen(&generations[g]);
298 }
299 // Initialise all the generations/steps that we're *not* collecting.
300 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
301 prepare_uncollected_gen(&generations[g]);
302 }
303
304 // Prepare this gc_thread
305 init_gc_thread(gct);
306
307 /* Allocate a mark stack if we're doing a major collection.
308 */
309 if (major_gc && oldest_gen->mark) {
310 mark_stack_bd = allocBlock();
311 mark_stack_top_bd = mark_stack_bd;
312 mark_stack_bd->link = NULL;
313 mark_stack_bd->u.back = NULL;
314 mark_sp = mark_stack_bd->start;
315 } else {
316 mark_stack_bd = NULL;
317 mark_stack_top_bd = NULL;
318 mark_sp = NULL;
319 }
320
321 /* -----------------------------------------------------------------------
322 * follow all the roots that we know about:
323 */
324
325 // the main thread is running: this prevents any other threads from
326 // exiting prematurely, so we can start them now.
327 // NB. do this after the mutable lists have been saved above, otherwise
328 // the other GC threads will be writing into the old mutable lists.
329 inc_running();
330 wakeup_gc_threads(gct->thread_index);
331
332 traceEventGcWork(gct->cap);
333
334 // scavenge the capability-private mutable lists. This isn't part
335 // of markSomeCapabilities() because markSomeCapabilities() can only
336 // call back into the GC via mark_root() (due to the gct register
337 // variable).
338 if (n_gc_threads == 1) {
339 for (n = 0; n < n_capabilities; n++) {
340 #if defined(THREADED_RTS)
341 scavenge_capability_mut_Lists1(&capabilities[n]);
342 #else
343 scavenge_capability_mut_lists(&capabilities[n]);
344 #endif
345 }
346 } else {
347 scavenge_capability_mut_lists(gct->cap);
348 for (n = 0; n < n_capabilities; n++) {
349 if (gc_threads[n]->idle) {
350 markCapability(mark_root, gct, &capabilities[n],
351 rtsTrue/*don't mark sparks*/);
352 scavenge_capability_mut_lists(&capabilities[n]);
353 }
354 }
355 }
356
357 // follow roots from the CAF list (used by GHCi)
358 gct->evac_gen_no = 0;
359 markCAFs(mark_root, gct);
360
361 // follow all the roots that the application knows about.
362 gct->evac_gen_no = 0;
363 if (n_gc_threads == 1) {
364 for (n = 0; n < n_capabilities; n++) {
365 markCapability(mark_root, gct, &capabilities[n],
366 rtsTrue/*don't mark sparks*/);
367 }
368 } else {
369 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
370 }
371
372 markScheduler(mark_root, gct);
373
374 #if defined(RTS_USER_SIGNALS)
375 // mark the signal handlers (signals should be already blocked)
376 markSignalHandlers(mark_root, gct);
377 #endif
378
379 // Mark the weak pointer list, and prepare to detect dead weak pointers.
380 markWeakPtrList();
381 initWeakForGC();
382
383 // Mark the stable pointer table.
384 markStablePtrTable(mark_root, gct);
385
386 /* -------------------------------------------------------------------------
387 * Repeatedly scavenge all the areas we know about until there's no
388 * more scavenging to be done.
389 */
390 for (;;)
391 {
392 scavenge_until_all_done();
393 // The other threads are now stopped. We might recurse back to
394 // here, but from now on this is the only thread.
395
396 // must be last... invariant is that everything is fully
397 // scavenged at this point.
398 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
399 inc_running();
400 continue;
401 }
402
403 // If we get to here, there's really nothing left to do.
404 break;
405 }
406
407 shutdown_gc_threads(gct->thread_index);
408
409 // Now see which stable names are still alive.
410 gcStablePtrTable();
411
412 #ifdef THREADED_RTS
413 if (n_gc_threads == 1) {
414 for (n = 0; n < n_capabilities; n++) {
415 pruneSparkQueue(&capabilities[n]);
416 }
417 } else {
418 for (n = 0; n < n_capabilities; n++) {
419 if (n == cap->no || gc_threads[n]->idle) {
420 pruneSparkQueue(&capabilities[n]);
421 }
422 }
423 }
424 #endif
425
426 #ifdef PROFILING
427 // We call processHeapClosureForDead() on every closure destroyed during
428 // the current garbage collection, so we invoke LdvCensusForDead().
429 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
430 || RtsFlags.ProfFlags.bioSelector != NULL) {
431 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
432 LdvCensusForDead(N);
433 ACQUIRE_SM_LOCK;
434 }
435 #endif
436
437 // NO MORE EVACUATION AFTER THIS POINT!
438
439 // Finally: compact or sweep the oldest generation.
440 if (major_gc && oldest_gen->mark) {
441 if (oldest_gen->compact)
442 compact(gct->scavenged_static_objects);
443 else
444 sweep(oldest_gen);
445 }
446
447 copied = 0;
448 par_max_copied = 0;
449 par_tot_copied = 0;
450 {
451 nat i;
452 for (i=0; i < n_gc_threads; i++) {
453 if (n_gc_threads > 1) {
454 debugTrace(DEBUG_gc,"thread %d:", i);
455 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
456 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
457 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
458 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
459 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
460 }
461 copied += gc_threads[i]->copied;
462 par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
463 }
464 par_tot_copied = copied;
465 if (n_gc_threads == 1) {
466 par_max_copied = 0;
467 par_tot_copied = 0;
468 }
469 }
470
471 // Run through all the generations/steps and tidy up.
472 // We're going to:
473 // - count the amount of "live" data (live_words, live_blocks)
474 // - count the amount of "copied" data in this GC (copied)
475 // - free from-space
476 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
477 //
478 live_words = 0;
479 live_blocks = 0;
480
481 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
482
483 if (g == N) {
484 generations[g].collections++; // for stats
485 if (n_gc_threads > 1) generations[g].par_collections++;
486 }
487
488 // Count the mutable list as bytes "copied" for the purposes of
489 // stats. Every mutable list is copied during every GC.
490 if (g > 0) {
491 nat mut_list_size = 0;
492 for (n = 0; n < n_capabilities; n++) {
493 mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
494 }
495 copied += mut_list_size;
496
497 debugTrace(DEBUG_gc,
498 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
499 (unsigned long)(mut_list_size * sizeof(W_)),
500 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
501 }
502
503 bdescr *next, *prev;
504 gen = &generations[g];
505
506 // for generations we collected...
507 if (g <= N) {
508
509 /* free old memory and shift to-space into from-space for all
510 * the collected steps (except the allocation area). These
511 * freed blocks will probaby be quickly recycled.
512 */
513 if (gen->mark)
514 {
515 // tack the new blocks on the end of the existing blocks
516 if (gen->old_blocks != NULL) {
517
518 prev = NULL;
519 for (bd = gen->old_blocks; bd != NULL; bd = next) {
520
521 next = bd->link;
522
523 if (!(bd->flags & BF_MARKED))
524 {
525 if (prev == NULL) {
526 gen->old_blocks = next;
527 } else {
528 prev->link = next;
529 }
530 freeGroup(bd);
531 gen->n_old_blocks--;
532 }
533 else
534 {
535 gen->n_words += bd->free - bd->start;
536
537 // NB. this step might not be compacted next
538 // time, so reset the BF_MARKED flags.
539 // They are set before GC if we're going to
540 // compact. (search for BF_MARKED above).
541 bd->flags &= ~BF_MARKED;
542
543 // between GCs, all blocks in the heap except
544 // for the nursery have the BF_EVACUATED flag set.
545 bd->flags |= BF_EVACUATED;
546
547 prev = bd;
548 }
549 }
550
551 if (prev != NULL) {
552 prev->link = gen->blocks;
553 gen->blocks = gen->old_blocks;
554 }
555 }
556 // add the new blocks to the block tally
557 gen->n_blocks += gen->n_old_blocks;
558 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
559 ASSERT(countOccupied(gen->blocks) == gen->n_words);
560 }
561 else // not copacted
562 {
563 freeChain(gen->old_blocks);
564 }
565
566 gen->old_blocks = NULL;
567 gen->n_old_blocks = 0;
568
569 /* LARGE OBJECTS. The current live large objects are chained on
570 * scavenged_large, having been moved during garbage
571 * collection from large_objects. Any objects left on the
572 * large_objects list are therefore dead, so we free them here.
573 */
574 freeChain(gen->large_objects);
575 gen->large_objects = gen->scavenged_large_objects;
576 gen->n_large_blocks = gen->n_scavenged_large_blocks;
577 gen->n_new_large_words = 0;
578 }
579 else // for generations > N
580 {
581 /* For older generations, we need to append the
582 * scavenged_large_object list (i.e. large objects that have been
583 * promoted during this GC) to the large_object list for that step.
584 */
585 for (bd = gen->scavenged_large_objects; bd; bd = next) {
586 next = bd->link;
587 dbl_link_onto(bd, &gen->large_objects);
588 }
589
590 // add the new blocks we promoted during this GC
591 gen->n_large_blocks += gen->n_scavenged_large_blocks;
592 }
593
594 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
595
596 gen->scavenged_large_objects = NULL;
597 gen->n_scavenged_large_blocks = 0;
598
599 // Count "live" data
600 live_words += genLiveWords(gen);
601 live_blocks += genLiveBlocks(gen);
602
603 // add in the partial blocks in the gen_workspaces, but ignore gen 0
604 // if this is a local GC (we can't count another capability's part_list)
605 {
606 nat i;
607 for (i = 0; i < n_capabilities; i++) {
608 live_words += gcThreadLiveWords(i, gen->no);
609 live_blocks += gcThreadLiveBlocks(i, gen->no);
610 }
611 }
612 } // for all generations
613
614 // update the max size of older generations after a major GC
615 resize_generations();
616
617 // Free the mark stack.
618 if (mark_stack_top_bd != NULL) {
619 debugTrace(DEBUG_gc, "mark stack: %d blocks",
620 countBlocks(mark_stack_top_bd));
621 freeChain(mark_stack_top_bd);
622 }
623
624 // Free any bitmaps.
625 for (g = 0; g <= N; g++) {
626 gen = &generations[g];
627 if (gen->bitmap != NULL) {
628 freeGroup(gen->bitmap);
629 gen->bitmap = NULL;
630 }
631 }
632
633 // Reset the nursery: make the blocks empty
634 if (n_gc_threads == 1) {
635 for (n = 0; n < n_capabilities; n++) {
636 allocated += clearNursery(&capabilities[n]);
637 }
638 } else {
639 gct->allocated = clearNursery(cap);
640 for (n = 0; n < n_capabilities; n++) {
641 allocated += gc_threads[n]->allocated;
642 }
643 }
644
645 resize_nursery();
646
647 resetNurseries();
648
649 // mark the garbage collected CAFs as dead
650 #if 0 && defined(DEBUG) // doesn't work at the moment
651 if (major_gc) { gcCAFs(); }
652 #endif
653
654 #ifdef PROFILING
655 // resetStaticObjectForRetainerProfiling() must be called before
656 // zeroing below.
657
658 // ToDo: fix the gct->scavenged_static_objects below
659 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
660 #endif
661
662 // zero the scavenged static object list
663 if (major_gc) {
664 nat i;
665 if (n_gc_threads == 1) {
666 zero_static_object_list(gct->scavenged_static_objects);
667 } else {
668 for (i = 0; i < n_gc_threads; i++) {
669 if (!gc_threads[i]->idle) {
670 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
671 }
672 }
673 }
674 }
675
676 // Update the stable pointer hash table.
677 updateStablePtrTable(major_gc);
678
679 // unlock the StablePtr table. Must be before scheduleFinalizers(),
680 // because a finalizer may call hs_free_fun_ptr() or
681 // hs_free_stable_ptr(), both of which access the StablePtr table.
682 stablePtrPostGC();
683
684 // Start any pending finalizers. Must be after
685 // updateStablePtrTable() and stablePtrPostGC() (see #4221).
686 RELEASE_SM_LOCK;
687 scheduleFinalizers(cap, old_weak_ptr_list);
688 ACQUIRE_SM_LOCK;
689
690 // check sanity after GC
691 // before resurrectThreads(), because that might overwrite some
692 // closures, which will cause problems with THREADED where we don't
693 // fill slop.
694 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
695
696 // If a heap census is due, we need to do it before
697 // resurrectThreads(), for the same reason as checkSanity above:
698 // resurrectThreads() will overwrite some closures and leave slop
699 // behind.
700 if (do_heap_census) {
701 debugTrace(DEBUG_sched, "performing heap census");
702 RELEASE_SM_LOCK;
703 heapCensus(gct->gc_start_cpu);
704 ACQUIRE_SM_LOCK;
705 }
706
707 // send exceptions to any threads which were about to die
708 RELEASE_SM_LOCK;
709 resurrectThreads(resurrected_threads);
710 ACQUIRE_SM_LOCK;
711
712 if (major_gc) {
713 nat need, got;
714 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
715 got = mblocks_allocated;
716 /* If the amount of data remains constant, next major GC we'll
717 require (F+1)*need. We leave (F+2)*need in order to reduce
718 repeated deallocation and reallocation. */
719 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
720 if (got > need) {
721 returnMemoryToOS(got - need);
722 }
723 }
724
725 // extra GC trace info
726 IF_DEBUG(gc, statDescribeGens());
727
728 #ifdef DEBUG
729 // symbol-table based profiling
730 /* heapCensus(to_blocks); */ /* ToDo */
731 #endif
732
733 // restore enclosing cost centre
734 #ifdef PROFILING
735 for (n = 0; n < n_capabilities; n++) {
736 capabilities[n].r.rCCCS = save_CCS[n];
737 }
738 #endif
739
740 #ifdef DEBUG
741 // check for memory leaks if DEBUG is on
742 memInventory(DEBUG_gc);
743 #endif
744
745 #ifdef RTS_GTK_FRONTPANEL
746 if (RtsFlags.GcFlags.frontpanel) {
747 updateFrontPanelAfterGC( N, live );
748 }
749 #endif
750
751 // ok, GC over: tell the stats department what happened.
752 stat_endGC(cap, gct, allocated, live_words, copied,
753 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
754 N, n_gc_threads, par_max_copied, par_tot_copied);
755
756 #if defined(RTS_USER_SIGNALS)
757 if (RtsFlags.MiscFlags.install_signal_handlers) {
758 // unblock signals again
759 unblockUserSignals();
760 }
761 #endif
762
763 RELEASE_SM_LOCK;
764
765 SET_GCT(saved_gct);
766 }
767
768 /* -----------------------------------------------------------------------------
769 Initialise the gc_thread structures.
770 -------------------------------------------------------------------------- */
771
772 #define GC_THREAD_INACTIVE 0
773 #define GC_THREAD_STANDING_BY 1
774 #define GC_THREAD_RUNNING 2
775 #define GC_THREAD_WAITING_TO_CONTINUE 3
776
777 static void
778 new_gc_thread (nat n, gc_thread *t)
779 {
780 nat g;
781 gen_workspace *ws;
782
783 t->cap = &capabilities[n];
784
785 #ifdef THREADED_RTS
786 t->id = 0;
787 initSpinLock(&t->gc_spin);
788 initSpinLock(&t->mut_spin);
789 ACQUIRE_SPIN_LOCK(&t->gc_spin);
790 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
791 // thread to start up, see wakeup_gc_threads
792 #endif
793
794 t->thread_index = n;
795 t->idle = rtsFalse;
796 t->free_blocks = NULL;
797 t->gc_count = 0;
798
799 init_gc_thread(t);
800
801 #ifdef USE_PAPI
802 t->papi_events = -1;
803 #endif
804
805 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
806 {
807 ws = &t->gens[g];
808 ws->gen = &generations[g];
809 ASSERT(g == ws->gen->no);
810 ws->my_gct = t;
811
812 // We want to call
813 // alloc_todo_block(ws,0);
814 // but can't, because it uses gct which isn't set up at this point.
815 // Hence, allocate a block for todo_bd manually:
816 {
817 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
818 initBdescr(bd, ws->gen, ws->gen->to);
819 bd->flags = BF_EVACUATED;
820 bd->u.scan = bd->free = bd->start;
821
822 ws->todo_bd = bd;
823 ws->todo_free = bd->free;
824 ws->todo_lim = bd->start + BLOCK_SIZE_W;
825 }
826
827 ws->todo_q = newWSDeque(128);
828 ws->todo_overflow = NULL;
829 ws->n_todo_overflow = 0;
830 ws->todo_large_objects = NULL;
831
832 ws->part_list = NULL;
833 ws->n_part_blocks = 0;
834
835 ws->scavd_list = NULL;
836 ws->n_scavd_blocks = 0;
837 }
838 }
839
840
841 void
842 initGcThreads (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
843 {
844 #if defined(THREADED_RTS)
845 nat i;
846
847 if (from > 0) {
848 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
849 "initGcThreads");
850 } else {
851 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
852 "initGcThreads");
853 }
854
855 // We have to update the gct->cap pointers to point to the new
856 // Capability array now.
857 for (i = 0; i < from; i++) {
858 gc_threads[i]->cap = &capabilities[gc_threads[i]->cap->no];
859 }
860
861 for (i = from; i < to; i++) {
862 gc_threads[i] =
863 stgMallocBytes(sizeof(gc_thread) +
864 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
865 "alloc_gc_threads");
866
867 new_gc_thread(i, gc_threads[i]);
868 }
869 #else
870 ASSERT(from == 0 && to == 1);
871 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
872 gc_threads[0] = gct;
873 new_gc_thread(0,gc_threads[0]);
874 #endif
875 }
876
877 void
878 freeGcThreads (void)
879 {
880 nat g;
881 if (gc_threads != NULL) {
882 #if defined(THREADED_RTS)
883 nat i;
884 for (i = 0; i < n_capabilities; i++) {
885 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
886 {
887 freeWSDeque(gc_threads[i]->gens[g].todo_q);
888 }
889 stgFree (gc_threads[i]);
890 }
891 stgFree (gc_threads);
892 #else
893 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
894 {
895 freeWSDeque(gc_threads[0]->gens[g].todo_q);
896 }
897 stgFree (gc_threads);
898 #endif
899 gc_threads = NULL;
900 }
901 }
902
903 /* ----------------------------------------------------------------------------
904 Start GC threads
905 ------------------------------------------------------------------------- */
906
907 static volatile StgWord gc_running_threads;
908
909 static StgWord
910 inc_running (void)
911 {
912 StgWord new;
913 new = atomic_inc(&gc_running_threads);
914 ASSERT(new <= n_gc_threads);
915 return new;
916 }
917
918 static StgWord
919 dec_running (void)
920 {
921 ASSERT(gc_running_threads != 0);
922 return atomic_dec(&gc_running_threads);
923 }
924
925 static rtsBool
926 any_work (void)
927 {
928 int g;
929 gen_workspace *ws;
930
931 gct->any_work++;
932
933 write_barrier();
934
935 // scavenge objects in compacted generation
936 if (mark_stack_bd != NULL && !mark_stack_empty()) {
937 return rtsTrue;
938 }
939
940 // Check for global work in any step. We don't need to check for
941 // local work, because we have already exited scavenge_loop(),
942 // which means there is no local work for this thread.
943 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
944 ws = &gct->gens[g];
945 if (ws->todo_large_objects) return rtsTrue;
946 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
947 if (ws->todo_overflow) return rtsTrue;
948 }
949
950 #if defined(THREADED_RTS)
951 if (work_stealing) {
952 nat n;
953 // look for work to steal
954 for (n = 0; n < n_gc_threads; n++) {
955 if (n == gct->thread_index) continue;
956 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
957 ws = &gc_threads[n]->gens[g];
958 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
959 }
960 }
961 }
962 #endif
963
964 gct->no_work++;
965 #if defined(THREADED_RTS)
966 yieldThread();
967 #endif
968
969 return rtsFalse;
970 }
971
972 static void
973 scavenge_until_all_done (void)
974 {
975 DEBUG_ONLY( nat r );
976
977
978 loop:
979 #if defined(THREADED_RTS)
980 if (n_gc_threads > 1) {
981 scavenge_loop();
982 } else {
983 scavenge_loop1();
984 }
985 #else
986 scavenge_loop();
987 #endif
988
989 collect_gct_blocks();
990
991 // scavenge_loop() only exits when there's no work to do
992
993 #ifdef DEBUG
994 r = dec_running();
995 #else
996 dec_running();
997 #endif
998
999 traceEventGcIdle(gct->cap);
1000
1001 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1002
1003 while (gc_running_threads != 0) {
1004 // usleep(1);
1005 if (any_work()) {
1006 inc_running();
1007 traceEventGcWork(gct->cap);
1008 goto loop;
1009 }
1010 // any_work() does not remove the work from the queue, it
1011 // just checks for the presence of work. If we find any,
1012 // then we increment gc_running_threads and go back to
1013 // scavenge_loop() to perform any pending work.
1014 }
1015
1016 traceEventGcDone(gct->cap);
1017 }
1018
1019 #if defined(THREADED_RTS)
1020
1021 void
1022 gcWorkerThread (Capability *cap)
1023 {
1024 gc_thread *saved_gct;
1025
1026 // necessary if we stole a callee-saves register for gct:
1027 saved_gct = gct;
1028
1029 SET_GCT(gc_threads[cap->no]);
1030 gct->id = osThreadId();
1031
1032 stat_gcWorkerThreadStart(gct);
1033
1034 // Wait until we're told to wake up
1035 RELEASE_SPIN_LOCK(&gct->mut_spin);
1036 // yieldThread();
1037 // Strangely, adding a yieldThread() here makes the CPU time
1038 // measurements more accurate on Linux, perhaps because it syncs
1039 // the CPU time across the multiple cores. Without this, CPU time
1040 // is heavily skewed towards GC rather than MUT.
1041 gct->wakeup = GC_THREAD_STANDING_BY;
1042 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1043 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1044
1045 #ifdef USE_PAPI
1046 // start performance counters in this thread...
1047 if (gct->papi_events == -1) {
1048 papi_init_eventset(&gct->papi_events);
1049 }
1050 papi_thread_start_gc1_count(gct->papi_events);
1051 #endif
1052
1053 init_gc_thread(gct);
1054
1055 traceEventGcWork(gct->cap);
1056
1057 // Every thread evacuates some roots.
1058 gct->evac_gen_no = 0;
1059 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1060 scavenge_capability_mut_lists(cap);
1061
1062 scavenge_until_all_done();
1063
1064 gct->allocated = clearNursery(cap);
1065
1066 #ifdef THREADED_RTS
1067 // Now that the whole heap is marked, we discard any sparks that
1068 // were found to be unreachable. The main GC thread is currently
1069 // marking heap reachable via weak pointers, so it is
1070 // non-deterministic whether a spark will be retained if it is
1071 // only reachable via weak pointers. To fix this problem would
1072 // require another GC barrier, which is too high a price.
1073 pruneSparkQueue(cap);
1074 #endif
1075
1076 #ifdef USE_PAPI
1077 // count events in this thread towards the GC totals
1078 papi_thread_stop_gc1_count(gct->papi_events);
1079 #endif
1080
1081 // Wait until we're told to continue
1082 RELEASE_SPIN_LOCK(&gct->gc_spin);
1083 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1084 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1085 gct->thread_index);
1086 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1087 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1088
1089 // record the time spent doing GC in the Task structure
1090 stat_gcWorkerThreadDone(gct);
1091
1092 SET_GCT(saved_gct);
1093 }
1094
1095 #endif
1096
1097 #if defined(THREADED_RTS)
1098
1099 void
1100 waitForGcThreads (Capability *cap USED_IF_THREADS)
1101 {
1102 const nat n_threads = n_capabilities;
1103 const nat me = cap->no;
1104 nat i, j;
1105 rtsBool retry = rtsTrue;
1106
1107 while(retry) {
1108 for (i=0; i < n_threads; i++) {
1109 if (i == me || gc_threads[i]->idle) continue;
1110 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1111 prodCapability(&capabilities[i], cap->running_task);
1112 }
1113 }
1114 for (j=0; j < 10; j++) {
1115 retry = rtsFalse;
1116 for (i=0; i < n_threads; i++) {
1117 if (i == me || gc_threads[i]->idle) continue;
1118 write_barrier();
1119 interruptCapability(&capabilities[i]);
1120 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1121 retry = rtsTrue;
1122 }
1123 }
1124 if (!retry) break;
1125 yieldThread();
1126 }
1127 }
1128 }
1129
1130 #endif // THREADED_RTS
1131
1132 static void
1133 start_gc_threads (void)
1134 {
1135 #if defined(THREADED_RTS)
1136 gc_running_threads = 0;
1137 #endif
1138 }
1139
1140 static void
1141 wakeup_gc_threads (nat me USED_IF_THREADS)
1142 {
1143 #if defined(THREADED_RTS)
1144 nat i;
1145
1146 if (n_gc_threads == 1) return;
1147
1148 for (i=0; i < n_gc_threads; i++) {
1149 if (i == me || gc_threads[i]->idle) continue;
1150 inc_running();
1151 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1152 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1153
1154 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1155 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1156 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1157 }
1158 #endif
1159 }
1160
1161 // After GC is complete, we must wait for all GC threads to enter the
1162 // standby state, otherwise they may still be executing inside
1163 // any_work(), and may even remain awake until the next GC starts.
1164 static void
1165 shutdown_gc_threads (nat me USED_IF_THREADS)
1166 {
1167 #if defined(THREADED_RTS)
1168 nat i;
1169
1170 if (n_gc_threads == 1) return;
1171
1172 for (i=0; i < n_gc_threads; i++) {
1173 if (i == me || gc_threads[i]->idle) continue;
1174 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1175 }
1176 #endif
1177 }
1178
1179 #if defined(THREADED_RTS)
1180 void
1181 releaseGCThreads (Capability *cap USED_IF_THREADS)
1182 {
1183 const nat n_threads = n_capabilities;
1184 const nat me = cap->no;
1185 nat i;
1186 for (i=0; i < n_threads; i++) {
1187 if (i == me || gc_threads[i]->idle) continue;
1188 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1189 barf("releaseGCThreads");
1190
1191 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1192 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1193 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1194 }
1195 }
1196 #endif
1197
1198 /* ----------------------------------------------------------------------------
1199 Initialise a generation that is to be collected
1200 ------------------------------------------------------------------------- */
1201
1202 static void
1203 prepare_collected_gen (generation *gen)
1204 {
1205 nat i, g, n;
1206 gen_workspace *ws;
1207 bdescr *bd, *next;
1208
1209 // Throw away the current mutable list. Invariant: the mutable
1210 // list always has at least one block; this means we can avoid a
1211 // check for NULL in recordMutable().
1212 g = gen->no;
1213 if (g != 0) {
1214 for (i = 0; i < n_capabilities; i++) {
1215 freeChain(capabilities[i].mut_lists[g]);
1216 capabilities[i].mut_lists[g] = allocBlock();
1217 }
1218 }
1219
1220 gen = &generations[g];
1221 ASSERT(gen->no == g);
1222
1223 // we'll construct a new list of threads in this step
1224 // during GC, throw away the current list.
1225 gen->old_threads = gen->threads;
1226 gen->threads = END_TSO_QUEUE;
1227
1228 // deprecate the existing blocks
1229 gen->old_blocks = gen->blocks;
1230 gen->n_old_blocks = gen->n_blocks;
1231 gen->blocks = NULL;
1232 gen->n_blocks = 0;
1233 gen->n_words = 0;
1234 gen->live_estimate = 0;
1235
1236 // initialise the large object queues.
1237 ASSERT(gen->scavenged_large_objects == NULL);
1238 ASSERT(gen->n_scavenged_large_blocks == 0);
1239
1240 // grab all the partial blocks stashed in the gc_thread workspaces and
1241 // move them to the old_blocks list of this gen.
1242 for (n = 0; n < n_capabilities; n++) {
1243 ws = &gc_threads[n]->gens[gen->no];
1244
1245 for (bd = ws->part_list; bd != NULL; bd = next) {
1246 next = bd->link;
1247 bd->link = gen->old_blocks;
1248 gen->old_blocks = bd;
1249 gen->n_old_blocks += bd->blocks;
1250 }
1251 ws->part_list = NULL;
1252 ws->n_part_blocks = 0;
1253
1254 ASSERT(ws->scavd_list == NULL);
1255 ASSERT(ws->n_scavd_blocks == 0);
1256
1257 if (ws->todo_free != ws->todo_bd->start) {
1258 ws->todo_bd->free = ws->todo_free;
1259 ws->todo_bd->link = gen->old_blocks;
1260 gen->old_blocks = ws->todo_bd;
1261 gen->n_old_blocks += ws->todo_bd->blocks;
1262 alloc_todo_block(ws,0); // always has one block.
1263 }
1264 }
1265
1266 // mark the small objects as from-space
1267 for (bd = gen->old_blocks; bd; bd = bd->link) {
1268 bd->flags &= ~BF_EVACUATED;
1269 }
1270
1271 // mark the large objects as from-space
1272 for (bd = gen->large_objects; bd; bd = bd->link) {
1273 bd->flags &= ~BF_EVACUATED;
1274 }
1275
1276 // for a compacted generation, we need to allocate the bitmap
1277 if (gen->mark) {
1278 lnat bitmap_size; // in bytes
1279 bdescr *bitmap_bdescr;
1280 StgWord *bitmap;
1281
1282 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1283
1284 if (bitmap_size > 0) {
1285 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1286 / BLOCK_SIZE);
1287 gen->bitmap = bitmap_bdescr;
1288 bitmap = bitmap_bdescr->start;
1289
1290 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1291 bitmap_size, bitmap);
1292
1293 // don't forget to fill it with zeros!
1294 memset(bitmap, 0, bitmap_size);
1295
1296 // For each block in this step, point to its bitmap from the
1297 // block descriptor.
1298 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1299 bd->u.bitmap = bitmap;
1300 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1301
1302 // Also at this point we set the BF_MARKED flag
1303 // for this block. The invariant is that
1304 // BF_MARKED is always unset, except during GC
1305 // when it is set on those blocks which will be
1306 // compacted.
1307 if (!(bd->flags & BF_FRAGMENTED)) {
1308 bd->flags |= BF_MARKED;
1309 }
1310
1311 // BF_SWEPT should be marked only for blocks that are being
1312 // collected in sweep()
1313 bd->flags &= ~BF_SWEPT;
1314 }
1315 }
1316 }
1317 }
1318
1319
1320 /* ----------------------------------------------------------------------------
1321 Save the mutable lists in saved_mut_lists
1322 ------------------------------------------------------------------------- */
1323
1324 static void
1325 stash_mut_list (Capability *cap, nat gen_no)
1326 {
1327 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1328 cap->mut_lists[gen_no] = allocBlock_sync();
1329 }
1330
1331 /* ----------------------------------------------------------------------------
1332 Initialise a generation that is *not* to be collected
1333 ------------------------------------------------------------------------- */
1334
1335 static void
1336 prepare_uncollected_gen (generation *gen)
1337 {
1338 nat i;
1339
1340
1341 ASSERT(gen->no > 0);
1342
1343 // save the current mutable lists for this generation, and
1344 // allocate a fresh block for each one. We'll traverse these
1345 // mutable lists as roots early on in the GC.
1346 for (i = 0; i < n_capabilities; i++) {
1347 stash_mut_list(&capabilities[i], gen->no);
1348 }
1349
1350 ASSERT(gen->scavenged_large_objects == NULL);
1351 ASSERT(gen->n_scavenged_large_blocks == 0);
1352 }
1353
1354 /* -----------------------------------------------------------------------------
1355 Collect the completed blocks from a GC thread and attach them to
1356 the generation.
1357 -------------------------------------------------------------------------- */
1358
1359 static void
1360 collect_gct_blocks (void)
1361 {
1362 nat g;
1363 gen_workspace *ws;
1364 bdescr *bd, *prev;
1365
1366 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1367 ws = &gct->gens[g];
1368
1369 // there may still be a block attached to ws->todo_bd;
1370 // leave it there to use next time.
1371
1372 if (ws->scavd_list != NULL) {
1373 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1374
1375 ASSERT(gct->scan_bd == NULL);
1376 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1377
1378 prev = NULL;
1379 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1380 ws->gen->n_words += bd->free - bd->start;
1381 prev = bd;
1382 }
1383 if (prev != NULL) {
1384 prev->link = ws->gen->blocks;
1385 ws->gen->blocks = ws->scavd_list;
1386 }
1387 ws->gen->n_blocks += ws->n_scavd_blocks;
1388
1389 ws->scavd_list = NULL;
1390 ws->n_scavd_blocks = 0;
1391
1392 RELEASE_SPIN_LOCK(&ws->gen->sync);
1393 }
1394 }
1395 }
1396
1397 /* -----------------------------------------------------------------------------
1398 During mutation, any blocks that are filled by allocatePinned() are
1399 stashed on the local pinned_object_blocks list, to avoid needing to
1400 take a global lock. Here we collect those blocks from the
1401 cap->pinned_object_blocks lists and put them on the
1402 main g0->large_object list.
1403
1404 Returns: the number of words allocated this way, for stats
1405 purposes.
1406 -------------------------------------------------------------------------- */
1407
1408 static lnat
1409 collect_pinned_object_blocks (void)
1410 {
1411 nat n;
1412 bdescr *bd, *prev;
1413 lnat allocated = 0;
1414
1415 for (n = 0; n < n_capabilities; n++) {
1416 prev = NULL;
1417 for (bd = capabilities[n].pinned_object_blocks; bd != NULL; bd = bd->link) {
1418 allocated += bd->free - bd->start;
1419 prev = bd;
1420 }
1421 if (prev != NULL) {
1422 prev->link = g0->large_objects;
1423 if (g0->large_objects != NULL) {
1424 g0->large_objects->u.back = prev;
1425 }
1426 g0->large_objects = capabilities[n].pinned_object_blocks;
1427 capabilities[n].pinned_object_blocks = 0;
1428 }
1429 }
1430
1431 return allocated;
1432 }
1433
1434 /* -----------------------------------------------------------------------------
1435 Initialise a gc_thread before GC
1436 -------------------------------------------------------------------------- */
1437
1438 static void
1439 init_gc_thread (gc_thread *t)
1440 {
1441 t->static_objects = END_OF_STATIC_LIST;
1442 t->scavenged_static_objects = END_OF_STATIC_LIST;
1443 t->scan_bd = NULL;
1444 t->mut_lists = t->cap->mut_lists;
1445 t->evac_gen_no = 0;
1446 t->failed_to_evac = rtsFalse;
1447 t->eager_promotion = rtsTrue;
1448 t->thunk_selector_depth = 0;
1449 t->allocated = 0;
1450 t->copied = 0;
1451 t->scanned = 0;
1452 t->any_work = 0;
1453 t->no_work = 0;
1454 t->scav_find_work = 0;
1455 }
1456
1457 /* -----------------------------------------------------------------------------
1458 Function we pass to evacuate roots.
1459 -------------------------------------------------------------------------- */
1460
1461 static void
1462 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1463 {
1464 // we stole a register for gct, but this function is called from
1465 // *outside* the GC where the register variable is not in effect,
1466 // so we need to save and restore it here. NB. only call
1467 // mark_root() from the main GC thread, otherwise gct will be
1468 // incorrect.
1469 #if defined(THREADED_RTS)
1470 gc_thread *saved_gct;
1471 saved_gct = gct;
1472 #endif
1473 SET_GCT(user);
1474
1475 evacuate(root);
1476
1477 SET_GCT(saved_gct);
1478 }
1479
1480 /* -----------------------------------------------------------------------------
1481 Initialising the static object & mutable lists
1482 -------------------------------------------------------------------------- */
1483
1484 static void
1485 zero_static_object_list(StgClosure* first_static)
1486 {
1487 StgClosure* p;
1488 StgClosure* link;
1489 const StgInfoTable *info;
1490
1491 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1492 info = get_itbl(p);
1493 link = *STATIC_LINK(info, p);
1494 *STATIC_LINK(info,p) = NULL;
1495 }
1496 }
1497
1498 /* ----------------------------------------------------------------------------
1499 Reset the sizes of the older generations when we do a major
1500 collection.
1501
1502 CURRENT STRATEGY: make all generations except zero the same size.
1503 We have to stay within the maximum heap size, and leave a certain
1504 percentage of the maximum heap size available to allocate into.
1505 ------------------------------------------------------------------------- */
1506
1507 static void
1508 resize_generations (void)
1509 {
1510 nat g;
1511
1512 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1513 nat live, size, min_alloc, words;
1514 const nat max = RtsFlags.GcFlags.maxHeapSize;
1515 const nat gens = RtsFlags.GcFlags.generations;
1516
1517 // live in the oldest generations
1518 if (oldest_gen->live_estimate != 0) {
1519 words = oldest_gen->live_estimate;
1520 } else {
1521 words = oldest_gen->n_words;
1522 }
1523 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1524 oldest_gen->n_large_blocks;
1525
1526 // default max size for all generations except zero
1527 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1528 RtsFlags.GcFlags.minOldGenSize);
1529
1530 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1531 RtsFlags.GcFlags.heapSizeSuggestion = size;
1532 }
1533
1534 // minimum size for generation zero
1535 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1536 RtsFlags.GcFlags.minAllocAreaSize);
1537
1538 // Auto-enable compaction when the residency reaches a
1539 // certain percentage of the maximum heap size (default: 30%).
1540 if (RtsFlags.GcFlags.compact ||
1541 (max > 0 &&
1542 oldest_gen->n_blocks >
1543 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1544 oldest_gen->mark = 1;
1545 oldest_gen->compact = 1;
1546 // debugBelch("compaction: on\n", live);
1547 } else {
1548 oldest_gen->mark = 0;
1549 oldest_gen->compact = 0;
1550 // debugBelch("compaction: off\n", live);
1551 }
1552
1553 if (RtsFlags.GcFlags.sweep) {
1554 oldest_gen->mark = 1;
1555 }
1556
1557 // if we're going to go over the maximum heap size, reduce the
1558 // size of the generations accordingly. The calculation is
1559 // different if compaction is turned on, because we don't need
1560 // to double the space required to collect the old generation.
1561 if (max != 0) {
1562
1563 // this test is necessary to ensure that the calculations
1564 // below don't have any negative results - we're working
1565 // with unsigned values here.
1566 if (max < min_alloc) {
1567 heapOverflow();
1568 }
1569
1570 if (oldest_gen->compact) {
1571 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1572 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1573 }
1574 } else {
1575 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1576 size = (max - min_alloc) / ((gens - 1) * 2);
1577 }
1578 }
1579
1580 if (size < live) {
1581 heapOverflow();
1582 }
1583 }
1584
1585 #if 0
1586 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1587 min_alloc, size, max);
1588 #endif
1589
1590 for (g = 0; g < gens; g++) {
1591 generations[g].max_blocks = size;
1592 }
1593 }
1594 }
1595
1596 /* -----------------------------------------------------------------------------
1597 Calculate the new size of the nursery, and resize it.
1598 -------------------------------------------------------------------------- */
1599
1600 static void
1601 resize_nursery (void)
1602 {
1603 const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1604
1605 if (RtsFlags.GcFlags.generations == 1)
1606 { // Two-space collector:
1607 nat blocks;
1608
1609 /* set up a new nursery. Allocate a nursery size based on a
1610 * function of the amount of live data (by default a factor of 2)
1611 * Use the blocks from the old nursery if possible, freeing up any
1612 * left over blocks.
1613 *
1614 * If we get near the maximum heap size, then adjust our nursery
1615 * size accordingly. If the nursery is the same size as the live
1616 * data (L), then we need 3L bytes. We can reduce the size of the
1617 * nursery to bring the required memory down near 2L bytes.
1618 *
1619 * A normal 2-space collector would need 4L bytes to give the same
1620 * performance we get from 3L bytes, reducing to the same
1621 * performance at 2L bytes.
1622 */
1623 blocks = generations[0].n_blocks;
1624
1625 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1626 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1627 RtsFlags.GcFlags.maxHeapSize )
1628 {
1629 long adjusted_blocks; // signed on purpose
1630 int pc_free;
1631
1632 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1633
1634 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1635 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1636
1637 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1638 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1639 {
1640 heapOverflow();
1641 }
1642 blocks = adjusted_blocks;
1643 }
1644 else
1645 {
1646 blocks *= RtsFlags.GcFlags.oldGenFactor;
1647 if (blocks < min_nursery)
1648 {
1649 blocks = min_nursery;
1650 }
1651 }
1652 resizeNurseries(blocks);
1653 }
1654 else // Generational collector
1655 {
1656 /*
1657 * If the user has given us a suggested heap size, adjust our
1658 * allocation area to make best use of the memory available.
1659 */
1660 if (RtsFlags.GcFlags.heapSizeSuggestion)
1661 {
1662 long blocks;
1663 lnat needed;
1664
1665 calcNeeded(rtsFalse, &needed); // approx blocks needed at next GC
1666
1667 /* Guess how much will be live in generation 0 step 0 next time.
1668 * A good approximation is obtained by finding the
1669 * percentage of g0 that was live at the last minor GC.
1670 *
1671 * We have an accurate figure for the amount of copied data in
1672 * 'copied', but we must convert this to a number of blocks, with
1673 * a small adjustment for estimated slop at the end of a block
1674 * (- 10 words).
1675 */
1676 if (N == 0)
1677 {
1678 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1679 / countNurseryBlocks();
1680 }
1681
1682 /* Estimate a size for the allocation area based on the
1683 * information available. We might end up going slightly under
1684 * or over the suggested heap size, but we should be pretty
1685 * close on average.
1686 *
1687 * Formula: suggested - needed
1688 * ----------------------------
1689 * 1 + g0_pcnt_kept/100
1690 *
1691 * where 'needed' is the amount of memory needed at the next
1692 * collection for collecting all gens except g0.
1693 */
1694 blocks =
1695 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1696 (100 + (long)g0_pcnt_kept);
1697
1698 if (blocks < (long)min_nursery) {
1699 blocks = min_nursery;
1700 }
1701
1702 resizeNurseries((nat)blocks);
1703 }
1704 else
1705 {
1706 // we might have added extra large blocks to the nursery, so
1707 // resize back to minAllocAreaSize again.
1708 resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1709 }
1710 }
1711 }
1712
1713 /* -----------------------------------------------------------------------------
1714 Sanity code for CAF garbage collection.
1715
1716 With DEBUG turned on, we manage a CAF list in addition to the SRT
1717 mechanism. After GC, we run down the CAF list and blackhole any
1718 CAFs which have been garbage collected. This means we get an error
1719 whenever the program tries to enter a garbage collected CAF.
1720
1721 Any garbage collected CAFs are taken off the CAF list at the same
1722 time.
1723 -------------------------------------------------------------------------- */
1724
1725 #if 0 && defined(DEBUG)
1726
1727 static void
1728 gcCAFs(void)
1729 {
1730 StgClosure* p;
1731 StgClosure** pp;
1732 const StgInfoTable *info;
1733 nat i;
1734
1735 i = 0;
1736 p = caf_list;
1737 pp = &caf_list;
1738
1739 while (p != NULL) {
1740
1741 info = get_itbl(p);
1742
1743 ASSERT(info->type == IND_STATIC);
1744
1745 if (STATIC_LINK(info,p) == NULL) {
1746 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1747 // black hole it
1748 SET_INFO(p,&stg_BLACKHOLE_info);
1749 p = STATIC_LINK2(info,p);
1750 *pp = p;
1751 }
1752 else {
1753 pp = &STATIC_LINK2(info,p);
1754 p = *pp;
1755 i++;
1756 }
1757
1758 }
1759
1760 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1761 }
1762 #endif