Merge branch 'master' of darcs.haskell.org:/srv/darcs//ghc
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "Storage.h"
19 #include "RtsUtils.h"
20 #include "Apply.h"
21 #include "Updates.h"
22 #include "Stats.h"
23 #include "Schedule.h"
24 #include "Sanity.h"
25 #include "BlockAlloc.h"
26 #include "ProfHeap.h"
27 #include "Weak.h"
28 #include "Prelude.h"
29 #include "RtsSignals.h"
30 #include "STM.h"
31 #if defined(RTS_GTK_FRONTPANEL)
32 #include "FrontPanel.h"
33 #endif
34 #include "Trace.h"
35 #include "RetainerProfile.h"
36 #include "LdvProfile.h"
37 #include "RaiseAsync.h"
38 #include "Papi.h"
39 #include "Stable.h"
40
41 #include "GC.h"
42 #include "GCThread.h"
43 #include "GCTDecl.h"
44 #include "Compact.h"
45 #include "Evac.h"
46 #include "Scav.h"
47 #include "GCUtils.h"
48 #include "MarkStack.h"
49 #include "MarkWeak.h"
50 #include "Sparks.h"
51 #include "Sweep.h"
52
53 #include <string.h> // for memset()
54 #include <unistd.h>
55
56 /* -----------------------------------------------------------------------------
57 Global variables
58 -------------------------------------------------------------------------- */
59
60 /* STATIC OBJECT LIST.
61 *
62 * During GC:
63 * We maintain a linked list of static objects that are still live.
64 * The requirements for this list are:
65 *
66 * - we need to scan the list while adding to it, in order to
67 * scavenge all the static objects (in the same way that
68 * breadth-first scavenging works for dynamic objects).
69 *
70 * - we need to be able to tell whether an object is already on
71 * the list, to break loops.
72 *
73 * Each static object has a "static link field", which we use for
74 * linking objects on to the list. We use a stack-type list, consing
75 * objects on the front as they are added (this means that the
76 * scavenge phase is depth-first, not breadth-first, but that
77 * shouldn't matter).
78 *
79 * A separate list is kept for objects that have been scavenged
80 * already - this is so that we can zero all the marks afterwards.
81 *
82 * An object is on the list if its static link field is non-zero; this
83 * means that we have to mark the end of the list with '1', not NULL.
84 *
85 * Extra notes for generational GC:
86 *
87 * Each generation has a static object list associated with it. When
88 * collecting generations up to N, we treat the static object lists
89 * from generations > N as roots.
90 *
91 * We build up a static object list while collecting generations 0..N,
92 * which is then appended to the static object list of generation N+1.
93 */
94
95 /* N is the oldest generation being collected, where the generations
96 * are numbered starting at 0. A major GC (indicated by the major_gc
97 * flag) is when we're collecting all generations. We only attempt to
98 * deal with static objects and GC CAFs when doing a major GC.
99 */
100 nat N;
101 rtsBool major_gc;
102
103 /* Data used for allocation area sizing.
104 */
105 static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
106
107 /* Mut-list stats */
108 #ifdef DEBUG
109 nat mutlist_MUTVARS,
110 mutlist_MUTARRS,
111 mutlist_MVARS,
112 mutlist_OTHERS;
113 #endif
114
115 /* Thread-local data for each GC thread
116 */
117 gc_thread **gc_threads = NULL;
118
119 #if !defined(THREADED_RTS)
120 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
121 #endif
122
123 // Number of threads running in *this* GC. Affects how many
124 // step->todos[] lists we have to look in to find work.
125 nat n_gc_threads;
126
127 // For stats:
128 long copied; // *words* copied & scavenged during this GC
129
130 rtsBool work_stealing;
131
132 DECLARE_GCT
133
134 /* -----------------------------------------------------------------------------
135 Static function declarations
136 -------------------------------------------------------------------------- */
137
138 static void mark_root (void *user, StgClosure **root);
139 static void zero_static_object_list (StgClosure* first_static);
140 static void prepare_collected_gen (generation *gen);
141 static void prepare_uncollected_gen (generation *gen);
142 static void init_gc_thread (gc_thread *t);
143 static void resize_generations (void);
144 static void resize_nursery (void);
145 static void start_gc_threads (void);
146 static void scavenge_until_all_done (void);
147 static StgWord inc_running (void);
148 static StgWord dec_running (void);
149 static void wakeup_gc_threads (nat me);
150 static void shutdown_gc_threads (nat me);
151 static void collect_gct_blocks (void);
152 static StgWord collect_pinned_object_blocks (void);
153
154 #if 0 && defined(DEBUG)
155 static void gcCAFs (void);
156 #endif
157
158 /* -----------------------------------------------------------------------------
159 The mark stack.
160 -------------------------------------------------------------------------- */
161
162 bdescr *mark_stack_top_bd; // topmost block in the mark stack
163 bdescr *mark_stack_bd; // current block in the mark stack
164 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
165
166 /* -----------------------------------------------------------------------------
167 GarbageCollect: the main entry point to the garbage collector.
168
169 The collect_gen parameter is gotten by calling calcNeeded().
170
171 Locks held: all capabilities are held throughout GarbageCollect().
172 -------------------------------------------------------------------------- */
173
174 void
175 GarbageCollect (nat collect_gen,
176 rtsBool do_heap_census,
177 nat gc_type USED_IF_THREADS,
178 Capability *cap)
179 {
180 bdescr *bd;
181 generation *gen;
182 StgWord live_blocks, live_words, allocated, par_max_copied, par_tot_copied;
183 #if defined(THREADED_RTS)
184 gc_thread *saved_gct;
185 #endif
186 nat g, n;
187
188 // necessary if we stole a callee-saves register for gct:
189 #if defined(THREADED_RTS)
190 saved_gct = gct;
191 #endif
192
193 #ifdef PROFILING
194 CostCentreStack *save_CCS[n_capabilities];
195 #endif
196
197 ACQUIRE_SM_LOCK;
198
199 #if defined(RTS_USER_SIGNALS)
200 if (RtsFlags.MiscFlags.install_signal_handlers) {
201 // block signals
202 blockUserSignals();
203 }
204 #endif
205
206 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
207 // otherwise adjust the padding in gen_workspace.
208
209 // this is the main thread
210 SET_GCT(gc_threads[cap->no]);
211
212 // tell the stats department that we've started a GC
213 stat_startGC(cap, gct);
214
215 // lock the StablePtr table
216 stablePtrPreGC();
217
218 #ifdef DEBUG
219 mutlist_MUTVARS = 0;
220 mutlist_MUTARRS = 0;
221 mutlist_OTHERS = 0;
222 #endif
223
224 // attribute any costs to CCS_GC
225 #ifdef PROFILING
226 for (n = 0; n < n_capabilities; n++) {
227 save_CCS[n] = capabilities[n].r.rCCCS;
228 capabilities[n].r.rCCCS = CCS_GC;
229 }
230 #endif
231
232 /* Approximate how much we allocated.
233 * Todo: only when generating stats?
234 */
235 allocated = countLargeAllocated(); /* don't count the nursery yet */
236
237 /* Figure out which generation to collect
238 */
239 N = collect_gen;
240 major_gc = (N == RtsFlags.GcFlags.generations-1);
241
242 #if defined(THREADED_RTS)
243 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
244 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
245 // It's not always a good idea to do load balancing in parallel
246 // GC. In particular, for a parallel program we don't want to
247 // lose locality by moving cached data into another CPU's cache
248 // (this effect can be quite significant).
249 //
250 // We could have a more complex way to deterimine whether to do
251 // work stealing or not, e.g. it might be a good idea to do it
252 // if the heap is big. For now, we just turn it on or off with
253 // a flag.
254 #endif
255
256 /* Start threads, so they can be spinning up while we finish initialisation.
257 */
258 start_gc_threads();
259
260 #if defined(THREADED_RTS)
261 /* How many threads will be participating in this GC?
262 * We don't try to parallelise minor GCs (unless the user asks for
263 * it with +RTS -gn0), or mark/compact/sweep GC.
264 */
265 if (gc_type == SYNC_GC_PAR) {
266 n_gc_threads = n_capabilities;
267 } else {
268 n_gc_threads = 1;
269 }
270 #else
271 n_gc_threads = 1;
272 #endif
273
274 debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
275 N, n_gc_threads);
276
277 #ifdef RTS_GTK_FRONTPANEL
278 if (RtsFlags.GcFlags.frontpanel) {
279 updateFrontPanelBeforeGC(N);
280 }
281 #endif
282
283 #ifdef DEBUG
284 // check for memory leaks if DEBUG is on
285 memInventory(DEBUG_gc);
286 #endif
287
288 // check sanity *before* GC
289 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
290
291 // gather blocks allocated using allocatePinned() from each capability
292 // and put them on the g0->large_object list.
293 collect_pinned_object_blocks();
294
295 // Initialise all the generations/steps that we're collecting.
296 for (g = 0; g <= N; g++) {
297 prepare_collected_gen(&generations[g]);
298 }
299 // Initialise all the generations/steps that we're *not* collecting.
300 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
301 prepare_uncollected_gen(&generations[g]);
302 }
303
304 // Prepare this gc_thread
305 init_gc_thread(gct);
306
307 /* Allocate a mark stack if we're doing a major collection.
308 */
309 if (major_gc && oldest_gen->mark) {
310 mark_stack_bd = allocBlock();
311 mark_stack_top_bd = mark_stack_bd;
312 mark_stack_bd->link = NULL;
313 mark_stack_bd->u.back = NULL;
314 mark_sp = mark_stack_bd->start;
315 } else {
316 mark_stack_bd = NULL;
317 mark_stack_top_bd = NULL;
318 mark_sp = NULL;
319 }
320
321 /* -----------------------------------------------------------------------
322 * follow all the roots that we know about:
323 */
324
325 // the main thread is running: this prevents any other threads from
326 // exiting prematurely, so we can start them now.
327 // NB. do this after the mutable lists have been saved above, otherwise
328 // the other GC threads will be writing into the old mutable lists.
329 inc_running();
330 wakeup_gc_threads(gct->thread_index);
331
332 traceEventGcWork(gct->cap);
333
334 // scavenge the capability-private mutable lists. This isn't part
335 // of markSomeCapabilities() because markSomeCapabilities() can only
336 // call back into the GC via mark_root() (due to the gct register
337 // variable).
338 if (n_gc_threads == 1) {
339 for (n = 0; n < n_capabilities; n++) {
340 #if defined(THREADED_RTS)
341 scavenge_capability_mut_Lists1(&capabilities[n]);
342 #else
343 scavenge_capability_mut_lists(&capabilities[n]);
344 #endif
345 }
346 } else {
347 scavenge_capability_mut_lists(gct->cap);
348 for (n = 0; n < n_capabilities; n++) {
349 if (gc_threads[n]->idle) {
350 markCapability(mark_root, gct, &capabilities[n],
351 rtsTrue/*don't mark sparks*/);
352 scavenge_capability_mut_lists(&capabilities[n]);
353 }
354 }
355 }
356
357 // follow roots from the CAF list (used by GHCi)
358 gct->evac_gen_no = 0;
359 markCAFs(mark_root, gct);
360
361 // follow all the roots that the application knows about.
362 gct->evac_gen_no = 0;
363 if (n_gc_threads == 1) {
364 for (n = 0; n < n_capabilities; n++) {
365 markCapability(mark_root, gct, &capabilities[n],
366 rtsTrue/*don't mark sparks*/);
367 }
368 } else {
369 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
370 }
371
372 markScheduler(mark_root, gct);
373
374 #if defined(RTS_USER_SIGNALS)
375 // mark the signal handlers (signals should be already blocked)
376 markSignalHandlers(mark_root, gct);
377 #endif
378
379 // Mark the weak pointer list, and prepare to detect dead weak pointers.
380 markWeakPtrList();
381 initWeakForGC();
382
383 // Mark the stable pointer table.
384 markStablePtrTable(mark_root, gct);
385
386 /* -------------------------------------------------------------------------
387 * Repeatedly scavenge all the areas we know about until there's no
388 * more scavenging to be done.
389 */
390 for (;;)
391 {
392 scavenge_until_all_done();
393 // The other threads are now stopped. We might recurse back to
394 // here, but from now on this is the only thread.
395
396 // must be last... invariant is that everything is fully
397 // scavenged at this point.
398 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
399 inc_running();
400 continue;
401 }
402
403 // If we get to here, there's really nothing left to do.
404 break;
405 }
406
407 if (n_gc_threads != 1) {
408 gct->allocated = clearNursery(cap);
409 }
410
411 shutdown_gc_threads(gct->thread_index);
412
413 // Now see which stable names are still alive.
414 gcStablePtrTable();
415
416 #ifdef THREADED_RTS
417 if (n_gc_threads == 1) {
418 for (n = 0; n < n_capabilities; n++) {
419 pruneSparkQueue(&capabilities[n]);
420 }
421 } else {
422 for (n = 0; n < n_capabilities; n++) {
423 if (n == cap->no || gc_threads[n]->idle) {
424 pruneSparkQueue(&capabilities[n]);
425 }
426 }
427 }
428 #endif
429
430 #ifdef PROFILING
431 // We call processHeapClosureForDead() on every closure destroyed during
432 // the current garbage collection, so we invoke LdvCensusForDead().
433 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
434 || RtsFlags.ProfFlags.bioSelector != NULL) {
435 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
436 LdvCensusForDead(N);
437 ACQUIRE_SM_LOCK;
438 }
439 #endif
440
441 // NO MORE EVACUATION AFTER THIS POINT!
442
443 // Finally: compact or sweep the oldest generation.
444 if (major_gc && oldest_gen->mark) {
445 if (oldest_gen->compact)
446 compact(gct->scavenged_static_objects);
447 else
448 sweep(oldest_gen);
449 }
450
451 copied = 0;
452 par_max_copied = 0;
453 par_tot_copied = 0;
454 {
455 nat i;
456 for (i=0; i < n_gc_threads; i++) {
457 if (n_gc_threads > 1) {
458 debugTrace(DEBUG_gc,"thread %d:", i);
459 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
460 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
461 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
462 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
463 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
464 }
465 copied += gc_threads[i]->copied;
466 par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
467 }
468 par_tot_copied = copied;
469 if (n_gc_threads == 1) {
470 par_max_copied = 0;
471 par_tot_copied = 0;
472 }
473 }
474
475 // Run through all the generations/steps and tidy up.
476 // We're going to:
477 // - count the amount of "live" data (live_words, live_blocks)
478 // - count the amount of "copied" data in this GC (copied)
479 // - free from-space
480 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
481 //
482 live_words = 0;
483 live_blocks = 0;
484
485 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
486
487 if (g == N) {
488 generations[g].collections++; // for stats
489 if (n_gc_threads > 1) generations[g].par_collections++;
490 }
491
492 // Count the mutable list as bytes "copied" for the purposes of
493 // stats. Every mutable list is copied during every GC.
494 if (g > 0) {
495 W_ mut_list_size = 0;
496 for (n = 0; n < n_capabilities; n++) {
497 mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
498 }
499 copied += mut_list_size;
500
501 debugTrace(DEBUG_gc,
502 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
503 (unsigned long)(mut_list_size * sizeof(W_)),
504 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
505 }
506
507 bdescr *next, *prev;
508 gen = &generations[g];
509
510 // for generations we collected...
511 if (g <= N) {
512
513 /* free old memory and shift to-space into from-space for all
514 * the collected steps (except the allocation area). These
515 * freed blocks will probaby be quickly recycled.
516 */
517 if (gen->mark)
518 {
519 // tack the new blocks on the end of the existing blocks
520 if (gen->old_blocks != NULL) {
521
522 prev = NULL;
523 for (bd = gen->old_blocks; bd != NULL; bd = next) {
524
525 next = bd->link;
526
527 if (!(bd->flags & BF_MARKED))
528 {
529 if (prev == NULL) {
530 gen->old_blocks = next;
531 } else {
532 prev->link = next;
533 }
534 freeGroup(bd);
535 gen->n_old_blocks--;
536 }
537 else
538 {
539 gen->n_words += bd->free - bd->start;
540
541 // NB. this step might not be compacted next
542 // time, so reset the BF_MARKED flags.
543 // They are set before GC if we're going to
544 // compact. (search for BF_MARKED above).
545 bd->flags &= ~BF_MARKED;
546
547 // between GCs, all blocks in the heap except
548 // for the nursery have the BF_EVACUATED flag set.
549 bd->flags |= BF_EVACUATED;
550
551 prev = bd;
552 }
553 }
554
555 if (prev != NULL) {
556 prev->link = gen->blocks;
557 gen->blocks = gen->old_blocks;
558 }
559 }
560 // add the new blocks to the block tally
561 gen->n_blocks += gen->n_old_blocks;
562 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
563 ASSERT(countOccupied(gen->blocks) == gen->n_words);
564 }
565 else // not copacted
566 {
567 freeChain(gen->old_blocks);
568 }
569
570 gen->old_blocks = NULL;
571 gen->n_old_blocks = 0;
572
573 /* LARGE OBJECTS. The current live large objects are chained on
574 * scavenged_large, having been moved during garbage
575 * collection from large_objects. Any objects left on the
576 * large_objects list are therefore dead, so we free them here.
577 */
578 freeChain(gen->large_objects);
579 gen->large_objects = gen->scavenged_large_objects;
580 gen->n_large_blocks = gen->n_scavenged_large_blocks;
581 gen->n_new_large_words = 0;
582 }
583 else // for generations > N
584 {
585 /* For older generations, we need to append the
586 * scavenged_large_object list (i.e. large objects that have been
587 * promoted during this GC) to the large_object list for that step.
588 */
589 for (bd = gen->scavenged_large_objects; bd; bd = next) {
590 next = bd->link;
591 dbl_link_onto(bd, &gen->large_objects);
592 }
593
594 // add the new blocks we promoted during this GC
595 gen->n_large_blocks += gen->n_scavenged_large_blocks;
596 }
597
598 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
599
600 gen->scavenged_large_objects = NULL;
601 gen->n_scavenged_large_blocks = 0;
602
603 // Count "live" data
604 live_words += genLiveWords(gen);
605 live_blocks += genLiveBlocks(gen);
606
607 // add in the partial blocks in the gen_workspaces, but ignore gen 0
608 // if this is a local GC (we can't count another capability's part_list)
609 {
610 nat i;
611 for (i = 0; i < n_capabilities; i++) {
612 live_words += gcThreadLiveWords(i, gen->no);
613 live_blocks += gcThreadLiveBlocks(i, gen->no);
614 }
615 }
616 } // for all generations
617
618 // update the max size of older generations after a major GC
619 resize_generations();
620
621 // Free the mark stack.
622 if (mark_stack_top_bd != NULL) {
623 debugTrace(DEBUG_gc, "mark stack: %d blocks",
624 countBlocks(mark_stack_top_bd));
625 freeChain(mark_stack_top_bd);
626 }
627
628 // Free any bitmaps.
629 for (g = 0; g <= N; g++) {
630 gen = &generations[g];
631 if (gen->bitmap != NULL) {
632 freeGroup(gen->bitmap);
633 gen->bitmap = NULL;
634 }
635 }
636
637 // Reset the nursery: make the blocks empty
638 if (n_gc_threads == 1) {
639 for (n = 0; n < n_capabilities; n++) {
640 allocated += clearNursery(&capabilities[n]);
641 }
642 } else {
643 // When doing parallel GC, clearNursery() is called by the
644 // worker threads, and the value returned is stored in
645 // gct->allocated.
646 for (n = 0; n < n_capabilities; n++) {
647 if (gc_threads[n]->idle) {
648 allocated += clearNursery(&capabilities[n]);
649 } else {
650 allocated += gc_threads[n]->allocated;
651 }
652 }
653 }
654
655 resize_nursery();
656
657 resetNurseries();
658
659 // mark the garbage collected CAFs as dead
660 #if 0 && defined(DEBUG) // doesn't work at the moment
661 if (major_gc) { gcCAFs(); }
662 #endif
663
664 #ifdef PROFILING
665 // resetStaticObjectForRetainerProfiling() must be called before
666 // zeroing below.
667
668 // ToDo: fix the gct->scavenged_static_objects below
669 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
670 #endif
671
672 // zero the scavenged static object list
673 if (major_gc) {
674 nat i;
675 if (n_gc_threads == 1) {
676 zero_static_object_list(gct->scavenged_static_objects);
677 } else {
678 for (i = 0; i < n_gc_threads; i++) {
679 if (!gc_threads[i]->idle) {
680 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
681 }
682 }
683 }
684 }
685
686 // Update the stable pointer hash table.
687 updateStablePtrTable(major_gc);
688
689 // unlock the StablePtr table. Must be before scheduleFinalizers(),
690 // because a finalizer may call hs_free_fun_ptr() or
691 // hs_free_stable_ptr(), both of which access the StablePtr table.
692 stablePtrPostGC();
693
694 // Start any pending finalizers. Must be after
695 // updateStablePtrTable() and stablePtrPostGC() (see #4221).
696 RELEASE_SM_LOCK;
697 scheduleFinalizers(cap, old_weak_ptr_list);
698 ACQUIRE_SM_LOCK;
699
700 // check sanity after GC
701 // before resurrectThreads(), because that might overwrite some
702 // closures, which will cause problems with THREADED where we don't
703 // fill slop.
704 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
705
706 // If a heap census is due, we need to do it before
707 // resurrectThreads(), for the same reason as checkSanity above:
708 // resurrectThreads() will overwrite some closures and leave slop
709 // behind.
710 if (do_heap_census) {
711 debugTrace(DEBUG_sched, "performing heap census");
712 RELEASE_SM_LOCK;
713 heapCensus(gct->gc_start_cpu);
714 ACQUIRE_SM_LOCK;
715 }
716
717 // send exceptions to any threads which were about to die
718 RELEASE_SM_LOCK;
719 resurrectThreads(resurrected_threads);
720 ACQUIRE_SM_LOCK;
721
722 if (major_gc) {
723 W_ need, got;
724 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
725 got = mblocks_allocated;
726 /* If the amount of data remains constant, next major GC we'll
727 require (F+1)*need. We leave (F+2)*need in order to reduce
728 repeated deallocation and reallocation. */
729 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
730 if (got > need) {
731 returnMemoryToOS(got - need);
732 }
733 }
734
735 // extra GC trace info
736 IF_DEBUG(gc, statDescribeGens());
737
738 #ifdef DEBUG
739 // symbol-table based profiling
740 /* heapCensus(to_blocks); */ /* ToDo */
741 #endif
742
743 // restore enclosing cost centre
744 #ifdef PROFILING
745 for (n = 0; n < n_capabilities; n++) {
746 capabilities[n].r.rCCCS = save_CCS[n];
747 }
748 #endif
749
750 #ifdef DEBUG
751 // check for memory leaks if DEBUG is on
752 memInventory(DEBUG_gc);
753 #endif
754
755 #ifdef RTS_GTK_FRONTPANEL
756 if (RtsFlags.GcFlags.frontpanel) {
757 updateFrontPanelAfterGC( N, live );
758 }
759 #endif
760
761 // ok, GC over: tell the stats department what happened.
762 stat_endGC(cap, gct, allocated, live_words, copied,
763 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
764 N, n_gc_threads, par_max_copied, par_tot_copied);
765
766 #if defined(RTS_USER_SIGNALS)
767 if (RtsFlags.MiscFlags.install_signal_handlers) {
768 // unblock signals again
769 unblockUserSignals();
770 }
771 #endif
772
773 RELEASE_SM_LOCK;
774
775 SET_GCT(saved_gct);
776 }
777
778 /* -----------------------------------------------------------------------------
779 Initialise the gc_thread structures.
780 -------------------------------------------------------------------------- */
781
782 #define GC_THREAD_INACTIVE 0
783 #define GC_THREAD_STANDING_BY 1
784 #define GC_THREAD_RUNNING 2
785 #define GC_THREAD_WAITING_TO_CONTINUE 3
786
787 static void
788 new_gc_thread (nat n, gc_thread *t)
789 {
790 nat g;
791 gen_workspace *ws;
792
793 t->cap = &capabilities[n];
794
795 #ifdef THREADED_RTS
796 t->id = 0;
797 initSpinLock(&t->gc_spin);
798 initSpinLock(&t->mut_spin);
799 ACQUIRE_SPIN_LOCK(&t->gc_spin);
800 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
801 // thread to start up, see wakeup_gc_threads
802 #endif
803
804 t->thread_index = n;
805 t->idle = rtsFalse;
806 t->free_blocks = NULL;
807 t->gc_count = 0;
808
809 init_gc_thread(t);
810
811 #ifdef USE_PAPI
812 t->papi_events = -1;
813 #endif
814
815 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
816 {
817 ws = &t->gens[g];
818 ws->gen = &generations[g];
819 ASSERT(g == ws->gen->no);
820 ws->my_gct = t;
821
822 // We want to call
823 // alloc_todo_block(ws,0);
824 // but can't, because it uses gct which isn't set up at this point.
825 // Hence, allocate a block for todo_bd manually:
826 {
827 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
828 initBdescr(bd, ws->gen, ws->gen->to);
829 bd->flags = BF_EVACUATED;
830 bd->u.scan = bd->free = bd->start;
831
832 ws->todo_bd = bd;
833 ws->todo_free = bd->free;
834 ws->todo_lim = bd->start + BLOCK_SIZE_W;
835 }
836
837 ws->todo_q = newWSDeque(128);
838 ws->todo_overflow = NULL;
839 ws->n_todo_overflow = 0;
840 ws->todo_large_objects = NULL;
841
842 ws->part_list = NULL;
843 ws->n_part_blocks = 0;
844
845 ws->scavd_list = NULL;
846 ws->n_scavd_blocks = 0;
847 }
848 }
849
850
851 void
852 initGcThreads (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
853 {
854 #if defined(THREADED_RTS)
855 nat i;
856
857 if (from > 0) {
858 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
859 "initGcThreads");
860 } else {
861 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
862 "initGcThreads");
863 }
864
865 // We have to update the gct->cap pointers to point to the new
866 // Capability array now.
867 for (i = 0; i < from; i++) {
868 gc_threads[i]->cap = &capabilities[gc_threads[i]->cap->no];
869 }
870
871 for (i = from; i < to; i++) {
872 gc_threads[i] =
873 stgMallocBytes(sizeof(gc_thread) +
874 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
875 "alloc_gc_threads");
876
877 new_gc_thread(i, gc_threads[i]);
878 }
879 #else
880 ASSERT(from == 0 && to == 1);
881 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
882 gc_threads[0] = gct;
883 new_gc_thread(0,gc_threads[0]);
884 #endif
885 }
886
887 void
888 freeGcThreads (void)
889 {
890 nat g;
891 if (gc_threads != NULL) {
892 #if defined(THREADED_RTS)
893 nat i;
894 for (i = 0; i < n_capabilities; i++) {
895 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
896 {
897 freeWSDeque(gc_threads[i]->gens[g].todo_q);
898 }
899 stgFree (gc_threads[i]);
900 }
901 stgFree (gc_threads);
902 #else
903 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
904 {
905 freeWSDeque(gc_threads[0]->gens[g].todo_q);
906 }
907 stgFree (gc_threads);
908 #endif
909 gc_threads = NULL;
910 }
911 }
912
913 /* ----------------------------------------------------------------------------
914 Start GC threads
915 ------------------------------------------------------------------------- */
916
917 static volatile StgWord gc_running_threads;
918
919 static StgWord
920 inc_running (void)
921 {
922 StgWord new;
923 new = atomic_inc(&gc_running_threads);
924 ASSERT(new <= n_gc_threads);
925 return new;
926 }
927
928 static StgWord
929 dec_running (void)
930 {
931 ASSERT(gc_running_threads != 0);
932 return atomic_dec(&gc_running_threads);
933 }
934
935 static rtsBool
936 any_work (void)
937 {
938 int g;
939 gen_workspace *ws;
940
941 gct->any_work++;
942
943 write_barrier();
944
945 // scavenge objects in compacted generation
946 if (mark_stack_bd != NULL && !mark_stack_empty()) {
947 return rtsTrue;
948 }
949
950 // Check for global work in any step. We don't need to check for
951 // local work, because we have already exited scavenge_loop(),
952 // which means there is no local work for this thread.
953 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
954 ws = &gct->gens[g];
955 if (ws->todo_large_objects) return rtsTrue;
956 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
957 if (ws->todo_overflow) return rtsTrue;
958 }
959
960 #if defined(THREADED_RTS)
961 if (work_stealing) {
962 nat n;
963 // look for work to steal
964 for (n = 0; n < n_gc_threads; n++) {
965 if (n == gct->thread_index) continue;
966 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
967 ws = &gc_threads[n]->gens[g];
968 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
969 }
970 }
971 }
972 #endif
973
974 gct->no_work++;
975 #if defined(THREADED_RTS)
976 yieldThread();
977 #endif
978
979 return rtsFalse;
980 }
981
982 static void
983 scavenge_until_all_done (void)
984 {
985 DEBUG_ONLY( nat r );
986
987
988 loop:
989 #if defined(THREADED_RTS)
990 if (n_gc_threads > 1) {
991 scavenge_loop();
992 } else {
993 scavenge_loop1();
994 }
995 #else
996 scavenge_loop();
997 #endif
998
999 collect_gct_blocks();
1000
1001 // scavenge_loop() only exits when there's no work to do
1002
1003 #ifdef DEBUG
1004 r = dec_running();
1005 #else
1006 dec_running();
1007 #endif
1008
1009 traceEventGcIdle(gct->cap);
1010
1011 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1012
1013 while (gc_running_threads != 0) {
1014 // usleep(1);
1015 if (any_work()) {
1016 inc_running();
1017 traceEventGcWork(gct->cap);
1018 goto loop;
1019 }
1020 // any_work() does not remove the work from the queue, it
1021 // just checks for the presence of work. If we find any,
1022 // then we increment gc_running_threads and go back to
1023 // scavenge_loop() to perform any pending work.
1024 }
1025
1026 traceEventGcDone(gct->cap);
1027 }
1028
1029 #if defined(THREADED_RTS)
1030
1031 void
1032 gcWorkerThread (Capability *cap)
1033 {
1034 gc_thread *saved_gct;
1035
1036 // necessary if we stole a callee-saves register for gct:
1037 saved_gct = gct;
1038
1039 SET_GCT(gc_threads[cap->no]);
1040 gct->id = osThreadId();
1041
1042 stat_gcWorkerThreadStart(gct);
1043
1044 // Wait until we're told to wake up
1045 RELEASE_SPIN_LOCK(&gct->mut_spin);
1046 // yieldThread();
1047 // Strangely, adding a yieldThread() here makes the CPU time
1048 // measurements more accurate on Linux, perhaps because it syncs
1049 // the CPU time across the multiple cores. Without this, CPU time
1050 // is heavily skewed towards GC rather than MUT.
1051 gct->wakeup = GC_THREAD_STANDING_BY;
1052 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1053 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1054
1055 #ifdef USE_PAPI
1056 // start performance counters in this thread...
1057 if (gct->papi_events == -1) {
1058 papi_init_eventset(&gct->papi_events);
1059 }
1060 papi_thread_start_gc1_count(gct->papi_events);
1061 #endif
1062
1063 init_gc_thread(gct);
1064
1065 traceEventGcWork(gct->cap);
1066
1067 // Every thread evacuates some roots.
1068 gct->evac_gen_no = 0;
1069 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1070 scavenge_capability_mut_lists(cap);
1071
1072 scavenge_until_all_done();
1073
1074 gct->allocated = clearNursery(cap);
1075
1076 #ifdef THREADED_RTS
1077 // Now that the whole heap is marked, we discard any sparks that
1078 // were found to be unreachable. The main GC thread is currently
1079 // marking heap reachable via weak pointers, so it is
1080 // non-deterministic whether a spark will be retained if it is
1081 // only reachable via weak pointers. To fix this problem would
1082 // require another GC barrier, which is too high a price.
1083 pruneSparkQueue(cap);
1084 #endif
1085
1086 #ifdef USE_PAPI
1087 // count events in this thread towards the GC totals
1088 papi_thread_stop_gc1_count(gct->papi_events);
1089 #endif
1090
1091 // Wait until we're told to continue
1092 RELEASE_SPIN_LOCK(&gct->gc_spin);
1093 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1094 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1095 gct->thread_index);
1096 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1097 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1098
1099 // record the time spent doing GC in the Task structure
1100 stat_gcWorkerThreadDone(gct);
1101
1102 SET_GCT(saved_gct);
1103 }
1104
1105 #endif
1106
1107 #if defined(THREADED_RTS)
1108
1109 void
1110 waitForGcThreads (Capability *cap USED_IF_THREADS)
1111 {
1112 const nat n_threads = n_capabilities;
1113 const nat me = cap->no;
1114 nat i, j;
1115 rtsBool retry = rtsTrue;
1116
1117 while(retry) {
1118 for (i=0; i < n_threads; i++) {
1119 if (i == me || gc_threads[i]->idle) continue;
1120 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1121 prodCapability(&capabilities[i], cap->running_task);
1122 }
1123 }
1124 for (j=0; j < 10; j++) {
1125 retry = rtsFalse;
1126 for (i=0; i < n_threads; i++) {
1127 if (i == me || gc_threads[i]->idle) continue;
1128 write_barrier();
1129 interruptCapability(&capabilities[i]);
1130 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1131 retry = rtsTrue;
1132 }
1133 }
1134 if (!retry) break;
1135 yieldThread();
1136 }
1137 }
1138 }
1139
1140 #endif // THREADED_RTS
1141
1142 static void
1143 start_gc_threads (void)
1144 {
1145 #if defined(THREADED_RTS)
1146 gc_running_threads = 0;
1147 #endif
1148 }
1149
1150 static void
1151 wakeup_gc_threads (nat me USED_IF_THREADS)
1152 {
1153 #if defined(THREADED_RTS)
1154 nat i;
1155
1156 if (n_gc_threads == 1) return;
1157
1158 for (i=0; i < n_gc_threads; i++) {
1159 if (i == me || gc_threads[i]->idle) continue;
1160 inc_running();
1161 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1162 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1163
1164 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1165 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1166 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1167 }
1168 #endif
1169 }
1170
1171 // After GC is complete, we must wait for all GC threads to enter the
1172 // standby state, otherwise they may still be executing inside
1173 // any_work(), and may even remain awake until the next GC starts.
1174 static void
1175 shutdown_gc_threads (nat me USED_IF_THREADS)
1176 {
1177 #if defined(THREADED_RTS)
1178 nat i;
1179
1180 if (n_gc_threads == 1) return;
1181
1182 for (i=0; i < n_gc_threads; i++) {
1183 if (i == me || gc_threads[i]->idle) continue;
1184 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1185 }
1186 #endif
1187 }
1188
1189 #if defined(THREADED_RTS)
1190 void
1191 releaseGCThreads (Capability *cap USED_IF_THREADS)
1192 {
1193 const nat n_threads = n_capabilities;
1194 const nat me = cap->no;
1195 nat i;
1196 for (i=0; i < n_threads; i++) {
1197 if (i == me || gc_threads[i]->idle) continue;
1198 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1199 barf("releaseGCThreads");
1200
1201 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1202 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1203 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1204 }
1205 }
1206 #endif
1207
1208 /* ----------------------------------------------------------------------------
1209 Initialise a generation that is to be collected
1210 ------------------------------------------------------------------------- */
1211
1212 static void
1213 prepare_collected_gen (generation *gen)
1214 {
1215 nat i, g, n;
1216 gen_workspace *ws;
1217 bdescr *bd, *next;
1218
1219 // Throw away the current mutable list. Invariant: the mutable
1220 // list always has at least one block; this means we can avoid a
1221 // check for NULL in recordMutable().
1222 g = gen->no;
1223 if (g != 0) {
1224 for (i = 0; i < n_capabilities; i++) {
1225 freeChain(capabilities[i].mut_lists[g]);
1226 capabilities[i].mut_lists[g] = allocBlock();
1227 }
1228 }
1229
1230 gen = &generations[g];
1231 ASSERT(gen->no == g);
1232
1233 // we'll construct a new list of threads in this step
1234 // during GC, throw away the current list.
1235 gen->old_threads = gen->threads;
1236 gen->threads = END_TSO_QUEUE;
1237
1238 // deprecate the existing blocks
1239 gen->old_blocks = gen->blocks;
1240 gen->n_old_blocks = gen->n_blocks;
1241 gen->blocks = NULL;
1242 gen->n_blocks = 0;
1243 gen->n_words = 0;
1244 gen->live_estimate = 0;
1245
1246 // initialise the large object queues.
1247 ASSERT(gen->scavenged_large_objects == NULL);
1248 ASSERT(gen->n_scavenged_large_blocks == 0);
1249
1250 // grab all the partial blocks stashed in the gc_thread workspaces and
1251 // move them to the old_blocks list of this gen.
1252 for (n = 0; n < n_capabilities; n++) {
1253 ws = &gc_threads[n]->gens[gen->no];
1254
1255 for (bd = ws->part_list; bd != NULL; bd = next) {
1256 next = bd->link;
1257 bd->link = gen->old_blocks;
1258 gen->old_blocks = bd;
1259 gen->n_old_blocks += bd->blocks;
1260 }
1261 ws->part_list = NULL;
1262 ws->n_part_blocks = 0;
1263
1264 ASSERT(ws->scavd_list == NULL);
1265 ASSERT(ws->n_scavd_blocks == 0);
1266
1267 if (ws->todo_free != ws->todo_bd->start) {
1268 ws->todo_bd->free = ws->todo_free;
1269 ws->todo_bd->link = gen->old_blocks;
1270 gen->old_blocks = ws->todo_bd;
1271 gen->n_old_blocks += ws->todo_bd->blocks;
1272 alloc_todo_block(ws,0); // always has one block.
1273 }
1274 }
1275
1276 // mark the small objects as from-space
1277 for (bd = gen->old_blocks; bd; bd = bd->link) {
1278 bd->flags &= ~BF_EVACUATED;
1279 }
1280
1281 // mark the large objects as from-space
1282 for (bd = gen->large_objects; bd; bd = bd->link) {
1283 bd->flags &= ~BF_EVACUATED;
1284 }
1285
1286 // for a compacted generation, we need to allocate the bitmap
1287 if (gen->mark) {
1288 StgWord bitmap_size; // in bytes
1289 bdescr *bitmap_bdescr;
1290 StgWord *bitmap;
1291
1292 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1293
1294 if (bitmap_size > 0) {
1295 bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
1296 / BLOCK_SIZE);
1297 gen->bitmap = bitmap_bdescr;
1298 bitmap = bitmap_bdescr->start;
1299
1300 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1301 bitmap_size, bitmap);
1302
1303 // don't forget to fill it with zeros!
1304 memset(bitmap, 0, bitmap_size);
1305
1306 // For each block in this step, point to its bitmap from the
1307 // block descriptor.
1308 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1309 bd->u.bitmap = bitmap;
1310 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1311
1312 // Also at this point we set the BF_MARKED flag
1313 // for this block. The invariant is that
1314 // BF_MARKED is always unset, except during GC
1315 // when it is set on those blocks which will be
1316 // compacted.
1317 if (!(bd->flags & BF_FRAGMENTED)) {
1318 bd->flags |= BF_MARKED;
1319 }
1320
1321 // BF_SWEPT should be marked only for blocks that are being
1322 // collected in sweep()
1323 bd->flags &= ~BF_SWEPT;
1324 }
1325 }
1326 }
1327 }
1328
1329
1330 /* ----------------------------------------------------------------------------
1331 Save the mutable lists in saved_mut_lists
1332 ------------------------------------------------------------------------- */
1333
1334 static void
1335 stash_mut_list (Capability *cap, nat gen_no)
1336 {
1337 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1338 cap->mut_lists[gen_no] = allocBlock_sync();
1339 }
1340
1341 /* ----------------------------------------------------------------------------
1342 Initialise a generation that is *not* to be collected
1343 ------------------------------------------------------------------------- */
1344
1345 static void
1346 prepare_uncollected_gen (generation *gen)
1347 {
1348 nat i;
1349
1350
1351 ASSERT(gen->no > 0);
1352
1353 // save the current mutable lists for this generation, and
1354 // allocate a fresh block for each one. We'll traverse these
1355 // mutable lists as roots early on in the GC.
1356 for (i = 0; i < n_capabilities; i++) {
1357 stash_mut_list(&capabilities[i], gen->no);
1358 }
1359
1360 ASSERT(gen->scavenged_large_objects == NULL);
1361 ASSERT(gen->n_scavenged_large_blocks == 0);
1362 }
1363
1364 /* -----------------------------------------------------------------------------
1365 Collect the completed blocks from a GC thread and attach them to
1366 the generation.
1367 -------------------------------------------------------------------------- */
1368
1369 static void
1370 collect_gct_blocks (void)
1371 {
1372 nat g;
1373 gen_workspace *ws;
1374 bdescr *bd, *prev;
1375
1376 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1377 ws = &gct->gens[g];
1378
1379 // there may still be a block attached to ws->todo_bd;
1380 // leave it there to use next time.
1381
1382 if (ws->scavd_list != NULL) {
1383 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1384
1385 ASSERT(gct->scan_bd == NULL);
1386 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1387
1388 prev = NULL;
1389 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1390 ws->gen->n_words += bd->free - bd->start;
1391 prev = bd;
1392 }
1393 if (prev != NULL) {
1394 prev->link = ws->gen->blocks;
1395 ws->gen->blocks = ws->scavd_list;
1396 }
1397 ws->gen->n_blocks += ws->n_scavd_blocks;
1398
1399 ws->scavd_list = NULL;
1400 ws->n_scavd_blocks = 0;
1401
1402 RELEASE_SPIN_LOCK(&ws->gen->sync);
1403 }
1404 }
1405 }
1406
1407 /* -----------------------------------------------------------------------------
1408 During mutation, any blocks that are filled by allocatePinned() are
1409 stashed on the local pinned_object_blocks list, to avoid needing to
1410 take a global lock. Here we collect those blocks from the
1411 cap->pinned_object_blocks lists and put them on the
1412 main g0->large_object list.
1413
1414 Returns: the number of words allocated this way, for stats
1415 purposes.
1416 -------------------------------------------------------------------------- */
1417
1418 static StgWord
1419 collect_pinned_object_blocks (void)
1420 {
1421 nat n;
1422 bdescr *bd, *prev;
1423 StgWord allocated = 0;
1424
1425 for (n = 0; n < n_capabilities; n++) {
1426 prev = NULL;
1427 for (bd = capabilities[n].pinned_object_blocks; bd != NULL; bd = bd->link) {
1428 allocated += bd->free - bd->start;
1429 prev = bd;
1430 }
1431 if (prev != NULL) {
1432 prev->link = g0->large_objects;
1433 if (g0->large_objects != NULL) {
1434 g0->large_objects->u.back = prev;
1435 }
1436 g0->large_objects = capabilities[n].pinned_object_blocks;
1437 capabilities[n].pinned_object_blocks = 0;
1438 }
1439 }
1440
1441 return allocated;
1442 }
1443
1444 /* -----------------------------------------------------------------------------
1445 Initialise a gc_thread before GC
1446 -------------------------------------------------------------------------- */
1447
1448 static void
1449 init_gc_thread (gc_thread *t)
1450 {
1451 t->static_objects = END_OF_STATIC_LIST;
1452 t->scavenged_static_objects = END_OF_STATIC_LIST;
1453 t->scan_bd = NULL;
1454 t->mut_lists = t->cap->mut_lists;
1455 t->evac_gen_no = 0;
1456 t->failed_to_evac = rtsFalse;
1457 t->eager_promotion = rtsTrue;
1458 t->thunk_selector_depth = 0;
1459 t->allocated = 0;
1460 t->copied = 0;
1461 t->scanned = 0;
1462 t->any_work = 0;
1463 t->no_work = 0;
1464 t->scav_find_work = 0;
1465 }
1466
1467 /* -----------------------------------------------------------------------------
1468 Function we pass to evacuate roots.
1469 -------------------------------------------------------------------------- */
1470
1471 static void
1472 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1473 {
1474 // we stole a register for gct, but this function is called from
1475 // *outside* the GC where the register variable is not in effect,
1476 // so we need to save and restore it here. NB. only call
1477 // mark_root() from the main GC thread, otherwise gct will be
1478 // incorrect.
1479 #if defined(THREADED_RTS)
1480 gc_thread *saved_gct;
1481 saved_gct = gct;
1482 #endif
1483 SET_GCT(user);
1484
1485 evacuate(root);
1486
1487 SET_GCT(saved_gct);
1488 }
1489
1490 /* -----------------------------------------------------------------------------
1491 Initialising the static object & mutable lists
1492 -------------------------------------------------------------------------- */
1493
1494 static void
1495 zero_static_object_list(StgClosure* first_static)
1496 {
1497 StgClosure* p;
1498 StgClosure* link;
1499 const StgInfoTable *info;
1500
1501 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1502 info = get_itbl(p);
1503 link = *STATIC_LINK(info, p);
1504 *STATIC_LINK(info,p) = NULL;
1505 }
1506 }
1507
1508 /* ----------------------------------------------------------------------------
1509 Reset the sizes of the older generations when we do a major
1510 collection.
1511
1512 CURRENT STRATEGY: make all generations except zero the same size.
1513 We have to stay within the maximum heap size, and leave a certain
1514 percentage of the maximum heap size available to allocate into.
1515 ------------------------------------------------------------------------- */
1516
1517 static void
1518 resize_generations (void)
1519 {
1520 nat g;
1521
1522 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1523 W_ live, size, min_alloc, words;
1524 const W_ max = RtsFlags.GcFlags.maxHeapSize;
1525 const W_ gens = RtsFlags.GcFlags.generations;
1526
1527 // live in the oldest generations
1528 if (oldest_gen->live_estimate != 0) {
1529 words = oldest_gen->live_estimate;
1530 } else {
1531 words = oldest_gen->n_words;
1532 }
1533 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1534 oldest_gen->n_large_blocks;
1535
1536 // default max size for all generations except zero
1537 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1538 RtsFlags.GcFlags.minOldGenSize);
1539
1540 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1541 if (max > 0) {
1542 RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
1543 } else {
1544 RtsFlags.GcFlags.heapSizeSuggestion = size;
1545 }
1546 }
1547
1548 // minimum size for generation zero
1549 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1550 RtsFlags.GcFlags.minAllocAreaSize);
1551
1552 // Auto-enable compaction when the residency reaches a
1553 // certain percentage of the maximum heap size (default: 30%).
1554 if (RtsFlags.GcFlags.compact ||
1555 (max > 0 &&
1556 oldest_gen->n_blocks >
1557 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1558 oldest_gen->mark = 1;
1559 oldest_gen->compact = 1;
1560 // debugBelch("compaction: on\n", live);
1561 } else {
1562 oldest_gen->mark = 0;
1563 oldest_gen->compact = 0;
1564 // debugBelch("compaction: off\n", live);
1565 }
1566
1567 if (RtsFlags.GcFlags.sweep) {
1568 oldest_gen->mark = 1;
1569 }
1570
1571 // if we're going to go over the maximum heap size, reduce the
1572 // size of the generations accordingly. The calculation is
1573 // different if compaction is turned on, because we don't need
1574 // to double the space required to collect the old generation.
1575 if (max != 0) {
1576
1577 // this test is necessary to ensure that the calculations
1578 // below don't have any negative results - we're working
1579 // with unsigned values here.
1580 if (max < min_alloc) {
1581 heapOverflow();
1582 }
1583
1584 if (oldest_gen->compact) {
1585 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1586 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1587 }
1588 } else {
1589 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1590 size = (max - min_alloc) / ((gens - 1) * 2);
1591 }
1592 }
1593
1594 if (size < live) {
1595 heapOverflow();
1596 }
1597 }
1598
1599 #if 0
1600 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1601 min_alloc, size, max);
1602 #endif
1603
1604 for (g = 0; g < gens; g++) {
1605 generations[g].max_blocks = size;
1606 }
1607 }
1608 }
1609
1610 /* -----------------------------------------------------------------------------
1611 Calculate the new size of the nursery, and resize it.
1612 -------------------------------------------------------------------------- */
1613
1614 static void
1615 resize_nursery (void)
1616 {
1617 const StgWord min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1618
1619 if (RtsFlags.GcFlags.generations == 1)
1620 { // Two-space collector:
1621 W_ blocks;
1622
1623 /* set up a new nursery. Allocate a nursery size based on a
1624 * function of the amount of live data (by default a factor of 2)
1625 * Use the blocks from the old nursery if possible, freeing up any
1626 * left over blocks.
1627 *
1628 * If we get near the maximum heap size, then adjust our nursery
1629 * size accordingly. If the nursery is the same size as the live
1630 * data (L), then we need 3L bytes. We can reduce the size of the
1631 * nursery to bring the required memory down near 2L bytes.
1632 *
1633 * A normal 2-space collector would need 4L bytes to give the same
1634 * performance we get from 3L bytes, reducing to the same
1635 * performance at 2L bytes.
1636 */
1637 blocks = generations[0].n_blocks;
1638
1639 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1640 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1641 RtsFlags.GcFlags.maxHeapSize )
1642 {
1643 long adjusted_blocks; // signed on purpose
1644 int pc_free;
1645
1646 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1647
1648 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1649 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1650
1651 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1652 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1653 {
1654 heapOverflow();
1655 }
1656 blocks = adjusted_blocks;
1657 }
1658 else
1659 {
1660 blocks *= RtsFlags.GcFlags.oldGenFactor;
1661 if (blocks < min_nursery)
1662 {
1663 blocks = min_nursery;
1664 }
1665 }
1666 resizeNurseries(blocks);
1667 }
1668 else // Generational collector
1669 {
1670 /*
1671 * If the user has given us a suggested heap size, adjust our
1672 * allocation area to make best use of the memory available.
1673 */
1674 if (RtsFlags.GcFlags.heapSizeSuggestion)
1675 {
1676 long blocks;
1677 StgWord needed;
1678
1679 calcNeeded(rtsFalse, &needed); // approx blocks needed at next GC
1680
1681 /* Guess how much will be live in generation 0 step 0 next time.
1682 * A good approximation is obtained by finding the
1683 * percentage of g0 that was live at the last minor GC.
1684 *
1685 * We have an accurate figure for the amount of copied data in
1686 * 'copied', but we must convert this to a number of blocks, with
1687 * a small adjustment for estimated slop at the end of a block
1688 * (- 10 words).
1689 */
1690 if (N == 0)
1691 {
1692 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1693 / countNurseryBlocks();
1694 }
1695
1696 /* Estimate a size for the allocation area based on the
1697 * information available. We might end up going slightly under
1698 * or over the suggested heap size, but we should be pretty
1699 * close on average.
1700 *
1701 * Formula: suggested - needed
1702 * ----------------------------
1703 * 1 + g0_pcnt_kept/100
1704 *
1705 * where 'needed' is the amount of memory needed at the next
1706 * collection for collecting all gens except g0.
1707 */
1708 blocks =
1709 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1710 (100 + (long)g0_pcnt_kept);
1711
1712 if (blocks < (long)min_nursery) {
1713 blocks = min_nursery;
1714 }
1715
1716 resizeNurseries((W_)blocks);
1717 }
1718 else
1719 {
1720 // we might have added extra large blocks to the nursery, so
1721 // resize back to minAllocAreaSize again.
1722 resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1723 }
1724 }
1725 }
1726
1727 /* -----------------------------------------------------------------------------
1728 Sanity code for CAF garbage collection.
1729
1730 With DEBUG turned on, we manage a CAF list in addition to the SRT
1731 mechanism. After GC, we run down the CAF list and blackhole any
1732 CAFs which have been garbage collected. This means we get an error
1733 whenever the program tries to enter a garbage collected CAF.
1734
1735 Any garbage collected CAFs are taken off the CAF list at the same
1736 time.
1737 -------------------------------------------------------------------------- */
1738
1739 #if 0 && defined(DEBUG)
1740
1741 static void
1742 gcCAFs(void)
1743 {
1744 StgClosure* p;
1745 StgClosure** pp;
1746 const StgInfoTable *info;
1747 nat i;
1748
1749 i = 0;
1750 p = caf_list;
1751 pp = &caf_list;
1752
1753 while (p != NULL) {
1754
1755 info = get_itbl(p);
1756
1757 ASSERT(info->type == IND_STATIC);
1758
1759 if (STATIC_LINK(info,p) == NULL) {
1760 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1761 // black hole it
1762 SET_INFO(p,&stg_BLACKHOLE_info);
1763 p = STATIC_LINK2(info,p);
1764 *pp = p;
1765 }
1766 else {
1767 pp = &STATIC_LINK2(info,p);
1768 p = *pp;
1769 i++;
1770 }
1771
1772 }
1773
1774 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1775 }
1776 #endif