Merge branch 'master' of http://darcs.haskell.org/ghc into ghc-new-flavor
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "Storage.h"
19 #include "RtsUtils.h"
20 #include "Apply.h"
21 #include "Updates.h"
22 #include "Stats.h"
23 #include "Schedule.h"
24 #include "Sanity.h"
25 #include "BlockAlloc.h"
26 #include "ProfHeap.h"
27 #include "Weak.h"
28 #include "Prelude.h"
29 #include "RtsSignals.h"
30 #include "STM.h"
31 #if defined(RTS_GTK_FRONTPANEL)
32 #include "FrontPanel.h"
33 #endif
34 #include "Trace.h"
35 #include "RetainerProfile.h"
36 #include "LdvProfile.h"
37 #include "RaiseAsync.h"
38 #include "Papi.h"
39 #include "Stable.h"
40
41 #include "GC.h"
42 #include "GCThread.h"
43 #include "GCTDecl.h"
44 #include "Compact.h"
45 #include "Evac.h"
46 #include "Scav.h"
47 #include "GCUtils.h"
48 #include "MarkStack.h"
49 #include "MarkWeak.h"
50 #include "Sparks.h"
51 #include "Sweep.h"
52
53 #include <string.h> // for memset()
54 #include <unistd.h>
55
56 /* -----------------------------------------------------------------------------
57 Global variables
58 -------------------------------------------------------------------------- */
59
60 /* STATIC OBJECT LIST.
61 *
62 * During GC:
63 * We maintain a linked list of static objects that are still live.
64 * The requirements for this list are:
65 *
66 * - we need to scan the list while adding to it, in order to
67 * scavenge all the static objects (in the same way that
68 * breadth-first scavenging works for dynamic objects).
69 *
70 * - we need to be able to tell whether an object is already on
71 * the list, to break loops.
72 *
73 * Each static object has a "static link field", which we use for
74 * linking objects on to the list. We use a stack-type list, consing
75 * objects on the front as they are added (this means that the
76 * scavenge phase is depth-first, not breadth-first, but that
77 * shouldn't matter).
78 *
79 * A separate list is kept for objects that have been scavenged
80 * already - this is so that we can zero all the marks afterwards.
81 *
82 * An object is on the list if its static link field is non-zero; this
83 * means that we have to mark the end of the list with '1', not NULL.
84 *
85 * Extra notes for generational GC:
86 *
87 * Each generation has a static object list associated with it. When
88 * collecting generations up to N, we treat the static object lists
89 * from generations > N as roots.
90 *
91 * We build up a static object list while collecting generations 0..N,
92 * which is then appended to the static object list of generation N+1.
93 */
94
95 /* N is the oldest generation being collected, where the generations
96 * are numbered starting at 0. A major GC (indicated by the major_gc
97 * flag) is when we're collecting all generations. We only attempt to
98 * deal with static objects and GC CAFs when doing a major GC.
99 */
100 nat N;
101 rtsBool major_gc;
102
103 /* Data used for allocation area sizing.
104 */
105 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
106
107 /* Mut-list stats */
108 #ifdef DEBUG
109 nat mutlist_MUTVARS,
110 mutlist_MUTARRS,
111 mutlist_MVARS,
112 mutlist_OTHERS;
113 #endif
114
115 /* Thread-local data for each GC thread
116 */
117 gc_thread **gc_threads = NULL;
118
119 #if !defined(THREADED_RTS)
120 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
121 #endif
122
123 // Number of threads running in *this* GC. Affects how many
124 // step->todos[] lists we have to look in to find work.
125 nat n_gc_threads;
126
127 // For stats:
128 long copied; // *words* copied & scavenged during this GC
129
130 rtsBool work_stealing;
131
132 DECLARE_GCT
133
134 /* -----------------------------------------------------------------------------
135 Static function declarations
136 -------------------------------------------------------------------------- */
137
138 static void mark_root (void *user, StgClosure **root);
139 static void zero_static_object_list (StgClosure* first_static);
140 static nat initialise_N (rtsBool force_major_gc);
141 static void prepare_collected_gen (generation *gen);
142 static void prepare_uncollected_gen (generation *gen);
143 static void init_gc_thread (gc_thread *t);
144 static void resize_generations (void);
145 static void resize_nursery (void);
146 static void start_gc_threads (void);
147 static void scavenge_until_all_done (void);
148 static StgWord inc_running (void);
149 static StgWord dec_running (void);
150 static void wakeup_gc_threads (nat me);
151 static void shutdown_gc_threads (nat me);
152 static void collect_gct_blocks (void);
153 static lnat collect_pinned_object_blocks (void);
154
155 #if 0 && defined(DEBUG)
156 static void gcCAFs (void);
157 #endif
158
159 /* -----------------------------------------------------------------------------
160 The mark stack.
161 -------------------------------------------------------------------------- */
162
163 bdescr *mark_stack_top_bd; // topmost block in the mark stack
164 bdescr *mark_stack_bd; // current block in the mark stack
165 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
166
167 /* -----------------------------------------------------------------------------
168 GarbageCollect: the main entry point to the garbage collector.
169
170 Locks held: all capabilities are held throughout GarbageCollect().
171 -------------------------------------------------------------------------- */
172
173 void
174 GarbageCollect (rtsBool force_major_gc,
175 rtsBool do_heap_census,
176 nat gc_type USED_IF_THREADS,
177 Capability *cap)
178 {
179 bdescr *bd;
180 generation *gen;
181 lnat live_blocks, live_words, allocated, par_max_copied, par_tot_copied;
182 #if defined(THREADED_RTS)
183 gc_thread *saved_gct;
184 #endif
185 nat g, n;
186
187 // necessary if we stole a callee-saves register for gct:
188 #if defined(THREADED_RTS)
189 saved_gct = gct;
190 #endif
191
192 #ifdef PROFILING
193 CostCentreStack *save_CCS[n_capabilities];
194 #endif
195
196 ACQUIRE_SM_LOCK;
197
198 #if defined(RTS_USER_SIGNALS)
199 if (RtsFlags.MiscFlags.install_signal_handlers) {
200 // block signals
201 blockUserSignals();
202 }
203 #endif
204
205 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
206 // otherwise adjust the padding in gen_workspace.
207
208 // this is the main thread
209 SET_GCT(gc_threads[cap->no]);
210
211 // tell the stats department that we've started a GC
212 stat_startGC(cap, gct);
213
214 // lock the StablePtr table
215 stablePtrPreGC();
216
217 #ifdef DEBUG
218 mutlist_MUTVARS = 0;
219 mutlist_MUTARRS = 0;
220 mutlist_OTHERS = 0;
221 #endif
222
223 // attribute any costs to CCS_GC
224 #ifdef PROFILING
225 for (n = 0; n < n_capabilities; n++) {
226 save_CCS[n] = capabilities[n].r.rCCCS;
227 capabilities[n].r.rCCCS = CCS_GC;
228 }
229 #endif
230
231 /* Approximate how much we allocated.
232 * Todo: only when generating stats?
233 */
234 allocated = countLargeAllocated(); /* don't count the nursery yet */
235
236 /* Figure out which generation to collect
237 */
238 n = initialise_N(force_major_gc);
239
240 #if defined(THREADED_RTS)
241 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
242 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
243 // It's not always a good idea to do load balancing in parallel
244 // GC. In particular, for a parallel program we don't want to
245 // lose locality by moving cached data into another CPU's cache
246 // (this effect can be quite significant).
247 //
248 // We could have a more complex way to deterimine whether to do
249 // work stealing or not, e.g. it might be a good idea to do it
250 // if the heap is big. For now, we just turn it on or off with
251 // a flag.
252 #endif
253
254 /* Start threads, so they can be spinning up while we finish initialisation.
255 */
256 start_gc_threads();
257
258 #if defined(THREADED_RTS)
259 /* How many threads will be participating in this GC?
260 * We don't try to parallelise minor GCs (unless the user asks for
261 * it with +RTS -gn0), or mark/compact/sweep GC.
262 */
263 if (gc_type == SYNC_GC_PAR) {
264 n_gc_threads = n_capabilities;
265 } else {
266 n_gc_threads = 1;
267 }
268 #else
269 n_gc_threads = 1;
270 #endif
271
272 debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
273 N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
274
275 #ifdef RTS_GTK_FRONTPANEL
276 if (RtsFlags.GcFlags.frontpanel) {
277 updateFrontPanelBeforeGC(N);
278 }
279 #endif
280
281 #ifdef DEBUG
282 // check for memory leaks if DEBUG is on
283 memInventory(DEBUG_gc);
284 #endif
285
286 // check sanity *before* GC
287 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
288
289 // gather blocks allocated using allocatePinned() from each capability
290 // and put them on the g0->large_object list.
291 collect_pinned_object_blocks();
292
293 // Initialise all the generations/steps that we're collecting.
294 for (g = 0; g <= N; g++) {
295 prepare_collected_gen(&generations[g]);
296 }
297 // Initialise all the generations/steps that we're *not* collecting.
298 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
299 prepare_uncollected_gen(&generations[g]);
300 }
301
302 // Prepare this gc_thread
303 init_gc_thread(gct);
304
305 /* Allocate a mark stack if we're doing a major collection.
306 */
307 if (major_gc && oldest_gen->mark) {
308 mark_stack_bd = allocBlock();
309 mark_stack_top_bd = mark_stack_bd;
310 mark_stack_bd->link = NULL;
311 mark_stack_bd->u.back = NULL;
312 mark_sp = mark_stack_bd->start;
313 } else {
314 mark_stack_bd = NULL;
315 mark_stack_top_bd = NULL;
316 mark_sp = NULL;
317 }
318
319 /* -----------------------------------------------------------------------
320 * follow all the roots that we know about:
321 */
322
323 // the main thread is running: this prevents any other threads from
324 // exiting prematurely, so we can start them now.
325 // NB. do this after the mutable lists have been saved above, otherwise
326 // the other GC threads will be writing into the old mutable lists.
327 inc_running();
328 wakeup_gc_threads(gct->thread_index);
329
330 traceEventGcWork(gct->cap);
331
332 // scavenge the capability-private mutable lists. This isn't part
333 // of markSomeCapabilities() because markSomeCapabilities() can only
334 // call back into the GC via mark_root() (due to the gct register
335 // variable).
336 if (n_gc_threads == 1) {
337 for (n = 0; n < n_capabilities; n++) {
338 #if defined(THREADED_RTS)
339 scavenge_capability_mut_Lists1(&capabilities[n]);
340 #else
341 scavenge_capability_mut_lists(&capabilities[n]);
342 #endif
343 }
344 } else {
345 scavenge_capability_mut_lists(gct->cap);
346 for (n = 0; n < n_capabilities; n++) {
347 if (gc_threads[n]->idle) {
348 markCapability(mark_root, gct, &capabilities[n],
349 rtsTrue/*don't mark sparks*/);
350 scavenge_capability_mut_lists(&capabilities[n]);
351 }
352 }
353 }
354
355 // follow roots from the CAF list (used by GHCi)
356 gct->evac_gen_no = 0;
357 markCAFs(mark_root, gct);
358
359 // follow all the roots that the application knows about.
360 gct->evac_gen_no = 0;
361 if (n_gc_threads == 1) {
362 for (n = 0; n < n_capabilities; n++) {
363 markCapability(mark_root, gct, &capabilities[n],
364 rtsTrue/*don't mark sparks*/);
365 }
366 } else {
367 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
368 }
369
370 markScheduler(mark_root, gct);
371
372 #if defined(RTS_USER_SIGNALS)
373 // mark the signal handlers (signals should be already blocked)
374 markSignalHandlers(mark_root, gct);
375 #endif
376
377 // Mark the weak pointer list, and prepare to detect dead weak pointers.
378 markWeakPtrList();
379 initWeakForGC();
380
381 // Mark the stable pointer table.
382 markStablePtrTable(mark_root, gct);
383
384 /* -------------------------------------------------------------------------
385 * Repeatedly scavenge all the areas we know about until there's no
386 * more scavenging to be done.
387 */
388 for (;;)
389 {
390 scavenge_until_all_done();
391 // The other threads are now stopped. We might recurse back to
392 // here, but from now on this is the only thread.
393
394 // must be last... invariant is that everything is fully
395 // scavenged at this point.
396 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
397 inc_running();
398 continue;
399 }
400
401 // If we get to here, there's really nothing left to do.
402 break;
403 }
404
405 shutdown_gc_threads(gct->thread_index);
406
407 // Now see which stable names are still alive.
408 gcStablePtrTable();
409
410 #ifdef THREADED_RTS
411 if (n_gc_threads == 1) {
412 for (n = 0; n < n_capabilities; n++) {
413 pruneSparkQueue(&capabilities[n]);
414 }
415 } else {
416 for (n = 0; n < n_capabilities; n++) {
417 if (n == cap->no || gc_threads[n]->idle) {
418 pruneSparkQueue(&capabilities[n]);
419 }
420 }
421 }
422 #endif
423
424 #ifdef PROFILING
425 // We call processHeapClosureForDead() on every closure destroyed during
426 // the current garbage collection, so we invoke LdvCensusForDead().
427 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
428 || RtsFlags.ProfFlags.bioSelector != NULL) {
429 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
430 LdvCensusForDead(N);
431 ACQUIRE_SM_LOCK;
432 }
433 #endif
434
435 // NO MORE EVACUATION AFTER THIS POINT!
436
437 // Finally: compact or sweep the oldest generation.
438 if (major_gc && oldest_gen->mark) {
439 if (oldest_gen->compact)
440 compact(gct->scavenged_static_objects);
441 else
442 sweep(oldest_gen);
443 }
444
445 copied = 0;
446 par_max_copied = 0;
447 par_tot_copied = 0;
448 {
449 nat i;
450 for (i=0; i < n_gc_threads; i++) {
451 if (n_gc_threads > 1) {
452 debugTrace(DEBUG_gc,"thread %d:", i);
453 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
454 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
455 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
456 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
457 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
458 }
459 copied += gc_threads[i]->copied;
460 par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
461 }
462 par_tot_copied = copied;
463 if (n_gc_threads == 1) {
464 par_max_copied = 0;
465 par_tot_copied = 0;
466 }
467 }
468
469 // Run through all the generations/steps and tidy up.
470 // We're going to:
471 // - count the amount of "live" data (live_words, live_blocks)
472 // - count the amount of "copied" data in this GC (copied)
473 // - free from-space
474 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
475 //
476 live_words = 0;
477 live_blocks = 0;
478
479 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
480
481 if (g == N) {
482 generations[g].collections++; // for stats
483 if (n_gc_threads > 1) generations[g].par_collections++;
484 }
485
486 // Count the mutable list as bytes "copied" for the purposes of
487 // stats. Every mutable list is copied during every GC.
488 if (g > 0) {
489 nat mut_list_size = 0;
490 for (n = 0; n < n_capabilities; n++) {
491 mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
492 }
493 copied += mut_list_size;
494
495 debugTrace(DEBUG_gc,
496 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
497 (unsigned long)(mut_list_size * sizeof(W_)),
498 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
499 }
500
501 bdescr *next, *prev;
502 gen = &generations[g];
503
504 // for generations we collected...
505 if (g <= N) {
506
507 /* free old memory and shift to-space into from-space for all
508 * the collected steps (except the allocation area). These
509 * freed blocks will probaby be quickly recycled.
510 */
511 if (gen->mark)
512 {
513 // tack the new blocks on the end of the existing blocks
514 if (gen->old_blocks != NULL) {
515
516 prev = NULL;
517 for (bd = gen->old_blocks; bd != NULL; bd = next) {
518
519 next = bd->link;
520
521 if (!(bd->flags & BF_MARKED))
522 {
523 if (prev == NULL) {
524 gen->old_blocks = next;
525 } else {
526 prev->link = next;
527 }
528 freeGroup(bd);
529 gen->n_old_blocks--;
530 }
531 else
532 {
533 gen->n_words += bd->free - bd->start;
534
535 // NB. this step might not be compacted next
536 // time, so reset the BF_MARKED flags.
537 // They are set before GC if we're going to
538 // compact. (search for BF_MARKED above).
539 bd->flags &= ~BF_MARKED;
540
541 // between GCs, all blocks in the heap except
542 // for the nursery have the BF_EVACUATED flag set.
543 bd->flags |= BF_EVACUATED;
544
545 prev = bd;
546 }
547 }
548
549 if (prev != NULL) {
550 prev->link = gen->blocks;
551 gen->blocks = gen->old_blocks;
552 }
553 }
554 // add the new blocks to the block tally
555 gen->n_blocks += gen->n_old_blocks;
556 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
557 ASSERT(countOccupied(gen->blocks) == gen->n_words);
558 }
559 else // not copacted
560 {
561 freeChain(gen->old_blocks);
562 }
563
564 gen->old_blocks = NULL;
565 gen->n_old_blocks = 0;
566
567 /* LARGE OBJECTS. The current live large objects are chained on
568 * scavenged_large, having been moved during garbage
569 * collection from large_objects. Any objects left on the
570 * large_objects list are therefore dead, so we free them here.
571 */
572 freeChain(gen->large_objects);
573 gen->large_objects = gen->scavenged_large_objects;
574 gen->n_large_blocks = gen->n_scavenged_large_blocks;
575 gen->n_new_large_words = 0;
576 }
577 else // for generations > N
578 {
579 /* For older generations, we need to append the
580 * scavenged_large_object list (i.e. large objects that have been
581 * promoted during this GC) to the large_object list for that step.
582 */
583 for (bd = gen->scavenged_large_objects; bd; bd = next) {
584 next = bd->link;
585 dbl_link_onto(bd, &gen->large_objects);
586 }
587
588 // add the new blocks we promoted during this GC
589 gen->n_large_blocks += gen->n_scavenged_large_blocks;
590 }
591
592 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
593
594 gen->scavenged_large_objects = NULL;
595 gen->n_scavenged_large_blocks = 0;
596
597 // Count "live" data
598 live_words += genLiveWords(gen);
599 live_blocks += genLiveBlocks(gen);
600
601 // add in the partial blocks in the gen_workspaces, but ignore gen 0
602 // if this is a local GC (we can't count another capability's part_list)
603 {
604 nat i;
605 for (i = 0; i < n_capabilities; i++) {
606 live_words += gcThreadLiveWords(i, gen->no);
607 live_blocks += gcThreadLiveBlocks(i, gen->no);
608 }
609 }
610 } // for all generations
611
612 // update the max size of older generations after a major GC
613 resize_generations();
614
615 // Free the mark stack.
616 if (mark_stack_top_bd != NULL) {
617 debugTrace(DEBUG_gc, "mark stack: %d blocks",
618 countBlocks(mark_stack_top_bd));
619 freeChain(mark_stack_top_bd);
620 }
621
622 // Free any bitmaps.
623 for (g = 0; g <= N; g++) {
624 gen = &generations[g];
625 if (gen->bitmap != NULL) {
626 freeGroup(gen->bitmap);
627 gen->bitmap = NULL;
628 }
629 }
630
631 // Reset the nursery: make the blocks empty
632 allocated += clearNurseries();
633
634 resize_nursery();
635
636 resetNurseries();
637
638 // mark the garbage collected CAFs as dead
639 #if 0 && defined(DEBUG) // doesn't work at the moment
640 if (major_gc) { gcCAFs(); }
641 #endif
642
643 #ifdef PROFILING
644 // resetStaticObjectForRetainerProfiling() must be called before
645 // zeroing below.
646
647 // ToDo: fix the gct->scavenged_static_objects below
648 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
649 #endif
650
651 // zero the scavenged static object list
652 if (major_gc) {
653 nat i;
654 if (n_gc_threads == 1) {
655 zero_static_object_list(gct->scavenged_static_objects);
656 } else {
657 for (i = 0; i < n_gc_threads; i++) {
658 if (!gc_threads[i]->idle) {
659 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
660 }
661 }
662 }
663 }
664
665 // Update the stable pointer hash table.
666 updateStablePtrTable(major_gc);
667
668 // unlock the StablePtr table. Must be before scheduleFinalizers(),
669 // because a finalizer may call hs_free_fun_ptr() or
670 // hs_free_stable_ptr(), both of which access the StablePtr table.
671 stablePtrPostGC();
672
673 // Start any pending finalizers. Must be after
674 // updateStablePtrTable() and stablePtrPostGC() (see #4221).
675 RELEASE_SM_LOCK;
676 scheduleFinalizers(cap, old_weak_ptr_list);
677 ACQUIRE_SM_LOCK;
678
679 // check sanity after GC
680 // before resurrectThreads(), because that might overwrite some
681 // closures, which will cause problems with THREADED where we don't
682 // fill slop.
683 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
684
685 // If a heap census is due, we need to do it before
686 // resurrectThreads(), for the same reason as checkSanity above:
687 // resurrectThreads() will overwrite some closures and leave slop
688 // behind.
689 if (do_heap_census) {
690 debugTrace(DEBUG_sched, "performing heap census");
691 RELEASE_SM_LOCK;
692 heapCensus(gct->gc_start_cpu);
693 ACQUIRE_SM_LOCK;
694 }
695
696 // send exceptions to any threads which were about to die
697 RELEASE_SM_LOCK;
698 resurrectThreads(resurrected_threads);
699 ACQUIRE_SM_LOCK;
700
701 if (major_gc) {
702 nat need, got;
703 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
704 got = mblocks_allocated;
705 /* If the amount of data remains constant, next major GC we'll
706 require (F+1)*need. We leave (F+2)*need in order to reduce
707 repeated deallocation and reallocation. */
708 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
709 if (got > need) {
710 returnMemoryToOS(got - need);
711 }
712 }
713
714 // extra GC trace info
715 IF_DEBUG(gc, statDescribeGens());
716
717 #ifdef DEBUG
718 // symbol-table based profiling
719 /* heapCensus(to_blocks); */ /* ToDo */
720 #endif
721
722 // restore enclosing cost centre
723 #ifdef PROFILING
724 for (n = 0; n < n_capabilities; n++) {
725 capabilities[n].r.rCCCS = save_CCS[n];
726 }
727 #endif
728
729 #ifdef DEBUG
730 // check for memory leaks if DEBUG is on
731 memInventory(DEBUG_gc);
732 #endif
733
734 #ifdef RTS_GTK_FRONTPANEL
735 if (RtsFlags.GcFlags.frontpanel) {
736 updateFrontPanelAfterGC( N, live );
737 }
738 #endif
739
740 // ok, GC over: tell the stats department what happened.
741 stat_endGC(cap, gct, allocated, live_words, copied,
742 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
743 N, n_gc_threads, par_max_copied, par_tot_copied);
744
745 // Guess which generation we'll collect *next* time
746 initialise_N(force_major_gc);
747
748 #if defined(RTS_USER_SIGNALS)
749 if (RtsFlags.MiscFlags.install_signal_handlers) {
750 // unblock signals again
751 unblockUserSignals();
752 }
753 #endif
754
755 RELEASE_SM_LOCK;
756
757 SET_GCT(saved_gct);
758 }
759
760 /* -----------------------------------------------------------------------------
761 Figure out which generation to collect, initialise N and major_gc.
762
763 Also returns the total number of blocks in generations that will be
764 collected.
765 -------------------------------------------------------------------------- */
766
767 static nat
768 initialise_N (rtsBool force_major_gc)
769 {
770 int g;
771 nat blocks, blocks_total;
772
773 blocks = 0;
774 blocks_total = 0;
775
776 if (force_major_gc) {
777 N = RtsFlags.GcFlags.generations - 1;
778 } else {
779 N = 0;
780 }
781
782 for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
783
784 blocks = generations[g].n_words / BLOCK_SIZE_W
785 + generations[g].n_large_blocks;
786
787 if (blocks >= generations[g].max_blocks) {
788 N = stg_max(N,g);
789 }
790 if ((nat)g <= N) {
791 blocks_total += blocks;
792 }
793 }
794
795 blocks_total += countNurseryBlocks();
796
797 major_gc = (N == RtsFlags.GcFlags.generations-1);
798 return blocks_total;
799 }
800
801 /* -----------------------------------------------------------------------------
802 Initialise the gc_thread structures.
803 -------------------------------------------------------------------------- */
804
805 #define GC_THREAD_INACTIVE 0
806 #define GC_THREAD_STANDING_BY 1
807 #define GC_THREAD_RUNNING 2
808 #define GC_THREAD_WAITING_TO_CONTINUE 3
809
810 static void
811 new_gc_thread (nat n, gc_thread *t)
812 {
813 nat g;
814 gen_workspace *ws;
815
816 t->cap = &capabilities[n];
817
818 #ifdef THREADED_RTS
819 t->id = 0;
820 initSpinLock(&t->gc_spin);
821 initSpinLock(&t->mut_spin);
822 ACQUIRE_SPIN_LOCK(&t->gc_spin);
823 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
824 // thread to start up, see wakeup_gc_threads
825 #endif
826
827 t->thread_index = n;
828 t->idle = rtsFalse;
829 t->free_blocks = NULL;
830 t->gc_count = 0;
831
832 init_gc_thread(t);
833
834 #ifdef USE_PAPI
835 t->papi_events = -1;
836 #endif
837
838 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
839 {
840 ws = &t->gens[g];
841 ws->gen = &generations[g];
842 ASSERT(g == ws->gen->no);
843 ws->my_gct = t;
844
845 // We want to call
846 // alloc_todo_block(ws,0);
847 // but can't, because it uses gct which isn't set up at this point.
848 // Hence, allocate a block for todo_bd manually:
849 {
850 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
851 initBdescr(bd, ws->gen, ws->gen->to);
852 bd->flags = BF_EVACUATED;
853 bd->u.scan = bd->free = bd->start;
854
855 ws->todo_bd = bd;
856 ws->todo_free = bd->free;
857 ws->todo_lim = bd->start + BLOCK_SIZE_W;
858 }
859
860 ws->todo_q = newWSDeque(128);
861 ws->todo_overflow = NULL;
862 ws->n_todo_overflow = 0;
863 ws->todo_large_objects = NULL;
864
865 ws->part_list = NULL;
866 ws->n_part_blocks = 0;
867
868 ws->scavd_list = NULL;
869 ws->n_scavd_blocks = 0;
870 }
871 }
872
873
874 void
875 initGcThreads (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
876 {
877 #if defined(THREADED_RTS)
878 nat i;
879
880 if (from > 0) {
881 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
882 "initGcThreads");
883 } else {
884 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
885 "initGcThreads");
886 }
887
888 // We have to update the gct->cap pointers to point to the new
889 // Capability array now.
890 for (i = 0; i < from; i++) {
891 gc_threads[i]->cap = &capabilities[gc_threads[i]->cap->no];
892 }
893
894 for (i = from; i < to; i++) {
895 gc_threads[i] =
896 stgMallocBytes(sizeof(gc_thread) +
897 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
898 "alloc_gc_threads");
899
900 new_gc_thread(i, gc_threads[i]);
901 }
902 #else
903 ASSERT(from == 0 && to == 1);
904 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
905 gc_threads[0] = gct;
906 new_gc_thread(0,gc_threads[0]);
907 #endif
908 }
909
910 void
911 freeGcThreads (void)
912 {
913 nat g;
914 if (gc_threads != NULL) {
915 #if defined(THREADED_RTS)
916 nat i;
917 for (i = 0; i < n_capabilities; i++) {
918 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
919 {
920 freeWSDeque(gc_threads[i]->gens[g].todo_q);
921 }
922 stgFree (gc_threads[i]);
923 }
924 stgFree (gc_threads);
925 #else
926 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
927 {
928 freeWSDeque(gc_threads[0]->gens[g].todo_q);
929 }
930 stgFree (gc_threads);
931 #endif
932 gc_threads = NULL;
933 }
934 }
935
936 /* ----------------------------------------------------------------------------
937 Start GC threads
938 ------------------------------------------------------------------------- */
939
940 static volatile StgWord gc_running_threads;
941
942 static StgWord
943 inc_running (void)
944 {
945 StgWord new;
946 new = atomic_inc(&gc_running_threads);
947 ASSERT(new <= n_gc_threads);
948 return new;
949 }
950
951 static StgWord
952 dec_running (void)
953 {
954 ASSERT(gc_running_threads != 0);
955 return atomic_dec(&gc_running_threads);
956 }
957
958 static rtsBool
959 any_work (void)
960 {
961 int g;
962 gen_workspace *ws;
963
964 gct->any_work++;
965
966 write_barrier();
967
968 // scavenge objects in compacted generation
969 if (mark_stack_bd != NULL && !mark_stack_empty()) {
970 return rtsTrue;
971 }
972
973 // Check for global work in any step. We don't need to check for
974 // local work, because we have already exited scavenge_loop(),
975 // which means there is no local work for this thread.
976 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
977 ws = &gct->gens[g];
978 if (ws->todo_large_objects) return rtsTrue;
979 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
980 if (ws->todo_overflow) return rtsTrue;
981 }
982
983 #if defined(THREADED_RTS)
984 if (work_stealing) {
985 nat n;
986 // look for work to steal
987 for (n = 0; n < n_gc_threads; n++) {
988 if (n == gct->thread_index) continue;
989 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
990 ws = &gc_threads[n]->gens[g];
991 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
992 }
993 }
994 }
995 #endif
996
997 gct->no_work++;
998 #if defined(THREADED_RTS)
999 yieldThread();
1000 #endif
1001
1002 return rtsFalse;
1003 }
1004
1005 static void
1006 scavenge_until_all_done (void)
1007 {
1008 DEBUG_ONLY( nat r );
1009
1010
1011 loop:
1012 #if defined(THREADED_RTS)
1013 if (n_gc_threads > 1) {
1014 scavenge_loop();
1015 } else {
1016 scavenge_loop1();
1017 }
1018 #else
1019 scavenge_loop();
1020 #endif
1021
1022 collect_gct_blocks();
1023
1024 // scavenge_loop() only exits when there's no work to do
1025
1026 #ifdef DEBUG
1027 r = dec_running();
1028 #else
1029 dec_running();
1030 #endif
1031
1032 traceEventGcIdle(gct->cap);
1033
1034 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1035
1036 while (gc_running_threads != 0) {
1037 // usleep(1);
1038 if (any_work()) {
1039 inc_running();
1040 traceEventGcWork(gct->cap);
1041 goto loop;
1042 }
1043 // any_work() does not remove the work from the queue, it
1044 // just checks for the presence of work. If we find any,
1045 // then we increment gc_running_threads and go back to
1046 // scavenge_loop() to perform any pending work.
1047 }
1048
1049 traceEventGcDone(gct->cap);
1050 }
1051
1052 #if defined(THREADED_RTS)
1053
1054 void
1055 gcWorkerThread (Capability *cap)
1056 {
1057 gc_thread *saved_gct;
1058
1059 // necessary if we stole a callee-saves register for gct:
1060 saved_gct = gct;
1061
1062 SET_GCT(gc_threads[cap->no]);
1063 gct->id = osThreadId();
1064
1065 stat_gcWorkerThreadStart(gct);
1066
1067 // Wait until we're told to wake up
1068 RELEASE_SPIN_LOCK(&gct->mut_spin);
1069 // yieldThread();
1070 // Strangely, adding a yieldThread() here makes the CPU time
1071 // measurements more accurate on Linux, perhaps because it syncs
1072 // the CPU time across the multiple cores. Without this, CPU time
1073 // is heavily skewed towards GC rather than MUT.
1074 gct->wakeup = GC_THREAD_STANDING_BY;
1075 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1076 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1077
1078 #ifdef USE_PAPI
1079 // start performance counters in this thread...
1080 if (gct->papi_events == -1) {
1081 papi_init_eventset(&gct->papi_events);
1082 }
1083 papi_thread_start_gc1_count(gct->papi_events);
1084 #endif
1085
1086 init_gc_thread(gct);
1087
1088 traceEventGcWork(gct->cap);
1089
1090 // Every thread evacuates some roots.
1091 gct->evac_gen_no = 0;
1092 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1093 scavenge_capability_mut_lists(cap);
1094
1095 scavenge_until_all_done();
1096
1097 #ifdef THREADED_RTS
1098 // Now that the whole heap is marked, we discard any sparks that
1099 // were found to be unreachable. The main GC thread is currently
1100 // marking heap reachable via weak pointers, so it is
1101 // non-deterministic whether a spark will be retained if it is
1102 // only reachable via weak pointers. To fix this problem would
1103 // require another GC barrier, which is too high a price.
1104 pruneSparkQueue(cap);
1105 #endif
1106
1107 #ifdef USE_PAPI
1108 // count events in this thread towards the GC totals
1109 papi_thread_stop_gc1_count(gct->papi_events);
1110 #endif
1111
1112 // Wait until we're told to continue
1113 RELEASE_SPIN_LOCK(&gct->gc_spin);
1114 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1115 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1116 gct->thread_index);
1117 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1118 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1119
1120 // record the time spent doing GC in the Task structure
1121 stat_gcWorkerThreadDone(gct);
1122
1123 SET_GCT(saved_gct);
1124 }
1125
1126 #endif
1127
1128 #if defined(THREADED_RTS)
1129
1130 void
1131 waitForGcThreads (Capability *cap USED_IF_THREADS)
1132 {
1133 const nat n_threads = n_capabilities;
1134 const nat me = cap->no;
1135 nat i, j;
1136 rtsBool retry = rtsTrue;
1137
1138 while(retry) {
1139 for (i=0; i < n_threads; i++) {
1140 if (i == me || gc_threads[i]->idle) continue;
1141 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1142 prodCapability(&capabilities[i], cap->running_task);
1143 }
1144 }
1145 for (j=0; j < 10; j++) {
1146 retry = rtsFalse;
1147 for (i=0; i < n_threads; i++) {
1148 if (i == me || gc_threads[i]->idle) continue;
1149 write_barrier();
1150 interruptCapability(&capabilities[i]);
1151 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1152 retry = rtsTrue;
1153 }
1154 }
1155 if (!retry) break;
1156 yieldThread();
1157 }
1158 }
1159 }
1160
1161 #endif // THREADED_RTS
1162
1163 static void
1164 start_gc_threads (void)
1165 {
1166 #if defined(THREADED_RTS)
1167 gc_running_threads = 0;
1168 #endif
1169 }
1170
1171 static void
1172 wakeup_gc_threads (nat me USED_IF_THREADS)
1173 {
1174 #if defined(THREADED_RTS)
1175 nat i;
1176
1177 if (n_gc_threads == 1) return;
1178
1179 for (i=0; i < n_gc_threads; i++) {
1180 if (i == me || gc_threads[i]->idle) continue;
1181 inc_running();
1182 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1183 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1184
1185 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1186 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1187 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1188 }
1189 #endif
1190 }
1191
1192 // After GC is complete, we must wait for all GC threads to enter the
1193 // standby state, otherwise they may still be executing inside
1194 // any_work(), and may even remain awake until the next GC starts.
1195 static void
1196 shutdown_gc_threads (nat me USED_IF_THREADS)
1197 {
1198 #if defined(THREADED_RTS)
1199 nat i;
1200
1201 if (n_gc_threads == 1) return;
1202
1203 for (i=0; i < n_gc_threads; i++) {
1204 if (i == me || gc_threads[i]->idle) continue;
1205 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1206 }
1207 #endif
1208 }
1209
1210 #if defined(THREADED_RTS)
1211 void
1212 releaseGCThreads (Capability *cap USED_IF_THREADS)
1213 {
1214 const nat n_threads = n_capabilities;
1215 const nat me = cap->no;
1216 nat i;
1217 for (i=0; i < n_threads; i++) {
1218 if (i == me || gc_threads[i]->idle) continue;
1219 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1220 barf("releaseGCThreads");
1221
1222 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1223 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1224 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1225 }
1226 }
1227 #endif
1228
1229 /* ----------------------------------------------------------------------------
1230 Initialise a generation that is to be collected
1231 ------------------------------------------------------------------------- */
1232
1233 static void
1234 prepare_collected_gen (generation *gen)
1235 {
1236 nat i, g, n;
1237 gen_workspace *ws;
1238 bdescr *bd, *next;
1239
1240 // Throw away the current mutable list. Invariant: the mutable
1241 // list always has at least one block; this means we can avoid a
1242 // check for NULL in recordMutable().
1243 g = gen->no;
1244 if (g != 0) {
1245 for (i = 0; i < n_capabilities; i++) {
1246 freeChain(capabilities[i].mut_lists[g]);
1247 capabilities[i].mut_lists[g] = allocBlock();
1248 }
1249 }
1250
1251 gen = &generations[g];
1252 ASSERT(gen->no == g);
1253
1254 // we'll construct a new list of threads in this step
1255 // during GC, throw away the current list.
1256 gen->old_threads = gen->threads;
1257 gen->threads = END_TSO_QUEUE;
1258
1259 // deprecate the existing blocks
1260 gen->old_blocks = gen->blocks;
1261 gen->n_old_blocks = gen->n_blocks;
1262 gen->blocks = NULL;
1263 gen->n_blocks = 0;
1264 gen->n_words = 0;
1265 gen->live_estimate = 0;
1266
1267 // initialise the large object queues.
1268 ASSERT(gen->scavenged_large_objects == NULL);
1269 ASSERT(gen->n_scavenged_large_blocks == 0);
1270
1271 // grab all the partial blocks stashed in the gc_thread workspaces and
1272 // move them to the old_blocks list of this gen.
1273 for (n = 0; n < n_capabilities; n++) {
1274 ws = &gc_threads[n]->gens[gen->no];
1275
1276 for (bd = ws->part_list; bd != NULL; bd = next) {
1277 next = bd->link;
1278 bd->link = gen->old_blocks;
1279 gen->old_blocks = bd;
1280 gen->n_old_blocks += bd->blocks;
1281 }
1282 ws->part_list = NULL;
1283 ws->n_part_blocks = 0;
1284
1285 ASSERT(ws->scavd_list == NULL);
1286 ASSERT(ws->n_scavd_blocks == 0);
1287
1288 if (ws->todo_free != ws->todo_bd->start) {
1289 ws->todo_bd->free = ws->todo_free;
1290 ws->todo_bd->link = gen->old_blocks;
1291 gen->old_blocks = ws->todo_bd;
1292 gen->n_old_blocks += ws->todo_bd->blocks;
1293 alloc_todo_block(ws,0); // always has one block.
1294 }
1295 }
1296
1297 // mark the small objects as from-space
1298 for (bd = gen->old_blocks; bd; bd = bd->link) {
1299 bd->flags &= ~BF_EVACUATED;
1300 }
1301
1302 // mark the large objects as from-space
1303 for (bd = gen->large_objects; bd; bd = bd->link) {
1304 bd->flags &= ~BF_EVACUATED;
1305 }
1306
1307 // for a compacted generation, we need to allocate the bitmap
1308 if (gen->mark) {
1309 lnat bitmap_size; // in bytes
1310 bdescr *bitmap_bdescr;
1311 StgWord *bitmap;
1312
1313 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1314
1315 if (bitmap_size > 0) {
1316 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1317 / BLOCK_SIZE);
1318 gen->bitmap = bitmap_bdescr;
1319 bitmap = bitmap_bdescr->start;
1320
1321 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1322 bitmap_size, bitmap);
1323
1324 // don't forget to fill it with zeros!
1325 memset(bitmap, 0, bitmap_size);
1326
1327 // For each block in this step, point to its bitmap from the
1328 // block descriptor.
1329 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1330 bd->u.bitmap = bitmap;
1331 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1332
1333 // Also at this point we set the BF_MARKED flag
1334 // for this block. The invariant is that
1335 // BF_MARKED is always unset, except during GC
1336 // when it is set on those blocks which will be
1337 // compacted.
1338 if (!(bd->flags & BF_FRAGMENTED)) {
1339 bd->flags |= BF_MARKED;
1340 }
1341
1342 // BF_SWEPT should be marked only for blocks that are being
1343 // collected in sweep()
1344 bd->flags &= ~BF_SWEPT;
1345 }
1346 }
1347 }
1348 }
1349
1350
1351 /* ----------------------------------------------------------------------------
1352 Save the mutable lists in saved_mut_lists
1353 ------------------------------------------------------------------------- */
1354
1355 static void
1356 stash_mut_list (Capability *cap, nat gen_no)
1357 {
1358 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1359 cap->mut_lists[gen_no] = allocBlock_sync();
1360 }
1361
1362 /* ----------------------------------------------------------------------------
1363 Initialise a generation that is *not* to be collected
1364 ------------------------------------------------------------------------- */
1365
1366 static void
1367 prepare_uncollected_gen (generation *gen)
1368 {
1369 nat i;
1370
1371
1372 ASSERT(gen->no > 0);
1373
1374 // save the current mutable lists for this generation, and
1375 // allocate a fresh block for each one. We'll traverse these
1376 // mutable lists as roots early on in the GC.
1377 for (i = 0; i < n_capabilities; i++) {
1378 stash_mut_list(&capabilities[i], gen->no);
1379 }
1380
1381 ASSERT(gen->scavenged_large_objects == NULL);
1382 ASSERT(gen->n_scavenged_large_blocks == 0);
1383 }
1384
1385 /* -----------------------------------------------------------------------------
1386 Collect the completed blocks from a GC thread and attach them to
1387 the generation.
1388 -------------------------------------------------------------------------- */
1389
1390 static void
1391 collect_gct_blocks (void)
1392 {
1393 nat g;
1394 gen_workspace *ws;
1395 bdescr *bd, *prev;
1396
1397 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1398 ws = &gct->gens[g];
1399
1400 // there may still be a block attached to ws->todo_bd;
1401 // leave it there to use next time.
1402
1403 if (ws->scavd_list != NULL) {
1404 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1405
1406 ASSERT(gct->scan_bd == NULL);
1407 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1408
1409 prev = NULL;
1410 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1411 ws->gen->n_words += bd->free - bd->start;
1412 prev = bd;
1413 }
1414 if (prev != NULL) {
1415 prev->link = ws->gen->blocks;
1416 ws->gen->blocks = ws->scavd_list;
1417 }
1418 ws->gen->n_blocks += ws->n_scavd_blocks;
1419
1420 ws->scavd_list = NULL;
1421 ws->n_scavd_blocks = 0;
1422
1423 RELEASE_SPIN_LOCK(&ws->gen->sync);
1424 }
1425 }
1426 }
1427
1428 /* -----------------------------------------------------------------------------
1429 During mutation, any blocks that are filled by allocatePinned() are
1430 stashed on the local pinned_object_blocks list, to avoid needing to
1431 take a global lock. Here we collect those blocks from the
1432 cap->pinned_object_blocks lists and put them on the
1433 main g0->large_object list.
1434
1435 Returns: the number of words allocated this way, for stats
1436 purposes.
1437 -------------------------------------------------------------------------- */
1438
1439 static lnat
1440 collect_pinned_object_blocks (void)
1441 {
1442 nat n;
1443 bdescr *bd, *prev;
1444 lnat allocated = 0;
1445
1446 for (n = 0; n < n_capabilities; n++) {
1447 prev = NULL;
1448 for (bd = capabilities[n].pinned_object_blocks; bd != NULL; bd = bd->link) {
1449 allocated += bd->free - bd->start;
1450 prev = bd;
1451 }
1452 if (prev != NULL) {
1453 prev->link = g0->large_objects;
1454 if (g0->large_objects != NULL) {
1455 g0->large_objects->u.back = prev;
1456 }
1457 g0->large_objects = capabilities[n].pinned_object_blocks;
1458 capabilities[n].pinned_object_blocks = 0;
1459 }
1460 }
1461
1462 return allocated;
1463 }
1464
1465 /* -----------------------------------------------------------------------------
1466 Initialise a gc_thread before GC
1467 -------------------------------------------------------------------------- */
1468
1469 static void
1470 init_gc_thread (gc_thread *t)
1471 {
1472 t->static_objects = END_OF_STATIC_LIST;
1473 t->scavenged_static_objects = END_OF_STATIC_LIST;
1474 t->scan_bd = NULL;
1475 t->mut_lists = t->cap->mut_lists;
1476 t->evac_gen_no = 0;
1477 t->failed_to_evac = rtsFalse;
1478 t->eager_promotion = rtsTrue;
1479 t->thunk_selector_depth = 0;
1480 t->copied = 0;
1481 t->scanned = 0;
1482 t->any_work = 0;
1483 t->no_work = 0;
1484 t->scav_find_work = 0;
1485 }
1486
1487 /* -----------------------------------------------------------------------------
1488 Function we pass to evacuate roots.
1489 -------------------------------------------------------------------------- */
1490
1491 static void
1492 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1493 {
1494 // we stole a register for gct, but this function is called from
1495 // *outside* the GC where the register variable is not in effect,
1496 // so we need to save and restore it here. NB. only call
1497 // mark_root() from the main GC thread, otherwise gct will be
1498 // incorrect.
1499 #if defined(THREADED_RTS)
1500 gc_thread *saved_gct;
1501 saved_gct = gct;
1502 #endif
1503 SET_GCT(user);
1504
1505 evacuate(root);
1506
1507 SET_GCT(saved_gct);
1508 }
1509
1510 /* -----------------------------------------------------------------------------
1511 Initialising the static object & mutable lists
1512 -------------------------------------------------------------------------- */
1513
1514 static void
1515 zero_static_object_list(StgClosure* first_static)
1516 {
1517 StgClosure* p;
1518 StgClosure* link;
1519 const StgInfoTable *info;
1520
1521 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1522 info = get_itbl(p);
1523 link = *STATIC_LINK(info, p);
1524 *STATIC_LINK(info,p) = NULL;
1525 }
1526 }
1527
1528 /* ----------------------------------------------------------------------------
1529 Reset the sizes of the older generations when we do a major
1530 collection.
1531
1532 CURRENT STRATEGY: make all generations except zero the same size.
1533 We have to stay within the maximum heap size, and leave a certain
1534 percentage of the maximum heap size available to allocate into.
1535 ------------------------------------------------------------------------- */
1536
1537 static void
1538 resize_generations (void)
1539 {
1540 nat g;
1541
1542 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1543 nat live, size, min_alloc, words;
1544 const nat max = RtsFlags.GcFlags.maxHeapSize;
1545 const nat gens = RtsFlags.GcFlags.generations;
1546
1547 // live in the oldest generations
1548 if (oldest_gen->live_estimate != 0) {
1549 words = oldest_gen->live_estimate;
1550 } else {
1551 words = oldest_gen->n_words;
1552 }
1553 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1554 oldest_gen->n_large_blocks;
1555
1556 // default max size for all generations except zero
1557 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1558 RtsFlags.GcFlags.minOldGenSize);
1559
1560 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1561 RtsFlags.GcFlags.heapSizeSuggestion = size;
1562 }
1563
1564 // minimum size for generation zero
1565 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1566 RtsFlags.GcFlags.minAllocAreaSize);
1567
1568 // Auto-enable compaction when the residency reaches a
1569 // certain percentage of the maximum heap size (default: 30%).
1570 if (RtsFlags.GcFlags.compact ||
1571 (max > 0 &&
1572 oldest_gen->n_blocks >
1573 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1574 oldest_gen->mark = 1;
1575 oldest_gen->compact = 1;
1576 // debugBelch("compaction: on\n", live);
1577 } else {
1578 oldest_gen->mark = 0;
1579 oldest_gen->compact = 0;
1580 // debugBelch("compaction: off\n", live);
1581 }
1582
1583 if (RtsFlags.GcFlags.sweep) {
1584 oldest_gen->mark = 1;
1585 }
1586
1587 // if we're going to go over the maximum heap size, reduce the
1588 // size of the generations accordingly. The calculation is
1589 // different if compaction is turned on, because we don't need
1590 // to double the space required to collect the old generation.
1591 if (max != 0) {
1592
1593 // this test is necessary to ensure that the calculations
1594 // below don't have any negative results - we're working
1595 // with unsigned values here.
1596 if (max < min_alloc) {
1597 heapOverflow();
1598 }
1599
1600 if (oldest_gen->compact) {
1601 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1602 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1603 }
1604 } else {
1605 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1606 size = (max - min_alloc) / ((gens - 1) * 2);
1607 }
1608 }
1609
1610 if (size < live) {
1611 heapOverflow();
1612 }
1613 }
1614
1615 #if 0
1616 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1617 min_alloc, size, max);
1618 #endif
1619
1620 for (g = 0; g < gens; g++) {
1621 generations[g].max_blocks = size;
1622 }
1623 }
1624 }
1625
1626 /* -----------------------------------------------------------------------------
1627 Calculate the new size of the nursery, and resize it.
1628 -------------------------------------------------------------------------- */
1629
1630 static void
1631 resize_nursery (void)
1632 {
1633 const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1634
1635 if (RtsFlags.GcFlags.generations == 1)
1636 { // Two-space collector:
1637 nat blocks;
1638
1639 /* set up a new nursery. Allocate a nursery size based on a
1640 * function of the amount of live data (by default a factor of 2)
1641 * Use the blocks from the old nursery if possible, freeing up any
1642 * left over blocks.
1643 *
1644 * If we get near the maximum heap size, then adjust our nursery
1645 * size accordingly. If the nursery is the same size as the live
1646 * data (L), then we need 3L bytes. We can reduce the size of the
1647 * nursery to bring the required memory down near 2L bytes.
1648 *
1649 * A normal 2-space collector would need 4L bytes to give the same
1650 * performance we get from 3L bytes, reducing to the same
1651 * performance at 2L bytes.
1652 */
1653 blocks = generations[0].n_blocks;
1654
1655 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1656 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1657 RtsFlags.GcFlags.maxHeapSize )
1658 {
1659 long adjusted_blocks; // signed on purpose
1660 int pc_free;
1661
1662 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1663
1664 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1665 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1666
1667 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1668 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1669 {
1670 heapOverflow();
1671 }
1672 blocks = adjusted_blocks;
1673 }
1674 else
1675 {
1676 blocks *= RtsFlags.GcFlags.oldGenFactor;
1677 if (blocks < min_nursery)
1678 {
1679 blocks = min_nursery;
1680 }
1681 }
1682 resizeNurseries(blocks);
1683 }
1684 else // Generational collector
1685 {
1686 /*
1687 * If the user has given us a suggested heap size, adjust our
1688 * allocation area to make best use of the memory available.
1689 */
1690 if (RtsFlags.GcFlags.heapSizeSuggestion)
1691 {
1692 long blocks;
1693 const nat needed = calcNeeded(); // approx blocks needed at next GC
1694
1695 /* Guess how much will be live in generation 0 step 0 next time.
1696 * A good approximation is obtained by finding the
1697 * percentage of g0 that was live at the last minor GC.
1698 *
1699 * We have an accurate figure for the amount of copied data in
1700 * 'copied', but we must convert this to a number of blocks, with
1701 * a small adjustment for estimated slop at the end of a block
1702 * (- 10 words).
1703 */
1704 if (N == 0)
1705 {
1706 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1707 / countNurseryBlocks();
1708 }
1709
1710 /* Estimate a size for the allocation area based on the
1711 * information available. We might end up going slightly under
1712 * or over the suggested heap size, but we should be pretty
1713 * close on average.
1714 *
1715 * Formula: suggested - needed
1716 * ----------------------------
1717 * 1 + g0_pcnt_kept/100
1718 *
1719 * where 'needed' is the amount of memory needed at the next
1720 * collection for collecting all gens except g0.
1721 */
1722 blocks =
1723 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1724 (100 + (long)g0_pcnt_kept);
1725
1726 if (blocks < (long)min_nursery) {
1727 blocks = min_nursery;
1728 }
1729
1730 resizeNurseries((nat)blocks);
1731 }
1732 else
1733 {
1734 // we might have added extra large blocks to the nursery, so
1735 // resize back to minAllocAreaSize again.
1736 resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1737 }
1738 }
1739 }
1740
1741 /* -----------------------------------------------------------------------------
1742 Sanity code for CAF garbage collection.
1743
1744 With DEBUG turned on, we manage a CAF list in addition to the SRT
1745 mechanism. After GC, we run down the CAF list and blackhole any
1746 CAFs which have been garbage collected. This means we get an error
1747 whenever the program tries to enter a garbage collected CAF.
1748
1749 Any garbage collected CAFs are taken off the CAF list at the same
1750 time.
1751 -------------------------------------------------------------------------- */
1752
1753 #if 0 && defined(DEBUG)
1754
1755 static void
1756 gcCAFs(void)
1757 {
1758 StgClosure* p;
1759 StgClosure** pp;
1760 const StgInfoTable *info;
1761 nat i;
1762
1763 i = 0;
1764 p = caf_list;
1765 pp = &caf_list;
1766
1767 while (p != NULL) {
1768
1769 info = get_itbl(p);
1770
1771 ASSERT(info->type == IND_STATIC);
1772
1773 if (STATIC_LINK(info,p) == NULL) {
1774 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1775 // black hole it
1776 SET_INFO(p,&stg_BLACKHOLE_info);
1777 p = STATIC_LINK2(info,p);
1778 *pp = p;
1779 }
1780 else {
1781 pp = &STATIC_LINK2(info,p);
1782 p = *pp;
1783 i++;
1784 }
1785
1786 }
1787
1788 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1789 }
1790 #endif