Merge branch 'master' of http://darcs.haskell.org/ghc
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "Storage.h"
19 #include "RtsUtils.h"
20 #include "Apply.h"
21 #include "Updates.h"
22 #include "Stats.h"
23 #include "Schedule.h"
24 #include "Sanity.h"
25 #include "BlockAlloc.h"
26 #include "ProfHeap.h"
27 #include "Weak.h"
28 #include "Prelude.h"
29 #include "RtsSignals.h"
30 #include "STM.h"
31 #if defined(RTS_GTK_FRONTPANEL)
32 #include "FrontPanel.h"
33 #endif
34 #include "Trace.h"
35 #include "RetainerProfile.h"
36 #include "LdvProfile.h"
37 #include "RaiseAsync.h"
38 #include "Papi.h"
39 #include "Stable.h"
40
41 #include "GC.h"
42 #include "GCThread.h"
43 #include "GCTDecl.h"
44 #include "Compact.h"
45 #include "Evac.h"
46 #include "Scav.h"
47 #include "GCUtils.h"
48 #include "MarkStack.h"
49 #include "MarkWeak.h"
50 #include "Sparks.h"
51 #include "Sweep.h"
52
53 #include <string.h> // for memset()
54 #include <unistd.h>
55
56 /* -----------------------------------------------------------------------------
57 Global variables
58 -------------------------------------------------------------------------- */
59
60 /* STATIC OBJECT LIST.
61 *
62 * During GC:
63 * We maintain a linked list of static objects that are still live.
64 * The requirements for this list are:
65 *
66 * - we need to scan the list while adding to it, in order to
67 * scavenge all the static objects (in the same way that
68 * breadth-first scavenging works for dynamic objects).
69 *
70 * - we need to be able to tell whether an object is already on
71 * the list, to break loops.
72 *
73 * Each static object has a "static link field", which we use for
74 * linking objects on to the list. We use a stack-type list, consing
75 * objects on the front as they are added (this means that the
76 * scavenge phase is depth-first, not breadth-first, but that
77 * shouldn't matter).
78 *
79 * A separate list is kept for objects that have been scavenged
80 * already - this is so that we can zero all the marks afterwards.
81 *
82 * An object is on the list if its static link field is non-zero; this
83 * means that we have to mark the end of the list with '1', not NULL.
84 *
85 * Extra notes for generational GC:
86 *
87 * Each generation has a static object list associated with it. When
88 * collecting generations up to N, we treat the static object lists
89 * from generations > N as roots.
90 *
91 * We build up a static object list while collecting generations 0..N,
92 * which is then appended to the static object list of generation N+1.
93 */
94
95 /* N is the oldest generation being collected, where the generations
96 * are numbered starting at 0. A major GC (indicated by the major_gc
97 * flag) is when we're collecting all generations. We only attempt to
98 * deal with static objects and GC CAFs when doing a major GC.
99 */
100 nat N;
101 rtsBool major_gc;
102
103 /* Data used for allocation area sizing.
104 */
105 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
106
107 /* Mut-list stats */
108 #ifdef DEBUG
109 nat mutlist_MUTVARS,
110 mutlist_MUTARRS,
111 mutlist_MVARS,
112 mutlist_OTHERS;
113 #endif
114
115 /* Thread-local data for each GC thread
116 */
117 gc_thread **gc_threads = NULL;
118
119 #if !defined(THREADED_RTS)
120 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
121 #endif
122
123 // Number of threads running in *this* GC. Affects how many
124 // step->todos[] lists we have to look in to find work.
125 nat n_gc_threads;
126
127 // For stats:
128 long copied; // *words* copied & scavenged during this GC
129
130 rtsBool work_stealing;
131
132 DECLARE_GCT
133
134 /* -----------------------------------------------------------------------------
135 Static function declarations
136 -------------------------------------------------------------------------- */
137
138 static void mark_root (void *user, StgClosure **root);
139 static void zero_static_object_list (StgClosure* first_static);
140 static nat initialise_N (rtsBool force_major_gc);
141 static void prepare_collected_gen (generation *gen);
142 static void prepare_uncollected_gen (generation *gen);
143 static void init_gc_thread (gc_thread *t);
144 static void resize_generations (void);
145 static void resize_nursery (void);
146 static void start_gc_threads (void);
147 static void scavenge_until_all_done (void);
148 static StgWord inc_running (void);
149 static StgWord dec_running (void);
150 static void wakeup_gc_threads (nat me);
151 static void shutdown_gc_threads (nat me);
152 static void collect_gct_blocks (void);
153 static lnat collect_pinned_object_blocks (void);
154
155 #if 0 && defined(DEBUG)
156 static void gcCAFs (void);
157 #endif
158
159 /* -----------------------------------------------------------------------------
160 The mark stack.
161 -------------------------------------------------------------------------- */
162
163 bdescr *mark_stack_top_bd; // topmost block in the mark stack
164 bdescr *mark_stack_bd; // current block in the mark stack
165 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
166
167 /* -----------------------------------------------------------------------------
168 GarbageCollect: the main entry point to the garbage collector.
169
170 Locks held: all capabilities are held throughout GarbageCollect().
171 -------------------------------------------------------------------------- */
172
173 void
174 GarbageCollect (rtsBool force_major_gc,
175 rtsBool do_heap_census,
176 nat gc_type USED_IF_THREADS,
177 Capability *cap)
178 {
179 bdescr *bd;
180 generation *gen;
181 lnat live_blocks, live_words, allocated, par_max_copied, par_tot_copied;
182 #if defined(THREADED_RTS)
183 gc_thread *saved_gct;
184 #endif
185 nat g, n;
186
187 // necessary if we stole a callee-saves register for gct:
188 #if defined(THREADED_RTS)
189 saved_gct = gct;
190 #endif
191
192 #ifdef PROFILING
193 CostCentreStack *save_CCS[n_capabilities];
194 #endif
195
196 ACQUIRE_SM_LOCK;
197
198 #if defined(RTS_USER_SIGNALS)
199 if (RtsFlags.MiscFlags.install_signal_handlers) {
200 // block signals
201 blockUserSignals();
202 }
203 #endif
204
205 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
206 // otherwise adjust the padding in gen_workspace.
207
208 // this is the main thread
209 SET_GCT(gc_threads[cap->no]);
210
211 // tell the stats department that we've started a GC
212 stat_startGC(cap, gct);
213
214 // lock the StablePtr table
215 stablePtrPreGC();
216
217 #ifdef DEBUG
218 mutlist_MUTVARS = 0;
219 mutlist_MUTARRS = 0;
220 mutlist_OTHERS = 0;
221 #endif
222
223 // attribute any costs to CCS_GC
224 #ifdef PROFILING
225 for (n = 0; n < n_capabilities; n++) {
226 save_CCS[n] = capabilities[n].r.rCCCS;
227 capabilities[n].r.rCCCS = CCS_GC;
228 }
229 #endif
230
231 /* Approximate how much we allocated.
232 * Todo: only when generating stats?
233 */
234 allocated = countLargeAllocated(); /* don't count the nursery yet */
235
236 /* Figure out which generation to collect
237 */
238 n = initialise_N(force_major_gc);
239
240 #if defined(THREADED_RTS)
241 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
242 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
243 // It's not always a good idea to do load balancing in parallel
244 // GC. In particular, for a parallel program we don't want to
245 // lose locality by moving cached data into another CPU's cache
246 // (this effect can be quite significant).
247 //
248 // We could have a more complex way to deterimine whether to do
249 // work stealing or not, e.g. it might be a good idea to do it
250 // if the heap is big. For now, we just turn it on or off with
251 // a flag.
252 #endif
253
254 /* Start threads, so they can be spinning up while we finish initialisation.
255 */
256 start_gc_threads();
257
258 #if defined(THREADED_RTS)
259 /* How many threads will be participating in this GC?
260 * We don't try to parallelise minor GCs (unless the user asks for
261 * it with +RTS -gn0), or mark/compact/sweep GC.
262 */
263 if (gc_type == SYNC_GC_PAR) {
264 n_gc_threads = n_capabilities;
265 } else {
266 n_gc_threads = 1;
267 }
268 #else
269 n_gc_threads = 1;
270 #endif
271
272 debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
273 N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
274
275 #ifdef RTS_GTK_FRONTPANEL
276 if (RtsFlags.GcFlags.frontpanel) {
277 updateFrontPanelBeforeGC(N);
278 }
279 #endif
280
281 #ifdef DEBUG
282 // check for memory leaks if DEBUG is on
283 memInventory(DEBUG_gc);
284 #endif
285
286 // check sanity *before* GC
287 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
288
289 // gather blocks allocated using allocatePinned() from each capability
290 // and put them on the g0->large_object list.
291 collect_pinned_object_blocks();
292
293 // Initialise all the generations/steps that we're collecting.
294 for (g = 0; g <= N; g++) {
295 prepare_collected_gen(&generations[g]);
296 }
297 // Initialise all the generations/steps that we're *not* collecting.
298 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
299 prepare_uncollected_gen(&generations[g]);
300 }
301
302 // Prepare this gc_thread
303 init_gc_thread(gct);
304
305 /* Allocate a mark stack if we're doing a major collection.
306 */
307 if (major_gc && oldest_gen->mark) {
308 mark_stack_bd = allocBlock();
309 mark_stack_top_bd = mark_stack_bd;
310 mark_stack_bd->link = NULL;
311 mark_stack_bd->u.back = NULL;
312 mark_sp = mark_stack_bd->start;
313 } else {
314 mark_stack_bd = NULL;
315 mark_stack_top_bd = NULL;
316 mark_sp = NULL;
317 }
318
319 /* -----------------------------------------------------------------------
320 * follow all the roots that we know about:
321 */
322
323 // the main thread is running: this prevents any other threads from
324 // exiting prematurely, so we can start them now.
325 // NB. do this after the mutable lists have been saved above, otherwise
326 // the other GC threads will be writing into the old mutable lists.
327 inc_running();
328 wakeup_gc_threads(gct->thread_index);
329
330 traceEventGcWork(gct->cap);
331
332 // scavenge the capability-private mutable lists. This isn't part
333 // of markSomeCapabilities() because markSomeCapabilities() can only
334 // call back into the GC via mark_root() (due to the gct register
335 // variable).
336 if (n_gc_threads == 1) {
337 for (n = 0; n < n_capabilities; n++) {
338 #if defined(THREADED_RTS)
339 scavenge_capability_mut_Lists1(&capabilities[n]);
340 #else
341 scavenge_capability_mut_lists(&capabilities[n]);
342 #endif
343 }
344 } else {
345 scavenge_capability_mut_lists(gct->cap);
346 for (n = 0; n < n_capabilities; n++) {
347 if (gc_threads[n]->idle) {
348 markCapability(mark_root, gct, &capabilities[n],
349 rtsTrue/*don't mark sparks*/);
350 scavenge_capability_mut_lists(&capabilities[n]);
351 }
352 }
353 }
354
355 // follow roots from the CAF list (used by GHCi)
356 gct->evac_gen_no = 0;
357 markCAFs(mark_root, gct);
358
359 // follow all the roots that the application knows about.
360 gct->evac_gen_no = 0;
361 if (n_gc_threads == 1) {
362 for (n = 0; n < n_capabilities; n++) {
363 markCapability(mark_root, gct, &capabilities[n],
364 rtsTrue/*don't mark sparks*/);
365 }
366 } else {
367 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
368 }
369
370 markScheduler(mark_root, gct);
371
372 #if defined(RTS_USER_SIGNALS)
373 // mark the signal handlers (signals should be already blocked)
374 markSignalHandlers(mark_root, gct);
375 #endif
376
377 // Mark the weak pointer list, and prepare to detect dead weak pointers.
378 markWeakPtrList();
379 initWeakForGC();
380
381 // Mark the stable pointer table.
382 markStablePtrTable(mark_root, gct);
383
384 /* -------------------------------------------------------------------------
385 * Repeatedly scavenge all the areas we know about until there's no
386 * more scavenging to be done.
387 */
388 for (;;)
389 {
390 scavenge_until_all_done();
391 // The other threads are now stopped. We might recurse back to
392 // here, but from now on this is the only thread.
393
394 // must be last... invariant is that everything is fully
395 // scavenged at this point.
396 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
397 inc_running();
398 continue;
399 }
400
401 // If we get to here, there's really nothing left to do.
402 break;
403 }
404
405 shutdown_gc_threads(gct->thread_index);
406
407 // Now see which stable names are still alive.
408 gcStablePtrTable();
409
410 #ifdef THREADED_RTS
411 if (n_gc_threads == 1) {
412 for (n = 0; n < n_capabilities; n++) {
413 pruneSparkQueue(&capabilities[n]);
414 }
415 } else {
416 for (n = 0; n < n_capabilities; n++) {
417 if (n == cap->no || gc_threads[n]->idle) {
418 pruneSparkQueue(&capabilities[n]);
419 }
420 }
421 }
422 #endif
423
424 #ifdef PROFILING
425 // We call processHeapClosureForDead() on every closure destroyed during
426 // the current garbage collection, so we invoke LdvCensusForDead().
427 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
428 || RtsFlags.ProfFlags.bioSelector != NULL) {
429 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
430 LdvCensusForDead(N);
431 ACQUIRE_SM_LOCK;
432 }
433 #endif
434
435 // NO MORE EVACUATION AFTER THIS POINT!
436
437 // Finally: compact or sweep the oldest generation.
438 if (major_gc && oldest_gen->mark) {
439 if (oldest_gen->compact)
440 compact(gct->scavenged_static_objects);
441 else
442 sweep(oldest_gen);
443 }
444
445 copied = 0;
446 par_max_copied = 0;
447 par_tot_copied = 0;
448 {
449 nat i;
450 for (i=0; i < n_gc_threads; i++) {
451 if (n_gc_threads > 1) {
452 debugTrace(DEBUG_gc,"thread %d:", i);
453 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
454 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
455 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
456 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
457 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
458 }
459 copied += gc_threads[i]->copied;
460 par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
461 }
462 par_tot_copied = copied;
463 if (n_gc_threads == 1) {
464 par_max_copied = 0;
465 par_tot_copied = 0;
466 }
467 }
468
469 // Run through all the generations/steps and tidy up.
470 // We're going to:
471 // - count the amount of "live" data (live_words, live_blocks)
472 // - count the amount of "copied" data in this GC (copied)
473 // - free from-space
474 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
475 //
476 live_words = 0;
477 live_blocks = 0;
478
479 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
480
481 if (g == N) {
482 generations[g].collections++; // for stats
483 if (n_gc_threads > 1) generations[g].par_collections++;
484 }
485
486 // Count the mutable list as bytes "copied" for the purposes of
487 // stats. Every mutable list is copied during every GC.
488 if (g > 0) {
489 nat mut_list_size = 0;
490 for (n = 0; n < n_capabilities; n++) {
491 mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
492 }
493 copied += mut_list_size;
494
495 debugTrace(DEBUG_gc,
496 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
497 (unsigned long)(mut_list_size * sizeof(W_)),
498 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
499 }
500
501 bdescr *next, *prev;
502 gen = &generations[g];
503
504 // for generations we collected...
505 if (g <= N) {
506
507 /* free old memory and shift to-space into from-space for all
508 * the collected steps (except the allocation area). These
509 * freed blocks will probaby be quickly recycled.
510 */
511 if (gen->mark)
512 {
513 // tack the new blocks on the end of the existing blocks
514 if (gen->old_blocks != NULL) {
515
516 prev = NULL;
517 for (bd = gen->old_blocks; bd != NULL; bd = next) {
518
519 next = bd->link;
520
521 if (!(bd->flags & BF_MARKED))
522 {
523 if (prev == NULL) {
524 gen->old_blocks = next;
525 } else {
526 prev->link = next;
527 }
528 freeGroup(bd);
529 gen->n_old_blocks--;
530 }
531 else
532 {
533 gen->n_words += bd->free - bd->start;
534
535 // NB. this step might not be compacted next
536 // time, so reset the BF_MARKED flags.
537 // They are set before GC if we're going to
538 // compact. (search for BF_MARKED above).
539 bd->flags &= ~BF_MARKED;
540
541 // between GCs, all blocks in the heap except
542 // for the nursery have the BF_EVACUATED flag set.
543 bd->flags |= BF_EVACUATED;
544
545 prev = bd;
546 }
547 }
548
549 if (prev != NULL) {
550 prev->link = gen->blocks;
551 gen->blocks = gen->old_blocks;
552 }
553 }
554 // add the new blocks to the block tally
555 gen->n_blocks += gen->n_old_blocks;
556 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
557 ASSERT(countOccupied(gen->blocks) == gen->n_words);
558 }
559 else // not copacted
560 {
561 freeChain(gen->old_blocks);
562 }
563
564 gen->old_blocks = NULL;
565 gen->n_old_blocks = 0;
566
567 /* LARGE OBJECTS. The current live large objects are chained on
568 * scavenged_large, having been moved during garbage
569 * collection from large_objects. Any objects left on the
570 * large_objects list are therefore dead, so we free them here.
571 */
572 freeChain(gen->large_objects);
573 gen->large_objects = gen->scavenged_large_objects;
574 gen->n_large_blocks = gen->n_scavenged_large_blocks;
575 gen->n_new_large_words = 0;
576 }
577 else // for generations > N
578 {
579 /* For older generations, we need to append the
580 * scavenged_large_object list (i.e. large objects that have been
581 * promoted during this GC) to the large_object list for that step.
582 */
583 for (bd = gen->scavenged_large_objects; bd; bd = next) {
584 next = bd->link;
585 dbl_link_onto(bd, &gen->large_objects);
586 }
587
588 // add the new blocks we promoted during this GC
589 gen->n_large_blocks += gen->n_scavenged_large_blocks;
590 }
591
592 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
593
594 gen->scavenged_large_objects = NULL;
595 gen->n_scavenged_large_blocks = 0;
596
597 // Count "live" data
598 live_words += genLiveWords(gen);
599 live_blocks += genLiveBlocks(gen);
600
601 // add in the partial blocks in the gen_workspaces, but ignore gen 0
602 // if this is a local GC (we can't count another capability's part_list)
603 {
604 nat i;
605 for (i = 0; i < n_capabilities; i++) {
606 live_words += gcThreadLiveWords(i, gen->no);
607 live_blocks += gcThreadLiveBlocks(i, gen->no);
608 }
609 }
610 } // for all generations
611
612 // update the max size of older generations after a major GC
613 resize_generations();
614
615 // Free the mark stack.
616 if (mark_stack_top_bd != NULL) {
617 debugTrace(DEBUG_gc, "mark stack: %d blocks",
618 countBlocks(mark_stack_top_bd));
619 freeChain(mark_stack_top_bd);
620 }
621
622 // Free any bitmaps.
623 for (g = 0; g <= N; g++) {
624 gen = &generations[g];
625 if (gen->bitmap != NULL) {
626 freeGroup(gen->bitmap);
627 gen->bitmap = NULL;
628 }
629 }
630
631 // Reset the nursery: make the blocks empty
632 if (n_gc_threads == 1) {
633 for (n = 0; n < n_capabilities; n++) {
634 allocated += clearNursery(&capabilities[n]);
635 }
636 } else {
637 gct->allocated = clearNursery(cap);
638 for (n = 0; n < n_capabilities; n++) {
639 allocated += gc_threads[n]->allocated;
640 }
641 }
642
643 resize_nursery();
644
645 resetNurseries();
646
647 // mark the garbage collected CAFs as dead
648 #if 0 && defined(DEBUG) // doesn't work at the moment
649 if (major_gc) { gcCAFs(); }
650 #endif
651
652 #ifdef PROFILING
653 // resetStaticObjectForRetainerProfiling() must be called before
654 // zeroing below.
655
656 // ToDo: fix the gct->scavenged_static_objects below
657 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
658 #endif
659
660 // zero the scavenged static object list
661 if (major_gc) {
662 nat i;
663 if (n_gc_threads == 1) {
664 zero_static_object_list(gct->scavenged_static_objects);
665 } else {
666 for (i = 0; i < n_gc_threads; i++) {
667 if (!gc_threads[i]->idle) {
668 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
669 }
670 }
671 }
672 }
673
674 // Update the stable pointer hash table.
675 updateStablePtrTable(major_gc);
676
677 // unlock the StablePtr table. Must be before scheduleFinalizers(),
678 // because a finalizer may call hs_free_fun_ptr() or
679 // hs_free_stable_ptr(), both of which access the StablePtr table.
680 stablePtrPostGC();
681
682 // Start any pending finalizers. Must be after
683 // updateStablePtrTable() and stablePtrPostGC() (see #4221).
684 RELEASE_SM_LOCK;
685 scheduleFinalizers(cap, old_weak_ptr_list);
686 ACQUIRE_SM_LOCK;
687
688 // check sanity after GC
689 // before resurrectThreads(), because that might overwrite some
690 // closures, which will cause problems with THREADED where we don't
691 // fill slop.
692 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
693
694 // If a heap census is due, we need to do it before
695 // resurrectThreads(), for the same reason as checkSanity above:
696 // resurrectThreads() will overwrite some closures and leave slop
697 // behind.
698 if (do_heap_census) {
699 debugTrace(DEBUG_sched, "performing heap census");
700 RELEASE_SM_LOCK;
701 heapCensus(gct->gc_start_cpu);
702 ACQUIRE_SM_LOCK;
703 }
704
705 // send exceptions to any threads which were about to die
706 RELEASE_SM_LOCK;
707 resurrectThreads(resurrected_threads);
708 ACQUIRE_SM_LOCK;
709
710 if (major_gc) {
711 nat need, got;
712 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
713 got = mblocks_allocated;
714 /* If the amount of data remains constant, next major GC we'll
715 require (F+1)*need. We leave (F+2)*need in order to reduce
716 repeated deallocation and reallocation. */
717 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
718 if (got > need) {
719 returnMemoryToOS(got - need);
720 }
721 }
722
723 // extra GC trace info
724 IF_DEBUG(gc, statDescribeGens());
725
726 #ifdef DEBUG
727 // symbol-table based profiling
728 /* heapCensus(to_blocks); */ /* ToDo */
729 #endif
730
731 // restore enclosing cost centre
732 #ifdef PROFILING
733 for (n = 0; n < n_capabilities; n++) {
734 capabilities[n].r.rCCCS = save_CCS[n];
735 }
736 #endif
737
738 #ifdef DEBUG
739 // check for memory leaks if DEBUG is on
740 memInventory(DEBUG_gc);
741 #endif
742
743 #ifdef RTS_GTK_FRONTPANEL
744 if (RtsFlags.GcFlags.frontpanel) {
745 updateFrontPanelAfterGC( N, live );
746 }
747 #endif
748
749 // ok, GC over: tell the stats department what happened.
750 stat_endGC(cap, gct, allocated, live_words, copied,
751 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
752 N, n_gc_threads, par_max_copied, par_tot_copied);
753
754 // Guess which generation we'll collect *next* time
755 initialise_N(force_major_gc);
756
757 #if defined(RTS_USER_SIGNALS)
758 if (RtsFlags.MiscFlags.install_signal_handlers) {
759 // unblock signals again
760 unblockUserSignals();
761 }
762 #endif
763
764 RELEASE_SM_LOCK;
765
766 SET_GCT(saved_gct);
767 }
768
769 /* -----------------------------------------------------------------------------
770 Figure out which generation to collect, initialise N and major_gc.
771
772 Also returns the total number of blocks in generations that will be
773 collected.
774 -------------------------------------------------------------------------- */
775
776 static nat
777 initialise_N (rtsBool force_major_gc)
778 {
779 int g;
780 nat blocks, blocks_total;
781
782 blocks = 0;
783 blocks_total = 0;
784
785 if (force_major_gc) {
786 N = RtsFlags.GcFlags.generations - 1;
787 } else {
788 N = 0;
789 }
790
791 for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
792
793 blocks = generations[g].n_words / BLOCK_SIZE_W
794 + generations[g].n_large_blocks;
795
796 if (blocks >= generations[g].max_blocks) {
797 N = stg_max(N,g);
798 }
799 if ((nat)g <= N) {
800 blocks_total += blocks;
801 }
802 }
803
804 blocks_total += countNurseryBlocks();
805
806 major_gc = (N == RtsFlags.GcFlags.generations-1);
807 return blocks_total;
808 }
809
810 /* -----------------------------------------------------------------------------
811 Initialise the gc_thread structures.
812 -------------------------------------------------------------------------- */
813
814 #define GC_THREAD_INACTIVE 0
815 #define GC_THREAD_STANDING_BY 1
816 #define GC_THREAD_RUNNING 2
817 #define GC_THREAD_WAITING_TO_CONTINUE 3
818
819 static void
820 new_gc_thread (nat n, gc_thread *t)
821 {
822 nat g;
823 gen_workspace *ws;
824
825 t->cap = &capabilities[n];
826
827 #ifdef THREADED_RTS
828 t->id = 0;
829 initSpinLock(&t->gc_spin);
830 initSpinLock(&t->mut_spin);
831 ACQUIRE_SPIN_LOCK(&t->gc_spin);
832 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
833 // thread to start up, see wakeup_gc_threads
834 #endif
835
836 t->thread_index = n;
837 t->idle = rtsFalse;
838 t->free_blocks = NULL;
839 t->gc_count = 0;
840
841 init_gc_thread(t);
842
843 #ifdef USE_PAPI
844 t->papi_events = -1;
845 #endif
846
847 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
848 {
849 ws = &t->gens[g];
850 ws->gen = &generations[g];
851 ASSERT(g == ws->gen->no);
852 ws->my_gct = t;
853
854 // We want to call
855 // alloc_todo_block(ws,0);
856 // but can't, because it uses gct which isn't set up at this point.
857 // Hence, allocate a block for todo_bd manually:
858 {
859 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
860 initBdescr(bd, ws->gen, ws->gen->to);
861 bd->flags = BF_EVACUATED;
862 bd->u.scan = bd->free = bd->start;
863
864 ws->todo_bd = bd;
865 ws->todo_free = bd->free;
866 ws->todo_lim = bd->start + BLOCK_SIZE_W;
867 }
868
869 ws->todo_q = newWSDeque(128);
870 ws->todo_overflow = NULL;
871 ws->n_todo_overflow = 0;
872 ws->todo_large_objects = NULL;
873
874 ws->part_list = NULL;
875 ws->n_part_blocks = 0;
876
877 ws->scavd_list = NULL;
878 ws->n_scavd_blocks = 0;
879 }
880 }
881
882
883 void
884 initGcThreads (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
885 {
886 #if defined(THREADED_RTS)
887 nat i;
888
889 if (from > 0) {
890 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
891 "initGcThreads");
892 } else {
893 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
894 "initGcThreads");
895 }
896
897 // We have to update the gct->cap pointers to point to the new
898 // Capability array now.
899 for (i = 0; i < from; i++) {
900 gc_threads[i]->cap = &capabilities[gc_threads[i]->cap->no];
901 }
902
903 for (i = from; i < to; i++) {
904 gc_threads[i] =
905 stgMallocBytes(sizeof(gc_thread) +
906 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
907 "alloc_gc_threads");
908
909 new_gc_thread(i, gc_threads[i]);
910 }
911 #else
912 ASSERT(from == 0 && to == 1);
913 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
914 gc_threads[0] = gct;
915 new_gc_thread(0,gc_threads[0]);
916 #endif
917 }
918
919 void
920 freeGcThreads (void)
921 {
922 nat g;
923 if (gc_threads != NULL) {
924 #if defined(THREADED_RTS)
925 nat i;
926 for (i = 0; i < n_capabilities; i++) {
927 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
928 {
929 freeWSDeque(gc_threads[i]->gens[g].todo_q);
930 }
931 stgFree (gc_threads[i]);
932 }
933 stgFree (gc_threads);
934 #else
935 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
936 {
937 freeWSDeque(gc_threads[0]->gens[g].todo_q);
938 }
939 stgFree (gc_threads);
940 #endif
941 gc_threads = NULL;
942 }
943 }
944
945 /* ----------------------------------------------------------------------------
946 Start GC threads
947 ------------------------------------------------------------------------- */
948
949 static volatile StgWord gc_running_threads;
950
951 static StgWord
952 inc_running (void)
953 {
954 StgWord new;
955 new = atomic_inc(&gc_running_threads);
956 ASSERT(new <= n_gc_threads);
957 return new;
958 }
959
960 static StgWord
961 dec_running (void)
962 {
963 ASSERT(gc_running_threads != 0);
964 return atomic_dec(&gc_running_threads);
965 }
966
967 static rtsBool
968 any_work (void)
969 {
970 int g;
971 gen_workspace *ws;
972
973 gct->any_work++;
974
975 write_barrier();
976
977 // scavenge objects in compacted generation
978 if (mark_stack_bd != NULL && !mark_stack_empty()) {
979 return rtsTrue;
980 }
981
982 // Check for global work in any step. We don't need to check for
983 // local work, because we have already exited scavenge_loop(),
984 // which means there is no local work for this thread.
985 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
986 ws = &gct->gens[g];
987 if (ws->todo_large_objects) return rtsTrue;
988 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
989 if (ws->todo_overflow) return rtsTrue;
990 }
991
992 #if defined(THREADED_RTS)
993 if (work_stealing) {
994 nat n;
995 // look for work to steal
996 for (n = 0; n < n_gc_threads; n++) {
997 if (n == gct->thread_index) continue;
998 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
999 ws = &gc_threads[n]->gens[g];
1000 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
1001 }
1002 }
1003 }
1004 #endif
1005
1006 gct->no_work++;
1007 #if defined(THREADED_RTS)
1008 yieldThread();
1009 #endif
1010
1011 return rtsFalse;
1012 }
1013
1014 static void
1015 scavenge_until_all_done (void)
1016 {
1017 DEBUG_ONLY( nat r );
1018
1019
1020 loop:
1021 #if defined(THREADED_RTS)
1022 if (n_gc_threads > 1) {
1023 scavenge_loop();
1024 } else {
1025 scavenge_loop1();
1026 }
1027 #else
1028 scavenge_loop();
1029 #endif
1030
1031 collect_gct_blocks();
1032
1033 // scavenge_loop() only exits when there's no work to do
1034
1035 #ifdef DEBUG
1036 r = dec_running();
1037 #else
1038 dec_running();
1039 #endif
1040
1041 traceEventGcIdle(gct->cap);
1042
1043 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1044
1045 while (gc_running_threads != 0) {
1046 // usleep(1);
1047 if (any_work()) {
1048 inc_running();
1049 traceEventGcWork(gct->cap);
1050 goto loop;
1051 }
1052 // any_work() does not remove the work from the queue, it
1053 // just checks for the presence of work. If we find any,
1054 // then we increment gc_running_threads and go back to
1055 // scavenge_loop() to perform any pending work.
1056 }
1057
1058 traceEventGcDone(gct->cap);
1059 }
1060
1061 #if defined(THREADED_RTS)
1062
1063 void
1064 gcWorkerThread (Capability *cap)
1065 {
1066 gc_thread *saved_gct;
1067
1068 // necessary if we stole a callee-saves register for gct:
1069 saved_gct = gct;
1070
1071 SET_GCT(gc_threads[cap->no]);
1072 gct->id = osThreadId();
1073
1074 stat_gcWorkerThreadStart(gct);
1075
1076 // Wait until we're told to wake up
1077 RELEASE_SPIN_LOCK(&gct->mut_spin);
1078 // yieldThread();
1079 // Strangely, adding a yieldThread() here makes the CPU time
1080 // measurements more accurate on Linux, perhaps because it syncs
1081 // the CPU time across the multiple cores. Without this, CPU time
1082 // is heavily skewed towards GC rather than MUT.
1083 gct->wakeup = GC_THREAD_STANDING_BY;
1084 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1085 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1086
1087 #ifdef USE_PAPI
1088 // start performance counters in this thread...
1089 if (gct->papi_events == -1) {
1090 papi_init_eventset(&gct->papi_events);
1091 }
1092 papi_thread_start_gc1_count(gct->papi_events);
1093 #endif
1094
1095 init_gc_thread(gct);
1096
1097 traceEventGcWork(gct->cap);
1098
1099 // Every thread evacuates some roots.
1100 gct->evac_gen_no = 0;
1101 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1102 scavenge_capability_mut_lists(cap);
1103
1104 scavenge_until_all_done();
1105
1106 gct->allocated = clearNursery(cap);
1107
1108 #ifdef THREADED_RTS
1109 // Now that the whole heap is marked, we discard any sparks that
1110 // were found to be unreachable. The main GC thread is currently
1111 // marking heap reachable via weak pointers, so it is
1112 // non-deterministic whether a spark will be retained if it is
1113 // only reachable via weak pointers. To fix this problem would
1114 // require another GC barrier, which is too high a price.
1115 pruneSparkQueue(cap);
1116 #endif
1117
1118 #ifdef USE_PAPI
1119 // count events in this thread towards the GC totals
1120 papi_thread_stop_gc1_count(gct->papi_events);
1121 #endif
1122
1123 // Wait until we're told to continue
1124 RELEASE_SPIN_LOCK(&gct->gc_spin);
1125 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1126 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1127 gct->thread_index);
1128 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1129 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1130
1131 // record the time spent doing GC in the Task structure
1132 stat_gcWorkerThreadDone(gct);
1133
1134 SET_GCT(saved_gct);
1135 }
1136
1137 #endif
1138
1139 #if defined(THREADED_RTS)
1140
1141 void
1142 waitForGcThreads (Capability *cap USED_IF_THREADS)
1143 {
1144 const nat n_threads = n_capabilities;
1145 const nat me = cap->no;
1146 nat i, j;
1147 rtsBool retry = rtsTrue;
1148
1149 while(retry) {
1150 for (i=0; i < n_threads; i++) {
1151 if (i == me || gc_threads[i]->idle) continue;
1152 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1153 prodCapability(&capabilities[i], cap->running_task);
1154 }
1155 }
1156 for (j=0; j < 10; j++) {
1157 retry = rtsFalse;
1158 for (i=0; i < n_threads; i++) {
1159 if (i == me || gc_threads[i]->idle) continue;
1160 write_barrier();
1161 interruptCapability(&capabilities[i]);
1162 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1163 retry = rtsTrue;
1164 }
1165 }
1166 if (!retry) break;
1167 yieldThread();
1168 }
1169 }
1170 }
1171
1172 #endif // THREADED_RTS
1173
1174 static void
1175 start_gc_threads (void)
1176 {
1177 #if defined(THREADED_RTS)
1178 gc_running_threads = 0;
1179 #endif
1180 }
1181
1182 static void
1183 wakeup_gc_threads (nat me USED_IF_THREADS)
1184 {
1185 #if defined(THREADED_RTS)
1186 nat i;
1187
1188 if (n_gc_threads == 1) return;
1189
1190 for (i=0; i < n_gc_threads; i++) {
1191 if (i == me || gc_threads[i]->idle) continue;
1192 inc_running();
1193 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1194 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1195
1196 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1197 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1198 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1199 }
1200 #endif
1201 }
1202
1203 // After GC is complete, we must wait for all GC threads to enter the
1204 // standby state, otherwise they may still be executing inside
1205 // any_work(), and may even remain awake until the next GC starts.
1206 static void
1207 shutdown_gc_threads (nat me USED_IF_THREADS)
1208 {
1209 #if defined(THREADED_RTS)
1210 nat i;
1211
1212 if (n_gc_threads == 1) return;
1213
1214 for (i=0; i < n_gc_threads; i++) {
1215 if (i == me || gc_threads[i]->idle) continue;
1216 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1217 }
1218 #endif
1219 }
1220
1221 #if defined(THREADED_RTS)
1222 void
1223 releaseGCThreads (Capability *cap USED_IF_THREADS)
1224 {
1225 const nat n_threads = n_capabilities;
1226 const nat me = cap->no;
1227 nat i;
1228 for (i=0; i < n_threads; i++) {
1229 if (i == me || gc_threads[i]->idle) continue;
1230 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1231 barf("releaseGCThreads");
1232
1233 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1234 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1235 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1236 }
1237 }
1238 #endif
1239
1240 /* ----------------------------------------------------------------------------
1241 Initialise a generation that is to be collected
1242 ------------------------------------------------------------------------- */
1243
1244 static void
1245 prepare_collected_gen (generation *gen)
1246 {
1247 nat i, g, n;
1248 gen_workspace *ws;
1249 bdescr *bd, *next;
1250
1251 // Throw away the current mutable list. Invariant: the mutable
1252 // list always has at least one block; this means we can avoid a
1253 // check for NULL in recordMutable().
1254 g = gen->no;
1255 if (g != 0) {
1256 for (i = 0; i < n_capabilities; i++) {
1257 freeChain(capabilities[i].mut_lists[g]);
1258 capabilities[i].mut_lists[g] = allocBlock();
1259 }
1260 }
1261
1262 gen = &generations[g];
1263 ASSERT(gen->no == g);
1264
1265 // we'll construct a new list of threads in this step
1266 // during GC, throw away the current list.
1267 gen->old_threads = gen->threads;
1268 gen->threads = END_TSO_QUEUE;
1269
1270 // deprecate the existing blocks
1271 gen->old_blocks = gen->blocks;
1272 gen->n_old_blocks = gen->n_blocks;
1273 gen->blocks = NULL;
1274 gen->n_blocks = 0;
1275 gen->n_words = 0;
1276 gen->live_estimate = 0;
1277
1278 // initialise the large object queues.
1279 ASSERT(gen->scavenged_large_objects == NULL);
1280 ASSERT(gen->n_scavenged_large_blocks == 0);
1281
1282 // grab all the partial blocks stashed in the gc_thread workspaces and
1283 // move them to the old_blocks list of this gen.
1284 for (n = 0; n < n_capabilities; n++) {
1285 ws = &gc_threads[n]->gens[gen->no];
1286
1287 for (bd = ws->part_list; bd != NULL; bd = next) {
1288 next = bd->link;
1289 bd->link = gen->old_blocks;
1290 gen->old_blocks = bd;
1291 gen->n_old_blocks += bd->blocks;
1292 }
1293 ws->part_list = NULL;
1294 ws->n_part_blocks = 0;
1295
1296 ASSERT(ws->scavd_list == NULL);
1297 ASSERT(ws->n_scavd_blocks == 0);
1298
1299 if (ws->todo_free != ws->todo_bd->start) {
1300 ws->todo_bd->free = ws->todo_free;
1301 ws->todo_bd->link = gen->old_blocks;
1302 gen->old_blocks = ws->todo_bd;
1303 gen->n_old_blocks += ws->todo_bd->blocks;
1304 alloc_todo_block(ws,0); // always has one block.
1305 }
1306 }
1307
1308 // mark the small objects as from-space
1309 for (bd = gen->old_blocks; bd; bd = bd->link) {
1310 bd->flags &= ~BF_EVACUATED;
1311 }
1312
1313 // mark the large objects as from-space
1314 for (bd = gen->large_objects; bd; bd = bd->link) {
1315 bd->flags &= ~BF_EVACUATED;
1316 }
1317
1318 // for a compacted generation, we need to allocate the bitmap
1319 if (gen->mark) {
1320 lnat bitmap_size; // in bytes
1321 bdescr *bitmap_bdescr;
1322 StgWord *bitmap;
1323
1324 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1325
1326 if (bitmap_size > 0) {
1327 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1328 / BLOCK_SIZE);
1329 gen->bitmap = bitmap_bdescr;
1330 bitmap = bitmap_bdescr->start;
1331
1332 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1333 bitmap_size, bitmap);
1334
1335 // don't forget to fill it with zeros!
1336 memset(bitmap, 0, bitmap_size);
1337
1338 // For each block in this step, point to its bitmap from the
1339 // block descriptor.
1340 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1341 bd->u.bitmap = bitmap;
1342 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1343
1344 // Also at this point we set the BF_MARKED flag
1345 // for this block. The invariant is that
1346 // BF_MARKED is always unset, except during GC
1347 // when it is set on those blocks which will be
1348 // compacted.
1349 if (!(bd->flags & BF_FRAGMENTED)) {
1350 bd->flags |= BF_MARKED;
1351 }
1352
1353 // BF_SWEPT should be marked only for blocks that are being
1354 // collected in sweep()
1355 bd->flags &= ~BF_SWEPT;
1356 }
1357 }
1358 }
1359 }
1360
1361
1362 /* ----------------------------------------------------------------------------
1363 Save the mutable lists in saved_mut_lists
1364 ------------------------------------------------------------------------- */
1365
1366 static void
1367 stash_mut_list (Capability *cap, nat gen_no)
1368 {
1369 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1370 cap->mut_lists[gen_no] = allocBlock_sync();
1371 }
1372
1373 /* ----------------------------------------------------------------------------
1374 Initialise a generation that is *not* to be collected
1375 ------------------------------------------------------------------------- */
1376
1377 static void
1378 prepare_uncollected_gen (generation *gen)
1379 {
1380 nat i;
1381
1382
1383 ASSERT(gen->no > 0);
1384
1385 // save the current mutable lists for this generation, and
1386 // allocate a fresh block for each one. We'll traverse these
1387 // mutable lists as roots early on in the GC.
1388 for (i = 0; i < n_capabilities; i++) {
1389 stash_mut_list(&capabilities[i], gen->no);
1390 }
1391
1392 ASSERT(gen->scavenged_large_objects == NULL);
1393 ASSERT(gen->n_scavenged_large_blocks == 0);
1394 }
1395
1396 /* -----------------------------------------------------------------------------
1397 Collect the completed blocks from a GC thread and attach them to
1398 the generation.
1399 -------------------------------------------------------------------------- */
1400
1401 static void
1402 collect_gct_blocks (void)
1403 {
1404 nat g;
1405 gen_workspace *ws;
1406 bdescr *bd, *prev;
1407
1408 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1409 ws = &gct->gens[g];
1410
1411 // there may still be a block attached to ws->todo_bd;
1412 // leave it there to use next time.
1413
1414 if (ws->scavd_list != NULL) {
1415 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1416
1417 ASSERT(gct->scan_bd == NULL);
1418 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1419
1420 prev = NULL;
1421 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1422 ws->gen->n_words += bd->free - bd->start;
1423 prev = bd;
1424 }
1425 if (prev != NULL) {
1426 prev->link = ws->gen->blocks;
1427 ws->gen->blocks = ws->scavd_list;
1428 }
1429 ws->gen->n_blocks += ws->n_scavd_blocks;
1430
1431 ws->scavd_list = NULL;
1432 ws->n_scavd_blocks = 0;
1433
1434 RELEASE_SPIN_LOCK(&ws->gen->sync);
1435 }
1436 }
1437 }
1438
1439 /* -----------------------------------------------------------------------------
1440 During mutation, any blocks that are filled by allocatePinned() are
1441 stashed on the local pinned_object_blocks list, to avoid needing to
1442 take a global lock. Here we collect those blocks from the
1443 cap->pinned_object_blocks lists and put them on the
1444 main g0->large_object list.
1445
1446 Returns: the number of words allocated this way, for stats
1447 purposes.
1448 -------------------------------------------------------------------------- */
1449
1450 static lnat
1451 collect_pinned_object_blocks (void)
1452 {
1453 nat n;
1454 bdescr *bd, *prev;
1455 lnat allocated = 0;
1456
1457 for (n = 0; n < n_capabilities; n++) {
1458 prev = NULL;
1459 for (bd = capabilities[n].pinned_object_blocks; bd != NULL; bd = bd->link) {
1460 allocated += bd->free - bd->start;
1461 prev = bd;
1462 }
1463 if (prev != NULL) {
1464 prev->link = g0->large_objects;
1465 if (g0->large_objects != NULL) {
1466 g0->large_objects->u.back = prev;
1467 }
1468 g0->large_objects = capabilities[n].pinned_object_blocks;
1469 capabilities[n].pinned_object_blocks = 0;
1470 }
1471 }
1472
1473 return allocated;
1474 }
1475
1476 /* -----------------------------------------------------------------------------
1477 Initialise a gc_thread before GC
1478 -------------------------------------------------------------------------- */
1479
1480 static void
1481 init_gc_thread (gc_thread *t)
1482 {
1483 t->static_objects = END_OF_STATIC_LIST;
1484 t->scavenged_static_objects = END_OF_STATIC_LIST;
1485 t->scan_bd = NULL;
1486 t->mut_lists = t->cap->mut_lists;
1487 t->evac_gen_no = 0;
1488 t->failed_to_evac = rtsFalse;
1489 t->eager_promotion = rtsTrue;
1490 t->thunk_selector_depth = 0;
1491 t->allocated = 0;
1492 t->copied = 0;
1493 t->scanned = 0;
1494 t->any_work = 0;
1495 t->no_work = 0;
1496 t->scav_find_work = 0;
1497 }
1498
1499 /* -----------------------------------------------------------------------------
1500 Function we pass to evacuate roots.
1501 -------------------------------------------------------------------------- */
1502
1503 static void
1504 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1505 {
1506 // we stole a register for gct, but this function is called from
1507 // *outside* the GC where the register variable is not in effect,
1508 // so we need to save and restore it here. NB. only call
1509 // mark_root() from the main GC thread, otherwise gct will be
1510 // incorrect.
1511 #if defined(THREADED_RTS)
1512 gc_thread *saved_gct;
1513 saved_gct = gct;
1514 #endif
1515 SET_GCT(user);
1516
1517 evacuate(root);
1518
1519 SET_GCT(saved_gct);
1520 }
1521
1522 /* -----------------------------------------------------------------------------
1523 Initialising the static object & mutable lists
1524 -------------------------------------------------------------------------- */
1525
1526 static void
1527 zero_static_object_list(StgClosure* first_static)
1528 {
1529 StgClosure* p;
1530 StgClosure* link;
1531 const StgInfoTable *info;
1532
1533 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1534 info = get_itbl(p);
1535 link = *STATIC_LINK(info, p);
1536 *STATIC_LINK(info,p) = NULL;
1537 }
1538 }
1539
1540 /* ----------------------------------------------------------------------------
1541 Reset the sizes of the older generations when we do a major
1542 collection.
1543
1544 CURRENT STRATEGY: make all generations except zero the same size.
1545 We have to stay within the maximum heap size, and leave a certain
1546 percentage of the maximum heap size available to allocate into.
1547 ------------------------------------------------------------------------- */
1548
1549 static void
1550 resize_generations (void)
1551 {
1552 nat g;
1553
1554 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1555 nat live, size, min_alloc, words;
1556 const nat max = RtsFlags.GcFlags.maxHeapSize;
1557 const nat gens = RtsFlags.GcFlags.generations;
1558
1559 // live in the oldest generations
1560 if (oldest_gen->live_estimate != 0) {
1561 words = oldest_gen->live_estimate;
1562 } else {
1563 words = oldest_gen->n_words;
1564 }
1565 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1566 oldest_gen->n_large_blocks;
1567
1568 // default max size for all generations except zero
1569 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1570 RtsFlags.GcFlags.minOldGenSize);
1571
1572 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1573 RtsFlags.GcFlags.heapSizeSuggestion = size;
1574 }
1575
1576 // minimum size for generation zero
1577 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1578 RtsFlags.GcFlags.minAllocAreaSize);
1579
1580 // Auto-enable compaction when the residency reaches a
1581 // certain percentage of the maximum heap size (default: 30%).
1582 if (RtsFlags.GcFlags.compact ||
1583 (max > 0 &&
1584 oldest_gen->n_blocks >
1585 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1586 oldest_gen->mark = 1;
1587 oldest_gen->compact = 1;
1588 // debugBelch("compaction: on\n", live);
1589 } else {
1590 oldest_gen->mark = 0;
1591 oldest_gen->compact = 0;
1592 // debugBelch("compaction: off\n", live);
1593 }
1594
1595 if (RtsFlags.GcFlags.sweep) {
1596 oldest_gen->mark = 1;
1597 }
1598
1599 // if we're going to go over the maximum heap size, reduce the
1600 // size of the generations accordingly. The calculation is
1601 // different if compaction is turned on, because we don't need
1602 // to double the space required to collect the old generation.
1603 if (max != 0) {
1604
1605 // this test is necessary to ensure that the calculations
1606 // below don't have any negative results - we're working
1607 // with unsigned values here.
1608 if (max < min_alloc) {
1609 heapOverflow();
1610 }
1611
1612 if (oldest_gen->compact) {
1613 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1614 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1615 }
1616 } else {
1617 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1618 size = (max - min_alloc) / ((gens - 1) * 2);
1619 }
1620 }
1621
1622 if (size < live) {
1623 heapOverflow();
1624 }
1625 }
1626
1627 #if 0
1628 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1629 min_alloc, size, max);
1630 #endif
1631
1632 for (g = 0; g < gens; g++) {
1633 generations[g].max_blocks = size;
1634 }
1635 }
1636 }
1637
1638 /* -----------------------------------------------------------------------------
1639 Calculate the new size of the nursery, and resize it.
1640 -------------------------------------------------------------------------- */
1641
1642 static void
1643 resize_nursery (void)
1644 {
1645 const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1646
1647 if (RtsFlags.GcFlags.generations == 1)
1648 { // Two-space collector:
1649 nat blocks;
1650
1651 /* set up a new nursery. Allocate a nursery size based on a
1652 * function of the amount of live data (by default a factor of 2)
1653 * Use the blocks from the old nursery if possible, freeing up any
1654 * left over blocks.
1655 *
1656 * If we get near the maximum heap size, then adjust our nursery
1657 * size accordingly. If the nursery is the same size as the live
1658 * data (L), then we need 3L bytes. We can reduce the size of the
1659 * nursery to bring the required memory down near 2L bytes.
1660 *
1661 * A normal 2-space collector would need 4L bytes to give the same
1662 * performance we get from 3L bytes, reducing to the same
1663 * performance at 2L bytes.
1664 */
1665 blocks = generations[0].n_blocks;
1666
1667 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1668 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1669 RtsFlags.GcFlags.maxHeapSize )
1670 {
1671 long adjusted_blocks; // signed on purpose
1672 int pc_free;
1673
1674 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1675
1676 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1677 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1678
1679 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1680 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1681 {
1682 heapOverflow();
1683 }
1684 blocks = adjusted_blocks;
1685 }
1686 else
1687 {
1688 blocks *= RtsFlags.GcFlags.oldGenFactor;
1689 if (blocks < min_nursery)
1690 {
1691 blocks = min_nursery;
1692 }
1693 }
1694 resizeNurseries(blocks);
1695 }
1696 else // Generational collector
1697 {
1698 /*
1699 * If the user has given us a suggested heap size, adjust our
1700 * allocation area to make best use of the memory available.
1701 */
1702 if (RtsFlags.GcFlags.heapSizeSuggestion)
1703 {
1704 long blocks;
1705 const nat needed = calcNeeded(); // approx blocks needed at next GC
1706
1707 /* Guess how much will be live in generation 0 step 0 next time.
1708 * A good approximation is obtained by finding the
1709 * percentage of g0 that was live at the last minor GC.
1710 *
1711 * We have an accurate figure for the amount of copied data in
1712 * 'copied', but we must convert this to a number of blocks, with
1713 * a small adjustment for estimated slop at the end of a block
1714 * (- 10 words).
1715 */
1716 if (N == 0)
1717 {
1718 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1719 / countNurseryBlocks();
1720 }
1721
1722 /* Estimate a size for the allocation area based on the
1723 * information available. We might end up going slightly under
1724 * or over the suggested heap size, but we should be pretty
1725 * close on average.
1726 *
1727 * Formula: suggested - needed
1728 * ----------------------------
1729 * 1 + g0_pcnt_kept/100
1730 *
1731 * where 'needed' is the amount of memory needed at the next
1732 * collection for collecting all gens except g0.
1733 */
1734 blocks =
1735 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1736 (100 + (long)g0_pcnt_kept);
1737
1738 if (blocks < (long)min_nursery) {
1739 blocks = min_nursery;
1740 }
1741
1742 resizeNurseries((nat)blocks);
1743 }
1744 else
1745 {
1746 // we might have added extra large blocks to the nursery, so
1747 // resize back to minAllocAreaSize again.
1748 resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1749 }
1750 }
1751 }
1752
1753 /* -----------------------------------------------------------------------------
1754 Sanity code for CAF garbage collection.
1755
1756 With DEBUG turned on, we manage a CAF list in addition to the SRT
1757 mechanism. After GC, we run down the CAF list and blackhole any
1758 CAFs which have been garbage collected. This means we get an error
1759 whenever the program tries to enter a garbage collected CAF.
1760
1761 Any garbage collected CAFs are taken off the CAF list at the same
1762 time.
1763 -------------------------------------------------------------------------- */
1764
1765 #if 0 && defined(DEBUG)
1766
1767 static void
1768 gcCAFs(void)
1769 {
1770 StgClosure* p;
1771 StgClosure** pp;
1772 const StgInfoTable *info;
1773 nat i;
1774
1775 i = 0;
1776 p = caf_list;
1777 pp = &caf_list;
1778
1779 while (p != NULL) {
1780
1781 info = get_itbl(p);
1782
1783 ASSERT(info->type == IND_STATIC);
1784
1785 if (STATIC_LINK(info,p) == NULL) {
1786 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1787 // black hole it
1788 SET_INFO(p,&stg_BLACKHOLE_info);
1789 p = STATIC_LINK2(info,p);
1790 *pp = p;
1791 }
1792 else {
1793 pp = &STATIC_LINK2(info,p);
1794 p = *pp;
1795 i++;
1796 }
1797
1798 }
1799
1800 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1801 }
1802 #endif