Rearrange includes to avoid a clash on ARM/Linux
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "GC.h"
19 #include "GCThread.h"
20 #include "GCTDecl.h" // NB. before RtsSignals.h which
21 // clobbers REG_R1 on arm/Linux
22 #include "Compact.h"
23 #include "Evac.h"
24 #include "Scav.h"
25 #include "GCUtils.h"
26 #include "MarkStack.h"
27 #include "MarkWeak.h"
28 #include "Sparks.h"
29 #include "Sweep.h"
30
31 #include "Storage.h"
32 #include "RtsUtils.h"
33 #include "Apply.h"
34 #include "Updates.h"
35 #include "Stats.h"
36 #include "Schedule.h"
37 #include "Sanity.h"
38 #include "BlockAlloc.h"
39 #include "ProfHeap.h"
40 #include "Weak.h"
41 #include "Prelude.h"
42 #include "RtsSignals.h"
43 #include "STM.h"
44 #if defined(RTS_GTK_FRONTPANEL)
45 #include "FrontPanel.h"
46 #endif
47 #include "Trace.h"
48 #include "RetainerProfile.h"
49 #include "LdvProfile.h"
50 #include "RaiseAsync.h"
51 #include "Papi.h"
52 #include "Stable.h"
53
54 #include <string.h> // for memset()
55 #include <unistd.h>
56
57 /* -----------------------------------------------------------------------------
58 Global variables
59 -------------------------------------------------------------------------- */
60
61 /* STATIC OBJECT LIST.
62 *
63 * During GC:
64 * We maintain a linked list of static objects that are still live.
65 * The requirements for this list are:
66 *
67 * - we need to scan the list while adding to it, in order to
68 * scavenge all the static objects (in the same way that
69 * breadth-first scavenging works for dynamic objects).
70 *
71 * - we need to be able to tell whether an object is already on
72 * the list, to break loops.
73 *
74 * Each static object has a "static link field", which we use for
75 * linking objects on to the list. We use a stack-type list, consing
76 * objects on the front as they are added (this means that the
77 * scavenge phase is depth-first, not breadth-first, but that
78 * shouldn't matter).
79 *
80 * A separate list is kept for objects that have been scavenged
81 * already - this is so that we can zero all the marks afterwards.
82 *
83 * An object is on the list if its static link field is non-zero; this
84 * means that we have to mark the end of the list with '1', not NULL.
85 *
86 * Extra notes for generational GC:
87 *
88 * Each generation has a static object list associated with it. When
89 * collecting generations up to N, we treat the static object lists
90 * from generations > N as roots.
91 *
92 * We build up a static object list while collecting generations 0..N,
93 * which is then appended to the static object list of generation N+1.
94 */
95
96 /* N is the oldest generation being collected, where the generations
97 * are numbered starting at 0. A major GC (indicated by the major_gc
98 * flag) is when we're collecting all generations. We only attempt to
99 * deal with static objects and GC CAFs when doing a major GC.
100 */
101 nat N;
102 rtsBool major_gc;
103
104 /* Data used for allocation area sizing.
105 */
106 static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
107
108 /* Mut-list stats */
109 #ifdef DEBUG
110 nat mutlist_MUTVARS,
111 mutlist_MUTARRS,
112 mutlist_MVARS,
113 mutlist_TVAR,
114 mutlist_TVAR_WATCH_QUEUE,
115 mutlist_TREC_CHUNK,
116 mutlist_TREC_HEADER,
117 mutlist_ATOMIC_INVARIANT,
118 mutlist_INVARIANT_CHECK_QUEUE,
119 mutlist_OTHERS;
120 #endif
121
122 /* Thread-local data for each GC thread
123 */
124 gc_thread **gc_threads = NULL;
125
126 #if !defined(THREADED_RTS)
127 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
128 #endif
129
130 // Number of threads running in *this* GC. Affects how many
131 // step->todos[] lists we have to look in to find work.
132 nat n_gc_threads;
133
134 // For stats:
135 long copied; // *words* copied & scavenged during this GC
136
137 rtsBool work_stealing;
138
139 DECLARE_GCT
140
141 /* -----------------------------------------------------------------------------
142 Static function declarations
143 -------------------------------------------------------------------------- */
144
145 static void mark_root (void *user, StgClosure **root);
146 static void zero_static_object_list (StgClosure* first_static);
147 static void prepare_collected_gen (generation *gen);
148 static void prepare_uncollected_gen (generation *gen);
149 static void init_gc_thread (gc_thread *t);
150 static void resize_generations (void);
151 static void resize_nursery (void);
152 static void start_gc_threads (void);
153 static void scavenge_until_all_done (void);
154 static StgWord inc_running (void);
155 static StgWord dec_running (void);
156 static void wakeup_gc_threads (nat me);
157 static void shutdown_gc_threads (nat me);
158 static void collect_gct_blocks (void);
159 static StgWord collect_pinned_object_blocks (void);
160
161 #if 0 && defined(DEBUG)
162 static void gcCAFs (void);
163 #endif
164
165 /* -----------------------------------------------------------------------------
166 The mark stack.
167 -------------------------------------------------------------------------- */
168
169 bdescr *mark_stack_top_bd; // topmost block in the mark stack
170 bdescr *mark_stack_bd; // current block in the mark stack
171 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
172
173 /* -----------------------------------------------------------------------------
174 GarbageCollect: the main entry point to the garbage collector.
175
176 The collect_gen parameter is gotten by calling calcNeeded().
177
178 Locks held: all capabilities are held throughout GarbageCollect().
179 -------------------------------------------------------------------------- */
180
181 void
182 GarbageCollect (nat collect_gen,
183 rtsBool do_heap_census,
184 nat gc_type USED_IF_THREADS,
185 Capability *cap)
186 {
187 bdescr *bd;
188 generation *gen;
189 StgWord live_blocks, live_words, allocated, par_max_copied, par_tot_copied;
190 #if defined(THREADED_RTS)
191 gc_thread *saved_gct;
192 #endif
193 nat g, n;
194
195 // necessary if we stole a callee-saves register for gct:
196 #if defined(THREADED_RTS)
197 saved_gct = gct;
198 #endif
199
200 #ifdef PROFILING
201 CostCentreStack *save_CCS[n_capabilities];
202 #endif
203
204 ACQUIRE_SM_LOCK;
205
206 #if defined(RTS_USER_SIGNALS)
207 if (RtsFlags.MiscFlags.install_signal_handlers) {
208 // block signals
209 blockUserSignals();
210 }
211 #endif
212
213 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
214 // otherwise adjust the padding in gen_workspace.
215
216 // this is the main thread
217 SET_GCT(gc_threads[cap->no]);
218
219 // tell the stats department that we've started a GC
220 stat_startGC(cap, gct);
221
222 // lock the StablePtr table
223 stablePtrPreGC();
224
225 #ifdef DEBUG
226 mutlist_MUTVARS = 0;
227 mutlist_MUTARRS = 0;
228 mutlist_MVARS = 0;
229 mutlist_TVAR = 0;
230 mutlist_TVAR_WATCH_QUEUE = 0;
231 mutlist_TREC_CHUNK = 0;
232 mutlist_TREC_HEADER = 0;
233 mutlist_ATOMIC_INVARIANT = 0;
234 mutlist_INVARIANT_CHECK_QUEUE = 0;
235 mutlist_OTHERS = 0;
236 #endif
237
238 // attribute any costs to CCS_GC
239 #ifdef PROFILING
240 for (n = 0; n < n_capabilities; n++) {
241 save_CCS[n] = capabilities[n].r.rCCCS;
242 capabilities[n].r.rCCCS = CCS_GC;
243 }
244 #endif
245
246 /* Approximate how much we allocated.
247 * Todo: only when generating stats?
248 */
249 allocated = countLargeAllocated(); /* don't count the nursery yet */
250
251 /* Figure out which generation to collect
252 */
253 N = collect_gen;
254 major_gc = (N == RtsFlags.GcFlags.generations-1);
255
256 #if defined(THREADED_RTS)
257 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
258 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
259 // It's not always a good idea to do load balancing in parallel
260 // GC. In particular, for a parallel program we don't want to
261 // lose locality by moving cached data into another CPU's cache
262 // (this effect can be quite significant).
263 //
264 // We could have a more complex way to deterimine whether to do
265 // work stealing or not, e.g. it might be a good idea to do it
266 // if the heap is big. For now, we just turn it on or off with
267 // a flag.
268 #endif
269
270 /* Start threads, so they can be spinning up while we finish initialisation.
271 */
272 start_gc_threads();
273
274 #if defined(THREADED_RTS)
275 /* How many threads will be participating in this GC?
276 * We don't try to parallelise minor GCs (unless the user asks for
277 * it with +RTS -gn0), or mark/compact/sweep GC.
278 */
279 if (gc_type == SYNC_GC_PAR) {
280 n_gc_threads = n_capabilities;
281 } else {
282 n_gc_threads = 1;
283 }
284 #else
285 n_gc_threads = 1;
286 #endif
287
288 debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
289 N, n_gc_threads);
290
291 #ifdef RTS_GTK_FRONTPANEL
292 if (RtsFlags.GcFlags.frontpanel) {
293 updateFrontPanelBeforeGC(N);
294 }
295 #endif
296
297 #ifdef DEBUG
298 // check for memory leaks if DEBUG is on
299 memInventory(DEBUG_gc);
300 #endif
301
302 // check sanity *before* GC
303 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
304
305 // gather blocks allocated using allocatePinned() from each capability
306 // and put them on the g0->large_object list.
307 allocated += collect_pinned_object_blocks();
308
309 // Initialise all the generations/steps that we're collecting.
310 for (g = 0; g <= N; g++) {
311 prepare_collected_gen(&generations[g]);
312 }
313 // Initialise all the generations/steps that we're *not* collecting.
314 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
315 prepare_uncollected_gen(&generations[g]);
316 }
317
318 // Prepare this gc_thread
319 init_gc_thread(gct);
320
321 /* Allocate a mark stack if we're doing a major collection.
322 */
323 if (major_gc && oldest_gen->mark) {
324 mark_stack_bd = allocBlock();
325 mark_stack_top_bd = mark_stack_bd;
326 mark_stack_bd->link = NULL;
327 mark_stack_bd->u.back = NULL;
328 mark_sp = mark_stack_bd->start;
329 } else {
330 mark_stack_bd = NULL;
331 mark_stack_top_bd = NULL;
332 mark_sp = NULL;
333 }
334
335 /* -----------------------------------------------------------------------
336 * follow all the roots that we know about:
337 */
338
339 // the main thread is running: this prevents any other threads from
340 // exiting prematurely, so we can start them now.
341 // NB. do this after the mutable lists have been saved above, otherwise
342 // the other GC threads will be writing into the old mutable lists.
343 inc_running();
344 wakeup_gc_threads(gct->thread_index);
345
346 traceEventGcWork(gct->cap);
347
348 // scavenge the capability-private mutable lists. This isn't part
349 // of markSomeCapabilities() because markSomeCapabilities() can only
350 // call back into the GC via mark_root() (due to the gct register
351 // variable).
352 if (n_gc_threads == 1) {
353 for (n = 0; n < n_capabilities; n++) {
354 #if defined(THREADED_RTS)
355 scavenge_capability_mut_Lists1(&capabilities[n]);
356 #else
357 scavenge_capability_mut_lists(&capabilities[n]);
358 #endif
359 }
360 } else {
361 scavenge_capability_mut_lists(gct->cap);
362 for (n = 0; n < n_capabilities; n++) {
363 if (gc_threads[n]->idle) {
364 markCapability(mark_root, gct, &capabilities[n],
365 rtsTrue/*don't mark sparks*/);
366 scavenge_capability_mut_lists(&capabilities[n]);
367 }
368 }
369 }
370
371 // follow roots from the CAF list (used by GHCi)
372 gct->evac_gen_no = 0;
373 markCAFs(mark_root, gct);
374
375 // follow all the roots that the application knows about.
376 gct->evac_gen_no = 0;
377 if (n_gc_threads == 1) {
378 for (n = 0; n < n_capabilities; n++) {
379 markCapability(mark_root, gct, &capabilities[n],
380 rtsTrue/*don't mark sparks*/);
381 }
382 } else {
383 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
384 }
385
386 markScheduler(mark_root, gct);
387
388 #if defined(RTS_USER_SIGNALS)
389 // mark the signal handlers (signals should be already blocked)
390 markSignalHandlers(mark_root, gct);
391 #endif
392
393 // Mark the weak pointer list, and prepare to detect dead weak pointers.
394 markWeakPtrList();
395 initWeakForGC();
396
397 // Mark the stable pointer table.
398 markStablePtrTable(mark_root, gct);
399
400 /* -------------------------------------------------------------------------
401 * Repeatedly scavenge all the areas we know about until there's no
402 * more scavenging to be done.
403 */
404 for (;;)
405 {
406 scavenge_until_all_done();
407 // The other threads are now stopped. We might recurse back to
408 // here, but from now on this is the only thread.
409
410 // must be last... invariant is that everything is fully
411 // scavenged at this point.
412 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
413 inc_running();
414 continue;
415 }
416
417 // If we get to here, there's really nothing left to do.
418 break;
419 }
420
421 if (!DEBUG_IS_ON && n_gc_threads != 1) {
422 gct->allocated = clearNursery(cap);
423 }
424
425 shutdown_gc_threads(gct->thread_index);
426
427 // Now see which stable names are still alive.
428 gcStablePtrTable();
429
430 #ifdef THREADED_RTS
431 if (n_gc_threads == 1) {
432 for (n = 0; n < n_capabilities; n++) {
433 pruneSparkQueue(&capabilities[n]);
434 }
435 } else {
436 for (n = 0; n < n_capabilities; n++) {
437 if (n == cap->no || gc_threads[n]->idle) {
438 pruneSparkQueue(&capabilities[n]);
439 }
440 }
441 }
442 #endif
443
444 #ifdef PROFILING
445 // We call processHeapClosureForDead() on every closure destroyed during
446 // the current garbage collection, so we invoke LdvCensusForDead().
447 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
448 || RtsFlags.ProfFlags.bioSelector != NULL) {
449 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
450 LdvCensusForDead(N);
451 ACQUIRE_SM_LOCK;
452 }
453 #endif
454
455 // NO MORE EVACUATION AFTER THIS POINT!
456
457 // Finally: compact or sweep the oldest generation.
458 if (major_gc && oldest_gen->mark) {
459 if (oldest_gen->compact)
460 compact(gct->scavenged_static_objects);
461 else
462 sweep(oldest_gen);
463 }
464
465 copied = 0;
466 par_max_copied = 0;
467 par_tot_copied = 0;
468 {
469 nat i;
470 for (i=0; i < n_gc_threads; i++) {
471 if (n_gc_threads > 1) {
472 debugTrace(DEBUG_gc,"thread %d:", i);
473 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
474 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
475 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
476 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
477 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
478 }
479 copied += gc_threads[i]->copied;
480 par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
481 }
482 par_tot_copied = copied;
483 if (n_gc_threads == 1) {
484 par_max_copied = 0;
485 par_tot_copied = 0;
486 }
487 }
488
489 // Run through all the generations/steps and tidy up.
490 // We're going to:
491 // - count the amount of "live" data (live_words, live_blocks)
492 // - count the amount of "copied" data in this GC (copied)
493 // - free from-space
494 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
495 //
496 live_words = 0;
497 live_blocks = 0;
498
499 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
500
501 if (g == N) {
502 generations[g].collections++; // for stats
503 if (n_gc_threads > 1) generations[g].par_collections++;
504 }
505
506 // Count the mutable list as bytes "copied" for the purposes of
507 // stats. Every mutable list is copied during every GC.
508 if (g > 0) {
509 W_ mut_list_size = 0;
510 for (n = 0; n < n_capabilities; n++) {
511 mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
512 }
513 copied += mut_list_size;
514
515 debugTrace(DEBUG_gc,
516 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d ATOMIC_INVARIANTs, %d INVARIANT_CHECK_QUEUEs, %d others)",
517 (unsigned long)(mut_list_size * sizeof(W_)),
518 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS,
519 mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE,
520 mutlist_TREC_CHUNK, mutlist_TREC_HEADER,
521 mutlist_ATOMIC_INVARIANT,
522 mutlist_INVARIANT_CHECK_QUEUE,
523 mutlist_OTHERS);
524 }
525
526 bdescr *next, *prev;
527 gen = &generations[g];
528
529 // for generations we collected...
530 if (g <= N) {
531
532 /* free old memory and shift to-space into from-space for all
533 * the collected steps (except the allocation area). These
534 * freed blocks will probaby be quickly recycled.
535 */
536 if (gen->mark)
537 {
538 // tack the new blocks on the end of the existing blocks
539 if (gen->old_blocks != NULL) {
540
541 prev = NULL;
542 for (bd = gen->old_blocks; bd != NULL; bd = next) {
543
544 next = bd->link;
545
546 if (!(bd->flags & BF_MARKED))
547 {
548 if (prev == NULL) {
549 gen->old_blocks = next;
550 } else {
551 prev->link = next;
552 }
553 freeGroup(bd);
554 gen->n_old_blocks--;
555 }
556 else
557 {
558 gen->n_words += bd->free - bd->start;
559
560 // NB. this step might not be compacted next
561 // time, so reset the BF_MARKED flags.
562 // They are set before GC if we're going to
563 // compact. (search for BF_MARKED above).
564 bd->flags &= ~BF_MARKED;
565
566 // between GCs, all blocks in the heap except
567 // for the nursery have the BF_EVACUATED flag set.
568 bd->flags |= BF_EVACUATED;
569
570 prev = bd;
571 }
572 }
573
574 if (prev != NULL) {
575 prev->link = gen->blocks;
576 gen->blocks = gen->old_blocks;
577 }
578 }
579 // add the new blocks to the block tally
580 gen->n_blocks += gen->n_old_blocks;
581 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
582 ASSERT(countOccupied(gen->blocks) == gen->n_words);
583 }
584 else // not copacted
585 {
586 freeChain(gen->old_blocks);
587 }
588
589 gen->old_blocks = NULL;
590 gen->n_old_blocks = 0;
591
592 /* LARGE OBJECTS. The current live large objects are chained on
593 * scavenged_large, having been moved during garbage
594 * collection from large_objects. Any objects left on the
595 * large_objects list are therefore dead, so we free them here.
596 */
597 freeChain(gen->large_objects);
598 gen->large_objects = gen->scavenged_large_objects;
599 gen->n_large_blocks = gen->n_scavenged_large_blocks;
600 gen->n_large_words = countOccupied(gen->large_objects);
601 gen->n_new_large_words = 0;
602 }
603 else // for generations > N
604 {
605 /* For older generations, we need to append the
606 * scavenged_large_object list (i.e. large objects that have been
607 * promoted during this GC) to the large_object list for that step.
608 */
609 for (bd = gen->scavenged_large_objects; bd; bd = next) {
610 next = bd->link;
611 dbl_link_onto(bd, &gen->large_objects);
612 gen->n_large_words += bd->free - bd->start;
613 }
614
615 // add the new blocks we promoted during this GC
616 gen->n_large_blocks += gen->n_scavenged_large_blocks;
617 }
618
619 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
620 ASSERT(countOccupied(gen->large_objects) == gen->n_large_words);
621
622 gen->scavenged_large_objects = NULL;
623 gen->n_scavenged_large_blocks = 0;
624
625 // Count "live" data
626 live_words += genLiveWords(gen);
627 live_blocks += genLiveBlocks(gen);
628
629 // add in the partial blocks in the gen_workspaces, but ignore gen 0
630 // if this is a local GC (we can't count another capability's part_list)
631 {
632 nat i;
633 for (i = 0; i < n_capabilities; i++) {
634 live_words += gcThreadLiveWords(i, gen->no);
635 live_blocks += gcThreadLiveBlocks(i, gen->no);
636 }
637 }
638 } // for all generations
639
640 // update the max size of older generations after a major GC
641 resize_generations();
642
643 // Free the mark stack.
644 if (mark_stack_top_bd != NULL) {
645 debugTrace(DEBUG_gc, "mark stack: %d blocks",
646 countBlocks(mark_stack_top_bd));
647 freeChain(mark_stack_top_bd);
648 }
649
650 // Free any bitmaps.
651 for (g = 0; g <= N; g++) {
652 gen = &generations[g];
653 if (gen->bitmap != NULL) {
654 freeGroup(gen->bitmap);
655 gen->bitmap = NULL;
656 }
657 }
658
659 // Reset the nursery: make the blocks empty
660 if (DEBUG_IS_ON || n_gc_threads == 1) {
661 for (n = 0; n < n_capabilities; n++) {
662 allocated += clearNursery(&capabilities[n]);
663 }
664 } else {
665 // When doing parallel GC, clearNursery() is called by the
666 // worker threads, and the value returned is stored in
667 // gct->allocated.
668 for (n = 0; n < n_capabilities; n++) {
669 if (gc_threads[n]->idle) {
670 allocated += clearNursery(&capabilities[n]);
671 } else {
672 allocated += gc_threads[n]->allocated;
673 }
674 }
675 }
676
677 resize_nursery();
678
679 resetNurseries();
680
681 // mark the garbage collected CAFs as dead
682 #if 0 && defined(DEBUG) // doesn't work at the moment
683 if (major_gc) { gcCAFs(); }
684 #endif
685
686 #ifdef PROFILING
687 // resetStaticObjectForRetainerProfiling() must be called before
688 // zeroing below.
689
690 // ToDo: fix the gct->scavenged_static_objects below
691 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
692 #endif
693
694 // zero the scavenged static object list
695 if (major_gc) {
696 nat i;
697 if (n_gc_threads == 1) {
698 zero_static_object_list(gct->scavenged_static_objects);
699 } else {
700 for (i = 0; i < n_gc_threads; i++) {
701 if (!gc_threads[i]->idle) {
702 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
703 }
704 }
705 }
706 }
707
708 // Update the stable pointer hash table.
709 updateStablePtrTable(major_gc);
710
711 // unlock the StablePtr table. Must be before scheduleFinalizers(),
712 // because a finalizer may call hs_free_fun_ptr() or
713 // hs_free_stable_ptr(), both of which access the StablePtr table.
714 stablePtrPostGC();
715
716 // Start any pending finalizers. Must be after
717 // updateStablePtrTable() and stablePtrPostGC() (see #4221).
718 RELEASE_SM_LOCK;
719 scheduleFinalizers(cap, old_weak_ptr_list);
720 ACQUIRE_SM_LOCK;
721
722 // check sanity after GC
723 // before resurrectThreads(), because that might overwrite some
724 // closures, which will cause problems with THREADED where we don't
725 // fill slop.
726 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
727
728 // If a heap census is due, we need to do it before
729 // resurrectThreads(), for the same reason as checkSanity above:
730 // resurrectThreads() will overwrite some closures and leave slop
731 // behind.
732 if (do_heap_census) {
733 debugTrace(DEBUG_sched, "performing heap census");
734 RELEASE_SM_LOCK;
735 heapCensus(gct->gc_start_cpu);
736 ACQUIRE_SM_LOCK;
737 }
738
739 // send exceptions to any threads which were about to die
740 RELEASE_SM_LOCK;
741 resurrectThreads(resurrected_threads);
742 ACQUIRE_SM_LOCK;
743
744 if (major_gc) {
745 W_ need, got;
746 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
747 got = mblocks_allocated;
748 /* If the amount of data remains constant, next major GC we'll
749 require (F+1)*need. We leave (F+2)*need in order to reduce
750 repeated deallocation and reallocation. */
751 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
752 if (got > need) {
753 returnMemoryToOS(got - need);
754 }
755 }
756
757 // extra GC trace info
758 IF_DEBUG(gc, statDescribeGens());
759
760 #ifdef DEBUG
761 // symbol-table based profiling
762 /* heapCensus(to_blocks); */ /* ToDo */
763 #endif
764
765 // restore enclosing cost centre
766 #ifdef PROFILING
767 for (n = 0; n < n_capabilities; n++) {
768 capabilities[n].r.rCCCS = save_CCS[n];
769 }
770 #endif
771
772 #ifdef DEBUG
773 // check for memory leaks if DEBUG is on
774 memInventory(DEBUG_gc);
775 #endif
776
777 #ifdef RTS_GTK_FRONTPANEL
778 if (RtsFlags.GcFlags.frontpanel) {
779 updateFrontPanelAfterGC( N, live );
780 }
781 #endif
782
783 // ok, GC over: tell the stats department what happened.
784 stat_endGC(cap, gct, allocated, live_words, copied,
785 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
786 N, n_gc_threads, par_max_copied, par_tot_copied);
787
788 #if defined(RTS_USER_SIGNALS)
789 if (RtsFlags.MiscFlags.install_signal_handlers) {
790 // unblock signals again
791 unblockUserSignals();
792 }
793 #endif
794
795 RELEASE_SM_LOCK;
796
797 SET_GCT(saved_gct);
798 }
799
800 /* -----------------------------------------------------------------------------
801 Initialise the gc_thread structures.
802 -------------------------------------------------------------------------- */
803
804 #define GC_THREAD_INACTIVE 0
805 #define GC_THREAD_STANDING_BY 1
806 #define GC_THREAD_RUNNING 2
807 #define GC_THREAD_WAITING_TO_CONTINUE 3
808
809 static void
810 new_gc_thread (nat n, gc_thread *t)
811 {
812 nat g;
813 gen_workspace *ws;
814
815 t->cap = &capabilities[n];
816
817 #ifdef THREADED_RTS
818 t->id = 0;
819 initSpinLock(&t->gc_spin);
820 initSpinLock(&t->mut_spin);
821 ACQUIRE_SPIN_LOCK(&t->gc_spin);
822 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
823 // thread to start up, see wakeup_gc_threads
824 #endif
825
826 t->thread_index = n;
827 t->idle = rtsFalse;
828 t->free_blocks = NULL;
829 t->gc_count = 0;
830
831 init_gc_thread(t);
832
833 #ifdef USE_PAPI
834 t->papi_events = -1;
835 #endif
836
837 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
838 {
839 ws = &t->gens[g];
840 ws->gen = &generations[g];
841 ASSERT(g == ws->gen->no);
842 ws->my_gct = t;
843
844 // We want to call
845 // alloc_todo_block(ws,0);
846 // but can't, because it uses gct which isn't set up at this point.
847 // Hence, allocate a block for todo_bd manually:
848 {
849 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
850 initBdescr(bd, ws->gen, ws->gen->to);
851 bd->flags = BF_EVACUATED;
852 bd->u.scan = bd->free = bd->start;
853
854 ws->todo_bd = bd;
855 ws->todo_free = bd->free;
856 ws->todo_lim = bd->start + BLOCK_SIZE_W;
857 }
858
859 ws->todo_q = newWSDeque(128);
860 ws->todo_overflow = NULL;
861 ws->n_todo_overflow = 0;
862 ws->todo_large_objects = NULL;
863
864 ws->part_list = NULL;
865 ws->n_part_blocks = 0;
866
867 ws->scavd_list = NULL;
868 ws->n_scavd_blocks = 0;
869 }
870 }
871
872
873 void
874 initGcThreads (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
875 {
876 #if defined(THREADED_RTS)
877 nat i;
878
879 if (from > 0) {
880 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
881 "initGcThreads");
882 } else {
883 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
884 "initGcThreads");
885 }
886
887 // We have to update the gct->cap pointers to point to the new
888 // Capability array now.
889 for (i = 0; i < from; i++) {
890 gc_threads[i]->cap = &capabilities[gc_threads[i]->cap->no];
891 }
892
893 for (i = from; i < to; i++) {
894 gc_threads[i] =
895 stgMallocBytes(sizeof(gc_thread) +
896 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
897 "alloc_gc_threads");
898
899 new_gc_thread(i, gc_threads[i]);
900 }
901 #else
902 ASSERT(from == 0 && to == 1);
903 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
904 gc_threads[0] = gct;
905 new_gc_thread(0,gc_threads[0]);
906 #endif
907 }
908
909 void
910 freeGcThreads (void)
911 {
912 nat g;
913 if (gc_threads != NULL) {
914 #if defined(THREADED_RTS)
915 nat i;
916 for (i = 0; i < n_capabilities; i++) {
917 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
918 {
919 freeWSDeque(gc_threads[i]->gens[g].todo_q);
920 }
921 stgFree (gc_threads[i]);
922 }
923 stgFree (gc_threads);
924 #else
925 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
926 {
927 freeWSDeque(gc_threads[0]->gens[g].todo_q);
928 }
929 stgFree (gc_threads);
930 #endif
931 gc_threads = NULL;
932 }
933 }
934
935 /* ----------------------------------------------------------------------------
936 Start GC threads
937 ------------------------------------------------------------------------- */
938
939 static volatile StgWord gc_running_threads;
940
941 static StgWord
942 inc_running (void)
943 {
944 StgWord new;
945 new = atomic_inc(&gc_running_threads);
946 ASSERT(new <= n_gc_threads);
947 return new;
948 }
949
950 static StgWord
951 dec_running (void)
952 {
953 ASSERT(gc_running_threads != 0);
954 return atomic_dec(&gc_running_threads);
955 }
956
957 static rtsBool
958 any_work (void)
959 {
960 int g;
961 gen_workspace *ws;
962
963 gct->any_work++;
964
965 write_barrier();
966
967 // scavenge objects in compacted generation
968 if (mark_stack_bd != NULL && !mark_stack_empty()) {
969 return rtsTrue;
970 }
971
972 // Check for global work in any step. We don't need to check for
973 // local work, because we have already exited scavenge_loop(),
974 // which means there is no local work for this thread.
975 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
976 ws = &gct->gens[g];
977 if (ws->todo_large_objects) return rtsTrue;
978 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
979 if (ws->todo_overflow) return rtsTrue;
980 }
981
982 #if defined(THREADED_RTS)
983 if (work_stealing) {
984 nat n;
985 // look for work to steal
986 for (n = 0; n < n_gc_threads; n++) {
987 if (n == gct->thread_index) continue;
988 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
989 ws = &gc_threads[n]->gens[g];
990 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
991 }
992 }
993 }
994 #endif
995
996 gct->no_work++;
997 #if defined(THREADED_RTS)
998 yieldThread();
999 #endif
1000
1001 return rtsFalse;
1002 }
1003
1004 static void
1005 scavenge_until_all_done (void)
1006 {
1007 DEBUG_ONLY( nat r );
1008
1009
1010 loop:
1011 #if defined(THREADED_RTS)
1012 if (n_gc_threads > 1) {
1013 scavenge_loop();
1014 } else {
1015 scavenge_loop1();
1016 }
1017 #else
1018 scavenge_loop();
1019 #endif
1020
1021 collect_gct_blocks();
1022
1023 // scavenge_loop() only exits when there's no work to do
1024
1025 #ifdef DEBUG
1026 r = dec_running();
1027 #else
1028 dec_running();
1029 #endif
1030
1031 traceEventGcIdle(gct->cap);
1032
1033 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1034
1035 while (gc_running_threads != 0) {
1036 // usleep(1);
1037 if (any_work()) {
1038 inc_running();
1039 traceEventGcWork(gct->cap);
1040 goto loop;
1041 }
1042 // any_work() does not remove the work from the queue, it
1043 // just checks for the presence of work. If we find any,
1044 // then we increment gc_running_threads and go back to
1045 // scavenge_loop() to perform any pending work.
1046 }
1047
1048 traceEventGcDone(gct->cap);
1049 }
1050
1051 #if defined(THREADED_RTS)
1052
1053 void
1054 gcWorkerThread (Capability *cap)
1055 {
1056 gc_thread *saved_gct;
1057
1058 // necessary if we stole a callee-saves register for gct:
1059 saved_gct = gct;
1060
1061 SET_GCT(gc_threads[cap->no]);
1062 gct->id = osThreadId();
1063
1064 stat_gcWorkerThreadStart(gct);
1065
1066 // Wait until we're told to wake up
1067 RELEASE_SPIN_LOCK(&gct->mut_spin);
1068 // yieldThread();
1069 // Strangely, adding a yieldThread() here makes the CPU time
1070 // measurements more accurate on Linux, perhaps because it syncs
1071 // the CPU time across the multiple cores. Without this, CPU time
1072 // is heavily skewed towards GC rather than MUT.
1073 gct->wakeup = GC_THREAD_STANDING_BY;
1074 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1075 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1076
1077 #ifdef USE_PAPI
1078 // start performance counters in this thread...
1079 if (gct->papi_events == -1) {
1080 papi_init_eventset(&gct->papi_events);
1081 }
1082 papi_thread_start_gc1_count(gct->papi_events);
1083 #endif
1084
1085 init_gc_thread(gct);
1086
1087 traceEventGcWork(gct->cap);
1088
1089 // Every thread evacuates some roots.
1090 gct->evac_gen_no = 0;
1091 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1092 scavenge_capability_mut_lists(cap);
1093
1094 scavenge_until_all_done();
1095
1096 if (!DEBUG_IS_ON) {
1097 gct->allocated = clearNursery(cap);
1098 }
1099
1100 #ifdef THREADED_RTS
1101 // Now that the whole heap is marked, we discard any sparks that
1102 // were found to be unreachable. The main GC thread is currently
1103 // marking heap reachable via weak pointers, so it is
1104 // non-deterministic whether a spark will be retained if it is
1105 // only reachable via weak pointers. To fix this problem would
1106 // require another GC barrier, which is too high a price.
1107 pruneSparkQueue(cap);
1108 #endif
1109
1110 #ifdef USE_PAPI
1111 // count events in this thread towards the GC totals
1112 papi_thread_stop_gc1_count(gct->papi_events);
1113 #endif
1114
1115 // Wait until we're told to continue
1116 RELEASE_SPIN_LOCK(&gct->gc_spin);
1117 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1118 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1119 gct->thread_index);
1120 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1121 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1122
1123 // record the time spent doing GC in the Task structure
1124 stat_gcWorkerThreadDone(gct);
1125
1126 SET_GCT(saved_gct);
1127 }
1128
1129 #endif
1130
1131 #if defined(THREADED_RTS)
1132
1133 void
1134 waitForGcThreads (Capability *cap USED_IF_THREADS)
1135 {
1136 const nat n_threads = n_capabilities;
1137 const nat me = cap->no;
1138 nat i, j;
1139 rtsBool retry = rtsTrue;
1140
1141 while(retry) {
1142 for (i=0; i < n_threads; i++) {
1143 if (i == me || gc_threads[i]->idle) continue;
1144 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1145 prodCapability(&capabilities[i], cap->running_task);
1146 }
1147 }
1148 for (j=0; j < 10; j++) {
1149 retry = rtsFalse;
1150 for (i=0; i < n_threads; i++) {
1151 if (i == me || gc_threads[i]->idle) continue;
1152 write_barrier();
1153 interruptCapability(&capabilities[i]);
1154 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1155 retry = rtsTrue;
1156 }
1157 }
1158 if (!retry) break;
1159 yieldThread();
1160 }
1161 }
1162 }
1163
1164 #endif // THREADED_RTS
1165
1166 static void
1167 start_gc_threads (void)
1168 {
1169 #if defined(THREADED_RTS)
1170 gc_running_threads = 0;
1171 #endif
1172 }
1173
1174 static void
1175 wakeup_gc_threads (nat me USED_IF_THREADS)
1176 {
1177 #if defined(THREADED_RTS)
1178 nat i;
1179
1180 if (n_gc_threads == 1) return;
1181
1182 for (i=0; i < n_gc_threads; i++) {
1183 if (i == me || gc_threads[i]->idle) continue;
1184 inc_running();
1185 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1186 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1187
1188 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1189 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1190 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1191 }
1192 #endif
1193 }
1194
1195 // After GC is complete, we must wait for all GC threads to enter the
1196 // standby state, otherwise they may still be executing inside
1197 // any_work(), and may even remain awake until the next GC starts.
1198 static void
1199 shutdown_gc_threads (nat me USED_IF_THREADS)
1200 {
1201 #if defined(THREADED_RTS)
1202 nat i;
1203
1204 if (n_gc_threads == 1) return;
1205
1206 for (i=0; i < n_gc_threads; i++) {
1207 if (i == me || gc_threads[i]->idle) continue;
1208 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1209 }
1210 #endif
1211 }
1212
1213 #if defined(THREADED_RTS)
1214 void
1215 releaseGCThreads (Capability *cap USED_IF_THREADS)
1216 {
1217 const nat n_threads = n_capabilities;
1218 const nat me = cap->no;
1219 nat i;
1220 for (i=0; i < n_threads; i++) {
1221 if (i == me || gc_threads[i]->idle) continue;
1222 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1223 barf("releaseGCThreads");
1224
1225 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1226 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1227 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1228 }
1229 }
1230 #endif
1231
1232 /* ----------------------------------------------------------------------------
1233 Initialise a generation that is to be collected
1234 ------------------------------------------------------------------------- */
1235
1236 static void
1237 prepare_collected_gen (generation *gen)
1238 {
1239 nat i, g, n;
1240 gen_workspace *ws;
1241 bdescr *bd, *next;
1242
1243 // Throw away the current mutable list. Invariant: the mutable
1244 // list always has at least one block; this means we can avoid a
1245 // check for NULL in recordMutable().
1246 g = gen->no;
1247 if (g != 0) {
1248 for (i = 0; i < n_capabilities; i++) {
1249 freeChain(capabilities[i].mut_lists[g]);
1250 capabilities[i].mut_lists[g] = allocBlock();
1251 }
1252 }
1253
1254 gen = &generations[g];
1255 ASSERT(gen->no == g);
1256
1257 // we'll construct a new list of threads in this step
1258 // during GC, throw away the current list.
1259 gen->old_threads = gen->threads;
1260 gen->threads = END_TSO_QUEUE;
1261
1262 // deprecate the existing blocks
1263 gen->old_blocks = gen->blocks;
1264 gen->n_old_blocks = gen->n_blocks;
1265 gen->blocks = NULL;
1266 gen->n_blocks = 0;
1267 gen->n_words = 0;
1268 gen->live_estimate = 0;
1269
1270 // initialise the large object queues.
1271 ASSERT(gen->scavenged_large_objects == NULL);
1272 ASSERT(gen->n_scavenged_large_blocks == 0);
1273
1274 // grab all the partial blocks stashed in the gc_thread workspaces and
1275 // move them to the old_blocks list of this gen.
1276 for (n = 0; n < n_capabilities; n++) {
1277 ws = &gc_threads[n]->gens[gen->no];
1278
1279 for (bd = ws->part_list; bd != NULL; bd = next) {
1280 next = bd->link;
1281 bd->link = gen->old_blocks;
1282 gen->old_blocks = bd;
1283 gen->n_old_blocks += bd->blocks;
1284 }
1285 ws->part_list = NULL;
1286 ws->n_part_blocks = 0;
1287
1288 ASSERT(ws->scavd_list == NULL);
1289 ASSERT(ws->n_scavd_blocks == 0);
1290
1291 if (ws->todo_free != ws->todo_bd->start) {
1292 ws->todo_bd->free = ws->todo_free;
1293 ws->todo_bd->link = gen->old_blocks;
1294 gen->old_blocks = ws->todo_bd;
1295 gen->n_old_blocks += ws->todo_bd->blocks;
1296 alloc_todo_block(ws,0); // always has one block.
1297 }
1298 }
1299
1300 // mark the small objects as from-space
1301 for (bd = gen->old_blocks; bd; bd = bd->link) {
1302 bd->flags &= ~BF_EVACUATED;
1303 }
1304
1305 // mark the large objects as from-space
1306 for (bd = gen->large_objects; bd; bd = bd->link) {
1307 bd->flags &= ~BF_EVACUATED;
1308 }
1309
1310 // for a compacted generation, we need to allocate the bitmap
1311 if (gen->mark) {
1312 StgWord bitmap_size; // in bytes
1313 bdescr *bitmap_bdescr;
1314 StgWord *bitmap;
1315
1316 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1317
1318 if (bitmap_size > 0) {
1319 bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
1320 / BLOCK_SIZE);
1321 gen->bitmap = bitmap_bdescr;
1322 bitmap = bitmap_bdescr->start;
1323
1324 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1325 bitmap_size, bitmap);
1326
1327 // don't forget to fill it with zeros!
1328 memset(bitmap, 0, bitmap_size);
1329
1330 // For each block in this step, point to its bitmap from the
1331 // block descriptor.
1332 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1333 bd->u.bitmap = bitmap;
1334 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1335
1336 // Also at this point we set the BF_MARKED flag
1337 // for this block. The invariant is that
1338 // BF_MARKED is always unset, except during GC
1339 // when it is set on those blocks which will be
1340 // compacted.
1341 if (!(bd->flags & BF_FRAGMENTED)) {
1342 bd->flags |= BF_MARKED;
1343 }
1344
1345 // BF_SWEPT should be marked only for blocks that are being
1346 // collected in sweep()
1347 bd->flags &= ~BF_SWEPT;
1348 }
1349 }
1350 }
1351 }
1352
1353
1354 /* ----------------------------------------------------------------------------
1355 Save the mutable lists in saved_mut_lists
1356 ------------------------------------------------------------------------- */
1357
1358 static void
1359 stash_mut_list (Capability *cap, nat gen_no)
1360 {
1361 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1362 cap->mut_lists[gen_no] = allocBlock_sync();
1363 }
1364
1365 /* ----------------------------------------------------------------------------
1366 Initialise a generation that is *not* to be collected
1367 ------------------------------------------------------------------------- */
1368
1369 static void
1370 prepare_uncollected_gen (generation *gen)
1371 {
1372 nat i;
1373
1374
1375 ASSERT(gen->no > 0);
1376
1377 // save the current mutable lists for this generation, and
1378 // allocate a fresh block for each one. We'll traverse these
1379 // mutable lists as roots early on in the GC.
1380 for (i = 0; i < n_capabilities; i++) {
1381 stash_mut_list(&capabilities[i], gen->no);
1382 }
1383
1384 ASSERT(gen->scavenged_large_objects == NULL);
1385 ASSERT(gen->n_scavenged_large_blocks == 0);
1386 }
1387
1388 /* -----------------------------------------------------------------------------
1389 Collect the completed blocks from a GC thread and attach them to
1390 the generation.
1391 -------------------------------------------------------------------------- */
1392
1393 static void
1394 collect_gct_blocks (void)
1395 {
1396 nat g;
1397 gen_workspace *ws;
1398 bdescr *bd, *prev;
1399
1400 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1401 ws = &gct->gens[g];
1402
1403 // there may still be a block attached to ws->todo_bd;
1404 // leave it there to use next time.
1405
1406 if (ws->scavd_list != NULL) {
1407 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1408
1409 ASSERT(gct->scan_bd == NULL);
1410 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1411
1412 prev = NULL;
1413 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1414 ws->gen->n_words += bd->free - bd->start;
1415 prev = bd;
1416 }
1417 if (prev != NULL) {
1418 prev->link = ws->gen->blocks;
1419 ws->gen->blocks = ws->scavd_list;
1420 }
1421 ws->gen->n_blocks += ws->n_scavd_blocks;
1422
1423 ws->scavd_list = NULL;
1424 ws->n_scavd_blocks = 0;
1425
1426 RELEASE_SPIN_LOCK(&ws->gen->sync);
1427 }
1428 }
1429 }
1430
1431 /* -----------------------------------------------------------------------------
1432 During mutation, any blocks that are filled by allocatePinned() are
1433 stashed on the local pinned_object_blocks list, to avoid needing to
1434 take a global lock. Here we collect those blocks from the
1435 cap->pinned_object_blocks lists and put them on the
1436 main g0->large_object list.
1437
1438 Returns: the number of words allocated this way, for stats
1439 purposes.
1440 -------------------------------------------------------------------------- */
1441
1442 static StgWord
1443 collect_pinned_object_blocks (void)
1444 {
1445 nat n;
1446 bdescr *bd, *prev;
1447 StgWord allocated = 0;
1448
1449 for (n = 0; n < n_capabilities; n++) {
1450 prev = NULL;
1451 for (bd = capabilities[n].pinned_object_blocks; bd != NULL; bd = bd->link) {
1452 allocated += bd->free - bd->start;
1453 prev = bd;
1454 }
1455 if (prev != NULL) {
1456 prev->link = g0->large_objects;
1457 if (g0->large_objects != NULL) {
1458 g0->large_objects->u.back = prev;
1459 }
1460 g0->large_objects = capabilities[n].pinned_object_blocks;
1461 capabilities[n].pinned_object_blocks = 0;
1462 }
1463 }
1464
1465 return allocated;
1466 }
1467
1468 /* -----------------------------------------------------------------------------
1469 Initialise a gc_thread before GC
1470 -------------------------------------------------------------------------- */
1471
1472 static void
1473 init_gc_thread (gc_thread *t)
1474 {
1475 t->static_objects = END_OF_STATIC_LIST;
1476 t->scavenged_static_objects = END_OF_STATIC_LIST;
1477 t->scan_bd = NULL;
1478 t->mut_lists = t->cap->mut_lists;
1479 t->evac_gen_no = 0;
1480 t->failed_to_evac = rtsFalse;
1481 t->eager_promotion = rtsTrue;
1482 t->thunk_selector_depth = 0;
1483 t->allocated = 0;
1484 t->copied = 0;
1485 t->scanned = 0;
1486 t->any_work = 0;
1487 t->no_work = 0;
1488 t->scav_find_work = 0;
1489 }
1490
1491 /* -----------------------------------------------------------------------------
1492 Function we pass to evacuate roots.
1493 -------------------------------------------------------------------------- */
1494
1495 static void
1496 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1497 {
1498 // we stole a register for gct, but this function is called from
1499 // *outside* the GC where the register variable is not in effect,
1500 // so we need to save and restore it here. NB. only call
1501 // mark_root() from the main GC thread, otherwise gct will be
1502 // incorrect.
1503 #if defined(THREADED_RTS)
1504 gc_thread *saved_gct;
1505 saved_gct = gct;
1506 #endif
1507 SET_GCT(user);
1508
1509 evacuate(root);
1510
1511 SET_GCT(saved_gct);
1512 }
1513
1514 /* -----------------------------------------------------------------------------
1515 Initialising the static object & mutable lists
1516 -------------------------------------------------------------------------- */
1517
1518 static void
1519 zero_static_object_list(StgClosure* first_static)
1520 {
1521 StgClosure* p;
1522 StgClosure* link;
1523 const StgInfoTable *info;
1524
1525 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1526 info = get_itbl(p);
1527 link = *STATIC_LINK(info, p);
1528 *STATIC_LINK(info,p) = NULL;
1529 }
1530 }
1531
1532 /* ----------------------------------------------------------------------------
1533 Reset the sizes of the older generations when we do a major
1534 collection.
1535
1536 CURRENT STRATEGY: make all generations except zero the same size.
1537 We have to stay within the maximum heap size, and leave a certain
1538 percentage of the maximum heap size available to allocate into.
1539 ------------------------------------------------------------------------- */
1540
1541 static void
1542 resize_generations (void)
1543 {
1544 nat g;
1545
1546 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1547 W_ live, size, min_alloc, words;
1548 const W_ max = RtsFlags.GcFlags.maxHeapSize;
1549 const W_ gens = RtsFlags.GcFlags.generations;
1550
1551 // live in the oldest generations
1552 if (oldest_gen->live_estimate != 0) {
1553 words = oldest_gen->live_estimate;
1554 } else {
1555 words = oldest_gen->n_words;
1556 }
1557 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1558 oldest_gen->n_large_blocks;
1559
1560 // default max size for all generations except zero
1561 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1562 RtsFlags.GcFlags.minOldGenSize);
1563
1564 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1565 if (max > 0) {
1566 RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
1567 } else {
1568 RtsFlags.GcFlags.heapSizeSuggestion = size;
1569 }
1570 }
1571
1572 // minimum size for generation zero
1573 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1574 RtsFlags.GcFlags.minAllocAreaSize);
1575
1576 // Auto-enable compaction when the residency reaches a
1577 // certain percentage of the maximum heap size (default: 30%).
1578 if (RtsFlags.GcFlags.compact ||
1579 (max > 0 &&
1580 oldest_gen->n_blocks >
1581 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1582 oldest_gen->mark = 1;
1583 oldest_gen->compact = 1;
1584 // debugBelch("compaction: on\n", live);
1585 } else {
1586 oldest_gen->mark = 0;
1587 oldest_gen->compact = 0;
1588 // debugBelch("compaction: off\n", live);
1589 }
1590
1591 if (RtsFlags.GcFlags.sweep) {
1592 oldest_gen->mark = 1;
1593 }
1594
1595 // if we're going to go over the maximum heap size, reduce the
1596 // size of the generations accordingly. The calculation is
1597 // different if compaction is turned on, because we don't need
1598 // to double the space required to collect the old generation.
1599 if (max != 0) {
1600
1601 // this test is necessary to ensure that the calculations
1602 // below don't have any negative results - we're working
1603 // with unsigned values here.
1604 if (max < min_alloc) {
1605 heapOverflow();
1606 }
1607
1608 if (oldest_gen->compact) {
1609 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1610 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1611 }
1612 } else {
1613 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1614 size = (max - min_alloc) / ((gens - 1) * 2);
1615 }
1616 }
1617
1618 if (size < live) {
1619 heapOverflow();
1620 }
1621 }
1622
1623 #if 0
1624 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1625 min_alloc, size, max);
1626 #endif
1627
1628 for (g = 0; g < gens; g++) {
1629 generations[g].max_blocks = size;
1630 }
1631 }
1632 }
1633
1634 /* -----------------------------------------------------------------------------
1635 Calculate the new size of the nursery, and resize it.
1636 -------------------------------------------------------------------------- */
1637
1638 static void
1639 resize_nursery (void)
1640 {
1641 const StgWord min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1642
1643 if (RtsFlags.GcFlags.generations == 1)
1644 { // Two-space collector:
1645 W_ blocks;
1646
1647 /* set up a new nursery. Allocate a nursery size based on a
1648 * function of the amount of live data (by default a factor of 2)
1649 * Use the blocks from the old nursery if possible, freeing up any
1650 * left over blocks.
1651 *
1652 * If we get near the maximum heap size, then adjust our nursery
1653 * size accordingly. If the nursery is the same size as the live
1654 * data (L), then we need 3L bytes. We can reduce the size of the
1655 * nursery to bring the required memory down near 2L bytes.
1656 *
1657 * A normal 2-space collector would need 4L bytes to give the same
1658 * performance we get from 3L bytes, reducing to the same
1659 * performance at 2L bytes.
1660 */
1661 blocks = generations[0].n_blocks;
1662
1663 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1664 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1665 RtsFlags.GcFlags.maxHeapSize )
1666 {
1667 long adjusted_blocks; // signed on purpose
1668 int pc_free;
1669
1670 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1671
1672 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1673 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1674
1675 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1676 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1677 {
1678 heapOverflow();
1679 }
1680 blocks = adjusted_blocks;
1681 }
1682 else
1683 {
1684 blocks *= RtsFlags.GcFlags.oldGenFactor;
1685 if (blocks < min_nursery)
1686 {
1687 blocks = min_nursery;
1688 }
1689 }
1690 resizeNurseries(blocks);
1691 }
1692 else // Generational collector
1693 {
1694 /*
1695 * If the user has given us a suggested heap size, adjust our
1696 * allocation area to make best use of the memory available.
1697 */
1698 if (RtsFlags.GcFlags.heapSizeSuggestion)
1699 {
1700 long blocks;
1701 StgWord needed;
1702
1703 calcNeeded(rtsFalse, &needed); // approx blocks needed at next GC
1704
1705 /* Guess how much will be live in generation 0 step 0 next time.
1706 * A good approximation is obtained by finding the
1707 * percentage of g0 that was live at the last minor GC.
1708 *
1709 * We have an accurate figure for the amount of copied data in
1710 * 'copied', but we must convert this to a number of blocks, with
1711 * a small adjustment for estimated slop at the end of a block
1712 * (- 10 words).
1713 */
1714 if (N == 0)
1715 {
1716 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1717 / countNurseryBlocks();
1718 }
1719
1720 /* Estimate a size for the allocation area based on the
1721 * information available. We might end up going slightly under
1722 * or over the suggested heap size, but we should be pretty
1723 * close on average.
1724 *
1725 * Formula: suggested - needed
1726 * ----------------------------
1727 * 1 + g0_pcnt_kept/100
1728 *
1729 * where 'needed' is the amount of memory needed at the next
1730 * collection for collecting all gens except g0.
1731 */
1732 blocks =
1733 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1734 (100 + (long)g0_pcnt_kept);
1735
1736 if (blocks < (long)min_nursery) {
1737 blocks = min_nursery;
1738 }
1739
1740 resizeNurseries((W_)blocks);
1741 }
1742 else
1743 {
1744 // we might have added extra large blocks to the nursery, so
1745 // resize back to minAllocAreaSize again.
1746 resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1747 }
1748 }
1749 }
1750
1751 /* -----------------------------------------------------------------------------
1752 Sanity code for CAF garbage collection.
1753
1754 With DEBUG turned on, we manage a CAF list in addition to the SRT
1755 mechanism. After GC, we run down the CAF list and blackhole any
1756 CAFs which have been garbage collected. This means we get an error
1757 whenever the program tries to enter a garbage collected CAF.
1758
1759 Any garbage collected CAFs are taken off the CAF list at the same
1760 time.
1761 -------------------------------------------------------------------------- */
1762
1763 #if 0 && defined(DEBUG)
1764
1765 static void
1766 gcCAFs(void)
1767 {
1768 StgClosure* p;
1769 StgClosure** pp;
1770 const StgInfoTable *info;
1771 nat i;
1772
1773 i = 0;
1774 p = caf_list;
1775 pp = &caf_list;
1776
1777 while (p != NULL) {
1778
1779 info = get_itbl(p);
1780
1781 ASSERT(info->type == IND_STATIC);
1782
1783 if (STATIC_LINK(info,p) == NULL) {
1784 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1785 // black hole it
1786 SET_INFO(p,&stg_BLACKHOLE_info);
1787 p = STATIC_LINK2(info,p);
1788 *pp = p;
1789 }
1790 else {
1791 pp = &STATIC_LINK2(info,p);
1792 p = *pp;
1793 i++;
1794 }
1795
1796 }
1797
1798 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1799 }
1800 #endif