Merge branch 'master' of http://darcs.haskell.org/ghc
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "Storage.h"
19 #include "RtsUtils.h"
20 #include "Apply.h"
21 #include "Updates.h"
22 #include "Stats.h"
23 #include "Schedule.h"
24 #include "Sanity.h"
25 #include "BlockAlloc.h"
26 #include "ProfHeap.h"
27 #include "Weak.h"
28 #include "Prelude.h"
29 #include "RtsSignals.h"
30 #include "STM.h"
31 #if defined(RTS_GTK_FRONTPANEL)
32 #include "FrontPanel.h"
33 #endif
34 #include "Trace.h"
35 #include "RetainerProfile.h"
36 #include "LdvProfile.h"
37 #include "RaiseAsync.h"
38 #include "Papi.h"
39 #include "Stable.h"
40
41 #include "GC.h"
42 #include "GCThread.h"
43 #include "GCTDecl.h"
44 #include "Compact.h"
45 #include "Evac.h"
46 #include "Scav.h"
47 #include "GCUtils.h"
48 #include "MarkStack.h"
49 #include "MarkWeak.h"
50 #include "Sparks.h"
51 #include "Sweep.h"
52
53 #include <string.h> // for memset()
54 #include <unistd.h>
55
56 /* -----------------------------------------------------------------------------
57 Global variables
58 -------------------------------------------------------------------------- */
59
60 /* STATIC OBJECT LIST.
61 *
62 * During GC:
63 * We maintain a linked list of static objects that are still live.
64 * The requirements for this list are:
65 *
66 * - we need to scan the list while adding to it, in order to
67 * scavenge all the static objects (in the same way that
68 * breadth-first scavenging works for dynamic objects).
69 *
70 * - we need to be able to tell whether an object is already on
71 * the list, to break loops.
72 *
73 * Each static object has a "static link field", which we use for
74 * linking objects on to the list. We use a stack-type list, consing
75 * objects on the front as they are added (this means that the
76 * scavenge phase is depth-first, not breadth-first, but that
77 * shouldn't matter).
78 *
79 * A separate list is kept for objects that have been scavenged
80 * already - this is so that we can zero all the marks afterwards.
81 *
82 * An object is on the list if its static link field is non-zero; this
83 * means that we have to mark the end of the list with '1', not NULL.
84 *
85 * Extra notes for generational GC:
86 *
87 * Each generation has a static object list associated with it. When
88 * collecting generations up to N, we treat the static object lists
89 * from generations > N as roots.
90 *
91 * We build up a static object list while collecting generations 0..N,
92 * which is then appended to the static object list of generation N+1.
93 */
94
95 /* N is the oldest generation being collected, where the generations
96 * are numbered starting at 0. A major GC (indicated by the major_gc
97 * flag) is when we're collecting all generations. We only attempt to
98 * deal with static objects and GC CAFs when doing a major GC.
99 */
100 nat N;
101 rtsBool major_gc;
102
103 /* Data used for allocation area sizing.
104 */
105 static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
106
107 /* Mut-list stats */
108 #ifdef DEBUG
109 nat mutlist_MUTVARS,
110 mutlist_MUTARRS,
111 mutlist_MVARS,
112 mutlist_TVAR,
113 mutlist_TVAR_WATCH_QUEUE,
114 mutlist_TREC_CHUNK,
115 mutlist_TREC_HEADER,
116 mutlist_ATOMIC_INVARIANT,
117 mutlist_INVARIANT_CHECK_QUEUE,
118 mutlist_OTHERS;
119 #endif
120
121 /* Thread-local data for each GC thread
122 */
123 gc_thread **gc_threads = NULL;
124
125 #if !defined(THREADED_RTS)
126 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
127 #endif
128
129 // Number of threads running in *this* GC. Affects how many
130 // step->todos[] lists we have to look in to find work.
131 nat n_gc_threads;
132
133 // For stats:
134 long copied; // *words* copied & scavenged during this GC
135
136 rtsBool work_stealing;
137
138 DECLARE_GCT
139
140 /* -----------------------------------------------------------------------------
141 Static function declarations
142 -------------------------------------------------------------------------- */
143
144 static void mark_root (void *user, StgClosure **root);
145 static void zero_static_object_list (StgClosure* first_static);
146 static void prepare_collected_gen (generation *gen);
147 static void prepare_uncollected_gen (generation *gen);
148 static void init_gc_thread (gc_thread *t);
149 static void resize_generations (void);
150 static void resize_nursery (void);
151 static void start_gc_threads (void);
152 static void scavenge_until_all_done (void);
153 static StgWord inc_running (void);
154 static StgWord dec_running (void);
155 static void wakeup_gc_threads (nat me);
156 static void shutdown_gc_threads (nat me);
157 static void collect_gct_blocks (void);
158 static StgWord collect_pinned_object_blocks (void);
159
160 #if 0 && defined(DEBUG)
161 static void gcCAFs (void);
162 #endif
163
164 /* -----------------------------------------------------------------------------
165 The mark stack.
166 -------------------------------------------------------------------------- */
167
168 bdescr *mark_stack_top_bd; // topmost block in the mark stack
169 bdescr *mark_stack_bd; // current block in the mark stack
170 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
171
172 /* -----------------------------------------------------------------------------
173 GarbageCollect: the main entry point to the garbage collector.
174
175 The collect_gen parameter is gotten by calling calcNeeded().
176
177 Locks held: all capabilities are held throughout GarbageCollect().
178 -------------------------------------------------------------------------- */
179
180 void
181 GarbageCollect (nat collect_gen,
182 rtsBool do_heap_census,
183 nat gc_type USED_IF_THREADS,
184 Capability *cap)
185 {
186 bdescr *bd;
187 generation *gen;
188 StgWord live_blocks, live_words, allocated, par_max_copied, par_tot_copied;
189 #if defined(THREADED_RTS)
190 gc_thread *saved_gct;
191 #endif
192 nat g, n;
193
194 // necessary if we stole a callee-saves register for gct:
195 #if defined(THREADED_RTS)
196 saved_gct = gct;
197 #endif
198
199 #ifdef PROFILING
200 CostCentreStack *save_CCS[n_capabilities];
201 #endif
202
203 ACQUIRE_SM_LOCK;
204
205 #if defined(RTS_USER_SIGNALS)
206 if (RtsFlags.MiscFlags.install_signal_handlers) {
207 // block signals
208 blockUserSignals();
209 }
210 #endif
211
212 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
213 // otherwise adjust the padding in gen_workspace.
214
215 // this is the main thread
216 SET_GCT(gc_threads[cap->no]);
217
218 // tell the stats department that we've started a GC
219 stat_startGC(cap, gct);
220
221 // lock the StablePtr table
222 stablePtrPreGC();
223
224 #ifdef DEBUG
225 mutlist_MUTVARS = 0;
226 mutlist_MUTARRS = 0;
227 mutlist_MVARS = 0;
228 mutlist_TVAR = 0;
229 mutlist_TVAR_WATCH_QUEUE = 0;
230 mutlist_TREC_CHUNK = 0;
231 mutlist_TREC_HEADER = 0;
232 mutlist_ATOMIC_INVARIANT = 0;
233 mutlist_INVARIANT_CHECK_QUEUE = 0;
234 mutlist_OTHERS = 0;
235 #endif
236
237 // attribute any costs to CCS_GC
238 #ifdef PROFILING
239 for (n = 0; n < n_capabilities; n++) {
240 save_CCS[n] = capabilities[n].r.rCCCS;
241 capabilities[n].r.rCCCS = CCS_GC;
242 }
243 #endif
244
245 /* Approximate how much we allocated.
246 * Todo: only when generating stats?
247 */
248 allocated = countLargeAllocated(); /* don't count the nursery yet */
249
250 /* Figure out which generation to collect
251 */
252 N = collect_gen;
253 major_gc = (N == RtsFlags.GcFlags.generations-1);
254
255 #if defined(THREADED_RTS)
256 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
257 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
258 // It's not always a good idea to do load balancing in parallel
259 // GC. In particular, for a parallel program we don't want to
260 // lose locality by moving cached data into another CPU's cache
261 // (this effect can be quite significant).
262 //
263 // We could have a more complex way to deterimine whether to do
264 // work stealing or not, e.g. it might be a good idea to do it
265 // if the heap is big. For now, we just turn it on or off with
266 // a flag.
267 #endif
268
269 /* Start threads, so they can be spinning up while we finish initialisation.
270 */
271 start_gc_threads();
272
273 #if defined(THREADED_RTS)
274 /* How many threads will be participating in this GC?
275 * We don't try to parallelise minor GCs (unless the user asks for
276 * it with +RTS -gn0), or mark/compact/sweep GC.
277 */
278 if (gc_type == SYNC_GC_PAR) {
279 n_gc_threads = n_capabilities;
280 } else {
281 n_gc_threads = 1;
282 }
283 #else
284 n_gc_threads = 1;
285 #endif
286
287 debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
288 N, n_gc_threads);
289
290 #ifdef RTS_GTK_FRONTPANEL
291 if (RtsFlags.GcFlags.frontpanel) {
292 updateFrontPanelBeforeGC(N);
293 }
294 #endif
295
296 #ifdef DEBUG
297 // check for memory leaks if DEBUG is on
298 memInventory(DEBUG_gc);
299 #endif
300
301 // check sanity *before* GC
302 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
303
304 // gather blocks allocated using allocatePinned() from each capability
305 // and put them on the g0->large_object list.
306 allocated += collect_pinned_object_blocks();
307
308 // Initialise all the generations/steps that we're collecting.
309 for (g = 0; g <= N; g++) {
310 prepare_collected_gen(&generations[g]);
311 }
312 // Initialise all the generations/steps that we're *not* collecting.
313 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
314 prepare_uncollected_gen(&generations[g]);
315 }
316
317 // Prepare this gc_thread
318 init_gc_thread(gct);
319
320 /* Allocate a mark stack if we're doing a major collection.
321 */
322 if (major_gc && oldest_gen->mark) {
323 mark_stack_bd = allocBlock();
324 mark_stack_top_bd = mark_stack_bd;
325 mark_stack_bd->link = NULL;
326 mark_stack_bd->u.back = NULL;
327 mark_sp = mark_stack_bd->start;
328 } else {
329 mark_stack_bd = NULL;
330 mark_stack_top_bd = NULL;
331 mark_sp = NULL;
332 }
333
334 /* -----------------------------------------------------------------------
335 * follow all the roots that we know about:
336 */
337
338 // the main thread is running: this prevents any other threads from
339 // exiting prematurely, so we can start them now.
340 // NB. do this after the mutable lists have been saved above, otherwise
341 // the other GC threads will be writing into the old mutable lists.
342 inc_running();
343 wakeup_gc_threads(gct->thread_index);
344
345 traceEventGcWork(gct->cap);
346
347 // scavenge the capability-private mutable lists. This isn't part
348 // of markSomeCapabilities() because markSomeCapabilities() can only
349 // call back into the GC via mark_root() (due to the gct register
350 // variable).
351 if (n_gc_threads == 1) {
352 for (n = 0; n < n_capabilities; n++) {
353 #if defined(THREADED_RTS)
354 scavenge_capability_mut_Lists1(&capabilities[n]);
355 #else
356 scavenge_capability_mut_lists(&capabilities[n]);
357 #endif
358 }
359 } else {
360 scavenge_capability_mut_lists(gct->cap);
361 for (n = 0; n < n_capabilities; n++) {
362 if (gc_threads[n]->idle) {
363 markCapability(mark_root, gct, &capabilities[n],
364 rtsTrue/*don't mark sparks*/);
365 scavenge_capability_mut_lists(&capabilities[n]);
366 }
367 }
368 }
369
370 // follow roots from the CAF list (used by GHCi)
371 gct->evac_gen_no = 0;
372 markCAFs(mark_root, gct);
373
374 // follow all the roots that the application knows about.
375 gct->evac_gen_no = 0;
376 if (n_gc_threads == 1) {
377 for (n = 0; n < n_capabilities; n++) {
378 markCapability(mark_root, gct, &capabilities[n],
379 rtsTrue/*don't mark sparks*/);
380 }
381 } else {
382 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
383 }
384
385 markScheduler(mark_root, gct);
386
387 #if defined(RTS_USER_SIGNALS)
388 // mark the signal handlers (signals should be already blocked)
389 markSignalHandlers(mark_root, gct);
390 #endif
391
392 // Mark the weak pointer list, and prepare to detect dead weak pointers.
393 markWeakPtrList();
394 initWeakForGC();
395
396 // Mark the stable pointer table.
397 markStablePtrTable(mark_root, gct);
398
399 /* -------------------------------------------------------------------------
400 * Repeatedly scavenge all the areas we know about until there's no
401 * more scavenging to be done.
402 */
403 for (;;)
404 {
405 scavenge_until_all_done();
406 // The other threads are now stopped. We might recurse back to
407 // here, but from now on this is the only thread.
408
409 // must be last... invariant is that everything is fully
410 // scavenged at this point.
411 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
412 inc_running();
413 continue;
414 }
415
416 // If we get to here, there's really nothing left to do.
417 break;
418 }
419
420 if (!DEBUG_IS_ON && n_gc_threads != 1) {
421 gct->allocated = clearNursery(cap);
422 }
423
424 shutdown_gc_threads(gct->thread_index);
425
426 // Now see which stable names are still alive.
427 gcStablePtrTable();
428
429 #ifdef THREADED_RTS
430 if (n_gc_threads == 1) {
431 for (n = 0; n < n_capabilities; n++) {
432 pruneSparkQueue(&capabilities[n]);
433 }
434 } else {
435 for (n = 0; n < n_capabilities; n++) {
436 if (n == cap->no || gc_threads[n]->idle) {
437 pruneSparkQueue(&capabilities[n]);
438 }
439 }
440 }
441 #endif
442
443 #ifdef PROFILING
444 // We call processHeapClosureForDead() on every closure destroyed during
445 // the current garbage collection, so we invoke LdvCensusForDead().
446 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
447 || RtsFlags.ProfFlags.bioSelector != NULL) {
448 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
449 LdvCensusForDead(N);
450 ACQUIRE_SM_LOCK;
451 }
452 #endif
453
454 // NO MORE EVACUATION AFTER THIS POINT!
455
456 // Finally: compact or sweep the oldest generation.
457 if (major_gc && oldest_gen->mark) {
458 if (oldest_gen->compact)
459 compact(gct->scavenged_static_objects);
460 else
461 sweep(oldest_gen);
462 }
463
464 copied = 0;
465 par_max_copied = 0;
466 par_tot_copied = 0;
467 {
468 nat i;
469 for (i=0; i < n_gc_threads; i++) {
470 if (n_gc_threads > 1) {
471 debugTrace(DEBUG_gc,"thread %d:", i);
472 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
473 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
474 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
475 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
476 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
477 }
478 copied += gc_threads[i]->copied;
479 par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
480 }
481 par_tot_copied = copied;
482 if (n_gc_threads == 1) {
483 par_max_copied = 0;
484 par_tot_copied = 0;
485 }
486 }
487
488 // Run through all the generations/steps and tidy up.
489 // We're going to:
490 // - count the amount of "live" data (live_words, live_blocks)
491 // - count the amount of "copied" data in this GC (copied)
492 // - free from-space
493 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
494 //
495 live_words = 0;
496 live_blocks = 0;
497
498 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
499
500 if (g == N) {
501 generations[g].collections++; // for stats
502 if (n_gc_threads > 1) generations[g].par_collections++;
503 }
504
505 // Count the mutable list as bytes "copied" for the purposes of
506 // stats. Every mutable list is copied during every GC.
507 if (g > 0) {
508 W_ mut_list_size = 0;
509 for (n = 0; n < n_capabilities; n++) {
510 mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
511 }
512 copied += mut_list_size;
513
514 debugTrace(DEBUG_gc,
515 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d ATOMIC_INVARIANTs, %d INVARIANT_CHECK_QUEUEs, %d others)",
516 (unsigned long)(mut_list_size * sizeof(W_)),
517 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS,
518 mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE,
519 mutlist_TREC_CHUNK, mutlist_TREC_HEADER,
520 mutlist_ATOMIC_INVARIANT,
521 mutlist_INVARIANT_CHECK_QUEUE,
522 mutlist_OTHERS);
523 }
524
525 bdescr *next, *prev;
526 gen = &generations[g];
527
528 // for generations we collected...
529 if (g <= N) {
530
531 /* free old memory and shift to-space into from-space for all
532 * the collected steps (except the allocation area). These
533 * freed blocks will probaby be quickly recycled.
534 */
535 if (gen->mark)
536 {
537 // tack the new blocks on the end of the existing blocks
538 if (gen->old_blocks != NULL) {
539
540 prev = NULL;
541 for (bd = gen->old_blocks; bd != NULL; bd = next) {
542
543 next = bd->link;
544
545 if (!(bd->flags & BF_MARKED))
546 {
547 if (prev == NULL) {
548 gen->old_blocks = next;
549 } else {
550 prev->link = next;
551 }
552 freeGroup(bd);
553 gen->n_old_blocks--;
554 }
555 else
556 {
557 gen->n_words += bd->free - bd->start;
558
559 // NB. this step might not be compacted next
560 // time, so reset the BF_MARKED flags.
561 // They are set before GC if we're going to
562 // compact. (search for BF_MARKED above).
563 bd->flags &= ~BF_MARKED;
564
565 // between GCs, all blocks in the heap except
566 // for the nursery have the BF_EVACUATED flag set.
567 bd->flags |= BF_EVACUATED;
568
569 prev = bd;
570 }
571 }
572
573 if (prev != NULL) {
574 prev->link = gen->blocks;
575 gen->blocks = gen->old_blocks;
576 }
577 }
578 // add the new blocks to the block tally
579 gen->n_blocks += gen->n_old_blocks;
580 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
581 ASSERT(countOccupied(gen->blocks) == gen->n_words);
582 }
583 else // not copacted
584 {
585 freeChain(gen->old_blocks);
586 }
587
588 gen->old_blocks = NULL;
589 gen->n_old_blocks = 0;
590
591 /* LARGE OBJECTS. The current live large objects are chained on
592 * scavenged_large, having been moved during garbage
593 * collection from large_objects. Any objects left on the
594 * large_objects list are therefore dead, so we free them here.
595 */
596 freeChain(gen->large_objects);
597 gen->large_objects = gen->scavenged_large_objects;
598 gen->n_large_blocks = gen->n_scavenged_large_blocks;
599 gen->n_large_words = countOccupied(gen->large_objects);
600 gen->n_new_large_words = 0;
601 }
602 else // for generations > N
603 {
604 /* For older generations, we need to append the
605 * scavenged_large_object list (i.e. large objects that have been
606 * promoted during this GC) to the large_object list for that step.
607 */
608 for (bd = gen->scavenged_large_objects; bd; bd = next) {
609 next = bd->link;
610 dbl_link_onto(bd, &gen->large_objects);
611 gen->n_large_words += bd->free - bd->start;
612 }
613
614 // add the new blocks we promoted during this GC
615 gen->n_large_blocks += gen->n_scavenged_large_blocks;
616 }
617
618 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
619 ASSERT(countOccupied(gen->large_objects) == gen->n_large_words);
620
621 gen->scavenged_large_objects = NULL;
622 gen->n_scavenged_large_blocks = 0;
623
624 // Count "live" data
625 live_words += genLiveWords(gen);
626 live_blocks += genLiveBlocks(gen);
627
628 // add in the partial blocks in the gen_workspaces, but ignore gen 0
629 // if this is a local GC (we can't count another capability's part_list)
630 {
631 nat i;
632 for (i = 0; i < n_capabilities; i++) {
633 live_words += gcThreadLiveWords(i, gen->no);
634 live_blocks += gcThreadLiveBlocks(i, gen->no);
635 }
636 }
637 } // for all generations
638
639 // update the max size of older generations after a major GC
640 resize_generations();
641
642 // Free the mark stack.
643 if (mark_stack_top_bd != NULL) {
644 debugTrace(DEBUG_gc, "mark stack: %d blocks",
645 countBlocks(mark_stack_top_bd));
646 freeChain(mark_stack_top_bd);
647 }
648
649 // Free any bitmaps.
650 for (g = 0; g <= N; g++) {
651 gen = &generations[g];
652 if (gen->bitmap != NULL) {
653 freeGroup(gen->bitmap);
654 gen->bitmap = NULL;
655 }
656 }
657
658 // Reset the nursery: make the blocks empty
659 if (DEBUG_IS_ON || n_gc_threads == 1) {
660 for (n = 0; n < n_capabilities; n++) {
661 allocated += clearNursery(&capabilities[n]);
662 }
663 } else {
664 // When doing parallel GC, clearNursery() is called by the
665 // worker threads, and the value returned is stored in
666 // gct->allocated.
667 for (n = 0; n < n_capabilities; n++) {
668 if (gc_threads[n]->idle) {
669 allocated += clearNursery(&capabilities[n]);
670 } else {
671 allocated += gc_threads[n]->allocated;
672 }
673 }
674 }
675
676 resize_nursery();
677
678 resetNurseries();
679
680 // mark the garbage collected CAFs as dead
681 #if 0 && defined(DEBUG) // doesn't work at the moment
682 if (major_gc) { gcCAFs(); }
683 #endif
684
685 #ifdef PROFILING
686 // resetStaticObjectForRetainerProfiling() must be called before
687 // zeroing below.
688
689 // ToDo: fix the gct->scavenged_static_objects below
690 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
691 #endif
692
693 // zero the scavenged static object list
694 if (major_gc) {
695 nat i;
696 if (n_gc_threads == 1) {
697 zero_static_object_list(gct->scavenged_static_objects);
698 } else {
699 for (i = 0; i < n_gc_threads; i++) {
700 if (!gc_threads[i]->idle) {
701 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
702 }
703 }
704 }
705 }
706
707 // Update the stable pointer hash table.
708 updateStablePtrTable(major_gc);
709
710 // unlock the StablePtr table. Must be before scheduleFinalizers(),
711 // because a finalizer may call hs_free_fun_ptr() or
712 // hs_free_stable_ptr(), both of which access the StablePtr table.
713 stablePtrPostGC();
714
715 // Start any pending finalizers. Must be after
716 // updateStablePtrTable() and stablePtrPostGC() (see #4221).
717 RELEASE_SM_LOCK;
718 scheduleFinalizers(cap, old_weak_ptr_list);
719 ACQUIRE_SM_LOCK;
720
721 // check sanity after GC
722 // before resurrectThreads(), because that might overwrite some
723 // closures, which will cause problems with THREADED where we don't
724 // fill slop.
725 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
726
727 // If a heap census is due, we need to do it before
728 // resurrectThreads(), for the same reason as checkSanity above:
729 // resurrectThreads() will overwrite some closures and leave slop
730 // behind.
731 if (do_heap_census) {
732 debugTrace(DEBUG_sched, "performing heap census");
733 RELEASE_SM_LOCK;
734 heapCensus(gct->gc_start_cpu);
735 ACQUIRE_SM_LOCK;
736 }
737
738 // send exceptions to any threads which were about to die
739 RELEASE_SM_LOCK;
740 resurrectThreads(resurrected_threads);
741 ACQUIRE_SM_LOCK;
742
743 if (major_gc) {
744 W_ need, got;
745 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
746 got = mblocks_allocated;
747 /* If the amount of data remains constant, next major GC we'll
748 require (F+1)*need. We leave (F+2)*need in order to reduce
749 repeated deallocation and reallocation. */
750 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
751 if (got > need) {
752 returnMemoryToOS(got - need);
753 }
754 }
755
756 // extra GC trace info
757 IF_DEBUG(gc, statDescribeGens());
758
759 #ifdef DEBUG
760 // symbol-table based profiling
761 /* heapCensus(to_blocks); */ /* ToDo */
762 #endif
763
764 // restore enclosing cost centre
765 #ifdef PROFILING
766 for (n = 0; n < n_capabilities; n++) {
767 capabilities[n].r.rCCCS = save_CCS[n];
768 }
769 #endif
770
771 #ifdef DEBUG
772 // check for memory leaks if DEBUG is on
773 memInventory(DEBUG_gc);
774 #endif
775
776 #ifdef RTS_GTK_FRONTPANEL
777 if (RtsFlags.GcFlags.frontpanel) {
778 updateFrontPanelAfterGC( N, live );
779 }
780 #endif
781
782 // ok, GC over: tell the stats department what happened.
783 stat_endGC(cap, gct, allocated, live_words, copied,
784 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
785 N, n_gc_threads, par_max_copied, par_tot_copied);
786
787 #if defined(RTS_USER_SIGNALS)
788 if (RtsFlags.MiscFlags.install_signal_handlers) {
789 // unblock signals again
790 unblockUserSignals();
791 }
792 #endif
793
794 RELEASE_SM_LOCK;
795
796 SET_GCT(saved_gct);
797 }
798
799 /* -----------------------------------------------------------------------------
800 Initialise the gc_thread structures.
801 -------------------------------------------------------------------------- */
802
803 #define GC_THREAD_INACTIVE 0
804 #define GC_THREAD_STANDING_BY 1
805 #define GC_THREAD_RUNNING 2
806 #define GC_THREAD_WAITING_TO_CONTINUE 3
807
808 static void
809 new_gc_thread (nat n, gc_thread *t)
810 {
811 nat g;
812 gen_workspace *ws;
813
814 t->cap = &capabilities[n];
815
816 #ifdef THREADED_RTS
817 t->id = 0;
818 initSpinLock(&t->gc_spin);
819 initSpinLock(&t->mut_spin);
820 ACQUIRE_SPIN_LOCK(&t->gc_spin);
821 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
822 // thread to start up, see wakeup_gc_threads
823 #endif
824
825 t->thread_index = n;
826 t->idle = rtsFalse;
827 t->free_blocks = NULL;
828 t->gc_count = 0;
829
830 init_gc_thread(t);
831
832 #ifdef USE_PAPI
833 t->papi_events = -1;
834 #endif
835
836 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
837 {
838 ws = &t->gens[g];
839 ws->gen = &generations[g];
840 ASSERT(g == ws->gen->no);
841 ws->my_gct = t;
842
843 // We want to call
844 // alloc_todo_block(ws,0);
845 // but can't, because it uses gct which isn't set up at this point.
846 // Hence, allocate a block for todo_bd manually:
847 {
848 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
849 initBdescr(bd, ws->gen, ws->gen->to);
850 bd->flags = BF_EVACUATED;
851 bd->u.scan = bd->free = bd->start;
852
853 ws->todo_bd = bd;
854 ws->todo_free = bd->free;
855 ws->todo_lim = bd->start + BLOCK_SIZE_W;
856 }
857
858 ws->todo_q = newWSDeque(128);
859 ws->todo_overflow = NULL;
860 ws->n_todo_overflow = 0;
861 ws->todo_large_objects = NULL;
862
863 ws->part_list = NULL;
864 ws->n_part_blocks = 0;
865
866 ws->scavd_list = NULL;
867 ws->n_scavd_blocks = 0;
868 }
869 }
870
871
872 void
873 initGcThreads (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
874 {
875 #if defined(THREADED_RTS)
876 nat i;
877
878 if (from > 0) {
879 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
880 "initGcThreads");
881 } else {
882 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
883 "initGcThreads");
884 }
885
886 // We have to update the gct->cap pointers to point to the new
887 // Capability array now.
888 for (i = 0; i < from; i++) {
889 gc_threads[i]->cap = &capabilities[gc_threads[i]->cap->no];
890 }
891
892 for (i = from; i < to; i++) {
893 gc_threads[i] =
894 stgMallocBytes(sizeof(gc_thread) +
895 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
896 "alloc_gc_threads");
897
898 new_gc_thread(i, gc_threads[i]);
899 }
900 #else
901 ASSERT(from == 0 && to == 1);
902 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
903 gc_threads[0] = gct;
904 new_gc_thread(0,gc_threads[0]);
905 #endif
906 }
907
908 void
909 freeGcThreads (void)
910 {
911 nat g;
912 if (gc_threads != NULL) {
913 #if defined(THREADED_RTS)
914 nat i;
915 for (i = 0; i < n_capabilities; i++) {
916 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
917 {
918 freeWSDeque(gc_threads[i]->gens[g].todo_q);
919 }
920 stgFree (gc_threads[i]);
921 }
922 stgFree (gc_threads);
923 #else
924 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
925 {
926 freeWSDeque(gc_threads[0]->gens[g].todo_q);
927 }
928 stgFree (gc_threads);
929 #endif
930 gc_threads = NULL;
931 }
932 }
933
934 /* ----------------------------------------------------------------------------
935 Start GC threads
936 ------------------------------------------------------------------------- */
937
938 static volatile StgWord gc_running_threads;
939
940 static StgWord
941 inc_running (void)
942 {
943 StgWord new;
944 new = atomic_inc(&gc_running_threads);
945 ASSERT(new <= n_gc_threads);
946 return new;
947 }
948
949 static StgWord
950 dec_running (void)
951 {
952 ASSERT(gc_running_threads != 0);
953 return atomic_dec(&gc_running_threads);
954 }
955
956 static rtsBool
957 any_work (void)
958 {
959 int g;
960 gen_workspace *ws;
961
962 gct->any_work++;
963
964 write_barrier();
965
966 // scavenge objects in compacted generation
967 if (mark_stack_bd != NULL && !mark_stack_empty()) {
968 return rtsTrue;
969 }
970
971 // Check for global work in any step. We don't need to check for
972 // local work, because we have already exited scavenge_loop(),
973 // which means there is no local work for this thread.
974 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
975 ws = &gct->gens[g];
976 if (ws->todo_large_objects) return rtsTrue;
977 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
978 if (ws->todo_overflow) return rtsTrue;
979 }
980
981 #if defined(THREADED_RTS)
982 if (work_stealing) {
983 nat n;
984 // look for work to steal
985 for (n = 0; n < n_gc_threads; n++) {
986 if (n == gct->thread_index) continue;
987 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
988 ws = &gc_threads[n]->gens[g];
989 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
990 }
991 }
992 }
993 #endif
994
995 gct->no_work++;
996 #if defined(THREADED_RTS)
997 yieldThread();
998 #endif
999
1000 return rtsFalse;
1001 }
1002
1003 static void
1004 scavenge_until_all_done (void)
1005 {
1006 DEBUG_ONLY( nat r );
1007
1008
1009 loop:
1010 #if defined(THREADED_RTS)
1011 if (n_gc_threads > 1) {
1012 scavenge_loop();
1013 } else {
1014 scavenge_loop1();
1015 }
1016 #else
1017 scavenge_loop();
1018 #endif
1019
1020 collect_gct_blocks();
1021
1022 // scavenge_loop() only exits when there's no work to do
1023
1024 #ifdef DEBUG
1025 r = dec_running();
1026 #else
1027 dec_running();
1028 #endif
1029
1030 traceEventGcIdle(gct->cap);
1031
1032 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1033
1034 while (gc_running_threads != 0) {
1035 // usleep(1);
1036 if (any_work()) {
1037 inc_running();
1038 traceEventGcWork(gct->cap);
1039 goto loop;
1040 }
1041 // any_work() does not remove the work from the queue, it
1042 // just checks for the presence of work. If we find any,
1043 // then we increment gc_running_threads and go back to
1044 // scavenge_loop() to perform any pending work.
1045 }
1046
1047 traceEventGcDone(gct->cap);
1048 }
1049
1050 #if defined(THREADED_RTS)
1051
1052 void
1053 gcWorkerThread (Capability *cap)
1054 {
1055 gc_thread *saved_gct;
1056
1057 // necessary if we stole a callee-saves register for gct:
1058 saved_gct = gct;
1059
1060 SET_GCT(gc_threads[cap->no]);
1061 gct->id = osThreadId();
1062
1063 stat_gcWorkerThreadStart(gct);
1064
1065 // Wait until we're told to wake up
1066 RELEASE_SPIN_LOCK(&gct->mut_spin);
1067 // yieldThread();
1068 // Strangely, adding a yieldThread() here makes the CPU time
1069 // measurements more accurate on Linux, perhaps because it syncs
1070 // the CPU time across the multiple cores. Without this, CPU time
1071 // is heavily skewed towards GC rather than MUT.
1072 gct->wakeup = GC_THREAD_STANDING_BY;
1073 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1074 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1075
1076 #ifdef USE_PAPI
1077 // start performance counters in this thread...
1078 if (gct->papi_events == -1) {
1079 papi_init_eventset(&gct->papi_events);
1080 }
1081 papi_thread_start_gc1_count(gct->papi_events);
1082 #endif
1083
1084 init_gc_thread(gct);
1085
1086 traceEventGcWork(gct->cap);
1087
1088 // Every thread evacuates some roots.
1089 gct->evac_gen_no = 0;
1090 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1091 scavenge_capability_mut_lists(cap);
1092
1093 scavenge_until_all_done();
1094
1095 if (!DEBUG_IS_ON) {
1096 gct->allocated = clearNursery(cap);
1097 }
1098
1099 #ifdef THREADED_RTS
1100 // Now that the whole heap is marked, we discard any sparks that
1101 // were found to be unreachable. The main GC thread is currently
1102 // marking heap reachable via weak pointers, so it is
1103 // non-deterministic whether a spark will be retained if it is
1104 // only reachable via weak pointers. To fix this problem would
1105 // require another GC barrier, which is too high a price.
1106 pruneSparkQueue(cap);
1107 #endif
1108
1109 #ifdef USE_PAPI
1110 // count events in this thread towards the GC totals
1111 papi_thread_stop_gc1_count(gct->papi_events);
1112 #endif
1113
1114 // Wait until we're told to continue
1115 RELEASE_SPIN_LOCK(&gct->gc_spin);
1116 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1117 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1118 gct->thread_index);
1119 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1120 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1121
1122 // record the time spent doing GC in the Task structure
1123 stat_gcWorkerThreadDone(gct);
1124
1125 SET_GCT(saved_gct);
1126 }
1127
1128 #endif
1129
1130 #if defined(THREADED_RTS)
1131
1132 void
1133 waitForGcThreads (Capability *cap USED_IF_THREADS)
1134 {
1135 const nat n_threads = n_capabilities;
1136 const nat me = cap->no;
1137 nat i, j;
1138 rtsBool retry = rtsTrue;
1139
1140 while(retry) {
1141 for (i=0; i < n_threads; i++) {
1142 if (i == me || gc_threads[i]->idle) continue;
1143 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1144 prodCapability(&capabilities[i], cap->running_task);
1145 }
1146 }
1147 for (j=0; j < 10; j++) {
1148 retry = rtsFalse;
1149 for (i=0; i < n_threads; i++) {
1150 if (i == me || gc_threads[i]->idle) continue;
1151 write_barrier();
1152 interruptCapability(&capabilities[i]);
1153 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1154 retry = rtsTrue;
1155 }
1156 }
1157 if (!retry) break;
1158 yieldThread();
1159 }
1160 }
1161 }
1162
1163 #endif // THREADED_RTS
1164
1165 static void
1166 start_gc_threads (void)
1167 {
1168 #if defined(THREADED_RTS)
1169 gc_running_threads = 0;
1170 #endif
1171 }
1172
1173 static void
1174 wakeup_gc_threads (nat me USED_IF_THREADS)
1175 {
1176 #if defined(THREADED_RTS)
1177 nat i;
1178
1179 if (n_gc_threads == 1) return;
1180
1181 for (i=0; i < n_gc_threads; i++) {
1182 if (i == me || gc_threads[i]->idle) continue;
1183 inc_running();
1184 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1185 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1186
1187 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1188 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1189 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1190 }
1191 #endif
1192 }
1193
1194 // After GC is complete, we must wait for all GC threads to enter the
1195 // standby state, otherwise they may still be executing inside
1196 // any_work(), and may even remain awake until the next GC starts.
1197 static void
1198 shutdown_gc_threads (nat me USED_IF_THREADS)
1199 {
1200 #if defined(THREADED_RTS)
1201 nat i;
1202
1203 if (n_gc_threads == 1) return;
1204
1205 for (i=0; i < n_gc_threads; i++) {
1206 if (i == me || gc_threads[i]->idle) continue;
1207 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1208 }
1209 #endif
1210 }
1211
1212 #if defined(THREADED_RTS)
1213 void
1214 releaseGCThreads (Capability *cap USED_IF_THREADS)
1215 {
1216 const nat n_threads = n_capabilities;
1217 const nat me = cap->no;
1218 nat i;
1219 for (i=0; i < n_threads; i++) {
1220 if (i == me || gc_threads[i]->idle) continue;
1221 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1222 barf("releaseGCThreads");
1223
1224 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1225 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1226 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1227 }
1228 }
1229 #endif
1230
1231 /* ----------------------------------------------------------------------------
1232 Initialise a generation that is to be collected
1233 ------------------------------------------------------------------------- */
1234
1235 static void
1236 prepare_collected_gen (generation *gen)
1237 {
1238 nat i, g, n;
1239 gen_workspace *ws;
1240 bdescr *bd, *next;
1241
1242 // Throw away the current mutable list. Invariant: the mutable
1243 // list always has at least one block; this means we can avoid a
1244 // check for NULL in recordMutable().
1245 g = gen->no;
1246 if (g != 0) {
1247 for (i = 0; i < n_capabilities; i++) {
1248 freeChain(capabilities[i].mut_lists[g]);
1249 capabilities[i].mut_lists[g] = allocBlock();
1250 }
1251 }
1252
1253 gen = &generations[g];
1254 ASSERT(gen->no == g);
1255
1256 // we'll construct a new list of threads in this step
1257 // during GC, throw away the current list.
1258 gen->old_threads = gen->threads;
1259 gen->threads = END_TSO_QUEUE;
1260
1261 // deprecate the existing blocks
1262 gen->old_blocks = gen->blocks;
1263 gen->n_old_blocks = gen->n_blocks;
1264 gen->blocks = NULL;
1265 gen->n_blocks = 0;
1266 gen->n_words = 0;
1267 gen->live_estimate = 0;
1268
1269 // initialise the large object queues.
1270 ASSERT(gen->scavenged_large_objects == NULL);
1271 ASSERT(gen->n_scavenged_large_blocks == 0);
1272
1273 // grab all the partial blocks stashed in the gc_thread workspaces and
1274 // move them to the old_blocks list of this gen.
1275 for (n = 0; n < n_capabilities; n++) {
1276 ws = &gc_threads[n]->gens[gen->no];
1277
1278 for (bd = ws->part_list; bd != NULL; bd = next) {
1279 next = bd->link;
1280 bd->link = gen->old_blocks;
1281 gen->old_blocks = bd;
1282 gen->n_old_blocks += bd->blocks;
1283 }
1284 ws->part_list = NULL;
1285 ws->n_part_blocks = 0;
1286
1287 ASSERT(ws->scavd_list == NULL);
1288 ASSERT(ws->n_scavd_blocks == 0);
1289
1290 if (ws->todo_free != ws->todo_bd->start) {
1291 ws->todo_bd->free = ws->todo_free;
1292 ws->todo_bd->link = gen->old_blocks;
1293 gen->old_blocks = ws->todo_bd;
1294 gen->n_old_blocks += ws->todo_bd->blocks;
1295 alloc_todo_block(ws,0); // always has one block.
1296 }
1297 }
1298
1299 // mark the small objects as from-space
1300 for (bd = gen->old_blocks; bd; bd = bd->link) {
1301 bd->flags &= ~BF_EVACUATED;
1302 }
1303
1304 // mark the large objects as from-space
1305 for (bd = gen->large_objects; bd; bd = bd->link) {
1306 bd->flags &= ~BF_EVACUATED;
1307 }
1308
1309 // for a compacted generation, we need to allocate the bitmap
1310 if (gen->mark) {
1311 StgWord bitmap_size; // in bytes
1312 bdescr *bitmap_bdescr;
1313 StgWord *bitmap;
1314
1315 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1316
1317 if (bitmap_size > 0) {
1318 bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
1319 / BLOCK_SIZE);
1320 gen->bitmap = bitmap_bdescr;
1321 bitmap = bitmap_bdescr->start;
1322
1323 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1324 bitmap_size, bitmap);
1325
1326 // don't forget to fill it with zeros!
1327 memset(bitmap, 0, bitmap_size);
1328
1329 // For each block in this step, point to its bitmap from the
1330 // block descriptor.
1331 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1332 bd->u.bitmap = bitmap;
1333 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1334
1335 // Also at this point we set the BF_MARKED flag
1336 // for this block. The invariant is that
1337 // BF_MARKED is always unset, except during GC
1338 // when it is set on those blocks which will be
1339 // compacted.
1340 if (!(bd->flags & BF_FRAGMENTED)) {
1341 bd->flags |= BF_MARKED;
1342 }
1343
1344 // BF_SWEPT should be marked only for blocks that are being
1345 // collected in sweep()
1346 bd->flags &= ~BF_SWEPT;
1347 }
1348 }
1349 }
1350 }
1351
1352
1353 /* ----------------------------------------------------------------------------
1354 Save the mutable lists in saved_mut_lists
1355 ------------------------------------------------------------------------- */
1356
1357 static void
1358 stash_mut_list (Capability *cap, nat gen_no)
1359 {
1360 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1361 cap->mut_lists[gen_no] = allocBlock_sync();
1362 }
1363
1364 /* ----------------------------------------------------------------------------
1365 Initialise a generation that is *not* to be collected
1366 ------------------------------------------------------------------------- */
1367
1368 static void
1369 prepare_uncollected_gen (generation *gen)
1370 {
1371 nat i;
1372
1373
1374 ASSERT(gen->no > 0);
1375
1376 // save the current mutable lists for this generation, and
1377 // allocate a fresh block for each one. We'll traverse these
1378 // mutable lists as roots early on in the GC.
1379 for (i = 0; i < n_capabilities; i++) {
1380 stash_mut_list(&capabilities[i], gen->no);
1381 }
1382
1383 ASSERT(gen->scavenged_large_objects == NULL);
1384 ASSERT(gen->n_scavenged_large_blocks == 0);
1385 }
1386
1387 /* -----------------------------------------------------------------------------
1388 Collect the completed blocks from a GC thread and attach them to
1389 the generation.
1390 -------------------------------------------------------------------------- */
1391
1392 static void
1393 collect_gct_blocks (void)
1394 {
1395 nat g;
1396 gen_workspace *ws;
1397 bdescr *bd, *prev;
1398
1399 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1400 ws = &gct->gens[g];
1401
1402 // there may still be a block attached to ws->todo_bd;
1403 // leave it there to use next time.
1404
1405 if (ws->scavd_list != NULL) {
1406 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1407
1408 ASSERT(gct->scan_bd == NULL);
1409 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1410
1411 prev = NULL;
1412 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1413 ws->gen->n_words += bd->free - bd->start;
1414 prev = bd;
1415 }
1416 if (prev != NULL) {
1417 prev->link = ws->gen->blocks;
1418 ws->gen->blocks = ws->scavd_list;
1419 }
1420 ws->gen->n_blocks += ws->n_scavd_blocks;
1421
1422 ws->scavd_list = NULL;
1423 ws->n_scavd_blocks = 0;
1424
1425 RELEASE_SPIN_LOCK(&ws->gen->sync);
1426 }
1427 }
1428 }
1429
1430 /* -----------------------------------------------------------------------------
1431 During mutation, any blocks that are filled by allocatePinned() are
1432 stashed on the local pinned_object_blocks list, to avoid needing to
1433 take a global lock. Here we collect those blocks from the
1434 cap->pinned_object_blocks lists and put them on the
1435 main g0->large_object list.
1436
1437 Returns: the number of words allocated this way, for stats
1438 purposes.
1439 -------------------------------------------------------------------------- */
1440
1441 static StgWord
1442 collect_pinned_object_blocks (void)
1443 {
1444 nat n;
1445 bdescr *bd, *prev;
1446 StgWord allocated = 0;
1447
1448 for (n = 0; n < n_capabilities; n++) {
1449 prev = NULL;
1450 for (bd = capabilities[n].pinned_object_blocks; bd != NULL; bd = bd->link) {
1451 allocated += bd->free - bd->start;
1452 prev = bd;
1453 }
1454 if (prev != NULL) {
1455 prev->link = g0->large_objects;
1456 if (g0->large_objects != NULL) {
1457 g0->large_objects->u.back = prev;
1458 }
1459 g0->large_objects = capabilities[n].pinned_object_blocks;
1460 capabilities[n].pinned_object_blocks = 0;
1461 }
1462 }
1463
1464 return allocated;
1465 }
1466
1467 /* -----------------------------------------------------------------------------
1468 Initialise a gc_thread before GC
1469 -------------------------------------------------------------------------- */
1470
1471 static void
1472 init_gc_thread (gc_thread *t)
1473 {
1474 t->static_objects = END_OF_STATIC_LIST;
1475 t->scavenged_static_objects = END_OF_STATIC_LIST;
1476 t->scan_bd = NULL;
1477 t->mut_lists = t->cap->mut_lists;
1478 t->evac_gen_no = 0;
1479 t->failed_to_evac = rtsFalse;
1480 t->eager_promotion = rtsTrue;
1481 t->thunk_selector_depth = 0;
1482 t->allocated = 0;
1483 t->copied = 0;
1484 t->scanned = 0;
1485 t->any_work = 0;
1486 t->no_work = 0;
1487 t->scav_find_work = 0;
1488 }
1489
1490 /* -----------------------------------------------------------------------------
1491 Function we pass to evacuate roots.
1492 -------------------------------------------------------------------------- */
1493
1494 static void
1495 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1496 {
1497 // we stole a register for gct, but this function is called from
1498 // *outside* the GC where the register variable is not in effect,
1499 // so we need to save and restore it here. NB. only call
1500 // mark_root() from the main GC thread, otherwise gct will be
1501 // incorrect.
1502 #if defined(THREADED_RTS)
1503 gc_thread *saved_gct;
1504 saved_gct = gct;
1505 #endif
1506 SET_GCT(user);
1507
1508 evacuate(root);
1509
1510 SET_GCT(saved_gct);
1511 }
1512
1513 /* -----------------------------------------------------------------------------
1514 Initialising the static object & mutable lists
1515 -------------------------------------------------------------------------- */
1516
1517 static void
1518 zero_static_object_list(StgClosure* first_static)
1519 {
1520 StgClosure* p;
1521 StgClosure* link;
1522 const StgInfoTable *info;
1523
1524 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1525 info = get_itbl(p);
1526 link = *STATIC_LINK(info, p);
1527 *STATIC_LINK(info,p) = NULL;
1528 }
1529 }
1530
1531 /* ----------------------------------------------------------------------------
1532 Reset the sizes of the older generations when we do a major
1533 collection.
1534
1535 CURRENT STRATEGY: make all generations except zero the same size.
1536 We have to stay within the maximum heap size, and leave a certain
1537 percentage of the maximum heap size available to allocate into.
1538 ------------------------------------------------------------------------- */
1539
1540 static void
1541 resize_generations (void)
1542 {
1543 nat g;
1544
1545 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1546 W_ live, size, min_alloc, words;
1547 const W_ max = RtsFlags.GcFlags.maxHeapSize;
1548 const W_ gens = RtsFlags.GcFlags.generations;
1549
1550 // live in the oldest generations
1551 if (oldest_gen->live_estimate != 0) {
1552 words = oldest_gen->live_estimate;
1553 } else {
1554 words = oldest_gen->n_words;
1555 }
1556 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1557 oldest_gen->n_large_blocks;
1558
1559 // default max size for all generations except zero
1560 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1561 RtsFlags.GcFlags.minOldGenSize);
1562
1563 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1564 if (max > 0) {
1565 RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
1566 } else {
1567 RtsFlags.GcFlags.heapSizeSuggestion = size;
1568 }
1569 }
1570
1571 // minimum size for generation zero
1572 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1573 RtsFlags.GcFlags.minAllocAreaSize);
1574
1575 // Auto-enable compaction when the residency reaches a
1576 // certain percentage of the maximum heap size (default: 30%).
1577 if (RtsFlags.GcFlags.compact ||
1578 (max > 0 &&
1579 oldest_gen->n_blocks >
1580 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1581 oldest_gen->mark = 1;
1582 oldest_gen->compact = 1;
1583 // debugBelch("compaction: on\n", live);
1584 } else {
1585 oldest_gen->mark = 0;
1586 oldest_gen->compact = 0;
1587 // debugBelch("compaction: off\n", live);
1588 }
1589
1590 if (RtsFlags.GcFlags.sweep) {
1591 oldest_gen->mark = 1;
1592 }
1593
1594 // if we're going to go over the maximum heap size, reduce the
1595 // size of the generations accordingly. The calculation is
1596 // different if compaction is turned on, because we don't need
1597 // to double the space required to collect the old generation.
1598 if (max != 0) {
1599
1600 // this test is necessary to ensure that the calculations
1601 // below don't have any negative results - we're working
1602 // with unsigned values here.
1603 if (max < min_alloc) {
1604 heapOverflow();
1605 }
1606
1607 if (oldest_gen->compact) {
1608 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1609 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1610 }
1611 } else {
1612 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1613 size = (max - min_alloc) / ((gens - 1) * 2);
1614 }
1615 }
1616
1617 if (size < live) {
1618 heapOverflow();
1619 }
1620 }
1621
1622 #if 0
1623 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1624 min_alloc, size, max);
1625 #endif
1626
1627 for (g = 0; g < gens; g++) {
1628 generations[g].max_blocks = size;
1629 }
1630 }
1631 }
1632
1633 /* -----------------------------------------------------------------------------
1634 Calculate the new size of the nursery, and resize it.
1635 -------------------------------------------------------------------------- */
1636
1637 static void
1638 resize_nursery (void)
1639 {
1640 const StgWord min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1641
1642 if (RtsFlags.GcFlags.generations == 1)
1643 { // Two-space collector:
1644 W_ blocks;
1645
1646 /* set up a new nursery. Allocate a nursery size based on a
1647 * function of the amount of live data (by default a factor of 2)
1648 * Use the blocks from the old nursery if possible, freeing up any
1649 * left over blocks.
1650 *
1651 * If we get near the maximum heap size, then adjust our nursery
1652 * size accordingly. If the nursery is the same size as the live
1653 * data (L), then we need 3L bytes. We can reduce the size of the
1654 * nursery to bring the required memory down near 2L bytes.
1655 *
1656 * A normal 2-space collector would need 4L bytes to give the same
1657 * performance we get from 3L bytes, reducing to the same
1658 * performance at 2L bytes.
1659 */
1660 blocks = generations[0].n_blocks;
1661
1662 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1663 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1664 RtsFlags.GcFlags.maxHeapSize )
1665 {
1666 long adjusted_blocks; // signed on purpose
1667 int pc_free;
1668
1669 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1670
1671 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1672 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1673
1674 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1675 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1676 {
1677 heapOverflow();
1678 }
1679 blocks = adjusted_blocks;
1680 }
1681 else
1682 {
1683 blocks *= RtsFlags.GcFlags.oldGenFactor;
1684 if (blocks < min_nursery)
1685 {
1686 blocks = min_nursery;
1687 }
1688 }
1689 resizeNurseries(blocks);
1690 }
1691 else // Generational collector
1692 {
1693 /*
1694 * If the user has given us a suggested heap size, adjust our
1695 * allocation area to make best use of the memory available.
1696 */
1697 if (RtsFlags.GcFlags.heapSizeSuggestion)
1698 {
1699 long blocks;
1700 StgWord needed;
1701
1702 calcNeeded(rtsFalse, &needed); // approx blocks needed at next GC
1703
1704 /* Guess how much will be live in generation 0 step 0 next time.
1705 * A good approximation is obtained by finding the
1706 * percentage of g0 that was live at the last minor GC.
1707 *
1708 * We have an accurate figure for the amount of copied data in
1709 * 'copied', but we must convert this to a number of blocks, with
1710 * a small adjustment for estimated slop at the end of a block
1711 * (- 10 words).
1712 */
1713 if (N == 0)
1714 {
1715 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1716 / countNurseryBlocks();
1717 }
1718
1719 /* Estimate a size for the allocation area based on the
1720 * information available. We might end up going slightly under
1721 * or over the suggested heap size, but we should be pretty
1722 * close on average.
1723 *
1724 * Formula: suggested - needed
1725 * ----------------------------
1726 * 1 + g0_pcnt_kept/100
1727 *
1728 * where 'needed' is the amount of memory needed at the next
1729 * collection for collecting all gens except g0.
1730 */
1731 blocks =
1732 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1733 (100 + (long)g0_pcnt_kept);
1734
1735 if (blocks < (long)min_nursery) {
1736 blocks = min_nursery;
1737 }
1738
1739 resizeNurseries((W_)blocks);
1740 }
1741 else
1742 {
1743 // we might have added extra large blocks to the nursery, so
1744 // resize back to minAllocAreaSize again.
1745 resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1746 }
1747 }
1748 }
1749
1750 /* -----------------------------------------------------------------------------
1751 Sanity code for CAF garbage collection.
1752
1753 With DEBUG turned on, we manage a CAF list in addition to the SRT
1754 mechanism. After GC, we run down the CAF list and blackhole any
1755 CAFs which have been garbage collected. This means we get an error
1756 whenever the program tries to enter a garbage collected CAF.
1757
1758 Any garbage collected CAFs are taken off the CAF list at the same
1759 time.
1760 -------------------------------------------------------------------------- */
1761
1762 #if 0 && defined(DEBUG)
1763
1764 static void
1765 gcCAFs(void)
1766 {
1767 StgClosure* p;
1768 StgClosure** pp;
1769 const StgInfoTable *info;
1770 nat i;
1771
1772 i = 0;
1773 p = caf_list;
1774 pp = &caf_list;
1775
1776 while (p != NULL) {
1777
1778 info = get_itbl(p);
1779
1780 ASSERT(info->type == IND_STATIC);
1781
1782 if (STATIC_LINK(info,p) == NULL) {
1783 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1784 // black hole it
1785 SET_INFO(p,&stg_BLACKHOLE_info);
1786 p = STATIC_LINK2(info,p);
1787 *pp = p;
1788 }
1789 else {
1790 pp = &STATIC_LINK2(info,p);
1791 p = *pp;
1792 i++;
1793 }
1794
1795 }
1796
1797 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1798 }
1799 #endif