Simplify the allocation stats accounting
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "GC.h"
19 #include "GCThread.h"
20 #include "GCTDecl.h" // NB. before RtsSignals.h which
21 // clobbers REG_R1 on arm/Linux
22 #include "Compact.h"
23 #include "Evac.h"
24 #include "Scav.h"
25 #include "GCUtils.h"
26 #include "MarkStack.h"
27 #include "MarkWeak.h"
28 #include "Sparks.h"
29 #include "Sweep.h"
30
31 #include "Storage.h"
32 #include "RtsUtils.h"
33 #include "Apply.h"
34 #include "Updates.h"
35 #include "Stats.h"
36 #include "Schedule.h"
37 #include "Sanity.h"
38 #include "BlockAlloc.h"
39 #include "ProfHeap.h"
40 #include "Weak.h"
41 #include "Prelude.h"
42 #include "RtsSignals.h"
43 #include "STM.h"
44 #if defined(RTS_GTK_FRONTPANEL)
45 #include "FrontPanel.h"
46 #endif
47 #include "Trace.h"
48 #include "RetainerProfile.h"
49 #include "LdvProfile.h"
50 #include "RaiseAsync.h"
51 #include "Papi.h"
52 #include "Stable.h"
53
54 #include <string.h> // for memset()
55 #include <unistd.h>
56
57 /* -----------------------------------------------------------------------------
58 Global variables
59 -------------------------------------------------------------------------- */
60
61 /* STATIC OBJECT LIST.
62 *
63 * During GC:
64 * We maintain a linked list of static objects that are still live.
65 * The requirements for this list are:
66 *
67 * - we need to scan the list while adding to it, in order to
68 * scavenge all the static objects (in the same way that
69 * breadth-first scavenging works for dynamic objects).
70 *
71 * - we need to be able to tell whether an object is already on
72 * the list, to break loops.
73 *
74 * Each static object has a "static link field", which we use for
75 * linking objects on to the list. We use a stack-type list, consing
76 * objects on the front as they are added (this means that the
77 * scavenge phase is depth-first, not breadth-first, but that
78 * shouldn't matter).
79 *
80 * A separate list is kept for objects that have been scavenged
81 * already - this is so that we can zero all the marks afterwards.
82 *
83 * An object is on the list if its static link field is non-zero; this
84 * means that we have to mark the end of the list with '1', not NULL.
85 *
86 * Extra notes for generational GC:
87 *
88 * Each generation has a static object list associated with it. When
89 * collecting generations up to N, we treat the static object lists
90 * from generations > N as roots.
91 *
92 * We build up a static object list while collecting generations 0..N,
93 * which is then appended to the static object list of generation N+1.
94 */
95
96 /* N is the oldest generation being collected, where the generations
97 * are numbered starting at 0. A major GC (indicated by the major_gc
98 * flag) is when we're collecting all generations. We only attempt to
99 * deal with static objects and GC CAFs when doing a major GC.
100 */
101 nat N;
102 rtsBool major_gc;
103
104 /* Data used for allocation area sizing.
105 */
106 static W_ g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
107
108 /* Mut-list stats */
109 #ifdef DEBUG
110 nat mutlist_MUTVARS,
111 mutlist_MUTARRS,
112 mutlist_MVARS,
113 mutlist_TVAR,
114 mutlist_TVAR_WATCH_QUEUE,
115 mutlist_TREC_CHUNK,
116 mutlist_TREC_HEADER,
117 mutlist_ATOMIC_INVARIANT,
118 mutlist_INVARIANT_CHECK_QUEUE,
119 mutlist_OTHERS;
120 #endif
121
122 /* Thread-local data for each GC thread
123 */
124 gc_thread **gc_threads = NULL;
125
126 #if !defined(THREADED_RTS)
127 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
128 #endif
129
130 // Number of threads running in *this* GC. Affects how many
131 // step->todos[] lists we have to look in to find work.
132 nat n_gc_threads;
133
134 // For stats:
135 long copied; // *words* copied & scavenged during this GC
136
137 rtsBool work_stealing;
138
139 DECLARE_GCT
140
141 /* -----------------------------------------------------------------------------
142 Static function declarations
143 -------------------------------------------------------------------------- */
144
145 static void mark_root (void *user, StgClosure **root);
146 static void zero_static_object_list (StgClosure* first_static);
147 static void prepare_collected_gen (generation *gen);
148 static void prepare_uncollected_gen (generation *gen);
149 static void init_gc_thread (gc_thread *t);
150 static void resize_generations (void);
151 static void resize_nursery (void);
152 static void start_gc_threads (void);
153 static void scavenge_until_all_done (void);
154 static StgWord inc_running (void);
155 static StgWord dec_running (void);
156 static void wakeup_gc_threads (nat me);
157 static void shutdown_gc_threads (nat me);
158 static void collect_gct_blocks (void);
159 static void collect_pinned_object_blocks (void);
160
161 #if 0 && defined(DEBUG)
162 static void gcCAFs (void);
163 #endif
164
165 /* -----------------------------------------------------------------------------
166 The mark stack.
167 -------------------------------------------------------------------------- */
168
169 bdescr *mark_stack_top_bd; // topmost block in the mark stack
170 bdescr *mark_stack_bd; // current block in the mark stack
171 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
172
173 /* -----------------------------------------------------------------------------
174 GarbageCollect: the main entry point to the garbage collector.
175
176 The collect_gen parameter is gotten by calling calcNeeded().
177
178 Locks held: all capabilities are held throughout GarbageCollect().
179 -------------------------------------------------------------------------- */
180
181 void
182 GarbageCollect (nat collect_gen,
183 rtsBool do_heap_census,
184 nat gc_type USED_IF_THREADS,
185 Capability *cap)
186 {
187 bdescr *bd;
188 generation *gen;
189 StgWord live_blocks, live_words, par_max_copied, par_tot_copied;
190 #if defined(THREADED_RTS)
191 gc_thread *saved_gct;
192 #endif
193 nat g, n;
194
195 // necessary if we stole a callee-saves register for gct:
196 #if defined(THREADED_RTS)
197 saved_gct = gct;
198 #endif
199
200 #ifdef PROFILING
201 CostCentreStack *save_CCS[n_capabilities];
202 #endif
203
204 ACQUIRE_SM_LOCK;
205
206 #if defined(RTS_USER_SIGNALS)
207 if (RtsFlags.MiscFlags.install_signal_handlers) {
208 // block signals
209 blockUserSignals();
210 }
211 #endif
212
213 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
214 // otherwise adjust the padding in gen_workspace.
215
216 // this is the main thread
217 SET_GCT(gc_threads[cap->no]);
218
219 // tell the stats department that we've started a GC
220 stat_startGC(cap, gct);
221
222 // lock the StablePtr table
223 stablePtrPreGC();
224
225 #ifdef DEBUG
226 mutlist_MUTVARS = 0;
227 mutlist_MUTARRS = 0;
228 mutlist_MVARS = 0;
229 mutlist_TVAR = 0;
230 mutlist_TVAR_WATCH_QUEUE = 0;
231 mutlist_TREC_CHUNK = 0;
232 mutlist_TREC_HEADER = 0;
233 mutlist_ATOMIC_INVARIANT = 0;
234 mutlist_INVARIANT_CHECK_QUEUE = 0;
235 mutlist_OTHERS = 0;
236 #endif
237
238 // attribute any costs to CCS_GC
239 #ifdef PROFILING
240 for (n = 0; n < n_capabilities; n++) {
241 save_CCS[n] = capabilities[n].r.rCCCS;
242 capabilities[n].r.rCCCS = CCS_GC;
243 }
244 #endif
245
246 /* Figure out which generation to collect
247 */
248 N = collect_gen;
249 major_gc = (N == RtsFlags.GcFlags.generations-1);
250
251 #if defined(THREADED_RTS)
252 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
253 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
254 // It's not always a good idea to do load balancing in parallel
255 // GC. In particular, for a parallel program we don't want to
256 // lose locality by moving cached data into another CPU's cache
257 // (this effect can be quite significant).
258 //
259 // We could have a more complex way to deterimine whether to do
260 // work stealing or not, e.g. it might be a good idea to do it
261 // if the heap is big. For now, we just turn it on or off with
262 // a flag.
263 #endif
264
265 /* Start threads, so they can be spinning up while we finish initialisation.
266 */
267 start_gc_threads();
268
269 #if defined(THREADED_RTS)
270 /* How many threads will be participating in this GC?
271 * We don't try to parallelise minor GCs (unless the user asks for
272 * it with +RTS -gn0), or mark/compact/sweep GC.
273 */
274 if (gc_type == SYNC_GC_PAR) {
275 n_gc_threads = n_capabilities;
276 } else {
277 n_gc_threads = 1;
278 }
279 #else
280 n_gc_threads = 1;
281 #endif
282
283 debugTrace(DEBUG_gc, "GC (gen %d, using %d thread(s))",
284 N, n_gc_threads);
285
286 #ifdef RTS_GTK_FRONTPANEL
287 if (RtsFlags.GcFlags.frontpanel) {
288 updateFrontPanelBeforeGC(N);
289 }
290 #endif
291
292 #ifdef DEBUG
293 // check for memory leaks if DEBUG is on
294 memInventory(DEBUG_gc);
295 #endif
296
297 // check sanity *before* GC
298 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
299
300 // gather blocks allocated using allocatePinned() from each capability
301 // and put them on the g0->large_object list.
302 collect_pinned_object_blocks();
303
304 // Initialise all the generations/steps that we're collecting.
305 for (g = 0; g <= N; g++) {
306 prepare_collected_gen(&generations[g]);
307 }
308 // Initialise all the generations/steps that we're *not* collecting.
309 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
310 prepare_uncollected_gen(&generations[g]);
311 }
312
313 // Prepare this gc_thread
314 init_gc_thread(gct);
315
316 /* Allocate a mark stack if we're doing a major collection.
317 */
318 if (major_gc && oldest_gen->mark) {
319 mark_stack_bd = allocBlock();
320 mark_stack_top_bd = mark_stack_bd;
321 mark_stack_bd->link = NULL;
322 mark_stack_bd->u.back = NULL;
323 mark_sp = mark_stack_bd->start;
324 } else {
325 mark_stack_bd = NULL;
326 mark_stack_top_bd = NULL;
327 mark_sp = NULL;
328 }
329
330 /* -----------------------------------------------------------------------
331 * follow all the roots that we know about:
332 */
333
334 // the main thread is running: this prevents any other threads from
335 // exiting prematurely, so we can start them now.
336 // NB. do this after the mutable lists have been saved above, otherwise
337 // the other GC threads will be writing into the old mutable lists.
338 inc_running();
339 wakeup_gc_threads(gct->thread_index);
340
341 traceEventGcWork(gct->cap);
342
343 // scavenge the capability-private mutable lists. This isn't part
344 // of markSomeCapabilities() because markSomeCapabilities() can only
345 // call back into the GC via mark_root() (due to the gct register
346 // variable).
347 if (n_gc_threads == 1) {
348 for (n = 0; n < n_capabilities; n++) {
349 #if defined(THREADED_RTS)
350 scavenge_capability_mut_Lists1(&capabilities[n]);
351 #else
352 scavenge_capability_mut_lists(&capabilities[n]);
353 #endif
354 }
355 } else {
356 scavenge_capability_mut_lists(gct->cap);
357 for (n = 0; n < n_capabilities; n++) {
358 if (gc_threads[n]->idle) {
359 markCapability(mark_root, gct, &capabilities[n],
360 rtsTrue/*don't mark sparks*/);
361 scavenge_capability_mut_lists(&capabilities[n]);
362 }
363 }
364 }
365
366 // follow roots from the CAF list (used by GHCi)
367 gct->evac_gen_no = 0;
368 markCAFs(mark_root, gct);
369
370 // follow all the roots that the application knows about.
371 gct->evac_gen_no = 0;
372 if (n_gc_threads == 1) {
373 for (n = 0; n < n_capabilities; n++) {
374 markCapability(mark_root, gct, &capabilities[n],
375 rtsTrue/*don't mark sparks*/);
376 }
377 } else {
378 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
379 }
380
381 markScheduler(mark_root, gct);
382
383 #if defined(RTS_USER_SIGNALS)
384 // mark the signal handlers (signals should be already blocked)
385 markSignalHandlers(mark_root, gct);
386 #endif
387
388 // Mark the weak pointer list, and prepare to detect dead weak pointers.
389 markWeakPtrList();
390 initWeakForGC();
391
392 // Mark the stable pointer table.
393 markStablePtrTable(mark_root, gct);
394
395 /* -------------------------------------------------------------------------
396 * Repeatedly scavenge all the areas we know about until there's no
397 * more scavenging to be done.
398 */
399 for (;;)
400 {
401 scavenge_until_all_done();
402 // The other threads are now stopped. We might recurse back to
403 // here, but from now on this is the only thread.
404
405 // must be last... invariant is that everything is fully
406 // scavenged at this point.
407 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
408 inc_running();
409 continue;
410 }
411
412 // If we get to here, there's really nothing left to do.
413 break;
414 }
415
416 if (!DEBUG_IS_ON && n_gc_threads != 1) {
417 clearNursery(cap);
418 }
419
420 shutdown_gc_threads(gct->thread_index);
421
422 // Now see which stable names are still alive.
423 gcStablePtrTable();
424
425 #ifdef THREADED_RTS
426 if (n_gc_threads == 1) {
427 for (n = 0; n < n_capabilities; n++) {
428 pruneSparkQueue(&capabilities[n]);
429 }
430 } else {
431 for (n = 0; n < n_capabilities; n++) {
432 if (n == cap->no || gc_threads[n]->idle) {
433 pruneSparkQueue(&capabilities[n]);
434 }
435 }
436 }
437 #endif
438
439 #ifdef PROFILING
440 // We call processHeapClosureForDead() on every closure destroyed during
441 // the current garbage collection, so we invoke LdvCensusForDead().
442 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
443 || RtsFlags.ProfFlags.bioSelector != NULL) {
444 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
445 LdvCensusForDead(N);
446 ACQUIRE_SM_LOCK;
447 }
448 #endif
449
450 // NO MORE EVACUATION AFTER THIS POINT!
451
452 // Finally: compact or sweep the oldest generation.
453 if (major_gc && oldest_gen->mark) {
454 if (oldest_gen->compact)
455 compact(gct->scavenged_static_objects);
456 else
457 sweep(oldest_gen);
458 }
459
460 copied = 0;
461 par_max_copied = 0;
462 par_tot_copied = 0;
463 {
464 nat i;
465 for (i=0; i < n_gc_threads; i++) {
466 if (n_gc_threads > 1) {
467 debugTrace(DEBUG_gc,"thread %d:", i);
468 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
469 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
470 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
471 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
472 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
473 }
474 copied += gc_threads[i]->copied;
475 par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied);
476 }
477 par_tot_copied = copied;
478 if (n_gc_threads == 1) {
479 par_max_copied = 0;
480 par_tot_copied = 0;
481 }
482 }
483
484 // Run through all the generations/steps and tidy up.
485 // We're going to:
486 // - count the amount of "live" data (live_words, live_blocks)
487 // - count the amount of "copied" data in this GC (copied)
488 // - free from-space
489 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
490 //
491 live_words = 0;
492 live_blocks = 0;
493
494 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
495
496 if (g == N) {
497 generations[g].collections++; // for stats
498 if (n_gc_threads > 1) generations[g].par_collections++;
499 }
500
501 // Count the mutable list as bytes "copied" for the purposes of
502 // stats. Every mutable list is copied during every GC.
503 if (g > 0) {
504 W_ mut_list_size = 0;
505 for (n = 0; n < n_capabilities; n++) {
506 mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
507 }
508 copied += mut_list_size;
509
510 debugTrace(DEBUG_gc,
511 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d TVARs, %d TVAR_WATCH_QUEUEs, %d TREC_CHUNKs, %d TREC_HEADERs, %d ATOMIC_INVARIANTs, %d INVARIANT_CHECK_QUEUEs, %d others)",
512 (unsigned long)(mut_list_size * sizeof(W_)),
513 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS,
514 mutlist_TVAR, mutlist_TVAR_WATCH_QUEUE,
515 mutlist_TREC_CHUNK, mutlist_TREC_HEADER,
516 mutlist_ATOMIC_INVARIANT,
517 mutlist_INVARIANT_CHECK_QUEUE,
518 mutlist_OTHERS);
519 }
520
521 bdescr *next, *prev;
522 gen = &generations[g];
523
524 // for generations we collected...
525 if (g <= N) {
526
527 /* free old memory and shift to-space into from-space for all
528 * the collected steps (except the allocation area). These
529 * freed blocks will probaby be quickly recycled.
530 */
531 if (gen->mark)
532 {
533 // tack the new blocks on the end of the existing blocks
534 if (gen->old_blocks != NULL) {
535
536 prev = NULL;
537 for (bd = gen->old_blocks; bd != NULL; bd = next) {
538
539 next = bd->link;
540
541 if (!(bd->flags & BF_MARKED))
542 {
543 if (prev == NULL) {
544 gen->old_blocks = next;
545 } else {
546 prev->link = next;
547 }
548 freeGroup(bd);
549 gen->n_old_blocks--;
550 }
551 else
552 {
553 gen->n_words += bd->free - bd->start;
554
555 // NB. this step might not be compacted next
556 // time, so reset the BF_MARKED flags.
557 // They are set before GC if we're going to
558 // compact. (search for BF_MARKED above).
559 bd->flags &= ~BF_MARKED;
560
561 // between GCs, all blocks in the heap except
562 // for the nursery have the BF_EVACUATED flag set.
563 bd->flags |= BF_EVACUATED;
564
565 prev = bd;
566 }
567 }
568
569 if (prev != NULL) {
570 prev->link = gen->blocks;
571 gen->blocks = gen->old_blocks;
572 }
573 }
574 // add the new blocks to the block tally
575 gen->n_blocks += gen->n_old_blocks;
576 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
577 ASSERT(countOccupied(gen->blocks) == gen->n_words);
578 }
579 else // not copacted
580 {
581 freeChain(gen->old_blocks);
582 }
583
584 gen->old_blocks = NULL;
585 gen->n_old_blocks = 0;
586
587 /* LARGE OBJECTS. The current live large objects are chained on
588 * scavenged_large, having been moved during garbage
589 * collection from large_objects. Any objects left on the
590 * large_objects list are therefore dead, so we free them here.
591 */
592 freeChain(gen->large_objects);
593 gen->large_objects = gen->scavenged_large_objects;
594 gen->n_large_blocks = gen->n_scavenged_large_blocks;
595 gen->n_large_words = countOccupied(gen->large_objects);
596 gen->n_new_large_words = 0;
597 }
598 else // for generations > N
599 {
600 /* For older generations, we need to append the
601 * scavenged_large_object list (i.e. large objects that have been
602 * promoted during this GC) to the large_object list for that step.
603 */
604 for (bd = gen->scavenged_large_objects; bd; bd = next) {
605 next = bd->link;
606 dbl_link_onto(bd, &gen->large_objects);
607 gen->n_large_words += bd->free - bd->start;
608 }
609
610 // add the new blocks we promoted during this GC
611 gen->n_large_blocks += gen->n_scavenged_large_blocks;
612 }
613
614 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
615 ASSERT(countOccupied(gen->large_objects) == gen->n_large_words);
616
617 gen->scavenged_large_objects = NULL;
618 gen->n_scavenged_large_blocks = 0;
619
620 // Count "live" data
621 live_words += genLiveWords(gen);
622 live_blocks += genLiveBlocks(gen);
623
624 // add in the partial blocks in the gen_workspaces, but ignore gen 0
625 // if this is a local GC (we can't count another capability's part_list)
626 {
627 nat i;
628 for (i = 0; i < n_capabilities; i++) {
629 live_words += gcThreadLiveWords(i, gen->no);
630 live_blocks += gcThreadLiveBlocks(i, gen->no);
631 }
632 }
633 } // for all generations
634
635 // update the max size of older generations after a major GC
636 resize_generations();
637
638 // Free the mark stack.
639 if (mark_stack_top_bd != NULL) {
640 debugTrace(DEBUG_gc, "mark stack: %d blocks",
641 countBlocks(mark_stack_top_bd));
642 freeChain(mark_stack_top_bd);
643 }
644
645 // Free any bitmaps.
646 for (g = 0; g <= N; g++) {
647 gen = &generations[g];
648 if (gen->bitmap != NULL) {
649 freeGroup(gen->bitmap);
650 gen->bitmap = NULL;
651 }
652 }
653
654 // Reset the nursery: make the blocks empty
655 if (DEBUG_IS_ON || n_gc_threads == 1) {
656 for (n = 0; n < n_capabilities; n++) {
657 clearNursery(&capabilities[n]);
658 }
659 } else {
660 // When doing parallel GC, clearNursery() is called by the
661 // worker threads
662 for (n = 0; n < n_capabilities; n++) {
663 if (gc_threads[n]->idle) {
664 clearNursery(&capabilities[n]);
665 }
666 }
667 }
668
669 resize_nursery();
670
671 resetNurseries();
672
673 // mark the garbage collected CAFs as dead
674 #if 0 && defined(DEBUG) // doesn't work at the moment
675 if (major_gc) { gcCAFs(); }
676 #endif
677
678 #ifdef PROFILING
679 // resetStaticObjectForRetainerProfiling() must be called before
680 // zeroing below.
681
682 // ToDo: fix the gct->scavenged_static_objects below
683 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
684 #endif
685
686 // zero the scavenged static object list
687 if (major_gc) {
688 nat i;
689 if (n_gc_threads == 1) {
690 zero_static_object_list(gct->scavenged_static_objects);
691 } else {
692 for (i = 0; i < n_gc_threads; i++) {
693 if (!gc_threads[i]->idle) {
694 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
695 }
696 }
697 }
698 }
699
700 // Update the stable pointer hash table.
701 updateStablePtrTable(major_gc);
702
703 // unlock the StablePtr table. Must be before scheduleFinalizers(),
704 // because a finalizer may call hs_free_fun_ptr() or
705 // hs_free_stable_ptr(), both of which access the StablePtr table.
706 stablePtrPostGC();
707
708 // Start any pending finalizers. Must be after
709 // updateStablePtrTable() and stablePtrPostGC() (see #4221).
710 RELEASE_SM_LOCK;
711 scheduleFinalizers(cap, old_weak_ptr_list);
712 ACQUIRE_SM_LOCK;
713
714 // check sanity after GC
715 // before resurrectThreads(), because that might overwrite some
716 // closures, which will cause problems with THREADED where we don't
717 // fill slop.
718 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
719
720 // If a heap census is due, we need to do it before
721 // resurrectThreads(), for the same reason as checkSanity above:
722 // resurrectThreads() will overwrite some closures and leave slop
723 // behind.
724 if (do_heap_census) {
725 debugTrace(DEBUG_sched, "performing heap census");
726 RELEASE_SM_LOCK;
727 heapCensus(gct->gc_start_cpu);
728 ACQUIRE_SM_LOCK;
729 }
730
731 // send exceptions to any threads which were about to die
732 RELEASE_SM_LOCK;
733 resurrectThreads(resurrected_threads);
734 ACQUIRE_SM_LOCK;
735
736 if (major_gc) {
737 W_ need, got;
738 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
739 got = mblocks_allocated;
740 /* If the amount of data remains constant, next major GC we'll
741 require (F+1)*need. We leave (F+2)*need in order to reduce
742 repeated deallocation and reallocation. */
743 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
744 if (got > need) {
745 returnMemoryToOS(got - need);
746 }
747 }
748
749 // extra GC trace info
750 IF_DEBUG(gc, statDescribeGens());
751
752 #ifdef DEBUG
753 // symbol-table based profiling
754 /* heapCensus(to_blocks); */ /* ToDo */
755 #endif
756
757 // restore enclosing cost centre
758 #ifdef PROFILING
759 for (n = 0; n < n_capabilities; n++) {
760 capabilities[n].r.rCCCS = save_CCS[n];
761 }
762 #endif
763
764 #ifdef DEBUG
765 // check for memory leaks if DEBUG is on
766 memInventory(DEBUG_gc);
767 #endif
768
769 #ifdef RTS_GTK_FRONTPANEL
770 if (RtsFlags.GcFlags.frontpanel) {
771 updateFrontPanelAfterGC( N, live );
772 }
773 #endif
774
775 // ok, GC over: tell the stats department what happened.
776 stat_endGC(cap, gct, live_words, copied,
777 live_blocks * BLOCK_SIZE_W - live_words /* slop */,
778 N, n_gc_threads, par_max_copied, par_tot_copied);
779
780 #if defined(RTS_USER_SIGNALS)
781 if (RtsFlags.MiscFlags.install_signal_handlers) {
782 // unblock signals again
783 unblockUserSignals();
784 }
785 #endif
786
787 RELEASE_SM_LOCK;
788
789 SET_GCT(saved_gct);
790 }
791
792 /* -----------------------------------------------------------------------------
793 Initialise the gc_thread structures.
794 -------------------------------------------------------------------------- */
795
796 #define GC_THREAD_INACTIVE 0
797 #define GC_THREAD_STANDING_BY 1
798 #define GC_THREAD_RUNNING 2
799 #define GC_THREAD_WAITING_TO_CONTINUE 3
800
801 static void
802 new_gc_thread (nat n, gc_thread *t)
803 {
804 nat g;
805 gen_workspace *ws;
806
807 t->cap = &capabilities[n];
808
809 #ifdef THREADED_RTS
810 t->id = 0;
811 initSpinLock(&t->gc_spin);
812 initSpinLock(&t->mut_spin);
813 ACQUIRE_SPIN_LOCK(&t->gc_spin);
814 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
815 // thread to start up, see wakeup_gc_threads
816 #endif
817
818 t->thread_index = n;
819 t->idle = rtsFalse;
820 t->free_blocks = NULL;
821 t->gc_count = 0;
822
823 init_gc_thread(t);
824
825 #ifdef USE_PAPI
826 t->papi_events = -1;
827 #endif
828
829 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
830 {
831 ws = &t->gens[g];
832 ws->gen = &generations[g];
833 ASSERT(g == ws->gen->no);
834 ws->my_gct = t;
835
836 // We want to call
837 // alloc_todo_block(ws,0);
838 // but can't, because it uses gct which isn't set up at this point.
839 // Hence, allocate a block for todo_bd manually:
840 {
841 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
842 initBdescr(bd, ws->gen, ws->gen->to);
843 bd->flags = BF_EVACUATED;
844 bd->u.scan = bd->free = bd->start;
845
846 ws->todo_bd = bd;
847 ws->todo_free = bd->free;
848 ws->todo_lim = bd->start + BLOCK_SIZE_W;
849 }
850
851 ws->todo_q = newWSDeque(128);
852 ws->todo_overflow = NULL;
853 ws->n_todo_overflow = 0;
854 ws->todo_large_objects = NULL;
855
856 ws->part_list = NULL;
857 ws->n_part_blocks = 0;
858
859 ws->scavd_list = NULL;
860 ws->n_scavd_blocks = 0;
861 }
862 }
863
864
865 void
866 initGcThreads (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
867 {
868 #if defined(THREADED_RTS)
869 nat i;
870
871 if (from > 0) {
872 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
873 "initGcThreads");
874 } else {
875 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
876 "initGcThreads");
877 }
878
879 // We have to update the gct->cap pointers to point to the new
880 // Capability array now.
881 for (i = 0; i < from; i++) {
882 gc_threads[i]->cap = &capabilities[gc_threads[i]->cap->no];
883 }
884
885 for (i = from; i < to; i++) {
886 gc_threads[i] =
887 stgMallocBytes(sizeof(gc_thread) +
888 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
889 "alloc_gc_threads");
890
891 new_gc_thread(i, gc_threads[i]);
892 }
893 #else
894 ASSERT(from == 0 && to == 1);
895 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
896 gc_threads[0] = gct;
897 new_gc_thread(0,gc_threads[0]);
898 #endif
899 }
900
901 void
902 freeGcThreads (void)
903 {
904 nat g;
905 if (gc_threads != NULL) {
906 #if defined(THREADED_RTS)
907 nat i;
908 for (i = 0; i < n_capabilities; i++) {
909 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
910 {
911 freeWSDeque(gc_threads[i]->gens[g].todo_q);
912 }
913 stgFree (gc_threads[i]);
914 }
915 stgFree (gc_threads);
916 #else
917 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
918 {
919 freeWSDeque(gc_threads[0]->gens[g].todo_q);
920 }
921 stgFree (gc_threads);
922 #endif
923 gc_threads = NULL;
924 }
925 }
926
927 /* ----------------------------------------------------------------------------
928 Start GC threads
929 ------------------------------------------------------------------------- */
930
931 static volatile StgWord gc_running_threads;
932
933 static StgWord
934 inc_running (void)
935 {
936 StgWord new;
937 new = atomic_inc(&gc_running_threads);
938 ASSERT(new <= n_gc_threads);
939 return new;
940 }
941
942 static StgWord
943 dec_running (void)
944 {
945 ASSERT(gc_running_threads != 0);
946 return atomic_dec(&gc_running_threads);
947 }
948
949 static rtsBool
950 any_work (void)
951 {
952 int g;
953 gen_workspace *ws;
954
955 gct->any_work++;
956
957 write_barrier();
958
959 // scavenge objects in compacted generation
960 if (mark_stack_bd != NULL && !mark_stack_empty()) {
961 return rtsTrue;
962 }
963
964 // Check for global work in any step. We don't need to check for
965 // local work, because we have already exited scavenge_loop(),
966 // which means there is no local work for this thread.
967 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
968 ws = &gct->gens[g];
969 if (ws->todo_large_objects) return rtsTrue;
970 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
971 if (ws->todo_overflow) return rtsTrue;
972 }
973
974 #if defined(THREADED_RTS)
975 if (work_stealing) {
976 nat n;
977 // look for work to steal
978 for (n = 0; n < n_gc_threads; n++) {
979 if (n == gct->thread_index) continue;
980 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
981 ws = &gc_threads[n]->gens[g];
982 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
983 }
984 }
985 }
986 #endif
987
988 gct->no_work++;
989 #if defined(THREADED_RTS)
990 yieldThread();
991 #endif
992
993 return rtsFalse;
994 }
995
996 static void
997 scavenge_until_all_done (void)
998 {
999 DEBUG_ONLY( nat r );
1000
1001
1002 loop:
1003 #if defined(THREADED_RTS)
1004 if (n_gc_threads > 1) {
1005 scavenge_loop();
1006 } else {
1007 scavenge_loop1();
1008 }
1009 #else
1010 scavenge_loop();
1011 #endif
1012
1013 collect_gct_blocks();
1014
1015 // scavenge_loop() only exits when there's no work to do
1016
1017 #ifdef DEBUG
1018 r = dec_running();
1019 #else
1020 dec_running();
1021 #endif
1022
1023 traceEventGcIdle(gct->cap);
1024
1025 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1026
1027 while (gc_running_threads != 0) {
1028 // usleep(1);
1029 if (any_work()) {
1030 inc_running();
1031 traceEventGcWork(gct->cap);
1032 goto loop;
1033 }
1034 // any_work() does not remove the work from the queue, it
1035 // just checks for the presence of work. If we find any,
1036 // then we increment gc_running_threads and go back to
1037 // scavenge_loop() to perform any pending work.
1038 }
1039
1040 traceEventGcDone(gct->cap);
1041 }
1042
1043 #if defined(THREADED_RTS)
1044
1045 void
1046 gcWorkerThread (Capability *cap)
1047 {
1048 gc_thread *saved_gct;
1049
1050 // necessary if we stole a callee-saves register for gct:
1051 saved_gct = gct;
1052
1053 SET_GCT(gc_threads[cap->no]);
1054 gct->id = osThreadId();
1055
1056 stat_gcWorkerThreadStart(gct);
1057
1058 // Wait until we're told to wake up
1059 RELEASE_SPIN_LOCK(&gct->mut_spin);
1060 // yieldThread();
1061 // Strangely, adding a yieldThread() here makes the CPU time
1062 // measurements more accurate on Linux, perhaps because it syncs
1063 // the CPU time across the multiple cores. Without this, CPU time
1064 // is heavily skewed towards GC rather than MUT.
1065 gct->wakeup = GC_THREAD_STANDING_BY;
1066 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1067 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1068
1069 #ifdef USE_PAPI
1070 // start performance counters in this thread...
1071 if (gct->papi_events == -1) {
1072 papi_init_eventset(&gct->papi_events);
1073 }
1074 papi_thread_start_gc1_count(gct->papi_events);
1075 #endif
1076
1077 init_gc_thread(gct);
1078
1079 traceEventGcWork(gct->cap);
1080
1081 // Every thread evacuates some roots.
1082 gct->evac_gen_no = 0;
1083 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1084 scavenge_capability_mut_lists(cap);
1085
1086 scavenge_until_all_done();
1087
1088 if (!DEBUG_IS_ON) {
1089 clearNursery(cap);
1090 }
1091
1092 #ifdef THREADED_RTS
1093 // Now that the whole heap is marked, we discard any sparks that
1094 // were found to be unreachable. The main GC thread is currently
1095 // marking heap reachable via weak pointers, so it is
1096 // non-deterministic whether a spark will be retained if it is
1097 // only reachable via weak pointers. To fix this problem would
1098 // require another GC barrier, which is too high a price.
1099 pruneSparkQueue(cap);
1100 #endif
1101
1102 #ifdef USE_PAPI
1103 // count events in this thread towards the GC totals
1104 papi_thread_stop_gc1_count(gct->papi_events);
1105 #endif
1106
1107 // Wait until we're told to continue
1108 RELEASE_SPIN_LOCK(&gct->gc_spin);
1109 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1110 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1111 gct->thread_index);
1112 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1113 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1114
1115 // record the time spent doing GC in the Task structure
1116 stat_gcWorkerThreadDone(gct);
1117
1118 SET_GCT(saved_gct);
1119 }
1120
1121 #endif
1122
1123 #if defined(THREADED_RTS)
1124
1125 void
1126 waitForGcThreads (Capability *cap USED_IF_THREADS)
1127 {
1128 const nat n_threads = n_capabilities;
1129 const nat me = cap->no;
1130 nat i, j;
1131 rtsBool retry = rtsTrue;
1132
1133 while(retry) {
1134 for (i=0; i < n_threads; i++) {
1135 if (i == me || gc_threads[i]->idle) continue;
1136 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1137 prodCapability(&capabilities[i], cap->running_task);
1138 }
1139 }
1140 for (j=0; j < 10; j++) {
1141 retry = rtsFalse;
1142 for (i=0; i < n_threads; i++) {
1143 if (i == me || gc_threads[i]->idle) continue;
1144 write_barrier();
1145 interruptCapability(&capabilities[i]);
1146 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1147 retry = rtsTrue;
1148 }
1149 }
1150 if (!retry) break;
1151 yieldThread();
1152 }
1153 }
1154 }
1155
1156 #endif // THREADED_RTS
1157
1158 static void
1159 start_gc_threads (void)
1160 {
1161 #if defined(THREADED_RTS)
1162 gc_running_threads = 0;
1163 #endif
1164 }
1165
1166 static void
1167 wakeup_gc_threads (nat me USED_IF_THREADS)
1168 {
1169 #if defined(THREADED_RTS)
1170 nat i;
1171
1172 if (n_gc_threads == 1) return;
1173
1174 for (i=0; i < n_gc_threads; i++) {
1175 if (i == me || gc_threads[i]->idle) continue;
1176 inc_running();
1177 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1178 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1179
1180 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1181 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1182 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1183 }
1184 #endif
1185 }
1186
1187 // After GC is complete, we must wait for all GC threads to enter the
1188 // standby state, otherwise they may still be executing inside
1189 // any_work(), and may even remain awake until the next GC starts.
1190 static void
1191 shutdown_gc_threads (nat me USED_IF_THREADS)
1192 {
1193 #if defined(THREADED_RTS)
1194 nat i;
1195
1196 if (n_gc_threads == 1) return;
1197
1198 for (i=0; i < n_gc_threads; i++) {
1199 if (i == me || gc_threads[i]->idle) continue;
1200 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1201 }
1202 #endif
1203 }
1204
1205 #if defined(THREADED_RTS)
1206 void
1207 releaseGCThreads (Capability *cap USED_IF_THREADS)
1208 {
1209 const nat n_threads = n_capabilities;
1210 const nat me = cap->no;
1211 nat i;
1212 for (i=0; i < n_threads; i++) {
1213 if (i == me || gc_threads[i]->idle) continue;
1214 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1215 barf("releaseGCThreads");
1216
1217 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1218 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1219 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1220 }
1221 }
1222 #endif
1223
1224 /* ----------------------------------------------------------------------------
1225 Initialise a generation that is to be collected
1226 ------------------------------------------------------------------------- */
1227
1228 static void
1229 prepare_collected_gen (generation *gen)
1230 {
1231 nat i, g, n;
1232 gen_workspace *ws;
1233 bdescr *bd, *next;
1234
1235 // Throw away the current mutable list. Invariant: the mutable
1236 // list always has at least one block; this means we can avoid a
1237 // check for NULL in recordMutable().
1238 g = gen->no;
1239 if (g != 0) {
1240 for (i = 0; i < n_capabilities; i++) {
1241 freeChain(capabilities[i].mut_lists[g]);
1242 capabilities[i].mut_lists[g] = allocBlock();
1243 }
1244 }
1245
1246 gen = &generations[g];
1247 ASSERT(gen->no == g);
1248
1249 // we'll construct a new list of threads in this step
1250 // during GC, throw away the current list.
1251 gen->old_threads = gen->threads;
1252 gen->threads = END_TSO_QUEUE;
1253
1254 // deprecate the existing blocks
1255 gen->old_blocks = gen->blocks;
1256 gen->n_old_blocks = gen->n_blocks;
1257 gen->blocks = NULL;
1258 gen->n_blocks = 0;
1259 gen->n_words = 0;
1260 gen->live_estimate = 0;
1261
1262 // initialise the large object queues.
1263 ASSERT(gen->scavenged_large_objects == NULL);
1264 ASSERT(gen->n_scavenged_large_blocks == 0);
1265
1266 // grab all the partial blocks stashed in the gc_thread workspaces and
1267 // move them to the old_blocks list of this gen.
1268 for (n = 0; n < n_capabilities; n++) {
1269 ws = &gc_threads[n]->gens[gen->no];
1270
1271 for (bd = ws->part_list; bd != NULL; bd = next) {
1272 next = bd->link;
1273 bd->link = gen->old_blocks;
1274 gen->old_blocks = bd;
1275 gen->n_old_blocks += bd->blocks;
1276 }
1277 ws->part_list = NULL;
1278 ws->n_part_blocks = 0;
1279
1280 ASSERT(ws->scavd_list == NULL);
1281 ASSERT(ws->n_scavd_blocks == 0);
1282
1283 if (ws->todo_free != ws->todo_bd->start) {
1284 ws->todo_bd->free = ws->todo_free;
1285 ws->todo_bd->link = gen->old_blocks;
1286 gen->old_blocks = ws->todo_bd;
1287 gen->n_old_blocks += ws->todo_bd->blocks;
1288 alloc_todo_block(ws,0); // always has one block.
1289 }
1290 }
1291
1292 // mark the small objects as from-space
1293 for (bd = gen->old_blocks; bd; bd = bd->link) {
1294 bd->flags &= ~BF_EVACUATED;
1295 }
1296
1297 // mark the large objects as from-space
1298 for (bd = gen->large_objects; bd; bd = bd->link) {
1299 bd->flags &= ~BF_EVACUATED;
1300 }
1301
1302 // for a compacted generation, we need to allocate the bitmap
1303 if (gen->mark) {
1304 StgWord bitmap_size; // in bytes
1305 bdescr *bitmap_bdescr;
1306 StgWord *bitmap;
1307
1308 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1309
1310 if (bitmap_size > 0) {
1311 bitmap_bdescr = allocGroup((StgWord)BLOCK_ROUND_UP(bitmap_size)
1312 / BLOCK_SIZE);
1313 gen->bitmap = bitmap_bdescr;
1314 bitmap = bitmap_bdescr->start;
1315
1316 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1317 bitmap_size, bitmap);
1318
1319 // don't forget to fill it with zeros!
1320 memset(bitmap, 0, bitmap_size);
1321
1322 // For each block in this step, point to its bitmap from the
1323 // block descriptor.
1324 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1325 bd->u.bitmap = bitmap;
1326 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1327
1328 // Also at this point we set the BF_MARKED flag
1329 // for this block. The invariant is that
1330 // BF_MARKED is always unset, except during GC
1331 // when it is set on those blocks which will be
1332 // compacted.
1333 if (!(bd->flags & BF_FRAGMENTED)) {
1334 bd->flags |= BF_MARKED;
1335 }
1336
1337 // BF_SWEPT should be marked only for blocks that are being
1338 // collected in sweep()
1339 bd->flags &= ~BF_SWEPT;
1340 }
1341 }
1342 }
1343 }
1344
1345
1346 /* ----------------------------------------------------------------------------
1347 Save the mutable lists in saved_mut_lists
1348 ------------------------------------------------------------------------- */
1349
1350 static void
1351 stash_mut_list (Capability *cap, nat gen_no)
1352 {
1353 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1354 cap->mut_lists[gen_no] = allocBlock_sync();
1355 }
1356
1357 /* ----------------------------------------------------------------------------
1358 Initialise a generation that is *not* to be collected
1359 ------------------------------------------------------------------------- */
1360
1361 static void
1362 prepare_uncollected_gen (generation *gen)
1363 {
1364 nat i;
1365
1366
1367 ASSERT(gen->no > 0);
1368
1369 // save the current mutable lists for this generation, and
1370 // allocate a fresh block for each one. We'll traverse these
1371 // mutable lists as roots early on in the GC.
1372 for (i = 0; i < n_capabilities; i++) {
1373 stash_mut_list(&capabilities[i], gen->no);
1374 }
1375
1376 ASSERT(gen->scavenged_large_objects == NULL);
1377 ASSERT(gen->n_scavenged_large_blocks == 0);
1378 }
1379
1380 /* -----------------------------------------------------------------------------
1381 Collect the completed blocks from a GC thread and attach them to
1382 the generation.
1383 -------------------------------------------------------------------------- */
1384
1385 static void
1386 collect_gct_blocks (void)
1387 {
1388 nat g;
1389 gen_workspace *ws;
1390 bdescr *bd, *prev;
1391
1392 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1393 ws = &gct->gens[g];
1394
1395 // there may still be a block attached to ws->todo_bd;
1396 // leave it there to use next time.
1397
1398 if (ws->scavd_list != NULL) {
1399 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1400
1401 ASSERT(gct->scan_bd == NULL);
1402 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1403
1404 prev = NULL;
1405 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1406 ws->gen->n_words += bd->free - bd->start;
1407 prev = bd;
1408 }
1409 if (prev != NULL) {
1410 prev->link = ws->gen->blocks;
1411 ws->gen->blocks = ws->scavd_list;
1412 }
1413 ws->gen->n_blocks += ws->n_scavd_blocks;
1414
1415 ws->scavd_list = NULL;
1416 ws->n_scavd_blocks = 0;
1417
1418 RELEASE_SPIN_LOCK(&ws->gen->sync);
1419 }
1420 }
1421 }
1422
1423 /* -----------------------------------------------------------------------------
1424 During mutation, any blocks that are filled by allocatePinned() are
1425 stashed on the local pinned_object_blocks list, to avoid needing to
1426 take a global lock. Here we collect those blocks from the
1427 cap->pinned_object_blocks lists and put them on the
1428 main g0->large_object list.
1429
1430 Returns: the number of words allocated this way, for stats
1431 purposes.
1432 -------------------------------------------------------------------------- */
1433
1434 static void
1435 collect_pinned_object_blocks (void)
1436 {
1437 nat n;
1438 bdescr *bd, *prev;
1439
1440 for (n = 0; n < n_capabilities; n++) {
1441 prev = NULL;
1442 for (bd = capabilities[n].pinned_object_blocks; bd != NULL; bd = bd->link) {
1443 prev = bd;
1444 }
1445 if (prev != NULL) {
1446 prev->link = g0->large_objects;
1447 if (g0->large_objects != NULL) {
1448 g0->large_objects->u.back = prev;
1449 }
1450 g0->large_objects = capabilities[n].pinned_object_blocks;
1451 capabilities[n].pinned_object_blocks = 0;
1452 }
1453 }
1454 }
1455
1456 /* -----------------------------------------------------------------------------
1457 Initialise a gc_thread before GC
1458 -------------------------------------------------------------------------- */
1459
1460 static void
1461 init_gc_thread (gc_thread *t)
1462 {
1463 t->static_objects = END_OF_STATIC_LIST;
1464 t->scavenged_static_objects = END_OF_STATIC_LIST;
1465 t->scan_bd = NULL;
1466 t->mut_lists = t->cap->mut_lists;
1467 t->evac_gen_no = 0;
1468 t->failed_to_evac = rtsFalse;
1469 t->eager_promotion = rtsTrue;
1470 t->thunk_selector_depth = 0;
1471 t->copied = 0;
1472 t->scanned = 0;
1473 t->any_work = 0;
1474 t->no_work = 0;
1475 t->scav_find_work = 0;
1476 }
1477
1478 /* -----------------------------------------------------------------------------
1479 Function we pass to evacuate roots.
1480 -------------------------------------------------------------------------- */
1481
1482 static void
1483 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1484 {
1485 // we stole a register for gct, but this function is called from
1486 // *outside* the GC where the register variable is not in effect,
1487 // so we need to save and restore it here. NB. only call
1488 // mark_root() from the main GC thread, otherwise gct will be
1489 // incorrect.
1490 #if defined(THREADED_RTS)
1491 gc_thread *saved_gct;
1492 saved_gct = gct;
1493 #endif
1494 SET_GCT(user);
1495
1496 evacuate(root);
1497
1498 SET_GCT(saved_gct);
1499 }
1500
1501 /* -----------------------------------------------------------------------------
1502 Initialising the static object & mutable lists
1503 -------------------------------------------------------------------------- */
1504
1505 static void
1506 zero_static_object_list(StgClosure* first_static)
1507 {
1508 StgClosure* p;
1509 StgClosure* link;
1510 const StgInfoTable *info;
1511
1512 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1513 info = get_itbl(p);
1514 link = *STATIC_LINK(info, p);
1515 *STATIC_LINK(info,p) = NULL;
1516 }
1517 }
1518
1519 /* ----------------------------------------------------------------------------
1520 Reset the sizes of the older generations when we do a major
1521 collection.
1522
1523 CURRENT STRATEGY: make all generations except zero the same size.
1524 We have to stay within the maximum heap size, and leave a certain
1525 percentage of the maximum heap size available to allocate into.
1526 ------------------------------------------------------------------------- */
1527
1528 static void
1529 resize_generations (void)
1530 {
1531 nat g;
1532
1533 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1534 W_ live, size, min_alloc, words;
1535 const W_ max = RtsFlags.GcFlags.maxHeapSize;
1536 const W_ gens = RtsFlags.GcFlags.generations;
1537
1538 // live in the oldest generations
1539 if (oldest_gen->live_estimate != 0) {
1540 words = oldest_gen->live_estimate;
1541 } else {
1542 words = oldest_gen->n_words;
1543 }
1544 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1545 oldest_gen->n_large_blocks;
1546
1547 // default max size for all generations except zero
1548 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1549 RtsFlags.GcFlags.minOldGenSize);
1550
1551 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1552 if (max > 0) {
1553 RtsFlags.GcFlags.heapSizeSuggestion = stg_min(max, size);
1554 } else {
1555 RtsFlags.GcFlags.heapSizeSuggestion = size;
1556 }
1557 }
1558
1559 // minimum size for generation zero
1560 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1561 RtsFlags.GcFlags.minAllocAreaSize);
1562
1563 // Auto-enable compaction when the residency reaches a
1564 // certain percentage of the maximum heap size (default: 30%).
1565 if (RtsFlags.GcFlags.compact ||
1566 (max > 0 &&
1567 oldest_gen->n_blocks >
1568 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1569 oldest_gen->mark = 1;
1570 oldest_gen->compact = 1;
1571 // debugBelch("compaction: on\n", live);
1572 } else {
1573 oldest_gen->mark = 0;
1574 oldest_gen->compact = 0;
1575 // debugBelch("compaction: off\n", live);
1576 }
1577
1578 if (RtsFlags.GcFlags.sweep) {
1579 oldest_gen->mark = 1;
1580 }
1581
1582 // if we're going to go over the maximum heap size, reduce the
1583 // size of the generations accordingly. The calculation is
1584 // different if compaction is turned on, because we don't need
1585 // to double the space required to collect the old generation.
1586 if (max != 0) {
1587
1588 // this test is necessary to ensure that the calculations
1589 // below don't have any negative results - we're working
1590 // with unsigned values here.
1591 if (max < min_alloc) {
1592 heapOverflow();
1593 }
1594
1595 if (oldest_gen->compact) {
1596 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1597 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1598 }
1599 } else {
1600 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1601 size = (max - min_alloc) / ((gens - 1) * 2);
1602 }
1603 }
1604
1605 if (size < live) {
1606 heapOverflow();
1607 }
1608 }
1609
1610 #if 0
1611 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1612 min_alloc, size, max);
1613 #endif
1614
1615 for (g = 0; g < gens; g++) {
1616 generations[g].max_blocks = size;
1617 }
1618 }
1619 }
1620
1621 /* -----------------------------------------------------------------------------
1622 Calculate the new size of the nursery, and resize it.
1623 -------------------------------------------------------------------------- */
1624
1625 static void
1626 resize_nursery (void)
1627 {
1628 const StgWord min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1629
1630 if (RtsFlags.GcFlags.generations == 1)
1631 { // Two-space collector:
1632 W_ blocks;
1633
1634 /* set up a new nursery. Allocate a nursery size based on a
1635 * function of the amount of live data (by default a factor of 2)
1636 * Use the blocks from the old nursery if possible, freeing up any
1637 * left over blocks.
1638 *
1639 * If we get near the maximum heap size, then adjust our nursery
1640 * size accordingly. If the nursery is the same size as the live
1641 * data (L), then we need 3L bytes. We can reduce the size of the
1642 * nursery to bring the required memory down near 2L bytes.
1643 *
1644 * A normal 2-space collector would need 4L bytes to give the same
1645 * performance we get from 3L bytes, reducing to the same
1646 * performance at 2L bytes.
1647 */
1648 blocks = generations[0].n_blocks;
1649
1650 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1651 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1652 RtsFlags.GcFlags.maxHeapSize )
1653 {
1654 long adjusted_blocks; // signed on purpose
1655 int pc_free;
1656
1657 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1658
1659 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1660 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1661
1662 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1663 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1664 {
1665 heapOverflow();
1666 }
1667 blocks = adjusted_blocks;
1668 }
1669 else
1670 {
1671 blocks *= RtsFlags.GcFlags.oldGenFactor;
1672 if (blocks < min_nursery)
1673 {
1674 blocks = min_nursery;
1675 }
1676 }
1677 resizeNurseries(blocks);
1678 }
1679 else // Generational collector
1680 {
1681 /*
1682 * If the user has given us a suggested heap size, adjust our
1683 * allocation area to make best use of the memory available.
1684 */
1685 if (RtsFlags.GcFlags.heapSizeSuggestion)
1686 {
1687 long blocks;
1688 StgWord needed;
1689
1690 calcNeeded(rtsFalse, &needed); // approx blocks needed at next GC
1691
1692 /* Guess how much will be live in generation 0 step 0 next time.
1693 * A good approximation is obtained by finding the
1694 * percentage of g0 that was live at the last minor GC.
1695 *
1696 * We have an accurate figure for the amount of copied data in
1697 * 'copied', but we must convert this to a number of blocks, with
1698 * a small adjustment for estimated slop at the end of a block
1699 * (- 10 words).
1700 */
1701 if (N == 0)
1702 {
1703 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1704 / countNurseryBlocks();
1705 }
1706
1707 /* Estimate a size for the allocation area based on the
1708 * information available. We might end up going slightly under
1709 * or over the suggested heap size, but we should be pretty
1710 * close on average.
1711 *
1712 * Formula: suggested - needed
1713 * ----------------------------
1714 * 1 + g0_pcnt_kept/100
1715 *
1716 * where 'needed' is the amount of memory needed at the next
1717 * collection for collecting all gens except g0.
1718 */
1719 blocks =
1720 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1721 (100 + (long)g0_pcnt_kept);
1722
1723 if (blocks < (long)min_nursery) {
1724 blocks = min_nursery;
1725 }
1726
1727 resizeNurseries((W_)blocks);
1728 }
1729 else
1730 {
1731 // we might have added extra large blocks to the nursery, so
1732 // resize back to minAllocAreaSize again.
1733 resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1734 }
1735 }
1736 }
1737
1738 /* -----------------------------------------------------------------------------
1739 Sanity code for CAF garbage collection.
1740
1741 With DEBUG turned on, we manage a CAF list in addition to the SRT
1742 mechanism. After GC, we run down the CAF list and blackhole any
1743 CAFs which have been garbage collected. This means we get an error
1744 whenever the program tries to enter a garbage collected CAF.
1745
1746 Any garbage collected CAFs are taken off the CAF list at the same
1747 time.
1748 -------------------------------------------------------------------------- */
1749
1750 #if 0 && defined(DEBUG)
1751
1752 static void
1753 gcCAFs(void)
1754 {
1755 StgClosure* p;
1756 StgClosure** pp;
1757 const StgInfoTable *info;
1758 nat i;
1759
1760 i = 0;
1761 p = caf_list;
1762 pp = &caf_list;
1763
1764 while (p != NULL) {
1765
1766 info = get_itbl(p);
1767
1768 ASSERT(info->type == IND_STATIC);
1769
1770 if (STATIC_LINK(info,p) == NULL) {
1771 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1772 // black hole it
1773 SET_INFO(p,&stg_BLACKHOLE_info);
1774 p = STATIC_LINK2(info,p);
1775 *pp = p;
1776 }
1777 else {
1778 pp = &STATIC_LINK2(info,p);
1779 p = *pp;
1780 i++;
1781 }
1782
1783 }
1784
1785 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1786 }
1787 #endif