Allocate pinned object blocks from the nursery, not the global
[ghc.git] / rts / sm / GC.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16 #include "HsFFI.h"
17
18 #include "Storage.h"
19 #include "RtsUtils.h"
20 #include "Apply.h"
21 #include "Updates.h"
22 #include "Stats.h"
23 #include "Schedule.h"
24 #include "Sanity.h"
25 #include "BlockAlloc.h"
26 #include "ProfHeap.h"
27 #include "Weak.h"
28 #include "Prelude.h"
29 #include "RtsSignals.h"
30 #include "STM.h"
31 #if defined(RTS_GTK_FRONTPANEL)
32 #include "FrontPanel.h"
33 #endif
34 #include "Trace.h"
35 #include "RetainerProfile.h"
36 #include "LdvProfile.h"
37 #include "RaiseAsync.h"
38 #include "Papi.h"
39 #include "Stable.h"
40
41 #include "GC.h"
42 #include "GCThread.h"
43 #include "GCTDecl.h"
44 #include "Compact.h"
45 #include "Evac.h"
46 #include "Scav.h"
47 #include "GCUtils.h"
48 #include "MarkStack.h"
49 #include "MarkWeak.h"
50 #include "Sparks.h"
51 #include "Sweep.h"
52
53 #include <string.h> // for memset()
54 #include <unistd.h>
55
56 /* -----------------------------------------------------------------------------
57 Global variables
58 -------------------------------------------------------------------------- */
59
60 /* STATIC OBJECT LIST.
61 *
62 * During GC:
63 * We maintain a linked list of static objects that are still live.
64 * The requirements for this list are:
65 *
66 * - we need to scan the list while adding to it, in order to
67 * scavenge all the static objects (in the same way that
68 * breadth-first scavenging works for dynamic objects).
69 *
70 * - we need to be able to tell whether an object is already on
71 * the list, to break loops.
72 *
73 * Each static object has a "static link field", which we use for
74 * linking objects on to the list. We use a stack-type list, consing
75 * objects on the front as they are added (this means that the
76 * scavenge phase is depth-first, not breadth-first, but that
77 * shouldn't matter).
78 *
79 * A separate list is kept for objects that have been scavenged
80 * already - this is so that we can zero all the marks afterwards.
81 *
82 * An object is on the list if its static link field is non-zero; this
83 * means that we have to mark the end of the list with '1', not NULL.
84 *
85 * Extra notes for generational GC:
86 *
87 * Each generation has a static object list associated with it. When
88 * collecting generations up to N, we treat the static object lists
89 * from generations > N as roots.
90 *
91 * We build up a static object list while collecting generations 0..N,
92 * which is then appended to the static object list of generation N+1.
93 */
94
95 /* N is the oldest generation being collected, where the generations
96 * are numbered starting at 0. A major GC (indicated by the major_gc
97 * flag) is when we're collecting all generations. We only attempt to
98 * deal with static objects and GC CAFs when doing a major GC.
99 */
100 nat N;
101 rtsBool major_gc;
102
103 /* Data used for allocation area sizing.
104 */
105 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
106
107 /* Mut-list stats */
108 #ifdef DEBUG
109 nat mutlist_MUTVARS,
110 mutlist_MUTARRS,
111 mutlist_MVARS,
112 mutlist_OTHERS;
113 #endif
114
115 /* Thread-local data for each GC thread
116 */
117 gc_thread **gc_threads = NULL;
118
119 #if !defined(THREADED_RTS)
120 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
121 #endif
122
123 // Number of threads running in *this* GC. Affects how many
124 // step->todos[] lists we have to look in to find work.
125 nat n_gc_threads;
126
127 // For stats:
128 long copied; // *words* copied & scavenged during this GC
129
130 rtsBool work_stealing;
131
132 DECLARE_GCT
133
134 /* -----------------------------------------------------------------------------
135 Static function declarations
136 -------------------------------------------------------------------------- */
137
138 static void mark_root (void *user, StgClosure **root);
139 static void zero_static_object_list (StgClosure* first_static);
140 static nat initialise_N (rtsBool force_major_gc);
141 static void prepare_collected_gen (generation *gen);
142 static void prepare_uncollected_gen (generation *gen);
143 static void init_gc_thread (gc_thread *t);
144 static void resize_generations (void);
145 static void resize_nursery (void);
146 static void start_gc_threads (void);
147 static void scavenge_until_all_done (void);
148 static StgWord inc_running (void);
149 static StgWord dec_running (void);
150 static void wakeup_gc_threads (nat me);
151 static void shutdown_gc_threads (nat me);
152 static void collect_gct_blocks (void);
153 static lnat collect_pinned_object_blocks (void);
154
155 #if 0 && defined(DEBUG)
156 static void gcCAFs (void);
157 #endif
158
159 /* -----------------------------------------------------------------------------
160 The mark stack.
161 -------------------------------------------------------------------------- */
162
163 bdescr *mark_stack_top_bd; // topmost block in the mark stack
164 bdescr *mark_stack_bd; // current block in the mark stack
165 StgPtr mark_sp; // pointer to the next unallocated mark stack entry
166
167 /* -----------------------------------------------------------------------------
168 GarbageCollect: the main entry point to the garbage collector.
169
170 Locks held: all capabilities are held throughout GarbageCollect().
171 -------------------------------------------------------------------------- */
172
173 void
174 GarbageCollect (rtsBool force_major_gc,
175 rtsBool do_heap_census,
176 nat gc_type USED_IF_THREADS,
177 Capability *cap)
178 {
179 bdescr *bd;
180 generation *gen;
181 lnat live_blocks, live_words, allocated, max_copied, avg_copied;
182 #if defined(THREADED_RTS)
183 gc_thread *saved_gct;
184 #endif
185 nat g, n;
186
187 // necessary if we stole a callee-saves register for gct:
188 #if defined(THREADED_RTS)
189 saved_gct = gct;
190 #endif
191
192 #ifdef PROFILING
193 CostCentreStack *save_CCS[n_capabilities];
194 #endif
195
196 ACQUIRE_SM_LOCK;
197
198 #if defined(RTS_USER_SIGNALS)
199 if (RtsFlags.MiscFlags.install_signal_handlers) {
200 // block signals
201 blockUserSignals();
202 }
203 #endif
204
205 ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
206 // otherwise adjust the padding in gen_workspace.
207
208 // this is the main thread
209 SET_GCT(gc_threads[cap->no]);
210
211 // tell the stats department that we've started a GC
212 stat_startGC(gct);
213
214 // lock the StablePtr table
215 stablePtrPreGC();
216
217 #ifdef DEBUG
218 mutlist_MUTVARS = 0;
219 mutlist_MUTARRS = 0;
220 mutlist_OTHERS = 0;
221 #endif
222
223 // attribute any costs to CCS_GC
224 #ifdef PROFILING
225 for (n = 0; n < n_capabilities; n++) {
226 save_CCS[n] = capabilities[n].r.rCCCS;
227 capabilities[n].r.rCCCS = CCS_GC;
228 }
229 #endif
230
231 /* Approximate how much we allocated.
232 * Todo: only when generating stats?
233 */
234 allocated = calcAllocated(rtsFalse/* don't count the nursery yet */);
235
236 /* Figure out which generation to collect
237 */
238 n = initialise_N(force_major_gc);
239
240 #if defined(THREADED_RTS)
241 work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
242 N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
243 // It's not always a good idea to do load balancing in parallel
244 // GC. In particular, for a parallel program we don't want to
245 // lose locality by moving cached data into another CPU's cache
246 // (this effect can be quite significant).
247 //
248 // We could have a more complex way to deterimine whether to do
249 // work stealing or not, e.g. it might be a good idea to do it
250 // if the heap is big. For now, we just turn it on or off with
251 // a flag.
252 #endif
253
254 /* Start threads, so they can be spinning up while we finish initialisation.
255 */
256 start_gc_threads();
257
258 #if defined(THREADED_RTS)
259 /* How many threads will be participating in this GC?
260 * We don't try to parallelise minor GCs (unless the user asks for
261 * it with +RTS -gn0), or mark/compact/sweep GC.
262 */
263 if (gc_type == SYNC_GC_PAR) {
264 n_gc_threads = n_capabilities;
265 } else {
266 n_gc_threads = 1;
267 }
268 #else
269 n_gc_threads = 1;
270 #endif
271
272 debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
273 N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
274
275 #ifdef RTS_GTK_FRONTPANEL
276 if (RtsFlags.GcFlags.frontpanel) {
277 updateFrontPanelBeforeGC(N);
278 }
279 #endif
280
281 #ifdef DEBUG
282 // check for memory leaks if DEBUG is on
283 memInventory(DEBUG_gc);
284 #endif
285
286 // check sanity *before* GC
287 IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
288
289 // gather blocks allocated using allocatePinned() from each capability
290 // and put them on the g0->large_object list.
291 collect_pinned_object_blocks();
292
293 // Initialise all the generations/steps that we're collecting.
294 for (g = 0; g <= N; g++) {
295 prepare_collected_gen(&generations[g]);
296 }
297 // Initialise all the generations/steps that we're *not* collecting.
298 for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
299 prepare_uncollected_gen(&generations[g]);
300 }
301
302 // Prepare this gc_thread
303 init_gc_thread(gct);
304
305 /* Allocate a mark stack if we're doing a major collection.
306 */
307 if (major_gc && oldest_gen->mark) {
308 mark_stack_bd = allocBlock();
309 mark_stack_top_bd = mark_stack_bd;
310 mark_stack_bd->link = NULL;
311 mark_stack_bd->u.back = NULL;
312 mark_sp = mark_stack_bd->start;
313 } else {
314 mark_stack_bd = NULL;
315 mark_stack_top_bd = NULL;
316 mark_sp = NULL;
317 }
318
319 /* -----------------------------------------------------------------------
320 * follow all the roots that we know about:
321 */
322
323 // the main thread is running: this prevents any other threads from
324 // exiting prematurely, so we can start them now.
325 // NB. do this after the mutable lists have been saved above, otherwise
326 // the other GC threads will be writing into the old mutable lists.
327 inc_running();
328 wakeup_gc_threads(gct->thread_index);
329
330 traceEventGcWork(gct->cap);
331
332 // scavenge the capability-private mutable lists. This isn't part
333 // of markSomeCapabilities() because markSomeCapabilities() can only
334 // call back into the GC via mark_root() (due to the gct register
335 // variable).
336 if (n_gc_threads == 1) {
337 for (n = 0; n < n_capabilities; n++) {
338 #if defined(THREADED_RTS)
339 scavenge_capability_mut_Lists1(&capabilities[n]);
340 #else
341 scavenge_capability_mut_lists(&capabilities[n]);
342 #endif
343 }
344 } else {
345 scavenge_capability_mut_lists(gct->cap);
346 for (n = 0; n < n_capabilities; n++) {
347 if (gc_threads[n]->idle) {
348 markCapability(mark_root, gct, &capabilities[n],
349 rtsTrue/*don't mark sparks*/);
350 scavenge_capability_mut_lists(&capabilities[n]);
351 }
352 }
353 }
354
355 // follow roots from the CAF list (used by GHCi)
356 gct->evac_gen_no = 0;
357 markCAFs(mark_root, gct);
358
359 // follow all the roots that the application knows about.
360 gct->evac_gen_no = 0;
361 if (n_gc_threads == 1) {
362 for (n = 0; n < n_capabilities; n++) {
363 markCapability(mark_root, gct, &capabilities[n],
364 rtsTrue/*don't mark sparks*/);
365 }
366 } else {
367 markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
368 }
369
370 markScheduler(mark_root, gct);
371
372 #if defined(RTS_USER_SIGNALS)
373 // mark the signal handlers (signals should be already blocked)
374 markSignalHandlers(mark_root, gct);
375 #endif
376
377 // Mark the weak pointer list, and prepare to detect dead weak pointers.
378 markWeakPtrList();
379 initWeakForGC();
380
381 // Mark the stable pointer table.
382 markStablePtrTable(mark_root, gct);
383
384 /* -------------------------------------------------------------------------
385 * Repeatedly scavenge all the areas we know about until there's no
386 * more scavenging to be done.
387 */
388 for (;;)
389 {
390 scavenge_until_all_done();
391 // The other threads are now stopped. We might recurse back to
392 // here, but from now on this is the only thread.
393
394 // must be last... invariant is that everything is fully
395 // scavenged at this point.
396 if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
397 inc_running();
398 continue;
399 }
400
401 // If we get to here, there's really nothing left to do.
402 break;
403 }
404
405 shutdown_gc_threads(gct->thread_index);
406
407 // Now see which stable names are still alive.
408 gcStablePtrTable();
409
410 #ifdef THREADED_RTS
411 if (n_gc_threads == 1) {
412 for (n = 0; n < n_capabilities; n++) {
413 pruneSparkQueue(&capabilities[n]);
414 }
415 } else {
416 for (n = 0; n < n_capabilities; n++) {
417 if (n == cap->no || gc_threads[n]->idle) {
418 pruneSparkQueue(&capabilities[n]);
419 }
420 }
421 }
422 #endif
423
424 #ifdef PROFILING
425 // We call processHeapClosureForDead() on every closure destroyed during
426 // the current garbage collection, so we invoke LdvCensusForDead().
427 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
428 || RtsFlags.ProfFlags.bioSelector != NULL) {
429 RELEASE_SM_LOCK; // LdvCensusForDead may need to take the lock
430 LdvCensusForDead(N);
431 ACQUIRE_SM_LOCK;
432 }
433 #endif
434
435 // NO MORE EVACUATION AFTER THIS POINT!
436
437 // Finally: compact or sweep the oldest generation.
438 if (major_gc && oldest_gen->mark) {
439 if (oldest_gen->compact)
440 compact(gct->scavenged_static_objects);
441 else
442 sweep(oldest_gen);
443 }
444
445 copied = 0;
446 max_copied = 0;
447 avg_copied = 0;
448 {
449 nat i;
450 for (i=0; i < n_gc_threads; i++) {
451 if (n_gc_threads > 1) {
452 debugTrace(DEBUG_gc,"thread %d:", i);
453 debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
454 debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
455 debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
456 debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
457 debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
458 }
459 copied += gc_threads[i]->copied;
460 max_copied = stg_max(gc_threads[i]->copied, max_copied);
461 }
462 if (n_gc_threads == 1) {
463 max_copied = 0;
464 avg_copied = 0;
465 } else {
466 avg_copied = copied;
467 }
468 }
469
470 // Run through all the generations/steps and tidy up.
471 // We're going to:
472 // - count the amount of "live" data (live_words, live_blocks)
473 // - count the amount of "copied" data in this GC (copied)
474 // - free from-space
475 // - make to-space the new from-space (set BF_EVACUATED on all blocks)
476 //
477 live_words = 0;
478 live_blocks = 0;
479
480 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
481
482 if (g == N) {
483 generations[g].collections++; // for stats
484 if (n_gc_threads > 1) generations[g].par_collections++;
485 }
486
487 // Count the mutable list as bytes "copied" for the purposes of
488 // stats. Every mutable list is copied during every GC.
489 if (g > 0) {
490 nat mut_list_size = 0;
491 for (n = 0; n < n_capabilities; n++) {
492 mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
493 }
494 copied += mut_list_size;
495
496 debugTrace(DEBUG_gc,
497 "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
498 (unsigned long)(mut_list_size * sizeof(W_)),
499 mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
500 }
501
502 bdescr *next, *prev;
503 gen = &generations[g];
504
505 // for generations we collected...
506 if (g <= N) {
507
508 /* free old memory and shift to-space into from-space for all
509 * the collected steps (except the allocation area). These
510 * freed blocks will probaby be quickly recycled.
511 */
512 if (gen->mark)
513 {
514 // tack the new blocks on the end of the existing blocks
515 if (gen->old_blocks != NULL) {
516
517 prev = NULL;
518 for (bd = gen->old_blocks; bd != NULL; bd = next) {
519
520 next = bd->link;
521
522 if (!(bd->flags & BF_MARKED))
523 {
524 if (prev == NULL) {
525 gen->old_blocks = next;
526 } else {
527 prev->link = next;
528 }
529 freeGroup(bd);
530 gen->n_old_blocks--;
531 }
532 else
533 {
534 gen->n_words += bd->free - bd->start;
535
536 // NB. this step might not be compacted next
537 // time, so reset the BF_MARKED flags.
538 // They are set before GC if we're going to
539 // compact. (search for BF_MARKED above).
540 bd->flags &= ~BF_MARKED;
541
542 // between GCs, all blocks in the heap except
543 // for the nursery have the BF_EVACUATED flag set.
544 bd->flags |= BF_EVACUATED;
545
546 prev = bd;
547 }
548 }
549
550 if (prev != NULL) {
551 prev->link = gen->blocks;
552 gen->blocks = gen->old_blocks;
553 }
554 }
555 // add the new blocks to the block tally
556 gen->n_blocks += gen->n_old_blocks;
557 ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
558 ASSERT(countOccupied(gen->blocks) == gen->n_words);
559 }
560 else // not copacted
561 {
562 freeChain(gen->old_blocks);
563 }
564
565 gen->old_blocks = NULL;
566 gen->n_old_blocks = 0;
567
568 /* LARGE OBJECTS. The current live large objects are chained on
569 * scavenged_large, having been moved during garbage
570 * collection from large_objects. Any objects left on the
571 * large_objects list are therefore dead, so we free them here.
572 */
573 freeChain(gen->large_objects);
574 gen->large_objects = gen->scavenged_large_objects;
575 gen->n_large_blocks = gen->n_scavenged_large_blocks;
576 gen->n_new_large_words = 0;
577 }
578 else // for generations > N
579 {
580 /* For older generations, we need to append the
581 * scavenged_large_object list (i.e. large objects that have been
582 * promoted during this GC) to the large_object list for that step.
583 */
584 for (bd = gen->scavenged_large_objects; bd; bd = next) {
585 next = bd->link;
586 dbl_link_onto(bd, &gen->large_objects);
587 }
588
589 // add the new blocks we promoted during this GC
590 gen->n_large_blocks += gen->n_scavenged_large_blocks;
591 }
592
593 ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
594
595 gen->scavenged_large_objects = NULL;
596 gen->n_scavenged_large_blocks = 0;
597
598 // Count "live" data
599 live_words += genLiveWords(gen);
600 live_blocks += genLiveBlocks(gen);
601
602 // add in the partial blocks in the gen_workspaces, but ignore gen 0
603 // if this is a local GC (we can't count another capability's part_list)
604 {
605 nat i;
606 for (i = 0; i < n_capabilities; i++) {
607 live_words += gcThreadLiveWords(i, gen->no);
608 live_blocks += gcThreadLiveBlocks(i, gen->no);
609 }
610 }
611 } // for all generations
612
613 // update the max size of older generations after a major GC
614 resize_generations();
615
616 // Free the mark stack.
617 if (mark_stack_top_bd != NULL) {
618 debugTrace(DEBUG_gc, "mark stack: %d blocks",
619 countBlocks(mark_stack_top_bd));
620 freeChain(mark_stack_top_bd);
621 }
622
623 // Free any bitmaps.
624 for (g = 0; g <= N; g++) {
625 gen = &generations[g];
626 if (gen->bitmap != NULL) {
627 freeGroup(gen->bitmap);
628 gen->bitmap = NULL;
629 }
630 }
631
632 // Reset the nursery: make the blocks empty
633 allocated += clearNurseries();
634
635 resize_nursery();
636
637 resetNurseries();
638
639 // mark the garbage collected CAFs as dead
640 #if 0 && defined(DEBUG) // doesn't work at the moment
641 if (major_gc) { gcCAFs(); }
642 #endif
643
644 #ifdef PROFILING
645 // resetStaticObjectForRetainerProfiling() must be called before
646 // zeroing below.
647
648 // ToDo: fix the gct->scavenged_static_objects below
649 resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
650 #endif
651
652 // zero the scavenged static object list
653 if (major_gc) {
654 nat i;
655 if (n_gc_threads == 1) {
656 zero_static_object_list(gct->scavenged_static_objects);
657 } else {
658 for (i = 0; i < n_gc_threads; i++) {
659 if (!gc_threads[i]->idle) {
660 zero_static_object_list(gc_threads[i]->scavenged_static_objects);
661 }
662 }
663 }
664 }
665
666 // Update the stable pointer hash table.
667 updateStablePtrTable(major_gc);
668
669 // unlock the StablePtr table. Must be before scheduleFinalizers(),
670 // because a finalizer may call hs_free_fun_ptr() or
671 // hs_free_stable_ptr(), both of which access the StablePtr table.
672 stablePtrPostGC();
673
674 // Start any pending finalizers. Must be after
675 // updateStablePtrTable() and stablePtrPostGC() (see #4221).
676 RELEASE_SM_LOCK;
677 scheduleFinalizers(cap, old_weak_ptr_list);
678 ACQUIRE_SM_LOCK;
679
680 // check sanity after GC
681 // before resurrectThreads(), because that might overwrite some
682 // closures, which will cause problems with THREADED where we don't
683 // fill slop.
684 IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
685
686 // If a heap census is due, we need to do it before
687 // resurrectThreads(), for the same reason as checkSanity above:
688 // resurrectThreads() will overwrite some closures and leave slop
689 // behind.
690 if (do_heap_census) {
691 debugTrace(DEBUG_sched, "performing heap census");
692 RELEASE_SM_LOCK;
693 heapCensus(gct->gc_start_cpu);
694 ACQUIRE_SM_LOCK;
695 }
696
697 // send exceptions to any threads which were about to die
698 RELEASE_SM_LOCK;
699 resurrectThreads(resurrected_threads);
700 ACQUIRE_SM_LOCK;
701
702 if (major_gc) {
703 nat need, got;
704 need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
705 got = mblocks_allocated;
706 /* If the amount of data remains constant, next major GC we'll
707 require (F+1)*need. We leave (F+2)*need in order to reduce
708 repeated deallocation and reallocation. */
709 need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
710 if (got > need) {
711 returnMemoryToOS(got - need);
712 }
713 }
714
715 // extra GC trace info
716 IF_DEBUG(gc, statDescribeGens());
717
718 #ifdef DEBUG
719 // symbol-table based profiling
720 /* heapCensus(to_blocks); */ /* ToDo */
721 #endif
722
723 // restore enclosing cost centre
724 #ifdef PROFILING
725 for (n = 0; n < n_capabilities; n++) {
726 capabilities[n].r.rCCCS = save_CCS[n];
727 }
728 #endif
729
730 #ifdef DEBUG
731 // check for memory leaks if DEBUG is on
732 memInventory(DEBUG_gc);
733 #endif
734
735 #ifdef RTS_GTK_FRONTPANEL
736 if (RtsFlags.GcFlags.frontpanel) {
737 updateFrontPanelAfterGC( N, live );
738 }
739 #endif
740
741 // ok, GC over: tell the stats department what happened.
742 stat_endGC(gct, allocated, live_words,
743 copied, N, max_copied, avg_copied,
744 live_blocks * BLOCK_SIZE_W - live_words /* slop */);
745
746 // Guess which generation we'll collect *next* time
747 initialise_N(force_major_gc);
748
749 #if defined(RTS_USER_SIGNALS)
750 if (RtsFlags.MiscFlags.install_signal_handlers) {
751 // unblock signals again
752 unblockUserSignals();
753 }
754 #endif
755
756 RELEASE_SM_LOCK;
757
758 SET_GCT(saved_gct);
759 }
760
761 /* -----------------------------------------------------------------------------
762 Figure out which generation to collect, initialise N and major_gc.
763
764 Also returns the total number of blocks in generations that will be
765 collected.
766 -------------------------------------------------------------------------- */
767
768 static nat
769 initialise_N (rtsBool force_major_gc)
770 {
771 int g;
772 nat blocks, blocks_total;
773
774 blocks = 0;
775 blocks_total = 0;
776
777 if (force_major_gc) {
778 N = RtsFlags.GcFlags.generations - 1;
779 } else {
780 N = 0;
781 }
782
783 for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
784
785 blocks = generations[g].n_words / BLOCK_SIZE_W
786 + generations[g].n_large_blocks;
787
788 if (blocks >= generations[g].max_blocks) {
789 N = stg_max(N,g);
790 }
791 if ((nat)g <= N) {
792 blocks_total += blocks;
793 }
794 }
795
796 blocks_total += countNurseryBlocks();
797
798 major_gc = (N == RtsFlags.GcFlags.generations-1);
799 return blocks_total;
800 }
801
802 /* -----------------------------------------------------------------------------
803 Initialise the gc_thread structures.
804 -------------------------------------------------------------------------- */
805
806 #define GC_THREAD_INACTIVE 0
807 #define GC_THREAD_STANDING_BY 1
808 #define GC_THREAD_RUNNING 2
809 #define GC_THREAD_WAITING_TO_CONTINUE 3
810
811 static void
812 new_gc_thread (nat n, gc_thread *t)
813 {
814 nat g;
815 gen_workspace *ws;
816
817 t->cap = &capabilities[n];
818
819 #ifdef THREADED_RTS
820 t->id = 0;
821 initSpinLock(&t->gc_spin);
822 initSpinLock(&t->mut_spin);
823 ACQUIRE_SPIN_LOCK(&t->gc_spin);
824 t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
825 // thread to start up, see wakeup_gc_threads
826 #endif
827
828 t->thread_index = n;
829 t->idle = rtsFalse;
830 t->free_blocks = NULL;
831 t->gc_count = 0;
832
833 init_gc_thread(t);
834
835 #ifdef USE_PAPI
836 t->papi_events = -1;
837 #endif
838
839 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
840 {
841 ws = &t->gens[g];
842 ws->gen = &generations[g];
843 ASSERT(g == ws->gen->no);
844 ws->my_gct = t;
845
846 // We want to call
847 // alloc_todo_block(ws,0);
848 // but can't, because it uses gct which isn't set up at this point.
849 // Hence, allocate a block for todo_bd manually:
850 {
851 bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
852 initBdescr(bd, ws->gen, ws->gen->to);
853 bd->flags = BF_EVACUATED;
854 bd->u.scan = bd->free = bd->start;
855
856 ws->todo_bd = bd;
857 ws->todo_free = bd->free;
858 ws->todo_lim = bd->start + BLOCK_SIZE_W;
859 }
860
861 ws->todo_q = newWSDeque(128);
862 ws->todo_overflow = NULL;
863 ws->n_todo_overflow = 0;
864 ws->todo_large_objects = NULL;
865
866 ws->part_list = NULL;
867 ws->n_part_blocks = 0;
868
869 ws->scavd_list = NULL;
870 ws->n_scavd_blocks = 0;
871 }
872 }
873
874
875 void
876 initGcThreads (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
877 {
878 #if defined(THREADED_RTS)
879 nat i;
880
881 if (from > 0) {
882 gc_threads = stgReallocBytes (gc_threads, to * sizeof(gc_thread*),
883 "initGcThreads");
884 } else {
885 gc_threads = stgMallocBytes (to * sizeof(gc_thread*),
886 "initGcThreads");
887 }
888
889 // We have to update the gct->cap pointers to point to the new
890 // Capability array now.
891 for (i = 0; i < from; i++) {
892 gc_threads[i]->cap = &capabilities[gc_threads[i]->cap->no];
893 }
894
895 for (i = from; i < to; i++) {
896 gc_threads[i] =
897 stgMallocBytes(sizeof(gc_thread) +
898 RtsFlags.GcFlags.generations * sizeof(gen_workspace),
899 "alloc_gc_threads");
900
901 new_gc_thread(i, gc_threads[i]);
902 }
903 #else
904 ASSERT(from == 0 && to == 1);
905 gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
906 gc_threads[0] = gct;
907 new_gc_thread(0,gc_threads[0]);
908 #endif
909 }
910
911 void
912 freeGcThreads (void)
913 {
914 nat g;
915 if (gc_threads != NULL) {
916 #if defined(THREADED_RTS)
917 nat i;
918 for (i = 0; i < n_capabilities; i++) {
919 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
920 {
921 freeWSDeque(gc_threads[i]->gens[g].todo_q);
922 }
923 stgFree (gc_threads[i]);
924 }
925 stgFree (gc_threads);
926 #else
927 for (g = 0; g < RtsFlags.GcFlags.generations; g++)
928 {
929 freeWSDeque(gc_threads[0]->gens[g].todo_q);
930 }
931 stgFree (gc_threads);
932 #endif
933 gc_threads = NULL;
934 }
935 }
936
937 /* ----------------------------------------------------------------------------
938 Start GC threads
939 ------------------------------------------------------------------------- */
940
941 static volatile StgWord gc_running_threads;
942
943 static StgWord
944 inc_running (void)
945 {
946 StgWord new;
947 new = atomic_inc(&gc_running_threads);
948 ASSERT(new <= n_gc_threads);
949 return new;
950 }
951
952 static StgWord
953 dec_running (void)
954 {
955 ASSERT(gc_running_threads != 0);
956 return atomic_dec(&gc_running_threads);
957 }
958
959 static rtsBool
960 any_work (void)
961 {
962 int g;
963 gen_workspace *ws;
964
965 gct->any_work++;
966
967 write_barrier();
968
969 // scavenge objects in compacted generation
970 if (mark_stack_bd != NULL && !mark_stack_empty()) {
971 return rtsTrue;
972 }
973
974 // Check for global work in any step. We don't need to check for
975 // local work, because we have already exited scavenge_loop(),
976 // which means there is no local work for this thread.
977 for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
978 ws = &gct->gens[g];
979 if (ws->todo_large_objects) return rtsTrue;
980 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
981 if (ws->todo_overflow) return rtsTrue;
982 }
983
984 #if defined(THREADED_RTS)
985 if (work_stealing) {
986 nat n;
987 // look for work to steal
988 for (n = 0; n < n_gc_threads; n++) {
989 if (n == gct->thread_index) continue;
990 for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
991 ws = &gc_threads[n]->gens[g];
992 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
993 }
994 }
995 }
996 #endif
997
998 gct->no_work++;
999 #if defined(THREADED_RTS)
1000 yieldThread();
1001 #endif
1002
1003 return rtsFalse;
1004 }
1005
1006 static void
1007 scavenge_until_all_done (void)
1008 {
1009 DEBUG_ONLY( nat r );
1010
1011
1012 loop:
1013 #if defined(THREADED_RTS)
1014 if (n_gc_threads > 1) {
1015 scavenge_loop();
1016 } else {
1017 scavenge_loop1();
1018 }
1019 #else
1020 scavenge_loop();
1021 #endif
1022
1023 collect_gct_blocks();
1024
1025 // scavenge_loop() only exits when there's no work to do
1026
1027 #ifdef DEBUG
1028 r = dec_running();
1029 #else
1030 dec_running();
1031 #endif
1032
1033 traceEventGcIdle(gct->cap);
1034
1035 debugTrace(DEBUG_gc, "%d GC threads still running", r);
1036
1037 while (gc_running_threads != 0) {
1038 // usleep(1);
1039 if (any_work()) {
1040 inc_running();
1041 traceEventGcWork(gct->cap);
1042 goto loop;
1043 }
1044 // any_work() does not remove the work from the queue, it
1045 // just checks for the presence of work. If we find any,
1046 // then we increment gc_running_threads and go back to
1047 // scavenge_loop() to perform any pending work.
1048 }
1049
1050 traceEventGcDone(gct->cap);
1051 }
1052
1053 #if defined(THREADED_RTS)
1054
1055 void
1056 gcWorkerThread (Capability *cap)
1057 {
1058 gc_thread *saved_gct;
1059
1060 // necessary if we stole a callee-saves register for gct:
1061 saved_gct = gct;
1062
1063 SET_GCT(gc_threads[cap->no]);
1064 gct->id = osThreadId();
1065
1066 stat_gcWorkerThreadStart(gct);
1067
1068 // Wait until we're told to wake up
1069 RELEASE_SPIN_LOCK(&gct->mut_spin);
1070 // yieldThread();
1071 // Strangely, adding a yieldThread() here makes the CPU time
1072 // measurements more accurate on Linux, perhaps because it syncs
1073 // the CPU time across the multiple cores. Without this, CPU time
1074 // is heavily skewed towards GC rather than MUT.
1075 gct->wakeup = GC_THREAD_STANDING_BY;
1076 debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1077 ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1078
1079 #ifdef USE_PAPI
1080 // start performance counters in this thread...
1081 if (gct->papi_events == -1) {
1082 papi_init_eventset(&gct->papi_events);
1083 }
1084 papi_thread_start_gc1_count(gct->papi_events);
1085 #endif
1086
1087 init_gc_thread(gct);
1088
1089 traceEventGcWork(gct->cap);
1090
1091 // Every thread evacuates some roots.
1092 gct->evac_gen_no = 0;
1093 markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1094 scavenge_capability_mut_lists(cap);
1095
1096 scavenge_until_all_done();
1097
1098 #ifdef THREADED_RTS
1099 // Now that the whole heap is marked, we discard any sparks that
1100 // were found to be unreachable. The main GC thread is currently
1101 // marking heap reachable via weak pointers, so it is
1102 // non-deterministic whether a spark will be retained if it is
1103 // only reachable via weak pointers. To fix this problem would
1104 // require another GC barrier, which is too high a price.
1105 pruneSparkQueue(cap);
1106 #endif
1107
1108 #ifdef USE_PAPI
1109 // count events in this thread towards the GC totals
1110 papi_thread_stop_gc1_count(gct->papi_events);
1111 #endif
1112
1113 // Wait until we're told to continue
1114 RELEASE_SPIN_LOCK(&gct->gc_spin);
1115 gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1116 debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1117 gct->thread_index);
1118 ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1119 debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1120
1121 // record the time spent doing GC in the Task structure
1122 stat_gcWorkerThreadDone(gct);
1123
1124 SET_GCT(saved_gct);
1125 }
1126
1127 #endif
1128
1129 #if defined(THREADED_RTS)
1130
1131 void
1132 waitForGcThreads (Capability *cap USED_IF_THREADS)
1133 {
1134 const nat n_threads = n_capabilities;
1135 const nat me = cap->no;
1136 nat i, j;
1137 rtsBool retry = rtsTrue;
1138
1139 while(retry) {
1140 for (i=0; i < n_threads; i++) {
1141 if (i == me || gc_threads[i]->idle) continue;
1142 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1143 prodCapability(&capabilities[i], cap->running_task);
1144 }
1145 }
1146 for (j=0; j < 10; j++) {
1147 retry = rtsFalse;
1148 for (i=0; i < n_threads; i++) {
1149 if (i == me || gc_threads[i]->idle) continue;
1150 write_barrier();
1151 interruptCapability(&capabilities[i]);
1152 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1153 retry = rtsTrue;
1154 }
1155 }
1156 if (!retry) break;
1157 yieldThread();
1158 }
1159 }
1160 }
1161
1162 #endif // THREADED_RTS
1163
1164 static void
1165 start_gc_threads (void)
1166 {
1167 #if defined(THREADED_RTS)
1168 gc_running_threads = 0;
1169 #endif
1170 }
1171
1172 static void
1173 wakeup_gc_threads (nat me USED_IF_THREADS)
1174 {
1175 #if defined(THREADED_RTS)
1176 nat i;
1177
1178 if (n_gc_threads == 1) return;
1179
1180 for (i=0; i < n_gc_threads; i++) {
1181 if (i == me || gc_threads[i]->idle) continue;
1182 inc_running();
1183 debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1184 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1185
1186 gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1187 ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1188 RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1189 }
1190 #endif
1191 }
1192
1193 // After GC is complete, we must wait for all GC threads to enter the
1194 // standby state, otherwise they may still be executing inside
1195 // any_work(), and may even remain awake until the next GC starts.
1196 static void
1197 shutdown_gc_threads (nat me USED_IF_THREADS)
1198 {
1199 #if defined(THREADED_RTS)
1200 nat i;
1201
1202 if (n_gc_threads == 1) return;
1203
1204 for (i=0; i < n_gc_threads; i++) {
1205 if (i == me || gc_threads[i]->idle) continue;
1206 while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1207 }
1208 #endif
1209 }
1210
1211 #if defined(THREADED_RTS)
1212 void
1213 releaseGCThreads (Capability *cap USED_IF_THREADS)
1214 {
1215 const nat n_threads = n_capabilities;
1216 const nat me = cap->no;
1217 nat i;
1218 for (i=0; i < n_threads; i++) {
1219 if (i == me || gc_threads[i]->idle) continue;
1220 if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1221 barf("releaseGCThreads");
1222
1223 gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1224 ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1225 RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1226 }
1227 }
1228 #endif
1229
1230 /* ----------------------------------------------------------------------------
1231 Initialise a generation that is to be collected
1232 ------------------------------------------------------------------------- */
1233
1234 static void
1235 prepare_collected_gen (generation *gen)
1236 {
1237 nat i, g, n;
1238 gen_workspace *ws;
1239 bdescr *bd, *next;
1240
1241 // Throw away the current mutable list. Invariant: the mutable
1242 // list always has at least one block; this means we can avoid a
1243 // check for NULL in recordMutable().
1244 g = gen->no;
1245 if (g != 0) {
1246 for (i = 0; i < n_capabilities; i++) {
1247 freeChain(capabilities[i].mut_lists[g]);
1248 capabilities[i].mut_lists[g] = allocBlock();
1249 }
1250 }
1251
1252 gen = &generations[g];
1253 ASSERT(gen->no == g);
1254
1255 // we'll construct a new list of threads in this step
1256 // during GC, throw away the current list.
1257 gen->old_threads = gen->threads;
1258 gen->threads = END_TSO_QUEUE;
1259
1260 // deprecate the existing blocks
1261 gen->old_blocks = gen->blocks;
1262 gen->n_old_blocks = gen->n_blocks;
1263 gen->blocks = NULL;
1264 gen->n_blocks = 0;
1265 gen->n_words = 0;
1266 gen->live_estimate = 0;
1267
1268 // initialise the large object queues.
1269 ASSERT(gen->scavenged_large_objects == NULL);
1270 ASSERT(gen->n_scavenged_large_blocks == 0);
1271
1272 // grab all the partial blocks stashed in the gc_thread workspaces and
1273 // move them to the old_blocks list of this gen.
1274 for (n = 0; n < n_capabilities; n++) {
1275 ws = &gc_threads[n]->gens[gen->no];
1276
1277 for (bd = ws->part_list; bd != NULL; bd = next) {
1278 next = bd->link;
1279 bd->link = gen->old_blocks;
1280 gen->old_blocks = bd;
1281 gen->n_old_blocks += bd->blocks;
1282 }
1283 ws->part_list = NULL;
1284 ws->n_part_blocks = 0;
1285
1286 ASSERT(ws->scavd_list == NULL);
1287 ASSERT(ws->n_scavd_blocks == 0);
1288
1289 if (ws->todo_free != ws->todo_bd->start) {
1290 ws->todo_bd->free = ws->todo_free;
1291 ws->todo_bd->link = gen->old_blocks;
1292 gen->old_blocks = ws->todo_bd;
1293 gen->n_old_blocks += ws->todo_bd->blocks;
1294 alloc_todo_block(ws,0); // always has one block.
1295 }
1296 }
1297
1298 // mark the small objects as from-space
1299 for (bd = gen->old_blocks; bd; bd = bd->link) {
1300 bd->flags &= ~BF_EVACUATED;
1301 }
1302
1303 // mark the large objects as from-space
1304 for (bd = gen->large_objects; bd; bd = bd->link) {
1305 bd->flags &= ~BF_EVACUATED;
1306 }
1307
1308 // for a compacted generation, we need to allocate the bitmap
1309 if (gen->mark) {
1310 lnat bitmap_size; // in bytes
1311 bdescr *bitmap_bdescr;
1312 StgWord *bitmap;
1313
1314 bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1315
1316 if (bitmap_size > 0) {
1317 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1318 / BLOCK_SIZE);
1319 gen->bitmap = bitmap_bdescr;
1320 bitmap = bitmap_bdescr->start;
1321
1322 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1323 bitmap_size, bitmap);
1324
1325 // don't forget to fill it with zeros!
1326 memset(bitmap, 0, bitmap_size);
1327
1328 // For each block in this step, point to its bitmap from the
1329 // block descriptor.
1330 for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1331 bd->u.bitmap = bitmap;
1332 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1333
1334 // Also at this point we set the BF_MARKED flag
1335 // for this block. The invariant is that
1336 // BF_MARKED is always unset, except during GC
1337 // when it is set on those blocks which will be
1338 // compacted.
1339 if (!(bd->flags & BF_FRAGMENTED)) {
1340 bd->flags |= BF_MARKED;
1341 }
1342
1343 // BF_SWEPT should be marked only for blocks that are being
1344 // collected in sweep()
1345 bd->flags &= ~BF_SWEPT;
1346 }
1347 }
1348 }
1349 }
1350
1351
1352 /* ----------------------------------------------------------------------------
1353 Save the mutable lists in saved_mut_lists
1354 ------------------------------------------------------------------------- */
1355
1356 static void
1357 stash_mut_list (Capability *cap, nat gen_no)
1358 {
1359 cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1360 cap->mut_lists[gen_no] = allocBlock_sync();
1361 }
1362
1363 /* ----------------------------------------------------------------------------
1364 Initialise a generation that is *not* to be collected
1365 ------------------------------------------------------------------------- */
1366
1367 static void
1368 prepare_uncollected_gen (generation *gen)
1369 {
1370 nat i;
1371
1372
1373 ASSERT(gen->no > 0);
1374
1375 // save the current mutable lists for this generation, and
1376 // allocate a fresh block for each one. We'll traverse these
1377 // mutable lists as roots early on in the GC.
1378 for (i = 0; i < n_capabilities; i++) {
1379 stash_mut_list(&capabilities[i], gen->no);
1380 }
1381
1382 ASSERT(gen->scavenged_large_objects == NULL);
1383 ASSERT(gen->n_scavenged_large_blocks == 0);
1384 }
1385
1386 /* -----------------------------------------------------------------------------
1387 Collect the completed blocks from a GC thread and attach them to
1388 the generation.
1389 -------------------------------------------------------------------------- */
1390
1391 static void
1392 collect_gct_blocks (void)
1393 {
1394 nat g;
1395 gen_workspace *ws;
1396 bdescr *bd, *prev;
1397
1398 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1399 ws = &gct->gens[g];
1400
1401 // there may still be a block attached to ws->todo_bd;
1402 // leave it there to use next time.
1403
1404 if (ws->scavd_list != NULL) {
1405 ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1406
1407 ASSERT(gct->scan_bd == NULL);
1408 ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1409
1410 prev = NULL;
1411 for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1412 ws->gen->n_words += bd->free - bd->start;
1413 prev = bd;
1414 }
1415 if (prev != NULL) {
1416 prev->link = ws->gen->blocks;
1417 ws->gen->blocks = ws->scavd_list;
1418 }
1419 ws->gen->n_blocks += ws->n_scavd_blocks;
1420
1421 ws->scavd_list = NULL;
1422 ws->n_scavd_blocks = 0;
1423
1424 RELEASE_SPIN_LOCK(&ws->gen->sync);
1425 }
1426 }
1427 }
1428
1429 /* -----------------------------------------------------------------------------
1430 During mutation, any blocks that are filled by allocatePinned() are
1431 stashed on the local pinned_object_blocks list, to avoid needing to
1432 take a global lock. Here we collect those blocks from the
1433 cap->pinned_object_blocks lists and put them on the
1434 main g0->large_object list.
1435
1436 Returns: the number of words allocated this way, for stats
1437 purposes.
1438 -------------------------------------------------------------------------- */
1439
1440 static lnat
1441 collect_pinned_object_blocks (void)
1442 {
1443 nat n;
1444 bdescr *bd, *prev;
1445 lnat allocated = 0;
1446
1447 for (n = 0; n < n_capabilities; n++) {
1448 prev = NULL;
1449 for (bd = capabilities[n].pinned_object_blocks; bd != NULL; bd = bd->link) {
1450 allocated += bd->free - bd->start;
1451 prev = bd;
1452 }
1453 if (prev != NULL) {
1454 prev->link = g0->large_objects;
1455 if (g0->large_objects != NULL) {
1456 g0->large_objects->u.back = prev;
1457 }
1458 g0->large_objects = capabilities[n].pinned_object_blocks;
1459 capabilities[n].pinned_object_blocks = 0;
1460 }
1461 }
1462
1463 return allocated;
1464 }
1465
1466 /* -----------------------------------------------------------------------------
1467 Initialise a gc_thread before GC
1468 -------------------------------------------------------------------------- */
1469
1470 static void
1471 init_gc_thread (gc_thread *t)
1472 {
1473 t->static_objects = END_OF_STATIC_LIST;
1474 t->scavenged_static_objects = END_OF_STATIC_LIST;
1475 t->scan_bd = NULL;
1476 t->mut_lists = t->cap->mut_lists;
1477 t->evac_gen_no = 0;
1478 t->failed_to_evac = rtsFalse;
1479 t->eager_promotion = rtsTrue;
1480 t->thunk_selector_depth = 0;
1481 t->copied = 0;
1482 t->scanned = 0;
1483 t->any_work = 0;
1484 t->no_work = 0;
1485 t->scav_find_work = 0;
1486 }
1487
1488 /* -----------------------------------------------------------------------------
1489 Function we pass to evacuate roots.
1490 -------------------------------------------------------------------------- */
1491
1492 static void
1493 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1494 {
1495 // we stole a register for gct, but this function is called from
1496 // *outside* the GC where the register variable is not in effect,
1497 // so we need to save and restore it here. NB. only call
1498 // mark_root() from the main GC thread, otherwise gct will be
1499 // incorrect.
1500 #if defined(THREADED_RTS)
1501 gc_thread *saved_gct;
1502 saved_gct = gct;
1503 #endif
1504 SET_GCT(user);
1505
1506 evacuate(root);
1507
1508 SET_GCT(saved_gct);
1509 }
1510
1511 /* -----------------------------------------------------------------------------
1512 Initialising the static object & mutable lists
1513 -------------------------------------------------------------------------- */
1514
1515 static void
1516 zero_static_object_list(StgClosure* first_static)
1517 {
1518 StgClosure* p;
1519 StgClosure* link;
1520 const StgInfoTable *info;
1521
1522 for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1523 info = get_itbl(p);
1524 link = *STATIC_LINK(info, p);
1525 *STATIC_LINK(info,p) = NULL;
1526 }
1527 }
1528
1529 /* ----------------------------------------------------------------------------
1530 Reset the sizes of the older generations when we do a major
1531 collection.
1532
1533 CURRENT STRATEGY: make all generations except zero the same size.
1534 We have to stay within the maximum heap size, and leave a certain
1535 percentage of the maximum heap size available to allocate into.
1536 ------------------------------------------------------------------------- */
1537
1538 static void
1539 resize_generations (void)
1540 {
1541 nat g;
1542
1543 if (major_gc && RtsFlags.GcFlags.generations > 1) {
1544 nat live, size, min_alloc, words;
1545 const nat max = RtsFlags.GcFlags.maxHeapSize;
1546 const nat gens = RtsFlags.GcFlags.generations;
1547
1548 // live in the oldest generations
1549 if (oldest_gen->live_estimate != 0) {
1550 words = oldest_gen->live_estimate;
1551 } else {
1552 words = oldest_gen->n_words;
1553 }
1554 live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1555 oldest_gen->n_large_blocks;
1556
1557 // default max size for all generations except zero
1558 size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1559 RtsFlags.GcFlags.minOldGenSize);
1560
1561 if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1562 RtsFlags.GcFlags.heapSizeSuggestion = size;
1563 }
1564
1565 // minimum size for generation zero
1566 min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1567 RtsFlags.GcFlags.minAllocAreaSize);
1568
1569 // Auto-enable compaction when the residency reaches a
1570 // certain percentage of the maximum heap size (default: 30%).
1571 if (RtsFlags.GcFlags.compact ||
1572 (max > 0 &&
1573 oldest_gen->n_blocks >
1574 (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1575 oldest_gen->mark = 1;
1576 oldest_gen->compact = 1;
1577 // debugBelch("compaction: on\n", live);
1578 } else {
1579 oldest_gen->mark = 0;
1580 oldest_gen->compact = 0;
1581 // debugBelch("compaction: off\n", live);
1582 }
1583
1584 if (RtsFlags.GcFlags.sweep) {
1585 oldest_gen->mark = 1;
1586 }
1587
1588 // if we're going to go over the maximum heap size, reduce the
1589 // size of the generations accordingly. The calculation is
1590 // different if compaction is turned on, because we don't need
1591 // to double the space required to collect the old generation.
1592 if (max != 0) {
1593
1594 // this test is necessary to ensure that the calculations
1595 // below don't have any negative results - we're working
1596 // with unsigned values here.
1597 if (max < min_alloc) {
1598 heapOverflow();
1599 }
1600
1601 if (oldest_gen->compact) {
1602 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1603 size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1604 }
1605 } else {
1606 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1607 size = (max - min_alloc) / ((gens - 1) * 2);
1608 }
1609 }
1610
1611 if (size < live) {
1612 heapOverflow();
1613 }
1614 }
1615
1616 #if 0
1617 debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1618 min_alloc, size, max);
1619 #endif
1620
1621 for (g = 0; g < gens; g++) {
1622 generations[g].max_blocks = size;
1623 }
1624 }
1625 }
1626
1627 /* -----------------------------------------------------------------------------
1628 Calculate the new size of the nursery, and resize it.
1629 -------------------------------------------------------------------------- */
1630
1631 static void
1632 resize_nursery (void)
1633 {
1634 const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1635
1636 if (RtsFlags.GcFlags.generations == 1)
1637 { // Two-space collector:
1638 nat blocks;
1639
1640 /* set up a new nursery. Allocate a nursery size based on a
1641 * function of the amount of live data (by default a factor of 2)
1642 * Use the blocks from the old nursery if possible, freeing up any
1643 * left over blocks.
1644 *
1645 * If we get near the maximum heap size, then adjust our nursery
1646 * size accordingly. If the nursery is the same size as the live
1647 * data (L), then we need 3L bytes. We can reduce the size of the
1648 * nursery to bring the required memory down near 2L bytes.
1649 *
1650 * A normal 2-space collector would need 4L bytes to give the same
1651 * performance we get from 3L bytes, reducing to the same
1652 * performance at 2L bytes.
1653 */
1654 blocks = generations[0].n_blocks;
1655
1656 if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1657 blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1658 RtsFlags.GcFlags.maxHeapSize )
1659 {
1660 long adjusted_blocks; // signed on purpose
1661 int pc_free;
1662
1663 adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1664
1665 debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1666 RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1667
1668 pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1669 if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1670 {
1671 heapOverflow();
1672 }
1673 blocks = adjusted_blocks;
1674 }
1675 else
1676 {
1677 blocks *= RtsFlags.GcFlags.oldGenFactor;
1678 if (blocks < min_nursery)
1679 {
1680 blocks = min_nursery;
1681 }
1682 }
1683 resizeNurseries(blocks);
1684 }
1685 else // Generational collector
1686 {
1687 /*
1688 * If the user has given us a suggested heap size, adjust our
1689 * allocation area to make best use of the memory available.
1690 */
1691 if (RtsFlags.GcFlags.heapSizeSuggestion)
1692 {
1693 long blocks;
1694 const nat needed = calcNeeded(); // approx blocks needed at next GC
1695
1696 /* Guess how much will be live in generation 0 step 0 next time.
1697 * A good approximation is obtained by finding the
1698 * percentage of g0 that was live at the last minor GC.
1699 *
1700 * We have an accurate figure for the amount of copied data in
1701 * 'copied', but we must convert this to a number of blocks, with
1702 * a small adjustment for estimated slop at the end of a block
1703 * (- 10 words).
1704 */
1705 if (N == 0)
1706 {
1707 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1708 / countNurseryBlocks();
1709 }
1710
1711 /* Estimate a size for the allocation area based on the
1712 * information available. We might end up going slightly under
1713 * or over the suggested heap size, but we should be pretty
1714 * close on average.
1715 *
1716 * Formula: suggested - needed
1717 * ----------------------------
1718 * 1 + g0_pcnt_kept/100
1719 *
1720 * where 'needed' is the amount of memory needed at the next
1721 * collection for collecting all gens except g0.
1722 */
1723 blocks =
1724 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1725 (100 + (long)g0_pcnt_kept);
1726
1727 if (blocks < (long)min_nursery) {
1728 blocks = min_nursery;
1729 }
1730
1731 resizeNurseries((nat)blocks);
1732 }
1733 else
1734 {
1735 // we might have added extra large blocks to the nursery, so
1736 // resize back to minAllocAreaSize again.
1737 resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1738 }
1739 }
1740 }
1741
1742 /* -----------------------------------------------------------------------------
1743 Sanity code for CAF garbage collection.
1744
1745 With DEBUG turned on, we manage a CAF list in addition to the SRT
1746 mechanism. After GC, we run down the CAF list and blackhole any
1747 CAFs which have been garbage collected. This means we get an error
1748 whenever the program tries to enter a garbage collected CAF.
1749
1750 Any garbage collected CAFs are taken off the CAF list at the same
1751 time.
1752 -------------------------------------------------------------------------- */
1753
1754 #if 0 && defined(DEBUG)
1755
1756 static void
1757 gcCAFs(void)
1758 {
1759 StgClosure* p;
1760 StgClosure** pp;
1761 const StgInfoTable *info;
1762 nat i;
1763
1764 i = 0;
1765 p = caf_list;
1766 pp = &caf_list;
1767
1768 while (p != NULL) {
1769
1770 info = get_itbl(p);
1771
1772 ASSERT(info->type == IND_STATIC);
1773
1774 if (STATIC_LINK(info,p) == NULL) {
1775 debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1776 // black hole it
1777 SET_INFO(p,&stg_BLACKHOLE_info);
1778 p = STATIC_LINK2(info,p);
1779 *pp = p;
1780 }
1781 else {
1782 pp = &STATIC_LINK2(info,p);
1783 p = *pp;
1784 i++;
1785 }
1786
1787 }
1788
1789 debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1790 }
1791 #endif