1b8a720870627dc606bc63e2686cab771f9a1881
[ghc.git] / rts / sm / Storage.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2008
4 *
5 * Storage manager front end
6 *
7 * Documentation on the architecture of the Storage Manager can be
8 * found in the online commentary:
9 *
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16
17 #include "Storage.h"
18 #include "RtsUtils.h"
19 #include "Stats.h"
20 #include "BlockAlloc.h"
21 #include "Weak.h"
22 #include "Sanity.h"
23 #include "Arena.h"
24 #include "Capability.h"
25 #include "Schedule.h"
26 #include "RetainerProfile.h" // for counting memory blocks (memInventory)
27 #include "OSMem.h"
28 #include "Trace.h"
29 #include "GC.h"
30 #include "Evac.h"
31
32 #include <string.h>
33
34 #include "ffi.h"
35
36 /*
37 * All these globals require sm_mutex to access in THREADED_RTS mode.
38 */
39 StgClosure *caf_list = NULL;
40 StgClosure *revertible_caf_list = NULL;
41 rtsBool keepCAFs;
42
43 nat large_alloc_lim; /* GC if n_large_blocks in any nursery
44 * reaches this. */
45
46 bdescr *exec_block;
47
48 generation *generations = NULL; /* all the generations */
49 generation *g0 = NULL; /* generation 0, for convenience */
50 generation *oldest_gen = NULL; /* oldest generation, for convenience */
51
52 nursery *nurseries = NULL; /* array of nurseries, size == n_capabilities */
53
54 #ifdef THREADED_RTS
55 /*
56 * Storage manager mutex: protects all the above state from
57 * simultaneous access by two STG threads.
58 */
59 Mutex sm_mutex;
60 #endif
61
62 static void allocNurseries ( void );
63
64 static void
65 initGeneration (generation *gen, int g)
66 {
67 gen->no = g;
68 gen->collections = 0;
69 gen->par_collections = 0;
70 gen->failed_promotions = 0;
71 gen->max_blocks = 0;
72 gen->blocks = NULL;
73 gen->n_blocks = 0;
74 gen->n_words = 0;
75 gen->live_estimate = 0;
76 gen->old_blocks = NULL;
77 gen->n_old_blocks = 0;
78 gen->large_objects = NULL;
79 gen->n_large_blocks = 0;
80 gen->n_new_large_words = 0;
81 gen->scavenged_large_objects = NULL;
82 gen->n_scavenged_large_blocks = 0;
83 gen->mark = 0;
84 gen->compact = 0;
85 gen->bitmap = NULL;
86 #ifdef THREADED_RTS
87 initSpinLock(&gen->sync_large_objects);
88 #endif
89 gen->threads = END_TSO_QUEUE;
90 gen->old_threads = END_TSO_QUEUE;
91 }
92
93 void
94 initStorage( void )
95 {
96 nat g, n;
97
98 if (generations != NULL) {
99 // multi-init protection
100 return;
101 }
102
103 initMBlocks();
104
105 /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
106 * doing something reasonable.
107 */
108 /* We use the NOT_NULL variant or gcc warns that the test is always true */
109 ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info));
110 ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
111 ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
112
113 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
114 RtsFlags.GcFlags.heapSizeSuggestion >
115 RtsFlags.GcFlags.maxHeapSize) {
116 RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
117 }
118
119 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
120 RtsFlags.GcFlags.minAllocAreaSize >
121 RtsFlags.GcFlags.maxHeapSize) {
122 errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
123 RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
124 }
125
126 initBlockAllocator();
127
128 #if defined(THREADED_RTS)
129 initMutex(&sm_mutex);
130 #endif
131
132 ACQUIRE_SM_LOCK;
133
134 /* allocate generation info array */
135 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
136 * sizeof(struct generation_),
137 "initStorage: gens");
138
139 /* Initialise all generations */
140 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
141 initGeneration(&generations[g], g);
142 }
143
144 /* A couple of convenience pointers */
145 g0 = &generations[0];
146 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
147
148 nurseries = stgMallocBytes(n_capabilities * sizeof(struct nursery_),
149 "initStorage: nurseries");
150
151 /* Set up the destination pointers in each younger gen. step */
152 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
153 generations[g].to = &generations[g+1];
154 }
155 oldest_gen->to = oldest_gen;
156
157 /* The oldest generation has one step. */
158 if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
159 if (RtsFlags.GcFlags.generations == 1) {
160 errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
161 } else {
162 oldest_gen->mark = 1;
163 if (RtsFlags.GcFlags.compact)
164 oldest_gen->compact = 1;
165 }
166 }
167
168 generations[0].max_blocks = 0;
169
170 /* The allocation area. Policy: keep the allocation area
171 * small to begin with, even if we have a large suggested heap
172 * size. Reason: we're going to do a major collection first, and we
173 * don't want it to be a big one. This vague idea is borne out by
174 * rigorous experimental evidence.
175 */
176 allocNurseries();
177
178 weak_ptr_list = NULL;
179 caf_list = END_OF_STATIC_LIST;
180 revertible_caf_list = END_OF_STATIC_LIST;
181
182 /* initialise the allocate() interface */
183 large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W;
184
185 exec_block = NULL;
186
187 #ifdef THREADED_RTS
188 initSpinLock(&gc_alloc_block_sync);
189 whitehole_spin = 0;
190 #endif
191
192 N = 0;
193
194 // allocate a block for each mut list
195 for (n = 0; n < n_capabilities; n++) {
196 for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
197 capabilities[n].mut_lists[g] = allocBlock();
198 }
199 }
200
201 initGcThreads();
202
203 IF_DEBUG(gc, statDescribeGens());
204
205 RELEASE_SM_LOCK;
206 }
207
208 void
209 exitStorage (void)
210 {
211 stat_exit(calcAllocated(rtsTrue));
212 }
213
214 void
215 freeStorage (rtsBool free_heap)
216 {
217 stgFree(generations);
218 if (free_heap) freeAllMBlocks();
219 #if defined(THREADED_RTS)
220 closeMutex(&sm_mutex);
221 #endif
222 stgFree(nurseries);
223 freeGcThreads();
224 }
225
226 /* -----------------------------------------------------------------------------
227 CAF management.
228
229 The entry code for every CAF does the following:
230
231 - builds a BLACKHOLE in the heap
232 - pushes an update frame pointing to the BLACKHOLE
233 - calls newCaf, below
234 - updates the CAF with a static indirection to the BLACKHOLE
235
236 Why do we build an BLACKHOLE in the heap rather than just updating
237 the thunk directly? It's so that we only need one kind of update
238 frame - otherwise we'd need a static version of the update frame too.
239
240 newCaf() does the following:
241
242 - it puts the CAF on the oldest generation's mutable list.
243 This is so that we treat the CAF as a root when collecting
244 younger generations.
245
246 For GHCI, we have additional requirements when dealing with CAFs:
247
248 - we must *retain* all dynamically-loaded CAFs ever entered,
249 just in case we need them again.
250 - we must be able to *revert* CAFs that have been evaluated, to
251 their pre-evaluated form.
252
253 To do this, we use an additional CAF list. When newCaf() is
254 called on a dynamically-loaded CAF, we add it to the CAF list
255 instead of the old-generation mutable list, and save away its
256 old info pointer (in caf->saved_info) for later reversion.
257
258 To revert all the CAFs, we traverse the CAF list and reset the
259 info pointer to caf->saved_info, then throw away the CAF list.
260 (see GC.c:revertCAFs()).
261
262 -- SDM 29/1/01
263
264 -------------------------------------------------------------------------- */
265
266 void
267 newCAF(StgRegTable *reg, StgClosure* caf)
268 {
269 if(keepCAFs)
270 {
271 // HACK:
272 // If we are in GHCi _and_ we are using dynamic libraries,
273 // then we can't redirect newCAF calls to newDynCAF (see below),
274 // so we make newCAF behave almost like newDynCAF.
275 // The dynamic libraries might be used by both the interpreted
276 // program and GHCi itself, so they must not be reverted.
277 // This also means that in GHCi with dynamic libraries, CAFs are not
278 // garbage collected. If this turns out to be a problem, we could
279 // do another hack here and do an address range test on caf to figure
280 // out whether it is from a dynamic library.
281 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
282
283 ACQUIRE_SM_LOCK; // caf_list is global, locked by sm_mutex
284 ((StgIndStatic *)caf)->static_link = caf_list;
285 caf_list = caf;
286 RELEASE_SM_LOCK;
287 }
288 else
289 {
290 // Put this CAF on the mutable list for the old generation.
291 ((StgIndStatic *)caf)->saved_info = NULL;
292 if (oldest_gen->no != 0) {
293 recordMutableCap(caf, regTableToCapability(reg), oldest_gen->no);
294 }
295 }
296 }
297
298 // External API for setting the keepCAFs flag. see #3900.
299 void
300 setKeepCAFs (void)
301 {
302 keepCAFs = 1;
303 }
304
305 // An alternate version of newCaf which is used for dynamically loaded
306 // object code in GHCi. In this case we want to retain *all* CAFs in
307 // the object code, because they might be demanded at any time from an
308 // expression evaluated on the command line.
309 // Also, GHCi might want to revert CAFs, so we add these to the
310 // revertible_caf_list.
311 //
312 // The linker hackily arranges that references to newCaf from dynamic
313 // code end up pointing to newDynCAF.
314 void
315 newDynCAF (StgRegTable *reg STG_UNUSED, StgClosure *caf)
316 {
317 ACQUIRE_SM_LOCK;
318
319 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
320 ((StgIndStatic *)caf)->static_link = revertible_caf_list;
321 revertible_caf_list = caf;
322
323 RELEASE_SM_LOCK;
324 }
325
326 /* -----------------------------------------------------------------------------
327 Nursery management.
328 -------------------------------------------------------------------------- */
329
330 static bdescr *
331 allocNursery (bdescr *tail, nat blocks)
332 {
333 bdescr *bd = NULL;
334 nat i, n;
335
336 // We allocate the nursery as a single contiguous block and then
337 // divide it into single blocks manually. This way we guarantee
338 // that the nursery blocks are adjacent, so that the processor's
339 // automatic prefetching works across nursery blocks. This is a
340 // tiny optimisation (~0.5%), but it's free.
341
342 while (blocks > 0) {
343 n = stg_min(blocks, BLOCKS_PER_MBLOCK);
344 blocks -= n;
345
346 bd = allocGroup(n);
347 for (i = 0; i < n; i++) {
348 initBdescr(&bd[i], g0, g0);
349
350 bd[i].blocks = 1;
351 bd[i].flags = 0;
352
353 if (i > 0) {
354 bd[i].u.back = &bd[i-1];
355 } else {
356 bd[i].u.back = NULL;
357 }
358
359 if (i+1 < n) {
360 bd[i].link = &bd[i+1];
361 } else {
362 bd[i].link = tail;
363 if (tail != NULL) {
364 tail->u.back = &bd[i];
365 }
366 }
367
368 bd[i].free = bd[i].start;
369 }
370
371 tail = &bd[0];
372 }
373
374 return &bd[0];
375 }
376
377 static void
378 assignNurseriesToCapabilities (void)
379 {
380 nat i;
381
382 for (i = 0; i < n_capabilities; i++) {
383 capabilities[i].r.rNursery = &nurseries[i];
384 capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
385 capabilities[i].r.rCurrentAlloc = NULL;
386 }
387 }
388
389 static void
390 allocNurseries( void )
391 {
392 nat i;
393
394 for (i = 0; i < n_capabilities; i++) {
395 nurseries[i].blocks =
396 allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
397 nurseries[i].n_blocks =
398 RtsFlags.GcFlags.minAllocAreaSize;
399 }
400 assignNurseriesToCapabilities();
401 }
402
403 lnat // words allocated
404 clearNurseries (void)
405 {
406 lnat allocated = 0;
407 nat i;
408 bdescr *bd;
409
410 for (i = 0; i < n_capabilities; i++) {
411 for (bd = nurseries[i].blocks; bd; bd = bd->link) {
412 allocated += (lnat)(bd->free - bd->start);
413 bd->free = bd->start;
414 ASSERT(bd->gen_no == 0);
415 ASSERT(bd->gen == g0);
416 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
417 }
418 }
419
420 return allocated;
421 }
422
423 void
424 resetNurseries (void)
425 {
426 assignNurseriesToCapabilities();
427
428 }
429
430 lnat
431 countNurseryBlocks (void)
432 {
433 nat i;
434 lnat blocks = 0;
435
436 for (i = 0; i < n_capabilities; i++) {
437 blocks += nurseries[i].n_blocks;
438 }
439 return blocks;
440 }
441
442 static void
443 resizeNursery ( nursery *nursery, nat blocks )
444 {
445 bdescr *bd;
446 nat nursery_blocks;
447
448 nursery_blocks = nursery->n_blocks;
449 if (nursery_blocks == blocks) return;
450
451 if (nursery_blocks < blocks) {
452 debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
453 blocks);
454 nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
455 }
456 else {
457 bdescr *next_bd;
458
459 debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
460 blocks);
461
462 bd = nursery->blocks;
463 while (nursery_blocks > blocks) {
464 next_bd = bd->link;
465 next_bd->u.back = NULL;
466 nursery_blocks -= bd->blocks; // might be a large block
467 freeGroup(bd);
468 bd = next_bd;
469 }
470 nursery->blocks = bd;
471 // might have gone just under, by freeing a large block, so make
472 // up the difference.
473 if (nursery_blocks < blocks) {
474 nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
475 }
476 }
477
478 nursery->n_blocks = blocks;
479 ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
480 }
481
482 //
483 // Resize each of the nurseries to the specified size.
484 //
485 void
486 resizeNurseriesFixed (nat blocks)
487 {
488 nat i;
489 for (i = 0; i < n_capabilities; i++) {
490 resizeNursery(&nurseries[i], blocks);
491 }
492 }
493
494 //
495 // Resize the nurseries to the total specified size.
496 //
497 void
498 resizeNurseries (nat blocks)
499 {
500 // If there are multiple nurseries, then we just divide the number
501 // of available blocks between them.
502 resizeNurseriesFixed(blocks / n_capabilities);
503 }
504
505
506 /* -----------------------------------------------------------------------------
507 move_STACK is called to update the TSO structure after it has been
508 moved from one place to another.
509 -------------------------------------------------------------------------- */
510
511 void
512 move_STACK (StgStack *src, StgStack *dest)
513 {
514 ptrdiff_t diff;
515
516 // relocate the stack pointer...
517 diff = (StgPtr)dest - (StgPtr)src; // In *words*
518 dest->sp = (StgPtr)dest->sp + diff;
519 }
520
521 /* -----------------------------------------------------------------------------
522 allocate()
523
524 This allocates memory in the current thread - it is intended for
525 use primarily from STG-land where we have a Capability. It is
526 better than allocate() because it doesn't require taking the
527 sm_mutex lock in the common case.
528
529 Memory is allocated directly from the nursery if possible (but not
530 from the current nursery block, so as not to interfere with
531 Hp/HpLim).
532 -------------------------------------------------------------------------- */
533
534 StgPtr
535 allocate (Capability *cap, lnat n)
536 {
537 bdescr *bd;
538 StgPtr p;
539
540 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
541 lnat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
542
543 // Attempting to allocate an object larger than maxHeapSize
544 // should definitely be disallowed. (bug #1791)
545 if (RtsFlags.GcFlags.maxHeapSize > 0 &&
546 req_blocks >= RtsFlags.GcFlags.maxHeapSize) {
547 heapOverflow();
548 // heapOverflow() doesn't exit (see #2592), but we aren't
549 // in a position to do a clean shutdown here: we
550 // either have to allocate the memory or exit now.
551 // Allocating the memory would be bad, because the user
552 // has requested that we not exceed maxHeapSize, so we
553 // just exit.
554 stg_exit(EXIT_HEAPOVERFLOW);
555 }
556
557 ACQUIRE_SM_LOCK
558 bd = allocGroup(req_blocks);
559 dbl_link_onto(bd, &g0->large_objects);
560 g0->n_large_blocks += bd->blocks; // might be larger than req_blocks
561 g0->n_new_large_words += n;
562 RELEASE_SM_LOCK;
563 initBdescr(bd, g0, g0);
564 bd->flags = BF_LARGE;
565 bd->free = bd->start + n;
566 return bd->start;
567 }
568
569 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
570
571 TICK_ALLOC_HEAP_NOCTR(n);
572 CCS_ALLOC(CCCS,n);
573
574 bd = cap->r.rCurrentAlloc;
575 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
576
577 // The CurrentAlloc block is full, we need to find another
578 // one. First, we try taking the next block from the
579 // nursery:
580 bd = cap->r.rCurrentNursery->link;
581
582 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
583 // The nursery is empty, or the next block is already
584 // full: allocate a fresh block (we can't fail here).
585 ACQUIRE_SM_LOCK;
586 bd = allocBlock();
587 cap->r.rNursery->n_blocks++;
588 RELEASE_SM_LOCK;
589 initBdescr(bd, g0, g0);
590 bd->flags = 0;
591 // If we had to allocate a new block, then we'll GC
592 // pretty quickly now, because MAYBE_GC() will
593 // notice that CurrentNursery->link is NULL.
594 } else {
595 // we have a block in the nursery: take it and put
596 // it at the *front* of the nursery list, and use it
597 // to allocate() from.
598 cap->r.rCurrentNursery->link = bd->link;
599 if (bd->link != NULL) {
600 bd->link->u.back = cap->r.rCurrentNursery;
601 }
602 }
603 dbl_link_onto(bd, &cap->r.rNursery->blocks);
604 cap->r.rCurrentAlloc = bd;
605 IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
606 }
607 p = bd->free;
608 bd->free += n;
609
610 IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa));
611 return p;
612 }
613
614 /* ---------------------------------------------------------------------------
615 Allocate a fixed/pinned object.
616
617 We allocate small pinned objects into a single block, allocating a
618 new block when the current one overflows. The block is chained
619 onto the large_object_list of generation 0.
620
621 NOTE: The GC can't in general handle pinned objects. This
622 interface is only safe to use for ByteArrays, which have no
623 pointers and don't require scavenging. It works because the
624 block's descriptor has the BF_LARGE flag set, so the block is
625 treated as a large object and chained onto various lists, rather
626 than the individual objects being copied. However, when it comes
627 to scavenge the block, the GC will only scavenge the first object.
628 The reason is that the GC can't linearly scan a block of pinned
629 objects at the moment (doing so would require using the
630 mostly-copying techniques). But since we're restricting ourselves
631 to pinned ByteArrays, not scavenging is ok.
632
633 This function is called by newPinnedByteArray# which immediately
634 fills the allocated memory with a MutableByteArray#.
635 ------------------------------------------------------------------------- */
636
637 StgPtr
638 allocatePinned (Capability *cap, lnat n)
639 {
640 StgPtr p;
641 bdescr *bd;
642
643 // If the request is for a large object, then allocate()
644 // will give us a pinned object anyway.
645 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
646 p = allocate(cap, n);
647 Bdescr(p)->flags |= BF_PINNED;
648 return p;
649 }
650
651 TICK_ALLOC_HEAP_NOCTR(n);
652 CCS_ALLOC(CCCS,n);
653
654 bd = cap->pinned_object_block;
655
656 // If we don't have a block of pinned objects yet, or the current
657 // one isn't large enough to hold the new object, allocate a new one.
658 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
659 ACQUIRE_SM_LOCK;
660 cap->pinned_object_block = bd = allocBlock();
661 dbl_link_onto(bd, &g0->large_objects);
662 g0->n_large_blocks++;
663 RELEASE_SM_LOCK;
664 initBdescr(bd, g0, g0);
665 bd->flags = BF_PINNED | BF_LARGE;
666 bd->free = bd->start;
667 }
668
669 g0->n_new_large_words += n;
670 p = bd->free;
671 bd->free += n;
672 return p;
673 }
674
675 /* -----------------------------------------------------------------------------
676 Write Barriers
677 -------------------------------------------------------------------------- */
678
679 /*
680 This is the write barrier for MUT_VARs, a.k.a. IORefs. A
681 MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
682 is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
683 and is put on the mutable list.
684 */
685 void
686 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
687 {
688 Capability *cap = regTableToCapability(reg);
689 if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
690 p->header.info = &stg_MUT_VAR_DIRTY_info;
691 recordClosureMutated(cap,p);
692 }
693 }
694
695 // Setting a TSO's link field with a write barrier.
696 // It is *not* necessary to call this function when
697 // * setting the link field to END_TSO_QUEUE
698 // * putting a TSO on the blackhole_queue
699 // * setting the link field of the currently running TSO, as it
700 // will already be dirty.
701 void
702 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
703 {
704 if (tso->dirty == 0) {
705 tso->dirty = 1;
706 recordClosureMutated(cap,(StgClosure*)tso);
707 }
708 tso->_link = target;
709 }
710
711 void
712 setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)
713 {
714 if (tso->dirty == 0) {
715 tso->dirty = 1;
716 recordClosureMutated(cap,(StgClosure*)tso);
717 }
718 tso->block_info.prev = target;
719 }
720
721 void
722 dirty_TSO (Capability *cap, StgTSO *tso)
723 {
724 if (tso->dirty == 0) {
725 tso->dirty = 1;
726 recordClosureMutated(cap,(StgClosure*)tso);
727 }
728 }
729
730 void
731 dirty_STACK (Capability *cap, StgStack *stack)
732 {
733 if (stack->dirty == 0) {
734 stack->dirty = 1;
735 recordClosureMutated(cap,(StgClosure*)stack);
736 }
737 }
738
739 /*
740 This is the write barrier for MVARs. An MVAR_CLEAN objects is not
741 on the mutable list; a MVAR_DIRTY is. When written to, a
742 MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
743 The check for MVAR_CLEAN is inlined at the call site for speed,
744 this really does make a difference on concurrency-heavy benchmarks
745 such as Chaneneos and cheap-concurrency.
746 */
747 void
748 dirty_MVAR(StgRegTable *reg, StgClosure *p)
749 {
750 recordClosureMutated(regTableToCapability(reg),p);
751 }
752
753 /* -----------------------------------------------------------------------------
754 * Stats and stuff
755 * -------------------------------------------------------------------------- */
756
757 /* -----------------------------------------------------------------------------
758 * calcAllocated()
759 *
760 * Approximate how much we've allocated: number of blocks in the
761 * nursery + blocks allocated via allocate() - unused nusery blocks.
762 * This leaves a little slop at the end of each block.
763 * -------------------------------------------------------------------------- */
764
765 lnat
766 calcAllocated (rtsBool include_nurseries)
767 {
768 nat allocated = 0;
769 bdescr *bd;
770 nat i;
771
772 // When called from GC.c, we already have the allocation count for
773 // the nursery from resetNurseries(), so we don't need to walk
774 // through these block lists again.
775 if (include_nurseries)
776 {
777 for (i = 0; i < n_capabilities; i++) {
778 for (bd = nurseries[i].blocks; bd; bd = bd->link) {
779 allocated += (lnat)(bd->free - bd->start);
780 }
781 }
782 }
783
784 // add in sizes of new large and pinned objects
785 allocated += g0->n_new_large_words;
786
787 return allocated;
788 }
789
790 /* Approximate the amount of live data in the heap. To be called just
791 * after garbage collection (see GarbageCollect()).
792 */
793 lnat calcLiveBlocks (void)
794 {
795 nat g;
796 lnat live = 0;
797 generation *gen;
798
799 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
800 /* approximate amount of live data (doesn't take into account slop
801 * at end of each block).
802 */
803 gen = &generations[g];
804 live += gen->n_large_blocks + gen->n_blocks;
805 }
806 return live;
807 }
808
809 lnat countOccupied (bdescr *bd)
810 {
811 lnat words;
812
813 words = 0;
814 for (; bd != NULL; bd = bd->link) {
815 ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
816 words += bd->free - bd->start;
817 }
818 return words;
819 }
820
821 // Return an accurate count of the live data in the heap, excluding
822 // generation 0.
823 lnat calcLiveWords (void)
824 {
825 nat g;
826 lnat live;
827 generation *gen;
828
829 live = 0;
830 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
831 gen = &generations[g];
832 live += gen->n_words + countOccupied(gen->large_objects);
833 }
834 return live;
835 }
836
837 /* Approximate the number of blocks that will be needed at the next
838 * garbage collection.
839 *
840 * Assume: all data currently live will remain live. Generationss
841 * that will be collected next time will therefore need twice as many
842 * blocks since all the data will be copied.
843 */
844 extern lnat
845 calcNeeded(void)
846 {
847 lnat needed = 0;
848 nat g;
849 generation *gen;
850
851 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
852 gen = &generations[g];
853
854 // we need at least this much space
855 needed += gen->n_blocks + gen->n_large_blocks;
856
857 // any additional space needed to collect this gen next time?
858 if (g == 0 || // always collect gen 0
859 (gen->n_blocks + gen->n_large_blocks > gen->max_blocks)) {
860 // we will collect this gen next time
861 if (gen->mark) {
862 // bitmap:
863 needed += gen->n_blocks / BITS_IN(W_);
864 // mark stack:
865 needed += gen->n_blocks / 100;
866 }
867 if (gen->compact) {
868 continue; // no additional space needed for compaction
869 } else {
870 needed += gen->n_blocks;
871 }
872 }
873 }
874 return needed;
875 }
876
877 /* ----------------------------------------------------------------------------
878 Executable memory
879
880 Executable memory must be managed separately from non-executable
881 memory. Most OSs these days require you to jump through hoops to
882 dynamically allocate executable memory, due to various security
883 measures.
884
885 Here we provide a small memory allocator for executable memory.
886 Memory is managed with a page granularity; we allocate linearly
887 in the page, and when the page is emptied (all objects on the page
888 are free) we free the page again, not forgetting to make it
889 non-executable.
890
891 TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
892 the linker cannot use allocateExec for loading object code files
893 on Windows. Once allocateExec can handle larger objects, the linker
894 should be modified to use allocateExec instead of VirtualAlloc.
895 ------------------------------------------------------------------------- */
896
897 #if defined(linux_HOST_OS)
898
899 // On Linux we need to use libffi for allocating executable memory,
900 // because it knows how to work around the restrictions put in place
901 // by SELinux.
902
903 void *allocateExec (nat bytes, void **exec_ret)
904 {
905 void **ret, **exec;
906 ACQUIRE_SM_LOCK;
907 ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
908 RELEASE_SM_LOCK;
909 if (ret == NULL) return ret;
910 *ret = ret; // save the address of the writable mapping, for freeExec().
911 *exec_ret = exec + 1;
912 return (ret + 1);
913 }
914
915 // freeExec gets passed the executable address, not the writable address.
916 void freeExec (void *addr)
917 {
918 void *writable;
919 writable = *((void**)addr - 1);
920 ACQUIRE_SM_LOCK;
921 ffi_closure_free (writable);
922 RELEASE_SM_LOCK
923 }
924
925 #else
926
927 void *allocateExec (nat bytes, void **exec_ret)
928 {
929 void *ret;
930 nat n;
931
932 ACQUIRE_SM_LOCK;
933
934 // round up to words.
935 n = (bytes + sizeof(W_) + 1) / sizeof(W_);
936
937 if (n+1 > BLOCK_SIZE_W) {
938 barf("allocateExec: can't handle large objects");
939 }
940
941 if (exec_block == NULL ||
942 exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
943 bdescr *bd;
944 lnat pagesize = getPageSize();
945 bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
946 debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
947 bd->gen_no = 0;
948 bd->flags = BF_EXEC;
949 bd->link = exec_block;
950 if (exec_block != NULL) {
951 exec_block->u.back = bd;
952 }
953 bd->u.back = NULL;
954 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
955 exec_block = bd;
956 }
957 *(exec_block->free) = n; // store the size of this chunk
958 exec_block->gen_no += n; // gen_no stores the number of words allocated
959 ret = exec_block->free + 1;
960 exec_block->free += n + 1;
961
962 RELEASE_SM_LOCK
963 *exec_ret = ret;
964 return ret;
965 }
966
967 void freeExec (void *addr)
968 {
969 StgPtr p = (StgPtr)addr - 1;
970 bdescr *bd = Bdescr((StgPtr)p);
971
972 if ((bd->flags & BF_EXEC) == 0) {
973 barf("freeExec: not executable");
974 }
975
976 if (*(StgPtr)p == 0) {
977 barf("freeExec: already free?");
978 }
979
980 ACQUIRE_SM_LOCK;
981
982 bd->gen_no -= *(StgPtr)p;
983 *(StgPtr)p = 0;
984
985 if (bd->gen_no == 0) {
986 // Free the block if it is empty, but not if it is the block at
987 // the head of the queue.
988 if (bd != exec_block) {
989 debugTrace(DEBUG_gc, "free exec block %p", bd->start);
990 dbl_link_remove(bd, &exec_block);
991 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
992 freeGroup(bd);
993 } else {
994 bd->free = bd->start;
995 }
996 }
997
998 RELEASE_SM_LOCK
999 }
1000
1001 #endif /* mingw32_HOST_OS */
1002
1003 #ifdef DEBUG
1004
1005 // handy function for use in gdb, because Bdescr() is inlined.
1006 extern bdescr *_bdescr( StgPtr p );
1007
1008 bdescr *
1009 _bdescr( StgPtr p )
1010 {
1011 return Bdescr(p);
1012 }
1013
1014 #endif