Add closeMutex and use it on clean up
[ghc.git] / rts / Storage.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2004
4 *
5 * Storage manager front end
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11 #include "RtsUtils.h"
12 #include "RtsFlags.h"
13 #include "Stats.h"
14 #include "Hooks.h"
15 #include "BlockAlloc.h"
16 #include "MBlock.h"
17 #include "Weak.h"
18 #include "Sanity.h"
19 #include "Arena.h"
20 #include "OSThreads.h"
21 #include "Capability.h"
22 #include "Storage.h"
23 #include "Schedule.h"
24 #include "RetainerProfile.h" // for counting memory blocks (memInventory)
25 #include "OSMem.h"
26 #include "Trace.h"
27
28 #include <stdlib.h>
29 #include <string.h>
30
31 /*
32 * All these globals require sm_mutex to access in THREADED_RTS mode.
33 */
34 StgClosure *caf_list = NULL;
35 StgClosure *revertible_caf_list = NULL;
36 rtsBool keepCAFs;
37
38 bdescr *small_alloc_list; /* allocate()d small objects */
39 bdescr *pinned_object_block; /* allocate pinned objects into this block */
40 nat alloc_blocks; /* number of allocate()d blocks since GC */
41 nat alloc_blocks_lim; /* approximate limit on alloc_blocks */
42
43 StgPtr alloc_Hp = NULL; /* next free byte in small_alloc_list */
44 StgPtr alloc_HpLim = NULL; /* end of block at small_alloc_list */
45
46 generation *generations = NULL; /* all the generations */
47 generation *g0 = NULL; /* generation 0, for convenience */
48 generation *oldest_gen = NULL; /* oldest generation, for convenience */
49 step *g0s0 = NULL; /* generation 0, step 0, for convenience */
50
51 ullong total_allocated = 0; /* total memory allocated during run */
52
53 nat n_nurseries = 0; /* == RtsFlags.ParFlags.nNodes, convenience */
54 step *nurseries = NULL; /* array of nurseries, >1 only if THREADED_RTS */
55
56 #ifdef THREADED_RTS
57 /*
58 * Storage manager mutex: protects all the above state from
59 * simultaneous access by two STG threads.
60 */
61 Mutex sm_mutex;
62 /*
63 * This mutex is used by atomicModifyMutVar# only
64 */
65 Mutex atomic_modify_mutvar_mutex;
66 #endif
67
68
69 /*
70 * Forward references
71 */
72 static void *stgAllocForGMP (size_t size_in_bytes);
73 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
74 static void stgDeallocForGMP (void *ptr, size_t size);
75
76 static void
77 initStep (step *stp, int g, int s)
78 {
79 stp->no = s;
80 stp->blocks = NULL;
81 stp->n_blocks = 0;
82 stp->old_blocks = NULL;
83 stp->n_old_blocks = 0;
84 stp->gen = &generations[g];
85 stp->gen_no = g;
86 stp->hp = NULL;
87 stp->hpLim = NULL;
88 stp->hp_bd = NULL;
89 stp->scavd_hp = NULL;
90 stp->scavd_hpLim = NULL;
91 stp->scan = NULL;
92 stp->scan_bd = NULL;
93 stp->large_objects = NULL;
94 stp->n_large_blocks = 0;
95 stp->new_large_objects = NULL;
96 stp->scavenged_large_objects = NULL;
97 stp->n_scavenged_large_blocks = 0;
98 stp->is_compacted = 0;
99 stp->bitmap = NULL;
100 }
101
102 void
103 initStorage( void )
104 {
105 nat g, s;
106 generation *gen;
107
108 if (generations != NULL) {
109 // multi-init protection
110 return;
111 }
112
113 /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
114 * doing something reasonable.
115 */
116 ASSERT(LOOKS_LIKE_INFO_PTR(&stg_BLACKHOLE_info));
117 ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
118 ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
119
120 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
121 RtsFlags.GcFlags.heapSizeSuggestion >
122 RtsFlags.GcFlags.maxHeapSize) {
123 RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
124 }
125
126 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
127 RtsFlags.GcFlags.minAllocAreaSize >
128 RtsFlags.GcFlags.maxHeapSize) {
129 errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
130 RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
131 }
132
133 initBlockAllocator();
134
135 #if defined(THREADED_RTS)
136 initMutex(&sm_mutex);
137 initMutex(&atomic_modify_mutvar_mutex);
138 #endif
139
140 ACQUIRE_SM_LOCK;
141
142 /* allocate generation info array */
143 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
144 * sizeof(struct generation_),
145 "initStorage: gens");
146
147 /* Initialise all generations */
148 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
149 gen = &generations[g];
150 gen->no = g;
151 gen->mut_list = allocBlock();
152 gen->collections = 0;
153 gen->failed_promotions = 0;
154 gen->max_blocks = 0;
155 }
156
157 /* A couple of convenience pointers */
158 g0 = &generations[0];
159 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
160
161 /* Allocate step structures in each generation */
162 if (RtsFlags.GcFlags.generations > 1) {
163 /* Only for multiple-generations */
164
165 /* Oldest generation: one step */
166 oldest_gen->n_steps = 1;
167 oldest_gen->steps =
168 stgMallocBytes(1 * sizeof(struct step_), "initStorage: last step");
169
170 /* set up all except the oldest generation with 2 steps */
171 for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
172 generations[g].n_steps = RtsFlags.GcFlags.steps;
173 generations[g].steps =
174 stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct step_),
175 "initStorage: steps");
176 }
177
178 } else {
179 /* single generation, i.e. a two-space collector */
180 g0->n_steps = 1;
181 g0->steps = stgMallocBytes (sizeof(struct step_), "initStorage: steps");
182 }
183
184 #ifdef THREADED_RTS
185 n_nurseries = n_capabilities;
186 nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
187 "initStorage: nurseries");
188 #else
189 n_nurseries = 1;
190 nurseries = g0->steps; // just share nurseries[0] with g0s0
191 #endif
192
193 /* Initialise all steps */
194 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
195 for (s = 0; s < generations[g].n_steps; s++) {
196 initStep(&generations[g].steps[s], g, s);
197 }
198 }
199
200 #ifdef THREADED_RTS
201 for (s = 0; s < n_nurseries; s++) {
202 initStep(&nurseries[s], 0, s);
203 }
204 #endif
205
206 /* Set up the destination pointers in each younger gen. step */
207 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
208 for (s = 0; s < generations[g].n_steps-1; s++) {
209 generations[g].steps[s].to = &generations[g].steps[s+1];
210 }
211 generations[g].steps[s].to = &generations[g+1].steps[0];
212 }
213 oldest_gen->steps[0].to = &oldest_gen->steps[0];
214
215 #ifdef THREADED_RTS
216 for (s = 0; s < n_nurseries; s++) {
217 nurseries[s].to = generations[0].steps[0].to;
218 }
219 #endif
220
221 /* The oldest generation has one step. */
222 if (RtsFlags.GcFlags.compact) {
223 if (RtsFlags.GcFlags.generations == 1) {
224 errorBelch("WARNING: compaction is incompatible with -G1; disabled");
225 } else {
226 oldest_gen->steps[0].is_compacted = 1;
227 }
228 }
229
230 #ifdef THREADED_RTS
231 if (RtsFlags.GcFlags.generations == 1) {
232 errorBelch("-G1 is incompatible with -threaded");
233 stg_exit(EXIT_FAILURE);
234 }
235 #endif
236
237 /* generation 0 is special: that's the nursery */
238 generations[0].max_blocks = 0;
239
240 /* G0S0: the allocation area. Policy: keep the allocation area
241 * small to begin with, even if we have a large suggested heap
242 * size. Reason: we're going to do a major collection first, and we
243 * don't want it to be a big one. This vague idea is borne out by
244 * rigorous experimental evidence.
245 */
246 g0s0 = &generations[0].steps[0];
247
248 allocNurseries();
249
250 weak_ptr_list = NULL;
251 caf_list = NULL;
252 revertible_caf_list = NULL;
253
254 /* initialise the allocate() interface */
255 small_alloc_list = NULL;
256 alloc_blocks = 0;
257 alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
258
259 /* Tell GNU multi-precision pkg about our custom alloc functions */
260 mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
261
262 IF_DEBUG(gc, statDescribeGens());
263
264 RELEASE_SM_LOCK;
265 }
266
267 void
268 exitStorage (void)
269 {
270 stat_exit(calcAllocated());
271 }
272
273 void
274 freeStorage (void)
275 {
276 nat g;
277
278 for(g = 0; g < RtsFlags.GcFlags.generations; g++)
279 stgFree(generations[g].steps);
280 stgFree(generations);
281 freeAllMBlocks();
282 #if defined(THREADED_RTS)
283 closeMutex(&sm_mutex);
284 closeMutex(&atomic_modify_mutvar_mutex);
285 #endif
286 }
287
288 /* -----------------------------------------------------------------------------
289 CAF management.
290
291 The entry code for every CAF does the following:
292
293 - builds a CAF_BLACKHOLE in the heap
294 - pushes an update frame pointing to the CAF_BLACKHOLE
295 - invokes UPD_CAF(), which:
296 - calls newCaf, below
297 - updates the CAF with a static indirection to the CAF_BLACKHOLE
298
299 Why do we build a BLACKHOLE in the heap rather than just updating
300 the thunk directly? It's so that we only need one kind of update
301 frame - otherwise we'd need a static version of the update frame too.
302
303 newCaf() does the following:
304
305 - it puts the CAF on the oldest generation's mut-once list.
306 This is so that we can treat the CAF as a root when collecting
307 younger generations.
308
309 For GHCI, we have additional requirements when dealing with CAFs:
310
311 - we must *retain* all dynamically-loaded CAFs ever entered,
312 just in case we need them again.
313 - we must be able to *revert* CAFs that have been evaluated, to
314 their pre-evaluated form.
315
316 To do this, we use an additional CAF list. When newCaf() is
317 called on a dynamically-loaded CAF, we add it to the CAF list
318 instead of the old-generation mutable list, and save away its
319 old info pointer (in caf->saved_info) for later reversion.
320
321 To revert all the CAFs, we traverse the CAF list and reset the
322 info pointer to caf->saved_info, then throw away the CAF list.
323 (see GC.c:revertCAFs()).
324
325 -- SDM 29/1/01
326
327 -------------------------------------------------------------------------- */
328
329 void
330 newCAF(StgClosure* caf)
331 {
332 ACQUIRE_SM_LOCK;
333
334 if(keepCAFs)
335 {
336 // HACK:
337 // If we are in GHCi _and_ we are using dynamic libraries,
338 // then we can't redirect newCAF calls to newDynCAF (see below),
339 // so we make newCAF behave almost like newDynCAF.
340 // The dynamic libraries might be used by both the interpreted
341 // program and GHCi itself, so they must not be reverted.
342 // This also means that in GHCi with dynamic libraries, CAFs are not
343 // garbage collected. If this turns out to be a problem, we could
344 // do another hack here and do an address range test on caf to figure
345 // out whether it is from a dynamic library.
346 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
347 ((StgIndStatic *)caf)->static_link = caf_list;
348 caf_list = caf;
349 }
350 else
351 {
352 /* Put this CAF on the mutable list for the old generation.
353 * This is a HACK - the IND_STATIC closure doesn't really have
354 * a mut_link field, but we pretend it has - in fact we re-use
355 * the STATIC_LINK field for the time being, because when we
356 * come to do a major GC we won't need the mut_link field
357 * any more and can use it as a STATIC_LINK.
358 */
359 ((StgIndStatic *)caf)->saved_info = NULL;
360 recordMutableGen(caf, oldest_gen);
361 }
362
363 RELEASE_SM_LOCK;
364
365 #ifdef PAR
366 /* If we are PAR or DIST then we never forget a CAF */
367 { globalAddr *newGA;
368 //debugBelch("<##> Globalising CAF %08x %s",caf,info_type(caf));
369 newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
370 ASSERT(newGA);
371 }
372 #endif /* PAR */
373 }
374
375 // An alternate version of newCaf which is used for dynamically loaded
376 // object code in GHCi. In this case we want to retain *all* CAFs in
377 // the object code, because they might be demanded at any time from an
378 // expression evaluated on the command line.
379 // Also, GHCi might want to revert CAFs, so we add these to the
380 // revertible_caf_list.
381 //
382 // The linker hackily arranges that references to newCaf from dynamic
383 // code end up pointing to newDynCAF.
384 void
385 newDynCAF(StgClosure *caf)
386 {
387 ACQUIRE_SM_LOCK;
388
389 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
390 ((StgIndStatic *)caf)->static_link = revertible_caf_list;
391 revertible_caf_list = caf;
392
393 RELEASE_SM_LOCK;
394 }
395
396 /* -----------------------------------------------------------------------------
397 Nursery management.
398 -------------------------------------------------------------------------- */
399
400 static bdescr *
401 allocNursery (step *stp, bdescr *tail, nat blocks)
402 {
403 bdescr *bd;
404 nat i;
405
406 // Allocate a nursery: we allocate fresh blocks one at a time and
407 // cons them on to the front of the list, not forgetting to update
408 // the back pointer on the tail of the list to point to the new block.
409 for (i=0; i < blocks; i++) {
410 // @LDV profiling
411 /*
412 processNursery() in LdvProfile.c assumes that every block group in
413 the nursery contains only a single block. So, if a block group is
414 given multiple blocks, change processNursery() accordingly.
415 */
416 bd = allocBlock();
417 bd->link = tail;
418 // double-link the nursery: we might need to insert blocks
419 if (tail != NULL) {
420 tail->u.back = bd;
421 }
422 bd->step = stp;
423 bd->gen_no = 0;
424 bd->flags = 0;
425 bd->free = bd->start;
426 tail = bd;
427 }
428 tail->u.back = NULL;
429 return tail;
430 }
431
432 static void
433 assignNurseriesToCapabilities (void)
434 {
435 #ifdef THREADED_RTS
436 nat i;
437
438 for (i = 0; i < n_nurseries; i++) {
439 capabilities[i].r.rNursery = &nurseries[i];
440 capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
441 capabilities[i].r.rCurrentAlloc = NULL;
442 }
443 #else /* THREADED_RTS */
444 MainCapability.r.rNursery = &nurseries[0];
445 MainCapability.r.rCurrentNursery = nurseries[0].blocks;
446 MainCapability.r.rCurrentAlloc = NULL;
447 #endif
448 }
449
450 void
451 allocNurseries( void )
452 {
453 nat i;
454
455 for (i = 0; i < n_nurseries; i++) {
456 nurseries[i].blocks =
457 allocNursery(&nurseries[i], NULL,
458 RtsFlags.GcFlags.minAllocAreaSize);
459 nurseries[i].n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
460 nurseries[i].old_blocks = NULL;
461 nurseries[i].n_old_blocks = 0;
462 /* hp, hpLim, hp_bd, to_space etc. aren't used in the nursery */
463 }
464 assignNurseriesToCapabilities();
465 }
466
467 void
468 resetNurseries( void )
469 {
470 nat i;
471 bdescr *bd;
472 step *stp;
473
474 for (i = 0; i < n_nurseries; i++) {
475 stp = &nurseries[i];
476 for (bd = stp->blocks; bd; bd = bd->link) {
477 bd->free = bd->start;
478 ASSERT(bd->gen_no == 0);
479 ASSERT(bd->step == stp);
480 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
481 }
482 }
483 assignNurseriesToCapabilities();
484 }
485
486 lnat
487 countNurseryBlocks (void)
488 {
489 nat i;
490 lnat blocks = 0;
491
492 for (i = 0; i < n_nurseries; i++) {
493 blocks += nurseries[i].n_blocks;
494 }
495 return blocks;
496 }
497
498 static void
499 resizeNursery ( step *stp, nat blocks )
500 {
501 bdescr *bd;
502 nat nursery_blocks;
503
504 nursery_blocks = stp->n_blocks;
505 if (nursery_blocks == blocks) return;
506
507 if (nursery_blocks < blocks) {
508 debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
509 blocks);
510 stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
511 }
512 else {
513 bdescr *next_bd;
514
515 debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
516 blocks);
517
518 bd = stp->blocks;
519 while (nursery_blocks > blocks) {
520 next_bd = bd->link;
521 next_bd->u.back = NULL;
522 nursery_blocks -= bd->blocks; // might be a large block
523 freeGroup(bd);
524 bd = next_bd;
525 }
526 stp->blocks = bd;
527 // might have gone just under, by freeing a large block, so make
528 // up the difference.
529 if (nursery_blocks < blocks) {
530 stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
531 }
532 }
533
534 stp->n_blocks = blocks;
535 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
536 }
537
538 //
539 // Resize each of the nurseries to the specified size.
540 //
541 void
542 resizeNurseriesFixed (nat blocks)
543 {
544 nat i;
545 for (i = 0; i < n_nurseries; i++) {
546 resizeNursery(&nurseries[i], blocks);
547 }
548 }
549
550 //
551 // Resize the nurseries to the total specified size.
552 //
553 void
554 resizeNurseries (nat blocks)
555 {
556 // If there are multiple nurseries, then we just divide the number
557 // of available blocks between them.
558 resizeNurseriesFixed(blocks / n_nurseries);
559 }
560
561 /* -----------------------------------------------------------------------------
562 The allocate() interface
563
564 allocate(n) always succeeds, and returns a chunk of memory n words
565 long. n can be larger than the size of a block if necessary, in
566 which case a contiguous block group will be allocated.
567 -------------------------------------------------------------------------- */
568
569 StgPtr
570 allocate( nat n )
571 {
572 bdescr *bd;
573 StgPtr p;
574
575 ACQUIRE_SM_LOCK;
576
577 TICK_ALLOC_HEAP_NOCTR(n);
578 CCS_ALLOC(CCCS,n);
579
580 /* big allocation (>LARGE_OBJECT_THRESHOLD) */
581 /* ToDo: allocate directly into generation 1 */
582 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
583 nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
584 bd = allocGroup(req_blocks);
585 dbl_link_onto(bd, &g0s0->large_objects);
586 g0s0->n_large_blocks += req_blocks;
587 bd->gen_no = 0;
588 bd->step = g0s0;
589 bd->flags = BF_LARGE;
590 bd->free = bd->start + n;
591 alloc_blocks += req_blocks;
592 RELEASE_SM_LOCK;
593 return bd->start;
594
595 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
596 } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
597 if (small_alloc_list) {
598 small_alloc_list->free = alloc_Hp;
599 }
600 bd = allocBlock();
601 bd->link = small_alloc_list;
602 small_alloc_list = bd;
603 bd->gen_no = 0;
604 bd->step = g0s0;
605 bd->flags = 0;
606 alloc_Hp = bd->start;
607 alloc_HpLim = bd->start + BLOCK_SIZE_W;
608 alloc_blocks++;
609 }
610
611 p = alloc_Hp;
612 alloc_Hp += n;
613 RELEASE_SM_LOCK;
614 return p;
615 }
616
617 lnat
618 allocated_bytes( void )
619 {
620 lnat allocated;
621
622 allocated = alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp);
623 if (pinned_object_block != NULL) {
624 allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
625 pinned_object_block->free;
626 }
627
628 return allocated;
629 }
630
631 void
632 tidyAllocateLists (void)
633 {
634 if (small_alloc_list != NULL) {
635 ASSERT(alloc_Hp >= small_alloc_list->start &&
636 alloc_Hp <= small_alloc_list->start + BLOCK_SIZE);
637 small_alloc_list->free = alloc_Hp;
638 }
639 }
640
641 /* -----------------------------------------------------------------------------
642 allocateLocal()
643
644 This allocates memory in the current thread - it is intended for
645 use primarily from STG-land where we have a Capability. It is
646 better than allocate() because it doesn't require taking the
647 sm_mutex lock in the common case.
648
649 Memory is allocated directly from the nursery if possible (but not
650 from the current nursery block, so as not to interfere with
651 Hp/HpLim).
652 -------------------------------------------------------------------------- */
653
654 StgPtr
655 allocateLocal (Capability *cap, nat n)
656 {
657 bdescr *bd;
658 StgPtr p;
659
660 TICK_ALLOC_HEAP_NOCTR(n);
661 CCS_ALLOC(CCCS,n);
662
663 /* big allocation (>LARGE_OBJECT_THRESHOLD) */
664 /* ToDo: allocate directly into generation 1 */
665 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
666 nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
667 ACQUIRE_SM_LOCK;
668 bd = allocGroup(req_blocks);
669 dbl_link_onto(bd, &g0s0->large_objects);
670 g0s0->n_large_blocks += req_blocks;
671 bd->gen_no = 0;
672 bd->step = g0s0;
673 bd->flags = BF_LARGE;
674 bd->free = bd->start + n;
675 alloc_blocks += req_blocks;
676 RELEASE_SM_LOCK;
677 return bd->start;
678
679 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
680 } else {
681
682 bd = cap->r.rCurrentAlloc;
683 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
684
685 // The CurrentAlloc block is full, we need to find another
686 // one. First, we try taking the next block from the
687 // nursery:
688 bd = cap->r.rCurrentNursery->link;
689
690 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
691 // The nursery is empty, or the next block is already
692 // full: allocate a fresh block (we can't fail here).
693 ACQUIRE_SM_LOCK;
694 bd = allocBlock();
695 cap->r.rNursery->n_blocks++;
696 RELEASE_SM_LOCK;
697 bd->gen_no = 0;
698 bd->step = cap->r.rNursery;
699 bd->flags = 0;
700 } else {
701 // we have a block in the nursery: take it and put
702 // it at the *front* of the nursery list, and use it
703 // to allocate() from.
704 cap->r.rCurrentNursery->link = bd->link;
705 if (bd->link != NULL) {
706 bd->link->u.back = cap->r.rCurrentNursery;
707 }
708 }
709 dbl_link_onto(bd, &cap->r.rNursery->blocks);
710 cap->r.rCurrentAlloc = bd;
711 IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
712 }
713 }
714 p = bd->free;
715 bd->free += n;
716 return p;
717 }
718
719 /* ---------------------------------------------------------------------------
720 Allocate a fixed/pinned object.
721
722 We allocate small pinned objects into a single block, allocating a
723 new block when the current one overflows. The block is chained
724 onto the large_object_list of generation 0 step 0.
725
726 NOTE: The GC can't in general handle pinned objects. This
727 interface is only safe to use for ByteArrays, which have no
728 pointers and don't require scavenging. It works because the
729 block's descriptor has the BF_LARGE flag set, so the block is
730 treated as a large object and chained onto various lists, rather
731 than the individual objects being copied. However, when it comes
732 to scavenge the block, the GC will only scavenge the first object.
733 The reason is that the GC can't linearly scan a block of pinned
734 objects at the moment (doing so would require using the
735 mostly-copying techniques). But since we're restricting ourselves
736 to pinned ByteArrays, not scavenging is ok.
737
738 This function is called by newPinnedByteArray# which immediately
739 fills the allocated memory with a MutableByteArray#.
740 ------------------------------------------------------------------------- */
741
742 StgPtr
743 allocatePinned( nat n )
744 {
745 StgPtr p;
746 bdescr *bd = pinned_object_block;
747
748 // If the request is for a large object, then allocate()
749 // will give us a pinned object anyway.
750 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
751 return allocate(n);
752 }
753
754 ACQUIRE_SM_LOCK;
755
756 TICK_ALLOC_HEAP_NOCTR(n);
757 CCS_ALLOC(CCCS,n);
758
759 // we always return 8-byte aligned memory. bd->free must be
760 // 8-byte aligned to begin with, so we just round up n to
761 // the nearest multiple of 8 bytes.
762 if (sizeof(StgWord) == 4) {
763 n = (n+1) & ~1;
764 }
765
766 // If we don't have a block of pinned objects yet, or the current
767 // one isn't large enough to hold the new object, allocate a new one.
768 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
769 pinned_object_block = bd = allocBlock();
770 dbl_link_onto(bd, &g0s0->large_objects);
771 bd->gen_no = 0;
772 bd->step = g0s0;
773 bd->flags = BF_PINNED | BF_LARGE;
774 bd->free = bd->start;
775 alloc_blocks++;
776 }
777
778 p = bd->free;
779 bd->free += n;
780 RELEASE_SM_LOCK;
781 return p;
782 }
783
784 /* -----------------------------------------------------------------------------
785 This is the write barrier for MUT_VARs, a.k.a. IORefs. A
786 MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
787 is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
788 and is put on the mutable list.
789 -------------------------------------------------------------------------- */
790
791 void
792 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
793 {
794 Capability *cap = regTableToCapability(reg);
795 bdescr *bd;
796 if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
797 p->header.info = &stg_MUT_VAR_DIRTY_info;
798 bd = Bdescr((StgPtr)p);
799 if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
800 }
801 }
802
803 /* -----------------------------------------------------------------------------
804 Allocation functions for GMP.
805
806 These all use the allocate() interface - we can't have any garbage
807 collection going on during a gmp operation, so we use allocate()
808 which always succeeds. The gmp operations which might need to
809 allocate will ask the storage manager (via doYouWantToGC()) whether
810 a garbage collection is required, in case we get into a loop doing
811 only allocate() style allocation.
812 -------------------------------------------------------------------------- */
813
814 static void *
815 stgAllocForGMP (size_t size_in_bytes)
816 {
817 StgArrWords* arr;
818 nat data_size_in_words, total_size_in_words;
819
820 /* round up to a whole number of words */
821 data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
822 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
823
824 /* allocate and fill it in. */
825 #if defined(THREADED_RTS)
826 arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words);
827 #else
828 arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words);
829 #endif
830 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
831
832 /* and return a ptr to the goods inside the array */
833 return arr->payload;
834 }
835
836 static void *
837 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
838 {
839 void *new_stuff_ptr = stgAllocForGMP(new_size);
840 nat i = 0;
841 char *p = (char *) ptr;
842 char *q = (char *) new_stuff_ptr;
843
844 for (; i < old_size; i++, p++, q++) {
845 *q = *p;
846 }
847
848 return(new_stuff_ptr);
849 }
850
851 static void
852 stgDeallocForGMP (void *ptr STG_UNUSED,
853 size_t size STG_UNUSED)
854 {
855 /* easy for us: the garbage collector does the dealloc'n */
856 }
857
858 /* -----------------------------------------------------------------------------
859 * Stats and stuff
860 * -------------------------------------------------------------------------- */
861
862 /* -----------------------------------------------------------------------------
863 * calcAllocated()
864 *
865 * Approximate how much we've allocated: number of blocks in the
866 * nursery + blocks allocated via allocate() - unused nusery blocks.
867 * This leaves a little slop at the end of each block, and doesn't
868 * take into account large objects (ToDo).
869 * -------------------------------------------------------------------------- */
870
871 lnat
872 calcAllocated( void )
873 {
874 nat allocated;
875 bdescr *bd;
876
877 allocated = allocated_bytes();
878 allocated += countNurseryBlocks() * BLOCK_SIZE_W;
879
880 {
881 #ifdef THREADED_RTS
882 nat i;
883 for (i = 0; i < n_nurseries; i++) {
884 Capability *cap;
885 for ( bd = capabilities[i].r.rCurrentNursery->link;
886 bd != NULL; bd = bd->link ) {
887 allocated -= BLOCK_SIZE_W;
888 }
889 cap = &capabilities[i];
890 if (cap->r.rCurrentNursery->free <
891 cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
892 allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
893 - cap->r.rCurrentNursery->free;
894 }
895 }
896 #else
897 bdescr *current_nursery = MainCapability.r.rCurrentNursery;
898
899 for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
900 allocated -= BLOCK_SIZE_W;
901 }
902 if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
903 allocated -= (current_nursery->start + BLOCK_SIZE_W)
904 - current_nursery->free;
905 }
906 #endif
907 }
908
909 total_allocated += allocated;
910 return allocated;
911 }
912
913 /* Approximate the amount of live data in the heap. To be called just
914 * after garbage collection (see GarbageCollect()).
915 */
916 extern lnat
917 calcLive(void)
918 {
919 nat g, s;
920 lnat live = 0;
921 step *stp;
922
923 if (RtsFlags.GcFlags.generations == 1) {
924 live = (g0s0->n_blocks - 1) * BLOCK_SIZE_W +
925 ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
926 return live;
927 }
928
929 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
930 for (s = 0; s < generations[g].n_steps; s++) {
931 /* approximate amount of live data (doesn't take into account slop
932 * at end of each block).
933 */
934 if (g == 0 && s == 0) {
935 continue;
936 }
937 stp = &generations[g].steps[s];
938 live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W;
939 if (stp->hp_bd != NULL) {
940 live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start)
941 / sizeof(W_);
942 }
943 if (stp->scavd_hp != NULL) {
944 live -= (P_)(BLOCK_ROUND_UP(stp->scavd_hp)) - stp->scavd_hp;
945 }
946 }
947 }
948 return live;
949 }
950
951 /* Approximate the number of blocks that will be needed at the next
952 * garbage collection.
953 *
954 * Assume: all data currently live will remain live. Steps that will
955 * be collected next time will therefore need twice as many blocks
956 * since all the data will be copied.
957 */
958 extern lnat
959 calcNeeded(void)
960 {
961 lnat needed = 0;
962 nat g, s;
963 step *stp;
964
965 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
966 for (s = 0; s < generations[g].n_steps; s++) {
967 if (g == 0 && s == 0) { continue; }
968 stp = &generations[g].steps[s];
969 if (generations[g].steps[0].n_blocks +
970 generations[g].steps[0].n_large_blocks
971 > generations[g].max_blocks
972 && stp->is_compacted == 0) {
973 needed += 2 * stp->n_blocks;
974 } else {
975 needed += stp->n_blocks;
976 }
977 }
978 }
979 return needed;
980 }
981
982 /* ----------------------------------------------------------------------------
983 Executable memory
984
985 Executable memory must be managed separately from non-executable
986 memory. Most OSs these days require you to jump through hoops to
987 dynamically allocate executable memory, due to various security
988 measures.
989
990 Here we provide a small memory allocator for executable memory.
991 Memory is managed with a page granularity; we allocate linearly
992 in the page, and when the page is emptied (all objects on the page
993 are free) we free the page again, not forgetting to make it
994 non-executable.
995 ------------------------------------------------------------------------- */
996
997 static bdescr *exec_block;
998
999 void *allocateExec (nat bytes)
1000 {
1001 void *ret;
1002 nat n;
1003
1004 ACQUIRE_SM_LOCK;
1005
1006 // round up to words.
1007 n = (bytes + sizeof(W_) + 1) / sizeof(W_);
1008
1009 if (n+1 > BLOCK_SIZE_W) {
1010 barf("allocateExec: can't handle large objects");
1011 }
1012
1013 if (exec_block == NULL ||
1014 exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1015 bdescr *bd;
1016 lnat pagesize = getPageSize();
1017 bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1018 debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1019 bd->gen_no = 0;
1020 bd->flags = BF_EXEC;
1021 bd->link = exec_block;
1022 if (exec_block != NULL) {
1023 exec_block->u.back = bd;
1024 }
1025 bd->u.back = NULL;
1026 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1027 exec_block = bd;
1028 }
1029 *(exec_block->free) = n; // store the size of this chunk
1030 exec_block->gen_no += n; // gen_no stores the number of words allocated
1031 ret = exec_block->free + 1;
1032 exec_block->free += n + 1;
1033
1034 RELEASE_SM_LOCK
1035 return ret;
1036 }
1037
1038 void freeExec (void *addr)
1039 {
1040 StgPtr p = (StgPtr)addr - 1;
1041 bdescr *bd = Bdescr((StgPtr)p);
1042
1043 if ((bd->flags & BF_EXEC) == 0) {
1044 barf("freeExec: not executable");
1045 }
1046
1047 if (*(StgPtr)p == 0) {
1048 barf("freeExec: already free?");
1049 }
1050
1051 ACQUIRE_SM_LOCK;
1052
1053 bd->gen_no -= *(StgPtr)p;
1054 *(StgPtr)p = 0;
1055
1056 // Free the block if it is empty, but not if it is the block at
1057 // the head of the queue.
1058 if (bd->gen_no == 0 && bd != exec_block) {
1059 debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1060 if (bd->u.back) {
1061 bd->u.back->link = bd->link;
1062 } else {
1063 exec_block = bd->link;
1064 }
1065 if (bd->link) {
1066 bd->link->u.back = bd->u.back;
1067 }
1068 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1069 freeGroup(bd);
1070 }
1071
1072 RELEASE_SM_LOCK
1073 }
1074
1075 /* -----------------------------------------------------------------------------
1076 Debugging
1077
1078 memInventory() checks for memory leaks by counting up all the
1079 blocks we know about and comparing that to the number of blocks
1080 allegedly floating around in the system.
1081 -------------------------------------------------------------------------- */
1082
1083 #ifdef DEBUG
1084
1085 static lnat
1086 stepBlocks (step *stp)
1087 {
1088 lnat total_blocks;
1089 bdescr *bd;
1090
1091 total_blocks = stp->n_blocks;
1092 total_blocks += stp->n_old_blocks;
1093 for (bd = stp->large_objects; bd; bd = bd->link) {
1094 total_blocks += bd->blocks;
1095 /* hack for megablock groups: they have an extra block or two in
1096 the second and subsequent megablocks where the block
1097 descriptors would normally go.
1098 */
1099 if (bd->blocks > BLOCKS_PER_MBLOCK) {
1100 total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
1101 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
1102 }
1103 }
1104 return total_blocks;
1105 }
1106
1107 void
1108 memInventory(void)
1109 {
1110 nat g, s, i;
1111 step *stp;
1112 bdescr *bd;
1113 lnat total_blocks = 0, free_blocks = 0;
1114
1115 /* count the blocks we current have */
1116
1117 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1118 for (i = 0; i < n_capabilities; i++) {
1119 for (bd = capabilities[i].mut_lists[g]; bd != NULL; bd = bd->link) {
1120 total_blocks += bd->blocks;
1121 }
1122 }
1123 for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
1124 total_blocks += bd->blocks;
1125 }
1126 for (s = 0; s < generations[g].n_steps; s++) {
1127 if (g==0 && s==0) continue;
1128 stp = &generations[g].steps[s];
1129 total_blocks += stepBlocks(stp);
1130 }
1131 }
1132
1133 for (i = 0; i < n_nurseries; i++) {
1134 total_blocks += stepBlocks(&nurseries[i]);
1135 }
1136 #ifdef THREADED_RTS
1137 // We put pinned object blocks in g0s0, so better count blocks there too.
1138 total_blocks += stepBlocks(g0s0);
1139 #endif
1140
1141 /* any blocks held by allocate() */
1142 for (bd = small_alloc_list; bd; bd = bd->link) {
1143 total_blocks += bd->blocks;
1144 }
1145
1146 #ifdef PROFILING
1147 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1148 total_blocks += retainerStackBlocks();
1149 }
1150 #endif
1151
1152 // count the blocks allocated by the arena allocator
1153 total_blocks += arenaBlocks();
1154
1155 // count the blocks containing executable memory
1156 for (bd = exec_block; bd; bd = bd->link) {
1157 total_blocks += bd->blocks;
1158 }
1159
1160 /* count the blocks on the free list */
1161 free_blocks = countFreeList();
1162
1163 if (total_blocks + free_blocks != mblocks_allocated *
1164 BLOCKS_PER_MBLOCK) {
1165 debugBelch("Blocks: %ld live + %ld free = %ld total (%ld around)\n",
1166 total_blocks, free_blocks, total_blocks + free_blocks,
1167 mblocks_allocated * BLOCKS_PER_MBLOCK);
1168 }
1169
1170 ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
1171 }
1172
1173
1174 nat
1175 countBlocks(bdescr *bd)
1176 {
1177 nat n;
1178 for (n=0; bd != NULL; bd=bd->link) {
1179 n += bd->blocks;
1180 }
1181 return n;
1182 }
1183
1184 /* Full heap sanity check. */
1185 void
1186 checkSanity( void )
1187 {
1188 nat g, s;
1189
1190 if (RtsFlags.GcFlags.generations == 1) {
1191 checkHeap(g0s0->blocks);
1192 checkChain(g0s0->large_objects);
1193 } else {
1194
1195 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1196 for (s = 0; s < generations[g].n_steps; s++) {
1197 if (g == 0 && s == 0) { continue; }
1198 ASSERT(countBlocks(generations[g].steps[s].blocks)
1199 == generations[g].steps[s].n_blocks);
1200 ASSERT(countBlocks(generations[g].steps[s].large_objects)
1201 == generations[g].steps[s].n_large_blocks);
1202 checkHeap(generations[g].steps[s].blocks);
1203 checkChain(generations[g].steps[s].large_objects);
1204 if (g > 0) {
1205 checkMutableList(generations[g].mut_list, g);
1206 }
1207 }
1208 }
1209
1210 for (s = 0; s < n_nurseries; s++) {
1211 ASSERT(countBlocks(nurseries[s].blocks)
1212 == nurseries[s].n_blocks);
1213 ASSERT(countBlocks(nurseries[s].large_objects)
1214 == nurseries[s].n_large_blocks);
1215 }
1216
1217 checkFreeListSanity();
1218 }
1219 }
1220
1221 /* Nursery sanity check */
1222 void
1223 checkNurserySanity( step *stp )
1224 {
1225 bdescr *bd, *prev;
1226 nat blocks = 0;
1227
1228 prev = NULL;
1229 for (bd = stp->blocks; bd != NULL; bd = bd->link) {
1230 ASSERT(bd->u.back == prev);
1231 prev = bd;
1232 blocks += bd->blocks;
1233 }
1234 ASSERT(blocks == stp->n_blocks);
1235 }
1236
1237 // handy function for use in gdb, because Bdescr() is inlined.
1238 extern bdescr *_bdescr( StgPtr p );
1239
1240 bdescr *
1241 _bdescr( StgPtr p )
1242 {
1243 return Bdescr(p);
1244 }
1245
1246 #endif