ee860e27a28bf9266e578f977ac1868b9e3376fe
[ghc.git] / rts / Storage.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2004
4 *
5 * Storage manager front end
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "PosixSource.h"
10 #include "Rts.h"
11 #include "RtsUtils.h"
12 #include "RtsFlags.h"
13 #include "Stats.h"
14 #include "Hooks.h"
15 #include "BlockAlloc.h"
16 #include "MBlock.h"
17 #include "Weak.h"
18 #include "Sanity.h"
19 #include "Arena.h"
20 #include "OSThreads.h"
21 #include "Capability.h"
22 #include "Storage.h"
23 #include "Schedule.h"
24 #include "RetainerProfile.h" // for counting memory blocks (memInventory)
25 #include "OSMem.h"
26
27 #include <stdlib.h>
28 #include <string.h>
29
30 /*
31 * All these globals require sm_mutex to access in THREADED_RTS mode.
32 */
33 StgClosure *caf_list = NULL;
34 StgClosure *revertible_caf_list = NULL;
35 rtsBool keepCAFs;
36
37 bdescr *small_alloc_list; /* allocate()d small objects */
38 bdescr *pinned_object_block; /* allocate pinned objects into this block */
39 nat alloc_blocks; /* number of allocate()d blocks since GC */
40 nat alloc_blocks_lim; /* approximate limit on alloc_blocks */
41
42 StgPtr alloc_Hp = NULL; /* next free byte in small_alloc_list */
43 StgPtr alloc_HpLim = NULL; /* end of block at small_alloc_list */
44
45 generation *generations = NULL; /* all the generations */
46 generation *g0 = NULL; /* generation 0, for convenience */
47 generation *oldest_gen = NULL; /* oldest generation, for convenience */
48 step *g0s0 = NULL; /* generation 0, step 0, for convenience */
49
50 ullong total_allocated = 0; /* total memory allocated during run */
51
52 nat n_nurseries = 0; /* == RtsFlags.ParFlags.nNodes, convenience */
53 step *nurseries = NULL; /* array of nurseries, >1 only if THREADED_RTS */
54
55 #ifdef THREADED_RTS
56 /*
57 * Storage manager mutex: protects all the above state from
58 * simultaneous access by two STG threads.
59 */
60 Mutex sm_mutex;
61 /*
62 * This mutex is used by atomicModifyMutVar# only
63 */
64 Mutex atomic_modify_mutvar_mutex;
65 #endif
66
67
68 /*
69 * Forward references
70 */
71 static void *stgAllocForGMP (size_t size_in_bytes);
72 static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
73 static void stgDeallocForGMP (void *ptr, size_t size);
74
75 static void
76 initStep (step *stp, int g, int s)
77 {
78 stp->no = s;
79 stp->blocks = NULL;
80 stp->n_blocks = 0;
81 stp->old_blocks = NULL;
82 stp->n_old_blocks = 0;
83 stp->gen = &generations[g];
84 stp->gen_no = g;
85 stp->hp = NULL;
86 stp->hpLim = NULL;
87 stp->hp_bd = NULL;
88 stp->scavd_hp = NULL;
89 stp->scavd_hpLim = NULL;
90 stp->scan = NULL;
91 stp->scan_bd = NULL;
92 stp->large_objects = NULL;
93 stp->n_large_blocks = 0;
94 stp->new_large_objects = NULL;
95 stp->scavenged_large_objects = NULL;
96 stp->n_scavenged_large_blocks = 0;
97 stp->is_compacted = 0;
98 stp->bitmap = NULL;
99 }
100
101 void
102 initStorage( void )
103 {
104 nat g, s;
105 generation *gen;
106
107 if (generations != NULL) {
108 // multi-init protection
109 return;
110 }
111
112 /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
113 * doing something reasonable.
114 */
115 ASSERT(LOOKS_LIKE_INFO_PTR(&stg_BLACKHOLE_info));
116 ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
117 ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
118
119 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
120 RtsFlags.GcFlags.heapSizeSuggestion >
121 RtsFlags.GcFlags.maxHeapSize) {
122 RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
123 }
124
125 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
126 RtsFlags.GcFlags.minAllocAreaSize >
127 RtsFlags.GcFlags.maxHeapSize) {
128 errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
129 exit(1);
130 }
131
132 initBlockAllocator();
133
134 #if defined(THREADED_RTS)
135 initMutex(&sm_mutex);
136 initMutex(&atomic_modify_mutvar_mutex);
137 #endif
138
139 ACQUIRE_SM_LOCK;
140
141 /* allocate generation info array */
142 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
143 * sizeof(struct generation_),
144 "initStorage: gens");
145
146 /* Initialise all generations */
147 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
148 gen = &generations[g];
149 gen->no = g;
150 gen->mut_list = allocBlock();
151 gen->collections = 0;
152 gen->failed_promotions = 0;
153 gen->max_blocks = 0;
154 }
155
156 /* A couple of convenience pointers */
157 g0 = &generations[0];
158 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
159
160 /* Allocate step structures in each generation */
161 if (RtsFlags.GcFlags.generations > 1) {
162 /* Only for multiple-generations */
163
164 /* Oldest generation: one step */
165 oldest_gen->n_steps = 1;
166 oldest_gen->steps =
167 stgMallocBytes(1 * sizeof(struct step_), "initStorage: last step");
168
169 /* set up all except the oldest generation with 2 steps */
170 for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
171 generations[g].n_steps = RtsFlags.GcFlags.steps;
172 generations[g].steps =
173 stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct step_),
174 "initStorage: steps");
175 }
176
177 } else {
178 /* single generation, i.e. a two-space collector */
179 g0->n_steps = 1;
180 g0->steps = stgMallocBytes (sizeof(struct step_), "initStorage: steps");
181 }
182
183 #ifdef THREADED_RTS
184 n_nurseries = n_capabilities;
185 nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
186 "initStorage: nurseries");
187 #else
188 n_nurseries = 1;
189 nurseries = g0->steps; // just share nurseries[0] with g0s0
190 #endif
191
192 /* Initialise all steps */
193 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
194 for (s = 0; s < generations[g].n_steps; s++) {
195 initStep(&generations[g].steps[s], g, s);
196 }
197 }
198
199 #ifdef THREADED_RTS
200 for (s = 0; s < n_nurseries; s++) {
201 initStep(&nurseries[s], 0, s);
202 }
203 #endif
204
205 /* Set up the destination pointers in each younger gen. step */
206 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
207 for (s = 0; s < generations[g].n_steps-1; s++) {
208 generations[g].steps[s].to = &generations[g].steps[s+1];
209 }
210 generations[g].steps[s].to = &generations[g+1].steps[0];
211 }
212 oldest_gen->steps[0].to = &oldest_gen->steps[0];
213
214 #ifdef THREADED_RTS
215 for (s = 0; s < n_nurseries; s++) {
216 nurseries[s].to = generations[0].steps[0].to;
217 }
218 #endif
219
220 /* The oldest generation has one step. */
221 if (RtsFlags.GcFlags.compact) {
222 if (RtsFlags.GcFlags.generations == 1) {
223 errorBelch("WARNING: compaction is incompatible with -G1; disabled");
224 } else {
225 oldest_gen->steps[0].is_compacted = 1;
226 }
227 }
228
229 #ifdef THREADED_RTS
230 if (RtsFlags.GcFlags.generations == 1) {
231 errorBelch("-G1 is incompatible with -threaded");
232 stg_exit(EXIT_FAILURE);
233 }
234 #endif
235
236 /* generation 0 is special: that's the nursery */
237 generations[0].max_blocks = 0;
238
239 /* G0S0: the allocation area. Policy: keep the allocation area
240 * small to begin with, even if we have a large suggested heap
241 * size. Reason: we're going to do a major collection first, and we
242 * don't want it to be a big one. This vague idea is borne out by
243 * rigorous experimental evidence.
244 */
245 g0s0 = &generations[0].steps[0];
246
247 allocNurseries();
248
249 weak_ptr_list = NULL;
250 caf_list = NULL;
251 revertible_caf_list = NULL;
252
253 /* initialise the allocate() interface */
254 small_alloc_list = NULL;
255 alloc_blocks = 0;
256 alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
257
258 /* Tell GNU multi-precision pkg about our custom alloc functions */
259 mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
260
261 IF_DEBUG(gc, statDescribeGens());
262
263 RELEASE_SM_LOCK;
264 }
265
266 void
267 exitStorage (void)
268 {
269 stat_exit(calcAllocated());
270 }
271
272 void
273 freeStorage (void)
274 {
275 freeAllMBlocks();
276 }
277
278 /* -----------------------------------------------------------------------------
279 CAF management.
280
281 The entry code for every CAF does the following:
282
283 - builds a CAF_BLACKHOLE in the heap
284 - pushes an update frame pointing to the CAF_BLACKHOLE
285 - invokes UPD_CAF(), which:
286 - calls newCaf, below
287 - updates the CAF with a static indirection to the CAF_BLACKHOLE
288
289 Why do we build a BLACKHOLE in the heap rather than just updating
290 the thunk directly? It's so that we only need one kind of update
291 frame - otherwise we'd need a static version of the update frame too.
292
293 newCaf() does the following:
294
295 - it puts the CAF on the oldest generation's mut-once list.
296 This is so that we can treat the CAF as a root when collecting
297 younger generations.
298
299 For GHCI, we have additional requirements when dealing with CAFs:
300
301 - we must *retain* all dynamically-loaded CAFs ever entered,
302 just in case we need them again.
303 - we must be able to *revert* CAFs that have been evaluated, to
304 their pre-evaluated form.
305
306 To do this, we use an additional CAF list. When newCaf() is
307 called on a dynamically-loaded CAF, we add it to the CAF list
308 instead of the old-generation mutable list, and save away its
309 old info pointer (in caf->saved_info) for later reversion.
310
311 To revert all the CAFs, we traverse the CAF list and reset the
312 info pointer to caf->saved_info, then throw away the CAF list.
313 (see GC.c:revertCAFs()).
314
315 -- SDM 29/1/01
316
317 -------------------------------------------------------------------------- */
318
319 void
320 newCAF(StgClosure* caf)
321 {
322 ACQUIRE_SM_LOCK;
323
324 if(keepCAFs)
325 {
326 // HACK:
327 // If we are in GHCi _and_ we are using dynamic libraries,
328 // then we can't redirect newCAF calls to newDynCAF (see below),
329 // so we make newCAF behave almost like newDynCAF.
330 // The dynamic libraries might be used by both the interpreted
331 // program and GHCi itself, so they must not be reverted.
332 // This also means that in GHCi with dynamic libraries, CAFs are not
333 // garbage collected. If this turns out to be a problem, we could
334 // do another hack here and do an address range test on caf to figure
335 // out whether it is from a dynamic library.
336 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
337 ((StgIndStatic *)caf)->static_link = caf_list;
338 caf_list = caf;
339 }
340 else
341 {
342 /* Put this CAF on the mutable list for the old generation.
343 * This is a HACK - the IND_STATIC closure doesn't really have
344 * a mut_link field, but we pretend it has - in fact we re-use
345 * the STATIC_LINK field for the time being, because when we
346 * come to do a major GC we won't need the mut_link field
347 * any more and can use it as a STATIC_LINK.
348 */
349 ((StgIndStatic *)caf)->saved_info = NULL;
350 recordMutableGen(caf, oldest_gen);
351 }
352
353 RELEASE_SM_LOCK;
354
355 #ifdef PAR
356 /* If we are PAR or DIST then we never forget a CAF */
357 { globalAddr *newGA;
358 //debugBelch("<##> Globalising CAF %08x %s",caf,info_type(caf));
359 newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
360 ASSERT(newGA);
361 }
362 #endif /* PAR */
363 }
364
365 // An alternate version of newCaf which is used for dynamically loaded
366 // object code in GHCi. In this case we want to retain *all* CAFs in
367 // the object code, because they might be demanded at any time from an
368 // expression evaluated on the command line.
369 // Also, GHCi might want to revert CAFs, so we add these to the
370 // revertible_caf_list.
371 //
372 // The linker hackily arranges that references to newCaf from dynamic
373 // code end up pointing to newDynCAF.
374 void
375 newDynCAF(StgClosure *caf)
376 {
377 ACQUIRE_SM_LOCK;
378
379 ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
380 ((StgIndStatic *)caf)->static_link = revertible_caf_list;
381 revertible_caf_list = caf;
382
383 RELEASE_SM_LOCK;
384 }
385
386 /* -----------------------------------------------------------------------------
387 Nursery management.
388 -------------------------------------------------------------------------- */
389
390 static bdescr *
391 allocNursery (step *stp, bdescr *tail, nat blocks)
392 {
393 bdescr *bd;
394 nat i;
395
396 // Allocate a nursery: we allocate fresh blocks one at a time and
397 // cons them on to the front of the list, not forgetting to update
398 // the back pointer on the tail of the list to point to the new block.
399 for (i=0; i < blocks; i++) {
400 // @LDV profiling
401 /*
402 processNursery() in LdvProfile.c assumes that every block group in
403 the nursery contains only a single block. So, if a block group is
404 given multiple blocks, change processNursery() accordingly.
405 */
406 bd = allocBlock();
407 bd->link = tail;
408 // double-link the nursery: we might need to insert blocks
409 if (tail != NULL) {
410 tail->u.back = bd;
411 }
412 bd->step = stp;
413 bd->gen_no = 0;
414 bd->flags = 0;
415 bd->free = bd->start;
416 tail = bd;
417 }
418 tail->u.back = NULL;
419 return tail;
420 }
421
422 static void
423 assignNurseriesToCapabilities (void)
424 {
425 #ifdef THREADED_RTS
426 nat i;
427
428 for (i = 0; i < n_nurseries; i++) {
429 capabilities[i].r.rNursery = &nurseries[i];
430 capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
431 capabilities[i].r.rCurrentAlloc = NULL;
432 }
433 #else /* THREADED_RTS */
434 MainCapability.r.rNursery = &nurseries[0];
435 MainCapability.r.rCurrentNursery = nurseries[0].blocks;
436 MainCapability.r.rCurrentAlloc = NULL;
437 #endif
438 }
439
440 void
441 allocNurseries( void )
442 {
443 nat i;
444
445 for (i = 0; i < n_nurseries; i++) {
446 nurseries[i].blocks =
447 allocNursery(&nurseries[i], NULL,
448 RtsFlags.GcFlags.minAllocAreaSize);
449 nurseries[i].n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
450 nurseries[i].old_blocks = NULL;
451 nurseries[i].n_old_blocks = 0;
452 /* hp, hpLim, hp_bd, to_space etc. aren't used in the nursery */
453 }
454 assignNurseriesToCapabilities();
455 }
456
457 void
458 resetNurseries( void )
459 {
460 nat i;
461 bdescr *bd;
462 step *stp;
463
464 for (i = 0; i < n_nurseries; i++) {
465 stp = &nurseries[i];
466 for (bd = stp->blocks; bd; bd = bd->link) {
467 bd->free = bd->start;
468 ASSERT(bd->gen_no == 0);
469 ASSERT(bd->step == stp);
470 IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
471 }
472 }
473 assignNurseriesToCapabilities();
474 }
475
476 lnat
477 countNurseryBlocks (void)
478 {
479 nat i;
480 lnat blocks = 0;
481
482 for (i = 0; i < n_nurseries; i++) {
483 blocks += nurseries[i].n_blocks;
484 }
485 return blocks;
486 }
487
488 static void
489 resizeNursery ( step *stp, nat blocks )
490 {
491 bdescr *bd;
492 nat nursery_blocks;
493
494 nursery_blocks = stp->n_blocks;
495 if (nursery_blocks == blocks) return;
496
497 if (nursery_blocks < blocks) {
498 IF_DEBUG(gc, debugBelch("Increasing size of nursery to %d blocks\n",
499 blocks));
500 stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
501 }
502 else {
503 bdescr *next_bd;
504
505 IF_DEBUG(gc, debugBelch("Decreasing size of nursery to %d blocks\n",
506 blocks));
507
508 bd = stp->blocks;
509 while (nursery_blocks > blocks) {
510 next_bd = bd->link;
511 next_bd->u.back = NULL;
512 nursery_blocks -= bd->blocks; // might be a large block
513 freeGroup(bd);
514 bd = next_bd;
515 }
516 stp->blocks = bd;
517 // might have gone just under, by freeing a large block, so make
518 // up the difference.
519 if (nursery_blocks < blocks) {
520 stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
521 }
522 }
523
524 stp->n_blocks = blocks;
525 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
526 }
527
528 //
529 // Resize each of the nurseries to the specified size.
530 //
531 void
532 resizeNurseriesFixed (nat blocks)
533 {
534 nat i;
535 for (i = 0; i < n_nurseries; i++) {
536 resizeNursery(&nurseries[i], blocks);
537 }
538 }
539
540 //
541 // Resize the nurseries to the total specified size.
542 //
543 void
544 resizeNurseries (nat blocks)
545 {
546 // If there are multiple nurseries, then we just divide the number
547 // of available blocks between them.
548 resizeNurseriesFixed(blocks / n_nurseries);
549 }
550
551 /* -----------------------------------------------------------------------------
552 The allocate() interface
553
554 allocate(n) always succeeds, and returns a chunk of memory n words
555 long. n can be larger than the size of a block if necessary, in
556 which case a contiguous block group will be allocated.
557 -------------------------------------------------------------------------- */
558
559 StgPtr
560 allocate( nat n )
561 {
562 bdescr *bd;
563 StgPtr p;
564
565 ACQUIRE_SM_LOCK;
566
567 TICK_ALLOC_HEAP_NOCTR(n);
568 CCS_ALLOC(CCCS,n);
569
570 /* big allocation (>LARGE_OBJECT_THRESHOLD) */
571 /* ToDo: allocate directly into generation 1 */
572 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
573 nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
574 bd = allocGroup(req_blocks);
575 dbl_link_onto(bd, &g0s0->large_objects);
576 g0s0->n_large_blocks += req_blocks;
577 bd->gen_no = 0;
578 bd->step = g0s0;
579 bd->flags = BF_LARGE;
580 bd->free = bd->start + n;
581 alloc_blocks += req_blocks;
582 RELEASE_SM_LOCK;
583 return bd->start;
584
585 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
586 } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
587 if (small_alloc_list) {
588 small_alloc_list->free = alloc_Hp;
589 }
590 bd = allocBlock();
591 bd->link = small_alloc_list;
592 small_alloc_list = bd;
593 bd->gen_no = 0;
594 bd->step = g0s0;
595 bd->flags = 0;
596 alloc_Hp = bd->start;
597 alloc_HpLim = bd->start + BLOCK_SIZE_W;
598 alloc_blocks++;
599 }
600
601 p = alloc_Hp;
602 alloc_Hp += n;
603 RELEASE_SM_LOCK;
604 return p;
605 }
606
607 lnat
608 allocated_bytes( void )
609 {
610 lnat allocated;
611
612 allocated = alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp);
613 if (pinned_object_block != NULL) {
614 allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
615 pinned_object_block->free;
616 }
617
618 return allocated;
619 }
620
621 void
622 tidyAllocateLists (void)
623 {
624 if (small_alloc_list != NULL) {
625 ASSERT(alloc_Hp >= small_alloc_list->start &&
626 alloc_Hp <= small_alloc_list->start + BLOCK_SIZE);
627 small_alloc_list->free = alloc_Hp;
628 }
629 }
630
631 /* -----------------------------------------------------------------------------
632 allocateLocal()
633
634 This allocates memory in the current thread - it is intended for
635 use primarily from STG-land where we have a Capability. It is
636 better than allocate() because it doesn't require taking the
637 sm_mutex lock in the common case.
638
639 Memory is allocated directly from the nursery if possible (but not
640 from the current nursery block, so as not to interfere with
641 Hp/HpLim).
642 -------------------------------------------------------------------------- */
643
644 StgPtr
645 allocateLocal (Capability *cap, nat n)
646 {
647 bdescr *bd;
648 StgPtr p;
649
650 TICK_ALLOC_HEAP_NOCTR(n);
651 CCS_ALLOC(CCCS,n);
652
653 /* big allocation (>LARGE_OBJECT_THRESHOLD) */
654 /* ToDo: allocate directly into generation 1 */
655 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
656 nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
657 ACQUIRE_SM_LOCK;
658 bd = allocGroup(req_blocks);
659 dbl_link_onto(bd, &g0s0->large_objects);
660 g0s0->n_large_blocks += req_blocks;
661 bd->gen_no = 0;
662 bd->step = g0s0;
663 bd->flags = BF_LARGE;
664 bd->free = bd->start + n;
665 alloc_blocks += req_blocks;
666 RELEASE_SM_LOCK;
667 return bd->start;
668
669 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
670 } else {
671
672 bd = cap->r.rCurrentAlloc;
673 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
674
675 // The CurrentAlloc block is full, we need to find another
676 // one. First, we try taking the next block from the
677 // nursery:
678 bd = cap->r.rCurrentNursery->link;
679
680 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
681 // The nursery is empty, or the next block is already
682 // full: allocate a fresh block (we can't fail here).
683 ACQUIRE_SM_LOCK;
684 bd = allocBlock();
685 cap->r.rNursery->n_blocks++;
686 RELEASE_SM_LOCK;
687 bd->gen_no = 0;
688 bd->step = cap->r.rNursery;
689 bd->flags = 0;
690 } else {
691 // we have a block in the nursery: take it and put
692 // it at the *front* of the nursery list, and use it
693 // to allocate() from.
694 cap->r.rCurrentNursery->link = bd->link;
695 if (bd->link != NULL) {
696 bd->link->u.back = cap->r.rCurrentNursery;
697 }
698 }
699 dbl_link_onto(bd, &cap->r.rNursery->blocks);
700 cap->r.rCurrentAlloc = bd;
701 IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
702 }
703 }
704 p = bd->free;
705 bd->free += n;
706 return p;
707 }
708
709 /* ---------------------------------------------------------------------------
710 Allocate a fixed/pinned object.
711
712 We allocate small pinned objects into a single block, allocating a
713 new block when the current one overflows. The block is chained
714 onto the large_object_list of generation 0 step 0.
715
716 NOTE: The GC can't in general handle pinned objects. This
717 interface is only safe to use for ByteArrays, which have no
718 pointers and don't require scavenging. It works because the
719 block's descriptor has the BF_LARGE flag set, so the block is
720 treated as a large object and chained onto various lists, rather
721 than the individual objects being copied. However, when it comes
722 to scavenge the block, the GC will only scavenge the first object.
723 The reason is that the GC can't linearly scan a block of pinned
724 objects at the moment (doing so would require using the
725 mostly-copying techniques). But since we're restricting ourselves
726 to pinned ByteArrays, not scavenging is ok.
727
728 This function is called by newPinnedByteArray# which immediately
729 fills the allocated memory with a MutableByteArray#.
730 ------------------------------------------------------------------------- */
731
732 StgPtr
733 allocatePinned( nat n )
734 {
735 StgPtr p;
736 bdescr *bd = pinned_object_block;
737
738 // If the request is for a large object, then allocate()
739 // will give us a pinned object anyway.
740 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
741 return allocate(n);
742 }
743
744 ACQUIRE_SM_LOCK;
745
746 TICK_ALLOC_HEAP_NOCTR(n);
747 CCS_ALLOC(CCCS,n);
748
749 // we always return 8-byte aligned memory. bd->free must be
750 // 8-byte aligned to begin with, so we just round up n to
751 // the nearest multiple of 8 bytes.
752 if (sizeof(StgWord) == 4) {
753 n = (n+1) & ~1;
754 }
755
756 // If we don't have a block of pinned objects yet, or the current
757 // one isn't large enough to hold the new object, allocate a new one.
758 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
759 pinned_object_block = bd = allocBlock();
760 dbl_link_onto(bd, &g0s0->large_objects);
761 bd->gen_no = 0;
762 bd->step = g0s0;
763 bd->flags = BF_PINNED | BF_LARGE;
764 bd->free = bd->start;
765 alloc_blocks++;
766 }
767
768 p = bd->free;
769 bd->free += n;
770 RELEASE_SM_LOCK;
771 return p;
772 }
773
774 /* -----------------------------------------------------------------------------
775 This is the write barrier for MUT_VARs, a.k.a. IORefs. A
776 MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
777 is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
778 and is put on the mutable list.
779 -------------------------------------------------------------------------- */
780
781 void
782 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
783 {
784 Capability *cap = regTableToCapability(reg);
785 bdescr *bd;
786 if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
787 p->header.info = &stg_MUT_VAR_DIRTY_info;
788 bd = Bdescr((StgPtr)p);
789 if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
790 }
791 }
792
793 /* -----------------------------------------------------------------------------
794 Allocation functions for GMP.
795
796 These all use the allocate() interface - we can't have any garbage
797 collection going on during a gmp operation, so we use allocate()
798 which always succeeds. The gmp operations which might need to
799 allocate will ask the storage manager (via doYouWantToGC()) whether
800 a garbage collection is required, in case we get into a loop doing
801 only allocate() style allocation.
802 -------------------------------------------------------------------------- */
803
804 static void *
805 stgAllocForGMP (size_t size_in_bytes)
806 {
807 StgArrWords* arr;
808 nat data_size_in_words, total_size_in_words;
809
810 /* round up to a whole number of words */
811 data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
812 total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
813
814 /* allocate and fill it in. */
815 #if defined(THREADED_RTS)
816 arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words);
817 #else
818 arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words);
819 #endif
820 SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
821
822 /* and return a ptr to the goods inside the array */
823 return arr->payload;
824 }
825
826 static void *
827 stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
828 {
829 void *new_stuff_ptr = stgAllocForGMP(new_size);
830 nat i = 0;
831 char *p = (char *) ptr;
832 char *q = (char *) new_stuff_ptr;
833
834 for (; i < old_size; i++, p++, q++) {
835 *q = *p;
836 }
837
838 return(new_stuff_ptr);
839 }
840
841 static void
842 stgDeallocForGMP (void *ptr STG_UNUSED,
843 size_t size STG_UNUSED)
844 {
845 /* easy for us: the garbage collector does the dealloc'n */
846 }
847
848 /* -----------------------------------------------------------------------------
849 * Stats and stuff
850 * -------------------------------------------------------------------------- */
851
852 /* -----------------------------------------------------------------------------
853 * calcAllocated()
854 *
855 * Approximate how much we've allocated: number of blocks in the
856 * nursery + blocks allocated via allocate() - unused nusery blocks.
857 * This leaves a little slop at the end of each block, and doesn't
858 * take into account large objects (ToDo).
859 * -------------------------------------------------------------------------- */
860
861 lnat
862 calcAllocated( void )
863 {
864 nat allocated;
865 bdescr *bd;
866
867 allocated = allocated_bytes();
868 allocated += countNurseryBlocks() * BLOCK_SIZE_W;
869
870 {
871 #ifdef THREADED_RTS
872 nat i;
873 for (i = 0; i < n_nurseries; i++) {
874 Capability *cap;
875 for ( bd = capabilities[i].r.rCurrentNursery->link;
876 bd != NULL; bd = bd->link ) {
877 allocated -= BLOCK_SIZE_W;
878 }
879 cap = &capabilities[i];
880 if (cap->r.rCurrentNursery->free <
881 cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
882 allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
883 - cap->r.rCurrentNursery->free;
884 }
885 }
886 #else
887 bdescr *current_nursery = MainCapability.r.rCurrentNursery;
888
889 for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
890 allocated -= BLOCK_SIZE_W;
891 }
892 if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
893 allocated -= (current_nursery->start + BLOCK_SIZE_W)
894 - current_nursery->free;
895 }
896 #endif
897 }
898
899 total_allocated += allocated;
900 return allocated;
901 }
902
903 /* Approximate the amount of live data in the heap. To be called just
904 * after garbage collection (see GarbageCollect()).
905 */
906 extern lnat
907 calcLive(void)
908 {
909 nat g, s;
910 lnat live = 0;
911 step *stp;
912
913 if (RtsFlags.GcFlags.generations == 1) {
914 live = (g0s0->n_blocks - 1) * BLOCK_SIZE_W +
915 ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
916 return live;
917 }
918
919 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
920 for (s = 0; s < generations[g].n_steps; s++) {
921 /* approximate amount of live data (doesn't take into account slop
922 * at end of each block).
923 */
924 if (g == 0 && s == 0) {
925 continue;
926 }
927 stp = &generations[g].steps[s];
928 live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W;
929 if (stp->hp_bd != NULL) {
930 live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start)
931 / sizeof(W_);
932 }
933 if (stp->scavd_hp != NULL) {
934 live -= (P_)(BLOCK_ROUND_UP(stp->scavd_hp)) - stp->scavd_hp;
935 }
936 }
937 }
938 return live;
939 }
940
941 /* Approximate the number of blocks that will be needed at the next
942 * garbage collection.
943 *
944 * Assume: all data currently live will remain live. Steps that will
945 * be collected next time will therefore need twice as many blocks
946 * since all the data will be copied.
947 */
948 extern lnat
949 calcNeeded(void)
950 {
951 lnat needed = 0;
952 nat g, s;
953 step *stp;
954
955 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
956 for (s = 0; s < generations[g].n_steps; s++) {
957 if (g == 0 && s == 0) { continue; }
958 stp = &generations[g].steps[s];
959 if (generations[g].steps[0].n_blocks +
960 generations[g].steps[0].n_large_blocks
961 > generations[g].max_blocks
962 && stp->is_compacted == 0) {
963 needed += 2 * stp->n_blocks;
964 } else {
965 needed += stp->n_blocks;
966 }
967 }
968 }
969 return needed;
970 }
971
972 /* ----------------------------------------------------------------------------
973 Executable memory
974
975 Executable memory must be managed separately from non-executable
976 memory. Most OSs these days require you to jump through hoops to
977 dynamically allocate executable memory, due to various security
978 measures.
979
980 Here we provide a small memory allocator for executable memory.
981 Memory is managed with a page granularity; we allocate linearly
982 in the page, and when the page is emptied (all objects on the page
983 are free) we free the page again, not forgetting to make it
984 non-executable.
985 ------------------------------------------------------------------------- */
986
987 static bdescr *exec_block;
988
989 void *allocateExec (nat bytes)
990 {
991 void *ret;
992 nat n;
993
994 ACQUIRE_SM_LOCK;
995
996 // round up to words.
997 n = (bytes + sizeof(W_) + 1) / sizeof(W_);
998
999 if (n+1 > BLOCK_SIZE_W) {
1000 barf("allocateExec: can't handle large objects");
1001 }
1002
1003 if (exec_block == NULL ||
1004 exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1005 bdescr *bd;
1006 lnat pagesize = getPageSize();
1007 bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1008 IF_DEBUG(gc, debugBelch("allocate exec block %p\n", bd->start));
1009 bd->gen_no = 0;
1010 bd->flags = BF_EXEC;
1011 bd->link = exec_block;
1012 if (exec_block != NULL) {
1013 exec_block->u.back = bd;
1014 }
1015 bd->u.back = NULL;
1016 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1017 exec_block = bd;
1018 }
1019 *(exec_block->free) = n; // store the size of this chunk
1020 exec_block->gen_no += n; // gen_no stores the number of words allocated
1021 ret = exec_block->free + 1;
1022 exec_block->free += n + 1;
1023
1024 RELEASE_SM_LOCK
1025 return ret;
1026 }
1027
1028 void freeExec (void *addr)
1029 {
1030 StgPtr p = (StgPtr)addr - 1;
1031 bdescr *bd = Bdescr((StgPtr)p);
1032
1033 if ((bd->flags & BF_EXEC) == 0) {
1034 barf("freeExec: not executable");
1035 }
1036
1037 if (*(StgPtr)p == 0) {
1038 barf("freeExec: already free?");
1039 }
1040
1041 ACQUIRE_SM_LOCK;
1042
1043 bd->gen_no -= *(StgPtr)p;
1044 *(StgPtr)p = 0;
1045
1046 // Free the block if it is empty, but not if it is the block at
1047 // the head of the queue.
1048 if (bd->gen_no == 0 && bd != exec_block) {
1049 IF_DEBUG(gc, debugBelch("free exec block %p\n", bd->start));
1050 if (bd->u.back) {
1051 bd->u.back->link = bd->link;
1052 } else {
1053 exec_block = bd->link;
1054 }
1055 if (bd->link) {
1056 bd->link->u.back = bd->u.back;
1057 }
1058 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1059 freeGroup(bd);
1060 }
1061
1062 RELEASE_SM_LOCK
1063 }
1064
1065 /* -----------------------------------------------------------------------------
1066 Debugging
1067
1068 memInventory() checks for memory leaks by counting up all the
1069 blocks we know about and comparing that to the number of blocks
1070 allegedly floating around in the system.
1071 -------------------------------------------------------------------------- */
1072
1073 #ifdef DEBUG
1074
1075 static lnat
1076 stepBlocks (step *stp)
1077 {
1078 lnat total_blocks;
1079 bdescr *bd;
1080
1081 total_blocks = stp->n_blocks;
1082 total_blocks += stp->n_old_blocks;
1083 for (bd = stp->large_objects; bd; bd = bd->link) {
1084 total_blocks += bd->blocks;
1085 /* hack for megablock groups: they have an extra block or two in
1086 the second and subsequent megablocks where the block
1087 descriptors would normally go.
1088 */
1089 if (bd->blocks > BLOCKS_PER_MBLOCK) {
1090 total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
1091 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
1092 }
1093 }
1094 return total_blocks;
1095 }
1096
1097 void
1098 memInventory(void)
1099 {
1100 nat g, s, i;
1101 step *stp;
1102 bdescr *bd;
1103 lnat total_blocks = 0, free_blocks = 0;
1104
1105 /* count the blocks we current have */
1106
1107 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1108 for (i = 0; i < n_capabilities; i++) {
1109 for (bd = capabilities[i].mut_lists[g]; bd != NULL; bd = bd->link) {
1110 total_blocks += bd->blocks;
1111 }
1112 }
1113 for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
1114 total_blocks += bd->blocks;
1115 }
1116 for (s = 0; s < generations[g].n_steps; s++) {
1117 if (g==0 && s==0) continue;
1118 stp = &generations[g].steps[s];
1119 total_blocks += stepBlocks(stp);
1120 }
1121 }
1122
1123 for (i = 0; i < n_nurseries; i++) {
1124 total_blocks += stepBlocks(&nurseries[i]);
1125 }
1126 #ifdef THREADED_RTS
1127 // We put pinned object blocks in g0s0, so better count blocks there too.
1128 total_blocks += stepBlocks(g0s0);
1129 #endif
1130
1131 /* any blocks held by allocate() */
1132 for (bd = small_alloc_list; bd; bd = bd->link) {
1133 total_blocks += bd->blocks;
1134 }
1135
1136 #ifdef PROFILING
1137 if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
1138 total_blocks += retainerStackBlocks();
1139 }
1140 #endif
1141
1142 // count the blocks allocated by the arena allocator
1143 total_blocks += arenaBlocks();
1144
1145 // count the blocks containing executable memory
1146 for (bd = exec_block; bd; bd = bd->link) {
1147 total_blocks += bd->blocks;
1148 }
1149
1150 /* count the blocks on the free list */
1151 free_blocks = countFreeList();
1152
1153 if (total_blocks + free_blocks != mblocks_allocated *
1154 BLOCKS_PER_MBLOCK) {
1155 debugBelch("Blocks: %ld live + %ld free = %ld total (%ld around)\n",
1156 total_blocks, free_blocks, total_blocks + free_blocks,
1157 mblocks_allocated * BLOCKS_PER_MBLOCK);
1158 }
1159
1160 ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
1161 }
1162
1163
1164 nat
1165 countBlocks(bdescr *bd)
1166 {
1167 nat n;
1168 for (n=0; bd != NULL; bd=bd->link) {
1169 n += bd->blocks;
1170 }
1171 return n;
1172 }
1173
1174 /* Full heap sanity check. */
1175 void
1176 checkSanity( void )
1177 {
1178 nat g, s;
1179
1180 if (RtsFlags.GcFlags.generations == 1) {
1181 checkHeap(g0s0->blocks);
1182 checkChain(g0s0->large_objects);
1183 } else {
1184
1185 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1186 for (s = 0; s < generations[g].n_steps; s++) {
1187 if (g == 0 && s == 0) { continue; }
1188 ASSERT(countBlocks(generations[g].steps[s].blocks)
1189 == generations[g].steps[s].n_blocks);
1190 ASSERT(countBlocks(generations[g].steps[s].large_objects)
1191 == generations[g].steps[s].n_large_blocks);
1192 checkHeap(generations[g].steps[s].blocks);
1193 checkChain(generations[g].steps[s].large_objects);
1194 if (g > 0) {
1195 checkMutableList(generations[g].mut_list, g);
1196 }
1197 }
1198 }
1199
1200 for (s = 0; s < n_nurseries; s++) {
1201 ASSERT(countBlocks(nurseries[s].blocks)
1202 == nurseries[s].n_blocks);
1203 ASSERT(countBlocks(nurseries[s].large_objects)
1204 == nurseries[s].n_large_blocks);
1205 }
1206
1207 checkFreeListSanity();
1208 }
1209 }
1210
1211 /* Nursery sanity check */
1212 void
1213 checkNurserySanity( step *stp )
1214 {
1215 bdescr *bd, *prev;
1216 nat blocks = 0;
1217
1218 prev = NULL;
1219 for (bd = stp->blocks; bd != NULL; bd = bd->link) {
1220 ASSERT(bd->u.back == prev);
1221 prev = bd;
1222 blocks += bd->blocks;
1223 }
1224 ASSERT(blocks == stp->n_blocks);
1225 }
1226
1227 // handy function for use in gdb, because Bdescr() is inlined.
1228 extern bdescr *_bdescr( StgPtr p );
1229
1230 bdescr *
1231 _bdescr( StgPtr p )
1232 {
1233 return Bdescr(p);
1234 }
1235
1236 #endif