Add +RTS -n<size>: divide the nursery into chunks
[ghc.git] / rts / sm / Storage.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2012
4 *
5 * Storage manager front end
6 *
7 * Documentation on the architecture of the Storage Manager can be
8 * found in the online commentary:
9 *
10 * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16
17 #include "Storage.h"
18 #include "GCThread.h"
19 #include "RtsUtils.h"
20 #include "Stats.h"
21 #include "BlockAlloc.h"
22 #include "Weak.h"
23 #include "Sanity.h"
24 #include "Arena.h"
25 #include "Capability.h"
26 #include "Schedule.h"
27 #include "RetainerProfile.h" // for counting memory blocks (memInventory)
28 #include "OSMem.h"
29 #include "Trace.h"
30 #include "GC.h"
31 #include "Evac.h"
32 #if defined(ios_HOST_OS)
33 #include "Hash.h"
34 #endif
35
36 #include <string.h>
37
38 #include "ffi.h"
39
40 /*
41 * All these globals require sm_mutex to access in THREADED_RTS mode.
42 */
43 StgIndStatic *dyn_caf_list = NULL;
44 StgIndStatic *debug_caf_list = NULL;
45 StgIndStatic *revertible_caf_list = NULL;
46 rtsBool keepCAFs;
47
48 W_ large_alloc_lim; /* GC if n_large_blocks in any nursery
49 * reaches this. */
50
51 bdescr *exec_block;
52
53 generation *generations = NULL; /* all the generations */
54 generation *g0 = NULL; /* generation 0, for convenience */
55 generation *oldest_gen = NULL; /* oldest generation, for convenience */
56
57 nursery *nurseries = NULL; /* array of nurseries, size == n_capabilities */
58 nat n_nurseries;
59 volatile StgWord next_nursery = 0;
60
61 #ifdef THREADED_RTS
62 /*
63 * Storage manager mutex: protects all the above state from
64 * simultaneous access by two STG threads.
65 */
66 Mutex sm_mutex;
67 #endif
68
69 static void allocNurseries (nat from, nat to);
70 static void assignNurseriesToCapabilities (nat from, nat to);
71
72 static void
73 initGeneration (generation *gen, int g)
74 {
75 gen->no = g;
76 gen->collections = 0;
77 gen->par_collections = 0;
78 gen->failed_promotions = 0;
79 gen->max_blocks = 0;
80 gen->blocks = NULL;
81 gen->n_blocks = 0;
82 gen->n_words = 0;
83 gen->live_estimate = 0;
84 gen->old_blocks = NULL;
85 gen->n_old_blocks = 0;
86 gen->large_objects = NULL;
87 gen->n_large_blocks = 0;
88 gen->n_large_words = 0;
89 gen->n_new_large_words = 0;
90 gen->scavenged_large_objects = NULL;
91 gen->n_scavenged_large_blocks = 0;
92 gen->mark = 0;
93 gen->compact = 0;
94 gen->bitmap = NULL;
95 #ifdef THREADED_RTS
96 initSpinLock(&gen->sync);
97 #endif
98 gen->threads = END_TSO_QUEUE;
99 gen->old_threads = END_TSO_QUEUE;
100 gen->weak_ptr_list = NULL;
101 gen->old_weak_ptr_list = NULL;
102 }
103
104 void
105 initStorage (void)
106 {
107 nat g;
108
109 if (generations != NULL) {
110 // multi-init protection
111 return;
112 }
113
114 initMBlocks();
115
116 /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
117 * doing something reasonable.
118 */
119 /* We use the NOT_NULL variant or gcc warns that the test is always true */
120 ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info));
121 ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
122 ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
123
124 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
125 RtsFlags.GcFlags.heapSizeSuggestion >
126 RtsFlags.GcFlags.maxHeapSize) {
127 RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
128 }
129
130 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
131 RtsFlags.GcFlags.minAllocAreaSize >
132 RtsFlags.GcFlags.maxHeapSize) {
133 errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
134 RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
135 }
136
137 initBlockAllocator();
138
139 #if defined(THREADED_RTS)
140 initMutex(&sm_mutex);
141 #endif
142
143 ACQUIRE_SM_LOCK;
144
145 /* allocate generation info array */
146 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
147 * sizeof(struct generation_),
148 "initStorage: gens");
149
150 /* Initialise all generations */
151 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
152 initGeneration(&generations[g], g);
153 }
154
155 /* A couple of convenience pointers */
156 g0 = &generations[0];
157 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
158
159 /* Set up the destination pointers in each younger gen. step */
160 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
161 generations[g].to = &generations[g+1];
162 }
163 oldest_gen->to = oldest_gen;
164
165 /* The oldest generation has one step. */
166 if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
167 if (RtsFlags.GcFlags.generations == 1) {
168 errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
169 } else {
170 oldest_gen->mark = 1;
171 if (RtsFlags.GcFlags.compact)
172 oldest_gen->compact = 1;
173 }
174 }
175
176 generations[0].max_blocks = 0;
177
178 dyn_caf_list = (StgIndStatic*)END_OF_STATIC_LIST;
179 debug_caf_list = (StgIndStatic*)END_OF_STATIC_LIST;
180 revertible_caf_list = (StgIndStatic*)END_OF_STATIC_LIST;
181
182 /* initialise the allocate() interface */
183 large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W;
184
185 exec_block = NULL;
186
187 #ifdef THREADED_RTS
188 initSpinLock(&gc_alloc_block_sync);
189 #ifdef PROF_SPIN
190 whitehole_spin = 0;
191 #endif
192 #endif
193
194 N = 0;
195
196 next_nursery = 0;
197 storageAddCapabilities(0, n_capabilities);
198
199 IF_DEBUG(gc, statDescribeGens());
200
201 RELEASE_SM_LOCK;
202
203 traceEventHeapInfo(CAPSET_HEAP_DEFAULT,
204 RtsFlags.GcFlags.generations,
205 RtsFlags.GcFlags.maxHeapSize * BLOCK_SIZE_W * sizeof(W_),
206 RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W * sizeof(W_),
207 MBLOCK_SIZE_W * sizeof(W_),
208 BLOCK_SIZE_W * sizeof(W_));
209 }
210
211 void storageAddCapabilities (nat from, nat to)
212 {
213 nat n, g, i, new_n_nurseries;
214
215 if (RtsFlags.GcFlags.nurseryChunkSize == 0) {
216 new_n_nurseries = to;
217 } else {
218 memcount total_alloc = to * RtsFlags.GcFlags.minAllocAreaSize;
219 new_n_nurseries =
220 stg_max(to, total_alloc / RtsFlags.GcFlags.nurseryChunkSize);
221 }
222
223 if (from > 0) {
224 nurseries = stgReallocBytes(nurseries,
225 new_n_nurseries * sizeof(struct nursery_),
226 "storageAddCapabilities");
227 } else {
228 nurseries = stgMallocBytes(new_n_nurseries * sizeof(struct nursery_),
229 "storageAddCapabilities");
230 }
231
232 // we've moved the nurseries, so we have to update the rNursery
233 // pointers from the Capabilities.
234 for (i = 0; i < to; i++) {
235 capabilities[i]->r.rNursery = &nurseries[i];
236 }
237
238 /* The allocation area. Policy: keep the allocation area
239 * small to begin with, even if we have a large suggested heap
240 * size. Reason: we're going to do a major collection first, and we
241 * don't want it to be a big one. This vague idea is borne out by
242 * rigorous experimental evidence.
243 */
244 allocNurseries(n_nurseries, new_n_nurseries);
245 n_nurseries = new_n_nurseries;
246
247 /*
248 * Assign each of the new capabilities a nursery. Remember to start from
249 * next_nursery, because we may have already consumed some of the earlier
250 * nurseries.
251 */
252 assignNurseriesToCapabilities(from,to);
253
254 // allocate a block for each mut list
255 for (n = from; n < to; n++) {
256 for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
257 capabilities[n]->mut_lists[g] = allocBlock();
258 }
259 }
260
261 #if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR) && (CC_SUPPORTS_TLS == 0)
262 newThreadLocalKey(&gctKey);
263 #endif
264
265 initGcThreads(from, to);
266 }
267
268
269 void
270 exitStorage (void)
271 {
272 updateNurseriesStats();
273 stat_exit();
274 }
275
276 void
277 freeStorage (rtsBool free_heap)
278 {
279 stgFree(generations);
280 if (free_heap) freeAllMBlocks();
281 #if defined(THREADED_RTS)
282 closeMutex(&sm_mutex);
283 #endif
284 stgFree(nurseries);
285 #if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR) && (CC_SUPPORTS_TLS == 0)
286 freeThreadLocalKey(&gctKey);
287 #endif
288 freeGcThreads();
289 }
290
291 /* -----------------------------------------------------------------------------
292 Note [CAF management].
293
294 The entry code for every CAF does the following:
295
296 - calls newCaf, which builds a CAF_BLACKHOLE on the heap and atomically
297 updates the CAF with IND_STATIC pointing to the CAF_BLACKHOLE
298
299 - if newCaf returns zero, it re-enters the CAF (see Note [atomic
300 CAF entry])
301
302 - pushes an update frame pointing to the CAF_BLACKHOLE
303
304 Why do we build an BLACKHOLE in the heap rather than just updating
305 the thunk directly? It's so that we only need one kind of update
306 frame - otherwise we'd need a static version of the update frame
307 too, and various other parts of the RTS that deal with update
308 frames would also need special cases for static update frames.
309
310 newCaf() does the following:
311
312 - atomically locks the CAF (see [atomic CAF entry])
313
314 - it builds a CAF_BLACKHOLE on the heap
315
316 - it updates the CAF with an IND_STATIC pointing to the
317 CAF_BLACKHOLE, atomically.
318
319 - it puts the CAF on the oldest generation's mutable list.
320 This is so that we treat the CAF as a root when collecting
321 younger generations.
322
323 - links the CAF onto the CAF list (see below)
324
325 ------------------
326 Note [atomic CAF entry]
327
328 With THREADED_RTS, newCaf() is required to be atomic (see
329 #5558). This is because if two threads happened to enter the same
330 CAF simultaneously, they would create two distinct CAF_BLACKHOLEs,
331 and so the normal threadPaused() machinery for detecting duplicate
332 evaluation will not detect this. Hence in lockCAF() below, we
333 atomically lock the CAF with WHITEHOLE before updating it with
334 IND_STATIC, and return zero if another thread locked the CAF first.
335 In the event that we lost the race, CAF entry code will re-enter
336 the CAF and block on the other thread's CAF_BLACKHOLE.
337
338 ------------------
339 Note [GHCi CAFs]
340
341 For GHCI, we have additional requirements when dealing with CAFs:
342
343 - we must *retain* all dynamically-loaded CAFs ever entered,
344 just in case we need them again.
345 - we must be able to *revert* CAFs that have been evaluated, to
346 their pre-evaluated form.
347
348 To do this, we use an additional CAF list. When newCaf() is
349 called on a dynamically-loaded CAF, we add it to the CAF list
350 instead of the old-generation mutable list, and save away its
351 old info pointer (in caf->saved_info) for later reversion.
352
353 To revert all the CAFs, we traverse the CAF list and reset the
354 info pointer to caf->saved_info, then throw away the CAF list.
355 (see GC.c:revertCAFs()).
356
357 -- SDM 29/1/01
358
359 -------------------------------------------------------------------------- */
360
361 STATIC_INLINE StgInd *
362 lockCAF (StgRegTable *reg, StgIndStatic *caf)
363 {
364 const StgInfoTable *orig_info;
365 Capability *cap = regTableToCapability(reg);
366 StgInd *bh;
367
368 orig_info = caf->header.info;
369
370 #ifdef THREADED_RTS
371 const StgInfoTable *cur_info;
372
373 if (orig_info == &stg_IND_STATIC_info ||
374 orig_info == &stg_WHITEHOLE_info) {
375 // already claimed by another thread; re-enter the CAF
376 return NULL;
377 }
378
379 cur_info = (const StgInfoTable *)
380 cas((StgVolatilePtr)&caf->header.info,
381 (StgWord)orig_info,
382 (StgWord)&stg_WHITEHOLE_info);
383
384 if (cur_info != orig_info) {
385 // already claimed by another thread; re-enter the CAF
386 return NULL;
387 }
388
389 // successfully claimed by us; overwrite with IND_STATIC
390 #endif
391
392 // For the benefit of revertCAFs(), save the original info pointer
393 caf->saved_info = orig_info;
394
395 // Allocate the blackhole indirection closure
396 bh = (StgInd *)allocate(cap, sizeofW(*bh));
397 SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs);
398 bh->indirectee = (StgClosure *)cap->r.rCurrentTSO;
399
400 caf->indirectee = (StgClosure *)bh;
401 write_barrier();
402 SET_INFO((StgClosure*)caf,&stg_IND_STATIC_info);
403
404 return bh;
405 }
406
407 StgInd *
408 newCAF(StgRegTable *reg, StgIndStatic *caf)
409 {
410 StgInd *bh;
411
412 bh = lockCAF(reg, caf);
413 if (!bh) return NULL;
414
415 if(keepCAFs)
416 {
417 // Note [dyn_caf_list]
418 // If we are in GHCi _and_ we are using dynamic libraries,
419 // then we can't redirect newCAF calls to newDynCAF (see below),
420 // so we make newCAF behave almost like newDynCAF.
421 // The dynamic libraries might be used by both the interpreted
422 // program and GHCi itself, so they must not be reverted.
423 // This also means that in GHCi with dynamic libraries, CAFs are not
424 // garbage collected. If this turns out to be a problem, we could
425 // do another hack here and do an address range test on caf to figure
426 // out whether it is from a dynamic library.
427
428 ACQUIRE_SM_LOCK; // dyn_caf_list is global, locked by sm_mutex
429 caf->static_link = (StgClosure*)dyn_caf_list;
430 dyn_caf_list = caf;
431 RELEASE_SM_LOCK;
432 }
433 else
434 {
435 // Put this CAF on the mutable list for the old generation.
436 if (oldest_gen->no != 0) {
437 recordMutableCap((StgClosure*)caf,
438 regTableToCapability(reg), oldest_gen->no);
439 }
440
441 #ifdef DEBUG
442 // In the DEBUG rts, we keep track of live CAFs by chaining them
443 // onto a list debug_caf_list. This is so that we can tell if we
444 // ever enter a GC'd CAF, and emit a suitable barf().
445 //
446 // The saved_info field of the CAF is used as the link field for
447 // debug_caf_list, because this field is only used by newDynCAF
448 // for revertible CAFs, and we don't put those on the
449 // debug_caf_list.
450 ACQUIRE_SM_LOCK; // debug_caf_list is global, locked by sm_mutex
451 ((StgIndStatic *)caf)->saved_info = (const StgInfoTable*)debug_caf_list;
452 debug_caf_list = (StgIndStatic*)caf;
453 RELEASE_SM_LOCK;
454 #endif
455 }
456
457 return bh;
458 }
459
460 // External API for setting the keepCAFs flag. see #3900.
461 void
462 setKeepCAFs (void)
463 {
464 keepCAFs = 1;
465 }
466
467 // An alternate version of newCaf which is used for dynamically loaded
468 // object code in GHCi. In this case we want to retain *all* CAFs in
469 // the object code, because they might be demanded at any time from an
470 // expression evaluated on the command line.
471 // Also, GHCi might want to revert CAFs, so we add these to the
472 // revertible_caf_list.
473 //
474 // The linker hackily arranges that references to newCaf from dynamic
475 // code end up pointing to newDynCAF.
476 StgInd *
477 newDynCAF (StgRegTable *reg, StgIndStatic *caf)
478 {
479 StgInd *bh;
480
481 bh = lockCAF(reg, caf);
482 if (!bh) return NULL;
483
484 ACQUIRE_SM_LOCK;
485
486 caf->static_link = (StgClosure*)revertible_caf_list;
487 revertible_caf_list = caf;
488
489 RELEASE_SM_LOCK;
490
491 return bh;
492 }
493
494 /* -----------------------------------------------------------------------------
495 Nursery management.
496 -------------------------------------------------------------------------- */
497
498 static bdescr *
499 allocNursery (bdescr *tail, W_ blocks)
500 {
501 bdescr *bd = NULL;
502 W_ i, n;
503
504 // We allocate the nursery as a single contiguous block and then
505 // divide it into single blocks manually. This way we guarantee
506 // that the nursery blocks are adjacent, so that the processor's
507 // automatic prefetching works across nursery blocks. This is a
508 // tiny optimisation (~0.5%), but it's free.
509
510 while (blocks > 0) {
511 n = stg_min(BLOCKS_PER_MBLOCK, blocks);
512 // allocLargeChunk will prefer large chunks, but will pick up
513 // small chunks if there are any available. We must allow
514 // single blocks here to avoid fragmentation (#7257)
515 bd = allocLargeChunk(1, n);
516 n = bd->blocks;
517 blocks -= n;
518
519 for (i = 0; i < n; i++) {
520 initBdescr(&bd[i], g0, g0);
521
522 bd[i].blocks = 1;
523 bd[i].flags = 0;
524
525 if (i > 0) {
526 bd[i].u.back = &bd[i-1];
527 } else {
528 bd[i].u.back = NULL;
529 }
530
531 if (i+1 < n) {
532 bd[i].link = &bd[i+1];
533 } else {
534 bd[i].link = tail;
535 if (tail != NULL) {
536 tail->u.back = &bd[i];
537 }
538 }
539
540 bd[i].free = bd[i].start;
541 }
542
543 tail = &bd[0];
544 }
545
546 return &bd[0];
547 }
548
549 STATIC_INLINE void
550 assignNurseryToCapability (Capability *cap, nat n)
551 {
552 cap->r.rNursery = &nurseries[n];
553 cap->r.rCurrentNursery = nurseries[n].blocks;
554 newNurseryBlock(nurseries[n].blocks);
555 cap->r.rCurrentAlloc = NULL;
556 }
557
558 /*
559 * Give each Capability a nursery from the pool. No need to do atomic increments
560 * here, everything must be stopped to call this function.
561 */
562 static void
563 assignNurseriesToCapabilities (nat from, nat to)
564 {
565 nat i;
566
567 for (i = from; i < to; i++) {
568 assignNurseryToCapability(capabilities[i], next_nursery++);
569 }
570 }
571
572 static void
573 allocNurseries (nat from, nat to)
574 {
575 nat i;
576 memcount n_blocks;
577
578 if (RtsFlags.GcFlags.nurseryChunkSize) {
579 n_blocks = RtsFlags.GcFlags.nurseryChunkSize;
580 } else {
581 n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
582 }
583
584 for (i = from; i < to; i++) {
585 nurseries[i].blocks = allocNursery(NULL, n_blocks);
586 nurseries[i].n_blocks = n_blocks;
587 }
588 }
589
590 void
591 resetNurseries (void)
592 {
593 next_nursery = 0;
594 assignNurseriesToCapabilities(0, n_capabilities);
595
596 #ifdef DEBUG
597 bdescr *bd;
598 nat n;
599 for (n = 0; n < n_nurseries; n++) {
600 for (bd = nurseries[n].blocks; bd; bd = bd->link) {
601 ASSERT(bd->gen_no == 0);
602 ASSERT(bd->gen == g0);
603 IF_DEBUG(sanity, memset(bd->start, 0xaa, BLOCK_SIZE));
604 }
605 }
606 #endif
607 }
608
609 W_
610 countNurseryBlocks (void)
611 {
612 nat i;
613 W_ blocks = 0;
614
615 for (i = 0; i < n_nurseries; i++) {
616 blocks += nurseries[i].n_blocks;
617 }
618 return blocks;
619 }
620
621 static void
622 resizeNursery (nursery *nursery, W_ blocks)
623 {
624 bdescr *bd;
625 W_ nursery_blocks;
626
627 nursery_blocks = nursery->n_blocks;
628 if (nursery_blocks == blocks) return;
629
630 if (nursery_blocks < blocks) {
631 debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
632 blocks);
633 nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
634 }
635 else {
636 bdescr *next_bd;
637
638 debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
639 blocks);
640
641 bd = nursery->blocks;
642 while (nursery_blocks > blocks) {
643 next_bd = bd->link;
644 next_bd->u.back = NULL;
645 nursery_blocks -= bd->blocks; // might be a large block
646 freeGroup(bd);
647 bd = next_bd;
648 }
649 nursery->blocks = bd;
650 // might have gone just under, by freeing a large block, so make
651 // up the difference.
652 if (nursery_blocks < blocks) {
653 nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
654 }
655 }
656
657 nursery->n_blocks = blocks;
658 ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
659 }
660
661 //
662 // Resize each of the nurseries to the specified size.
663 //
664 static void
665 resizeNurseriesEach (W_ blocks)
666 {
667 nat i;
668
669 for (i = 0; i < n_nurseries; i++) {
670 resizeNursery(&nurseries[i], blocks);
671 }
672 }
673
674 void
675 resizeNurseriesFixed (void)
676 {
677 nat blocks;
678
679 if (RtsFlags.GcFlags.nurseryChunkSize) {
680 blocks = RtsFlags.GcFlags.nurseryChunkSize;
681 } else {
682 blocks = RtsFlags.GcFlags.minAllocAreaSize;
683 }
684
685 resizeNurseriesEach(blocks);
686 }
687
688 //
689 // Resize the nurseries to the total specified size.
690 //
691 void
692 resizeNurseries (W_ blocks)
693 {
694 // If there are multiple nurseries, then we just divide the number
695 // of available blocks between them.
696 resizeNurseriesEach(blocks / n_nurseries);
697 }
698
699 rtsBool
700 getNewNursery (Capability *cap)
701 {
702 StgWord i = atomic_inc(&next_nursery, 1) - 1;
703 if (i >= n_nurseries) {
704 return rtsFalse;
705 }
706 assignNurseryToCapability(cap, i);
707 return rtsTrue;
708 }
709
710 /* -----------------------------------------------------------------------------
711 move_STACK is called to update the TSO structure after it has been
712 moved from one place to another.
713 -------------------------------------------------------------------------- */
714
715 void
716 move_STACK (StgStack *src, StgStack *dest)
717 {
718 ptrdiff_t diff;
719
720 // relocate the stack pointer...
721 diff = (StgPtr)dest - (StgPtr)src; // In *words*
722 dest->sp = (StgPtr)dest->sp + diff;
723 }
724
725 /* -----------------------------------------------------------------------------
726 StgPtr allocate (Capability *cap, W_ n)
727
728 Allocates an area of memory n *words* large, from the nursery of
729 the supplied Capability, or from the global block pool if the area
730 requested is larger than LARGE_OBJECT_THRESHOLD. Memory is not
731 allocated from the current nursery block, so as not to interfere
732 with Hp/HpLim.
733
734 The address of the allocated memory is returned. allocate() never
735 fails; if it returns, the returned value is a valid address. If
736 the nursery is already full, then another block is allocated from
737 the global block pool. If we need to get memory from the OS and
738 that operation fails, then the whole process will be killed.
739 -------------------------------------------------------------------------- */
740
741 StgPtr allocate (Capability *cap, W_ n)
742 {
743 bdescr *bd;
744 StgPtr p;
745
746 TICK_ALLOC_HEAP_NOCTR(WDS(n));
747 CCS_ALLOC(cap->r.rCCCS,n);
748 if (cap->r.rCurrentTSO != NULL) {
749 cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
750 }
751
752 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
753 // The largest number of words such that
754 // the computation of req_blocks will not overflow.
755 W_ max_words = (HS_WORD_MAX & ~(BLOCK_SIZE-1)) / sizeof(W_);
756 W_ req_blocks;
757
758 if (n > max_words)
759 req_blocks = HS_WORD_MAX; // signal overflow below
760 else
761 req_blocks = (W_)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
762
763 // Attempting to allocate an object larger than maxHeapSize
764 // should definitely be disallowed. (bug #1791)
765 if ((RtsFlags.GcFlags.maxHeapSize > 0 &&
766 req_blocks >= RtsFlags.GcFlags.maxHeapSize) ||
767 req_blocks >= HS_INT32_MAX) // avoid overflow when
768 // calling allocGroup() below
769 {
770 heapOverflow();
771 // heapOverflow() doesn't exit (see #2592), but we aren't
772 // in a position to do a clean shutdown here: we
773 // either have to allocate the memory or exit now.
774 // Allocating the memory would be bad, because the user
775 // has requested that we not exceed maxHeapSize, so we
776 // just exit.
777 stg_exit(EXIT_HEAPOVERFLOW);
778 }
779
780 ACQUIRE_SM_LOCK
781 bd = allocGroup(req_blocks);
782 dbl_link_onto(bd, &g0->large_objects);
783 g0->n_large_blocks += bd->blocks; // might be larger than req_blocks
784 g0->n_new_large_words += n;
785 RELEASE_SM_LOCK;
786 initBdescr(bd, g0, g0);
787 bd->flags = BF_LARGE;
788 bd->free = bd->start + n;
789 cap->total_allocated += n;
790 return bd->start;
791 }
792
793 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
794
795 bd = cap->r.rCurrentAlloc;
796 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
797
798 if (bd) finishedNurseryBlock(cap,bd);
799
800 // The CurrentAlloc block is full, we need to find another
801 // one. First, we try taking the next block from the
802 // nursery:
803 bd = cap->r.rCurrentNursery->link;
804
805 if (bd == NULL) {
806 // The nursery is empty: allocate a fresh block (we can't
807 // fail here).
808 ACQUIRE_SM_LOCK;
809 bd = allocBlock();
810 cap->r.rNursery->n_blocks++;
811 RELEASE_SM_LOCK;
812 initBdescr(bd, g0, g0);
813 bd->flags = 0;
814 // If we had to allocate a new block, then we'll GC
815 // pretty quickly now, because MAYBE_GC() will
816 // notice that CurrentNursery->link is NULL.
817 } else {
818 newNurseryBlock(bd);
819 // we have a block in the nursery: take it and put
820 // it at the *front* of the nursery list, and use it
821 // to allocate() from.
822 //
823 // Previously the nursery looked like this:
824 //
825 // CurrentNursery
826 // /
827 // +-+ +-+
828 // nursery -> ... |A| -> |B| -> ...
829 // +-+ +-+
830 //
831 // After doing this, it looks like this:
832 //
833 // CurrentNursery
834 // /
835 // +-+ +-+
836 // nursery -> |B| -> ... -> |A| -> ...
837 // +-+ +-+
838 // |
839 // CurrentAlloc
840 //
841 // The point is to get the block out of the way of the
842 // advancing CurrentNursery pointer, while keeping it
843 // on the nursery list so we don't lose track of it.
844 cap->r.rCurrentNursery->link = bd->link;
845 if (bd->link != NULL) {
846 bd->link->u.back = cap->r.rCurrentNursery;
847 }
848 }
849 dbl_link_onto(bd, &cap->r.rNursery->blocks);
850 cap->r.rCurrentAlloc = bd;
851 IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
852 }
853 p = bd->free;
854 bd->free += n;
855
856 IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa));
857 return p;
858 }
859
860 /* ---------------------------------------------------------------------------
861 Allocate a fixed/pinned object.
862
863 We allocate small pinned objects into a single block, allocating a
864 new block when the current one overflows. The block is chained
865 onto the large_object_list of generation 0.
866
867 NOTE: The GC can't in general handle pinned objects. This
868 interface is only safe to use for ByteArrays, which have no
869 pointers and don't require scavenging. It works because the
870 block's descriptor has the BF_LARGE flag set, so the block is
871 treated as a large object and chained onto various lists, rather
872 than the individual objects being copied. However, when it comes
873 to scavenge the block, the GC will only scavenge the first object.
874 The reason is that the GC can't linearly scan a block of pinned
875 objects at the moment (doing so would require using the
876 mostly-copying techniques). But since we're restricting ourselves
877 to pinned ByteArrays, not scavenging is ok.
878
879 This function is called by newPinnedByteArray# which immediately
880 fills the allocated memory with a MutableByteArray#.
881 ------------------------------------------------------------------------- */
882
883 StgPtr
884 allocatePinned (Capability *cap, W_ n)
885 {
886 StgPtr p;
887 bdescr *bd;
888
889 // If the request is for a large object, then allocate()
890 // will give us a pinned object anyway.
891 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
892 p = allocate(cap, n);
893 Bdescr(p)->flags |= BF_PINNED;
894 return p;
895 }
896
897 TICK_ALLOC_HEAP_NOCTR(WDS(n));
898 CCS_ALLOC(cap->r.rCCCS,n);
899 if (cap->r.rCurrentTSO != NULL) {
900 cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
901 }
902
903 bd = cap->pinned_object_block;
904
905 // If we don't have a block of pinned objects yet, or the current
906 // one isn't large enough to hold the new object, get a new one.
907 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
908
909 // stash the old block on cap->pinned_object_blocks. On the
910 // next GC cycle these objects will be moved to
911 // g0->large_objects.
912 if (bd != NULL) {
913 // add it to the allocation stats when the block is full
914 finishedNurseryBlock(cap, bd);
915 dbl_link_onto(bd, &cap->pinned_object_blocks);
916 }
917
918 // We need to find another block. We could just allocate one,
919 // but that means taking a global lock and we really want to
920 // avoid that (benchmarks that allocate a lot of pinned
921 // objects scale really badly if we do this).
922 //
923 // So first, we try taking the next block from the nursery, in
924 // the same way as allocate(), but note that we can only take
925 // an *empty* block, because we're about to mark it as
926 // BF_PINNED | BF_LARGE.
927 bd = cap->r.rCurrentNursery->link;
928 if (bd == NULL) { // must be empty!
929 // The nursery is empty, or the next block is non-empty:
930 // allocate a fresh block (we can't fail here).
931
932 // XXX in the case when the next nursery block is
933 // non-empty we aren't exerting any pressure to GC soon,
934 // so if this case ever happens then we could in theory
935 // keep allocating for ever without calling the GC. We
936 // can't bump g0->n_new_large_words because that will be
937 // counted towards allocation, and we're already counting
938 // our pinned obects as allocation in
939 // collect_pinned_object_blocks in the GC.
940 ACQUIRE_SM_LOCK;
941 bd = allocBlock();
942 RELEASE_SM_LOCK;
943 initBdescr(bd, g0, g0);
944 } else {
945 newNurseryBlock(bd);
946 // we have a block in the nursery: steal it
947 cap->r.rCurrentNursery->link = bd->link;
948 if (bd->link != NULL) {
949 bd->link->u.back = cap->r.rCurrentNursery;
950 }
951 cap->r.rNursery->n_blocks -= bd->blocks;
952 }
953
954 cap->pinned_object_block = bd;
955 bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED;
956
957 // The pinned_object_block remains attached to the capability
958 // until it is full, even if a GC occurs. We want this
959 // behaviour because otherwise the unallocated portion of the
960 // block would be forever slop, and under certain workloads
961 // (allocating a few ByteStrings per GC) we accumulate a lot
962 // of slop.
963 //
964 // So, the pinned_object_block is initially marked
965 // BF_EVACUATED so the GC won't touch it. When it is full,
966 // we place it on the large_objects list, and at the start of
967 // the next GC the BF_EVACUATED flag will be cleared, and the
968 // block will be promoted as usual (if anything in it is
969 // live).
970 }
971
972 p = bd->free;
973 bd->free += n;
974 return p;
975 }
976
977 /* -----------------------------------------------------------------------------
978 Write Barriers
979 -------------------------------------------------------------------------- */
980
981 /*
982 This is the write barrier for MUT_VARs, a.k.a. IORefs. A
983 MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
984 is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
985 and is put on the mutable list.
986 */
987 void
988 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
989 {
990 Capability *cap = regTableToCapability(reg);
991 if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
992 p->header.info = &stg_MUT_VAR_DIRTY_info;
993 recordClosureMutated(cap,p);
994 }
995 }
996
997 void
998 dirty_TVAR(Capability *cap, StgTVar *p)
999 {
1000 if (p->header.info == &stg_TVAR_CLEAN_info) {
1001 p->header.info = &stg_TVAR_DIRTY_info;
1002 recordClosureMutated(cap,(StgClosure*)p);
1003 }
1004 }
1005
1006 // Setting a TSO's link field with a write barrier.
1007 // It is *not* necessary to call this function when
1008 // * setting the link field to END_TSO_QUEUE
1009 // * putting a TSO on the blackhole_queue
1010 // * setting the link field of the currently running TSO, as it
1011 // will already be dirty.
1012 void
1013 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
1014 {
1015 if (tso->dirty == 0) {
1016 tso->dirty = 1;
1017 recordClosureMutated(cap,(StgClosure*)tso);
1018 }
1019 tso->_link = target;
1020 }
1021
1022 void
1023 setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)
1024 {
1025 if (tso->dirty == 0) {
1026 tso->dirty = 1;
1027 recordClosureMutated(cap,(StgClosure*)tso);
1028 }
1029 tso->block_info.prev = target;
1030 }
1031
1032 void
1033 dirty_TSO (Capability *cap, StgTSO *tso)
1034 {
1035 if (tso->dirty == 0) {
1036 tso->dirty = 1;
1037 recordClosureMutated(cap,(StgClosure*)tso);
1038 }
1039 }
1040
1041 void
1042 dirty_STACK (Capability *cap, StgStack *stack)
1043 {
1044 if (stack->dirty == 0) {
1045 stack->dirty = 1;
1046 recordClosureMutated(cap,(StgClosure*)stack);
1047 }
1048 }
1049
1050 /*
1051 This is the write barrier for MVARs. An MVAR_CLEAN objects is not
1052 on the mutable list; a MVAR_DIRTY is. When written to, a
1053 MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
1054 The check for MVAR_CLEAN is inlined at the call site for speed,
1055 this really does make a difference on concurrency-heavy benchmarks
1056 such as Chaneneos and cheap-concurrency.
1057 */
1058 void
1059 dirty_MVAR(StgRegTable *reg, StgClosure *p)
1060 {
1061 recordClosureMutated(regTableToCapability(reg),p);
1062 }
1063
1064 /* -----------------------------------------------------------------------------
1065 * Stats and stuff
1066 * -------------------------------------------------------------------------- */
1067
1068 /* -----------------------------------------------------------------------------
1069 * [Note allocation accounting]
1070 *
1071 * - When cap->r.rCurrentNusery moves to a new block in the nursery,
1072 * we add the size of the used portion of the previous block to
1073 * cap->total_allocated. (see finishedNurseryBlock())
1074 *
1075 * - When we start a GC, the allocated portion of CurrentNursery and
1076 * CurrentAlloc are added to cap->total_allocated. (see
1077 * updateNurseriesStats())
1078 *
1079 * -------------------------------------------------------------------------- */
1080
1081 //
1082 // Calculate the total allocated memory since the start of the
1083 // program. Also emits events reporting the per-cap allocation
1084 // totals.
1085 //
1086 StgWord
1087 calcTotalAllocated (void)
1088 {
1089 W_ tot_alloc = 0;
1090 W_ n;
1091
1092 for (n = 0; n < n_capabilities; n++) {
1093 tot_alloc += capabilities[n]->total_allocated;
1094
1095 traceEventHeapAllocated(capabilities[n],
1096 CAPSET_HEAP_DEFAULT,
1097 capabilities[n]->total_allocated * sizeof(W_));
1098 }
1099
1100 return tot_alloc;
1101 }
1102
1103 //
1104 // Update the per-cap total_allocated numbers with an approximation of
1105 // the amount of memory used in each cap's nursery.
1106 //
1107 void
1108 updateNurseriesStats (void)
1109 {
1110 nat i;
1111 bdescr *bd;
1112
1113 for (i = 0; i < n_capabilities; i++) {
1114 // The current nursery block and the current allocate block have not
1115 // yet been accounted for in cap->total_allocated, so we add them here.
1116 bd = capabilities[i]->r.rCurrentNursery;
1117 if (bd) finishedNurseryBlock(capabilities[i], bd);
1118 bd = capabilities[i]->r.rCurrentAlloc;
1119 if (bd) finishedNurseryBlock(capabilities[i], bd);
1120 }
1121 }
1122
1123 W_ countOccupied (bdescr *bd)
1124 {
1125 W_ words;
1126
1127 words = 0;
1128 for (; bd != NULL; bd = bd->link) {
1129 ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
1130 words += bd->free - bd->start;
1131 }
1132 return words;
1133 }
1134
1135 W_ genLiveWords (generation *gen)
1136 {
1137 return gen->n_words + gen->n_large_words;
1138 }
1139
1140 W_ genLiveBlocks (generation *gen)
1141 {
1142 return gen->n_blocks + gen->n_large_blocks;
1143 }
1144
1145 W_ gcThreadLiveWords (nat i, nat g)
1146 {
1147 W_ words;
1148
1149 words = countOccupied(gc_threads[i]->gens[g].todo_bd);
1150 words += countOccupied(gc_threads[i]->gens[g].part_list);
1151 words += countOccupied(gc_threads[i]->gens[g].scavd_list);
1152
1153 return words;
1154 }
1155
1156 W_ gcThreadLiveBlocks (nat i, nat g)
1157 {
1158 W_ blocks;
1159
1160 blocks = countBlocks(gc_threads[i]->gens[g].todo_bd);
1161 blocks += gc_threads[i]->gens[g].n_part_blocks;
1162 blocks += gc_threads[i]->gens[g].n_scavd_blocks;
1163
1164 return blocks;
1165 }
1166
1167 // Return an accurate count of the live data in the heap, excluding
1168 // generation 0.
1169 W_ calcLiveWords (void)
1170 {
1171 nat g;
1172 W_ live;
1173
1174 live = 0;
1175 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1176 live += genLiveWords(&generations[g]);
1177 }
1178 return live;
1179 }
1180
1181 W_ calcLiveBlocks (void)
1182 {
1183 nat g;
1184 W_ live;
1185
1186 live = 0;
1187 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1188 live += genLiveBlocks(&generations[g]);
1189 }
1190 return live;
1191 }
1192
1193 /* Determine which generation will be collected next, and approximate
1194 * the maximum amount of memory that will be required to do the GC,
1195 * taking into account data that will be copied, and the space needed
1196 * to store bitmaps and the mark stack. Note: blocks_needed does not
1197 * include the blocks in the nursery.
1198 *
1199 * Assume: all data currently live will remain live. Generationss
1200 * that will be collected next time will therefore need twice as many
1201 * blocks since all the data will be copied.
1202 */
1203 extern W_
1204 calcNeeded (rtsBool force_major, memcount *blocks_needed)
1205 {
1206 W_ needed = 0, blocks;
1207 nat g, N;
1208 generation *gen;
1209
1210 if (force_major) {
1211 N = RtsFlags.GcFlags.generations - 1;
1212 } else {
1213 N = 0;
1214 }
1215
1216 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1217 gen = &generations[g];
1218
1219 blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?)
1220 + gen->n_large_blocks;
1221
1222 // we need at least this much space
1223 needed += blocks;
1224
1225 // are we collecting this gen?
1226 if (g == 0 || // always collect gen 0
1227 blocks > gen->max_blocks)
1228 {
1229 N = stg_max(N,g);
1230
1231 // we will collect this gen next time
1232 if (gen->mark) {
1233 // bitmap:
1234 needed += gen->n_blocks / BITS_IN(W_);
1235 // mark stack:
1236 needed += gen->n_blocks / 100;
1237 }
1238 if (gen->compact) {
1239 continue; // no additional space needed for compaction
1240 } else {
1241 needed += gen->n_blocks;
1242 }
1243 }
1244 }
1245
1246 if (blocks_needed != NULL) {
1247 *blocks_needed = needed;
1248 }
1249 return N;
1250 }
1251
1252 /* ----------------------------------------------------------------------------
1253 Executable memory
1254
1255 Executable memory must be managed separately from non-executable
1256 memory. Most OSs these days require you to jump through hoops to
1257 dynamically allocate executable memory, due to various security
1258 measures.
1259
1260 Here we provide a small memory allocator for executable memory.
1261 Memory is managed with a page granularity; we allocate linearly
1262 in the page, and when the page is emptied (all objects on the page
1263 are free) we free the page again, not forgetting to make it
1264 non-executable.
1265
1266 TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1267 the linker cannot use allocateExec for loading object code files
1268 on Windows. Once allocateExec can handle larger objects, the linker
1269 should be modified to use allocateExec instead of VirtualAlloc.
1270 ------------------------------------------------------------------------- */
1271
1272 #if (defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) && defined(ios_HOST_OS)
1273 void sys_icache_invalidate(void *start, size_t len);
1274 #endif
1275
1276 /* On ARM and other platforms, we need to flush the cache after
1277 writing code into memory, so the processor reliably sees it. */
1278 void flushExec (W_ len, AdjustorExecutable exec_addr)
1279 {
1280 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
1281 /* x86 doesn't need to do anything, so just suppress some warnings. */
1282 (void)len;
1283 (void)exec_addr;
1284 #elif (defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) && defined(ios_HOST_OS)
1285 /* On iOS we need to use the special 'sys_icache_invalidate' call. */
1286 sys_icache_invalidate(exec_addr, ((unsigned char*)exec_addr)+len);
1287 #elif defined(__GNUC__)
1288 /* For all other platforms, fall back to a libgcc builtin. */
1289 unsigned char* begin = (unsigned char*)exec_addr;
1290 unsigned char* end = begin + len;
1291 __clear_cache((void*)begin, (void*)end);
1292 #else
1293 #error Missing support to flush the instruction cache
1294 #endif
1295 }
1296
1297 #if defined(linux_HOST_OS)
1298
1299 // On Linux we need to use libffi for allocating executable memory,
1300 // because it knows how to work around the restrictions put in place
1301 // by SELinux.
1302
1303 AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
1304 {
1305 void **ret, **exec;
1306 ACQUIRE_SM_LOCK;
1307 ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
1308 RELEASE_SM_LOCK;
1309 if (ret == NULL) return ret;
1310 *ret = ret; // save the address of the writable mapping, for freeExec().
1311 *exec_ret = exec + 1;
1312 return (ret + 1);
1313 }
1314
1315 // freeExec gets passed the executable address, not the writable address.
1316 void freeExec (AdjustorExecutable addr)
1317 {
1318 AdjustorWritable writable;
1319 writable = *((void**)addr - 1);
1320 ACQUIRE_SM_LOCK;
1321 ffi_closure_free (writable);
1322 RELEASE_SM_LOCK
1323 }
1324
1325 #elif defined(ios_HOST_OS)
1326
1327 static HashTable* allocatedExecs;
1328
1329 AdjustorWritable allocateExec(W_ bytes, AdjustorExecutable *exec_ret)
1330 {
1331 AdjustorWritable writ;
1332 ffi_closure* cl;
1333 if (bytes != sizeof(ffi_closure)) {
1334 barf("allocateExec: for ffi_closure only");
1335 }
1336 ACQUIRE_SM_LOCK;
1337 cl = writ = ffi_closure_alloc((size_t)bytes, exec_ret);
1338 if (cl != NULL) {
1339 if (allocatedExecs == NULL) {
1340 allocatedExecs = allocHashTable();
1341 }
1342 insertHashTable(allocatedExecs, (StgWord)*exec_ret, writ);
1343 }
1344 RELEASE_SM_LOCK;
1345 return writ;
1346 }
1347
1348 AdjustorWritable execToWritable(AdjustorExecutable exec)
1349 {
1350 AdjustorWritable writ;
1351 ACQUIRE_SM_LOCK;
1352 if (allocatedExecs == NULL ||
1353 (writ = lookupHashTable(allocatedExecs, (StgWord)exec)) == NULL) {
1354 RELEASE_SM_LOCK;
1355 barf("execToWritable: not found");
1356 }
1357 RELEASE_SM_LOCK;
1358 return writ;
1359 }
1360
1361 void freeExec(AdjustorExecutable exec)
1362 {
1363 AdjustorWritable writ;
1364 ffi_closure* cl;
1365 cl = writ = execToWritable(exec);
1366 ACQUIRE_SM_LOCK;
1367 removeHashTable(allocatedExecs, (StgWord)exec, writ);
1368 ffi_closure_free(cl);
1369 RELEASE_SM_LOCK
1370 }
1371
1372 #else
1373
1374 AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
1375 {
1376 void *ret;
1377 W_ n;
1378
1379 ACQUIRE_SM_LOCK;
1380
1381 // round up to words.
1382 n = (bytes + sizeof(W_) + 1) / sizeof(W_);
1383
1384 if (n+1 > BLOCK_SIZE_W) {
1385 barf("allocateExec: can't handle large objects");
1386 }
1387
1388 if (exec_block == NULL ||
1389 exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1390 bdescr *bd;
1391 W_ pagesize = getPageSize();
1392 bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1393 debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1394 bd->gen_no = 0;
1395 bd->flags = BF_EXEC;
1396 bd->link = exec_block;
1397 if (exec_block != NULL) {
1398 exec_block->u.back = bd;
1399 }
1400 bd->u.back = NULL;
1401 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1402 exec_block = bd;
1403 }
1404 *(exec_block->free) = n; // store the size of this chunk
1405 exec_block->gen_no += n; // gen_no stores the number of words allocated
1406 ret = exec_block->free + 1;
1407 exec_block->free += n + 1;
1408
1409 RELEASE_SM_LOCK
1410 *exec_ret = ret;
1411 return ret;
1412 }
1413
1414 void freeExec (void *addr)
1415 {
1416 StgPtr p = (StgPtr)addr - 1;
1417 bdescr *bd = Bdescr((StgPtr)p);
1418
1419 if ((bd->flags & BF_EXEC) == 0) {
1420 barf("freeExec: not executable");
1421 }
1422
1423 if (*(StgPtr)p == 0) {
1424 barf("freeExec: already free?");
1425 }
1426
1427 ACQUIRE_SM_LOCK;
1428
1429 bd->gen_no -= *(StgPtr)p;
1430 *(StgPtr)p = 0;
1431
1432 if (bd->gen_no == 0) {
1433 // Free the block if it is empty, but not if it is the block at
1434 // the head of the queue.
1435 if (bd != exec_block) {
1436 debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1437 dbl_link_remove(bd, &exec_block);
1438 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1439 freeGroup(bd);
1440 } else {
1441 bd->free = bd->start;
1442 }
1443 }
1444
1445 RELEASE_SM_LOCK
1446 }
1447
1448 #endif /* switch(HOST_OS) */
1449
1450 #ifdef DEBUG
1451
1452 // handy function for use in gdb, because Bdescr() is inlined.
1453 extern bdescr *_bdescr (StgPtr p);
1454
1455 bdescr *
1456 _bdescr (StgPtr p)
1457 {
1458 return Bdescr(p);
1459 }
1460
1461 #endif