717c96ae81729fb5dfdf54f6e80c6602e65a2656
[ghc.git] / rts / sm / Storage.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2012
4 *
5 * Storage manager front end
6 *
7 * Documentation on the architecture of the Storage Manager can be
8 * found in the online commentary:
9 *
10 * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16
17 #include "Storage.h"
18 #include "GCThread.h"
19 #include "RtsUtils.h"
20 #include "Stats.h"
21 #include "BlockAlloc.h"
22 #include "Weak.h"
23 #include "Sanity.h"
24 #include "Arena.h"
25 #include "Capability.h"
26 #include "Schedule.h"
27 #include "RetainerProfile.h" // for counting memory blocks (memInventory)
28 #include "OSMem.h"
29 #include "Trace.h"
30 #include "GC.h"
31 #include "Evac.h"
32 #if defined(ios_HOST_OS)
33 #include "Hash.h"
34 #endif
35
36 #include <string.h>
37
38 #include "ffi.h"
39
40 /*
41 * All these globals require sm_mutex to access in THREADED_RTS mode.
42 */
43 StgIndStatic *dyn_caf_list = NULL;
44 StgIndStatic *debug_caf_list = NULL;
45 StgIndStatic *revertible_caf_list = NULL;
46 rtsBool keepCAFs;
47
48 W_ large_alloc_lim; /* GC if n_large_blocks in any nursery
49 * reaches this. */
50
51 bdescr *exec_block;
52
53 generation *generations = NULL; /* all the generations */
54 generation *g0 = NULL; /* generation 0, for convenience */
55 generation *oldest_gen = NULL; /* oldest generation, for convenience */
56
57 nursery *nurseries = NULL; /* array of nurseries, size == n_capabilities */
58 uint32_t n_nurseries;
59 volatile StgWord next_nursery = 0;
60
61 #ifdef THREADED_RTS
62 /*
63 * Storage manager mutex: protects all the above state from
64 * simultaneous access by two STG threads.
65 */
66 Mutex sm_mutex;
67 #endif
68
69 static void allocNurseries (uint32_t from, uint32_t to);
70 static void assignNurseriesToCapabilities (uint32_t from, uint32_t to);
71
72 static void
73 initGeneration (generation *gen, int g)
74 {
75 gen->no = g;
76 gen->collections = 0;
77 gen->par_collections = 0;
78 gen->failed_promotions = 0;
79 gen->max_blocks = 0;
80 gen->blocks = NULL;
81 gen->n_blocks = 0;
82 gen->n_words = 0;
83 gen->live_estimate = 0;
84 gen->old_blocks = NULL;
85 gen->n_old_blocks = 0;
86 gen->large_objects = NULL;
87 gen->n_large_blocks = 0;
88 gen->n_large_words = 0;
89 gen->n_new_large_words = 0;
90 gen->scavenged_large_objects = NULL;
91 gen->n_scavenged_large_blocks = 0;
92 gen->mark = 0;
93 gen->compact = 0;
94 gen->bitmap = NULL;
95 #ifdef THREADED_RTS
96 initSpinLock(&gen->sync);
97 #endif
98 gen->threads = END_TSO_QUEUE;
99 gen->old_threads = END_TSO_QUEUE;
100 gen->weak_ptr_list = NULL;
101 gen->old_weak_ptr_list = NULL;
102 }
103
104 void
105 initStorage (void)
106 {
107 uint32_t g;
108
109 if (generations != NULL) {
110 // multi-init protection
111 return;
112 }
113
114 initMBlocks();
115
116 /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
117 * doing something reasonable.
118 */
119 /* We use the NOT_NULL variant or gcc warns that the test is always true */
120 ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info));
121 ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
122 ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
123
124 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
125 RtsFlags.GcFlags.heapSizeSuggestion >
126 RtsFlags.GcFlags.maxHeapSize) {
127 RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
128 }
129
130 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
131 RtsFlags.GcFlags.minAllocAreaSize >
132 RtsFlags.GcFlags.maxHeapSize) {
133 errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
134 RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
135 }
136
137 initBlockAllocator();
138
139 #if defined(THREADED_RTS)
140 initMutex(&sm_mutex);
141 #endif
142
143 ACQUIRE_SM_LOCK;
144
145 /* allocate generation info array */
146 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
147 * sizeof(struct generation_),
148 "initStorage: gens");
149
150 /* Initialise all generations */
151 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
152 initGeneration(&generations[g], g);
153 }
154
155 /* A couple of convenience pointers */
156 g0 = &generations[0];
157 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
158
159 /* Set up the destination pointers in each younger gen. step */
160 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
161 generations[g].to = &generations[g+1];
162 }
163 oldest_gen->to = oldest_gen;
164
165 /* The oldest generation has one step. */
166 if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
167 if (RtsFlags.GcFlags.generations == 1) {
168 errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
169 } else {
170 oldest_gen->mark = 1;
171 if (RtsFlags.GcFlags.compact)
172 oldest_gen->compact = 1;
173 }
174 }
175
176 generations[0].max_blocks = 0;
177
178 dyn_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
179 debug_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
180 revertible_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
181
182 if (RtsFlags.GcFlags.largeAllocLim > 0) {
183 large_alloc_lim = RtsFlags.GcFlags.largeAllocLim * BLOCK_SIZE_W;
184 } else {
185 large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W;
186 }
187
188 exec_block = NULL;
189
190 #ifdef THREADED_RTS
191 initSpinLock(&gc_alloc_block_sync);
192 #ifdef PROF_SPIN
193 whitehole_spin = 0;
194 #endif
195 #endif
196
197 N = 0;
198
199 next_nursery = 0;
200 storageAddCapabilities(0, n_capabilities);
201
202 IF_DEBUG(gc, statDescribeGens());
203
204 RELEASE_SM_LOCK;
205
206 traceEventHeapInfo(CAPSET_HEAP_DEFAULT,
207 RtsFlags.GcFlags.generations,
208 RtsFlags.GcFlags.maxHeapSize * BLOCK_SIZE,
209 RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE,
210 MBLOCK_SIZE,
211 BLOCK_SIZE);
212 }
213
214 void storageAddCapabilities (uint32_t from, uint32_t to)
215 {
216 uint32_t n, g, i, new_n_nurseries;
217
218 if (RtsFlags.GcFlags.nurseryChunkSize == 0) {
219 new_n_nurseries = to;
220 } else {
221 memcount total_alloc = to * RtsFlags.GcFlags.minAllocAreaSize;
222 new_n_nurseries =
223 stg_max(to, total_alloc / RtsFlags.GcFlags.nurseryChunkSize);
224 }
225
226 if (from > 0) {
227 nurseries = stgReallocBytes(nurseries,
228 new_n_nurseries * sizeof(struct nursery_),
229 "storageAddCapabilities");
230 } else {
231 nurseries = stgMallocBytes(new_n_nurseries * sizeof(struct nursery_),
232 "storageAddCapabilities");
233 }
234
235 // we've moved the nurseries, so we have to update the rNursery
236 // pointers from the Capabilities.
237 for (i = 0; i < to; i++) {
238 capabilities[i]->r.rNursery = &nurseries[i];
239 }
240
241 /* The allocation area. Policy: keep the allocation area
242 * small to begin with, even if we have a large suggested heap
243 * size. Reason: we're going to do a major collection first, and we
244 * don't want it to be a big one. This vague idea is borne out by
245 * rigorous experimental evidence.
246 */
247 allocNurseries(n_nurseries, new_n_nurseries);
248 n_nurseries = new_n_nurseries;
249
250 /*
251 * Assign each of the new capabilities a nursery. Remember to start from
252 * next_nursery, because we may have already consumed some of the earlier
253 * nurseries.
254 */
255 assignNurseriesToCapabilities(from,to);
256
257 // allocate a block for each mut list
258 for (n = from; n < to; n++) {
259 for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
260 capabilities[n]->mut_lists[g] = allocBlock();
261 }
262 }
263
264 #if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR) && (CC_SUPPORTS_TLS == 0)
265 newThreadLocalKey(&gctKey);
266 #endif
267
268 initGcThreads(from, to);
269 }
270
271
272 void
273 exitStorage (void)
274 {
275 updateNurseriesStats();
276 stat_exit();
277 }
278
279 void
280 freeStorage (rtsBool free_heap)
281 {
282 stgFree(generations);
283 if (free_heap) freeAllMBlocks();
284 #if defined(THREADED_RTS)
285 closeMutex(&sm_mutex);
286 #endif
287 stgFree(nurseries);
288 #if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR) && (CC_SUPPORTS_TLS == 0)
289 freeThreadLocalKey(&gctKey);
290 #endif
291 freeGcThreads();
292 }
293
294 /* -----------------------------------------------------------------------------
295 Note [CAF management].
296
297 The entry code for every CAF does the following:
298
299 - calls newCaf, which builds a CAF_BLACKHOLE on the heap and atomically
300 updates the CAF with IND_STATIC pointing to the CAF_BLACKHOLE
301
302 - if newCaf returns zero, it re-enters the CAF (see Note [atomic
303 CAF entry])
304
305 - pushes an update frame pointing to the CAF_BLACKHOLE
306
307 Why do we build an BLACKHOLE in the heap rather than just updating
308 the thunk directly? It's so that we only need one kind of update
309 frame - otherwise we'd need a static version of the update frame
310 too, and various other parts of the RTS that deal with update
311 frames would also need special cases for static update frames.
312
313 newCaf() does the following:
314
315 - atomically locks the CAF (see [atomic CAF entry])
316
317 - it builds a CAF_BLACKHOLE on the heap
318
319 - it updates the CAF with an IND_STATIC pointing to the
320 CAF_BLACKHOLE, atomically.
321
322 - it puts the CAF on the oldest generation's mutable list.
323 This is so that we treat the CAF as a root when collecting
324 younger generations.
325
326 - links the CAF onto the CAF list (see below)
327
328 ------------------
329 Note [atomic CAF entry]
330
331 With THREADED_RTS, newCaf() is required to be atomic (see
332 #5558). This is because if two threads happened to enter the same
333 CAF simultaneously, they would create two distinct CAF_BLACKHOLEs,
334 and so the normal threadPaused() machinery for detecting duplicate
335 evaluation will not detect this. Hence in lockCAF() below, we
336 atomically lock the CAF with WHITEHOLE before updating it with
337 IND_STATIC, and return zero if another thread locked the CAF first.
338 In the event that we lost the race, CAF entry code will re-enter
339 the CAF and block on the other thread's CAF_BLACKHOLE.
340
341 ------------------
342 Note [GHCi CAFs]
343
344 For GHCI, we have additional requirements when dealing with CAFs:
345
346 - we must *retain* all dynamically-loaded CAFs ever entered,
347 just in case we need them again.
348 - we must be able to *revert* CAFs that have been evaluated, to
349 their pre-evaluated form.
350
351 To do this, we use an additional CAF list. When newCaf() is
352 called on a dynamically-loaded CAF, we add it to the CAF list
353 instead of the old-generation mutable list, and save away its
354 old info pointer (in caf->saved_info) for later reversion.
355
356 To revert all the CAFs, we traverse the CAF list and reset the
357 info pointer to caf->saved_info, then throw away the CAF list.
358 (see GC.c:revertCAFs()).
359
360 -- SDM 29/1/01
361
362 -------------------------------------------------------------------------- */
363
364 STATIC_INLINE StgInd *
365 lockCAF (StgRegTable *reg, StgIndStatic *caf)
366 {
367 const StgInfoTable *orig_info;
368 Capability *cap = regTableToCapability(reg);
369 StgInd *bh;
370
371 orig_info = caf->header.info;
372
373 #ifdef THREADED_RTS
374 const StgInfoTable *cur_info;
375
376 if (orig_info == &stg_IND_STATIC_info ||
377 orig_info == &stg_WHITEHOLE_info) {
378 // already claimed by another thread; re-enter the CAF
379 return NULL;
380 }
381
382 cur_info = (const StgInfoTable *)
383 cas((StgVolatilePtr)&caf->header.info,
384 (StgWord)orig_info,
385 (StgWord)&stg_WHITEHOLE_info);
386
387 if (cur_info != orig_info) {
388 // already claimed by another thread; re-enter the CAF
389 return NULL;
390 }
391
392 // successfully claimed by us; overwrite with IND_STATIC
393 #endif
394
395 // For the benefit of revertCAFs(), save the original info pointer
396 caf->saved_info = orig_info;
397
398 // Allocate the blackhole indirection closure
399 bh = (StgInd *)allocate(cap, sizeofW(*bh));
400 SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs);
401 bh->indirectee = (StgClosure *)cap->r.rCurrentTSO;
402
403 caf->indirectee = (StgClosure *)bh;
404 write_barrier();
405 SET_INFO((StgClosure*)caf,&stg_IND_STATIC_info);
406
407 return bh;
408 }
409
410 StgInd *
411 newCAF(StgRegTable *reg, StgIndStatic *caf)
412 {
413 StgInd *bh;
414
415 bh = lockCAF(reg, caf);
416 if (!bh) return NULL;
417
418 if(keepCAFs)
419 {
420 // Note [dyn_caf_list]
421 // If we are in GHCi _and_ we are using dynamic libraries,
422 // then we can't redirect newCAF calls to newRetainedCAF (see below),
423 // so we make newCAF behave almost like newRetainedCAF.
424 // The dynamic libraries might be used by both the interpreted
425 // program and GHCi itself, so they must not be reverted.
426 // This also means that in GHCi with dynamic libraries, CAFs are not
427 // garbage collected. If this turns out to be a problem, we could
428 // do another hack here and do an address range test on caf to figure
429 // out whether it is from a dynamic library.
430
431 ACQUIRE_SM_LOCK; // dyn_caf_list is global, locked by sm_mutex
432 caf->static_link = (StgClosure*)dyn_caf_list;
433 dyn_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
434 RELEASE_SM_LOCK;
435 }
436 else
437 {
438 // Put this CAF on the mutable list for the old generation.
439 if (oldest_gen->no != 0) {
440 recordMutableCap((StgClosure*)caf,
441 regTableToCapability(reg), oldest_gen->no);
442 }
443
444 #ifdef DEBUG
445 // In the DEBUG rts, we keep track of live CAFs by chaining them
446 // onto a list debug_caf_list. This is so that we can tell if we
447 // ever enter a GC'd CAF, and emit a suitable barf().
448 //
449 // The saved_info field of the CAF is used as the link field for
450 // debug_caf_list, because this field is only used by newDynCAF
451 // for revertible CAFs, and we don't put those on the
452 // debug_caf_list.
453 ACQUIRE_SM_LOCK; // debug_caf_list is global, locked by sm_mutex
454 ((StgIndStatic *)caf)->saved_info = (const StgInfoTable*)debug_caf_list;
455 debug_caf_list = (StgIndStatic*)caf;
456 RELEASE_SM_LOCK;
457 #endif
458 }
459
460 return bh;
461 }
462
463 // External API for setting the keepCAFs flag. see #3900.
464 void
465 setKeepCAFs (void)
466 {
467 keepCAFs = 1;
468 }
469
470 // An alternate version of newCAF which is used for dynamically loaded
471 // object code in GHCi. In this case we want to retain *all* CAFs in
472 // the object code, because they might be demanded at any time from an
473 // expression evaluated on the command line.
474 // Also, GHCi might want to revert CAFs, so we add these to the
475 // revertible_caf_list.
476 //
477 // The linker hackily arranges that references to newCAF from dynamic
478 // code end up pointing to newRetainedCAF.
479 //
480 StgInd* newRetainedCAF (StgRegTable *reg, StgIndStatic *caf)
481 {
482 StgInd *bh;
483
484 bh = lockCAF(reg, caf);
485 if (!bh) return NULL;
486
487 ACQUIRE_SM_LOCK;
488
489 caf->static_link = (StgClosure*)revertible_caf_list;
490 revertible_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
491
492 RELEASE_SM_LOCK;
493
494 return bh;
495 }
496
497 // If we are using loadObj/unloadObj in the linker, then we want to
498 //
499 // - retain all CAFs in statically linked code (keepCAFs == rtsTrue),
500 // because we might link a new object that uses any of these CAFs.
501 //
502 // - GC CAFs in dynamically-linked code, so that we can detect when
503 // a dynamically-linked object is unloadable.
504 //
505 // So for this case, we set keepCAFs to rtsTrue, and link newCAF to newGCdCAF
506 // for dynamically-linked code.
507 //
508 StgInd* newGCdCAF (StgRegTable *reg, StgIndStatic *caf)
509 {
510 StgInd *bh;
511
512 bh = lockCAF(reg, caf);
513 if (!bh) return NULL;
514
515 // Put this CAF on the mutable list for the old generation.
516 if (oldest_gen->no != 0) {
517 recordMutableCap((StgClosure*)caf,
518 regTableToCapability(reg), oldest_gen->no);
519 }
520
521 return bh;
522 }
523
524 /* -----------------------------------------------------------------------------
525 Nursery management.
526 -------------------------------------------------------------------------- */
527
528 static bdescr *
529 allocNursery (bdescr *tail, W_ blocks)
530 {
531 bdescr *bd = NULL;
532 W_ i, n;
533
534 // We allocate the nursery as a single contiguous block and then
535 // divide it into single blocks manually. This way we guarantee
536 // that the nursery blocks are adjacent, so that the processor's
537 // automatic prefetching works across nursery blocks. This is a
538 // tiny optimisation (~0.5%), but it's free.
539
540 while (blocks > 0) {
541 n = stg_min(BLOCKS_PER_MBLOCK, blocks);
542 // allocLargeChunk will prefer large chunks, but will pick up
543 // small chunks if there are any available. We must allow
544 // single blocks here to avoid fragmentation (#7257)
545 bd = allocLargeChunk(1, n);
546 n = bd->blocks;
547 blocks -= n;
548
549 for (i = 0; i < n; i++) {
550 initBdescr(&bd[i], g0, g0);
551
552 bd[i].blocks = 1;
553 bd[i].flags = 0;
554
555 if (i > 0) {
556 bd[i].u.back = &bd[i-1];
557 } else {
558 bd[i].u.back = NULL;
559 }
560
561 if (i+1 < n) {
562 bd[i].link = &bd[i+1];
563 } else {
564 bd[i].link = tail;
565 if (tail != NULL) {
566 tail->u.back = &bd[i];
567 }
568 }
569
570 bd[i].free = bd[i].start;
571 }
572
573 tail = &bd[0];
574 }
575
576 return &bd[0];
577 }
578
579 STATIC_INLINE void
580 assignNurseryToCapability (Capability *cap, uint32_t n)
581 {
582 ASSERT(n < n_nurseries);
583 cap->r.rNursery = &nurseries[n];
584 cap->r.rCurrentNursery = nurseries[n].blocks;
585 newNurseryBlock(nurseries[n].blocks);
586 cap->r.rCurrentAlloc = NULL;
587 }
588
589 /*
590 * Give each Capability a nursery from the pool. No need to do atomic increments
591 * here, everything must be stopped to call this function.
592 */
593 static void
594 assignNurseriesToCapabilities (uint32_t from, uint32_t to)
595 {
596 uint32_t i;
597
598 for (i = from; i < to; i++) {
599 assignNurseryToCapability(capabilities[i], next_nursery++);
600 }
601 }
602
603 static void
604 allocNurseries (uint32_t from, uint32_t to)
605 {
606 uint32_t i;
607 memcount n_blocks;
608
609 if (RtsFlags.GcFlags.nurseryChunkSize) {
610 n_blocks = RtsFlags.GcFlags.nurseryChunkSize;
611 } else {
612 n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
613 }
614
615 for (i = from; i < to; i++) {
616 nurseries[i].blocks = allocNursery(NULL, n_blocks);
617 nurseries[i].n_blocks = n_blocks;
618 }
619 }
620
621 void
622 resetNurseries (void)
623 {
624 next_nursery = 0;
625 assignNurseriesToCapabilities(0, n_capabilities);
626
627 #ifdef DEBUG
628 bdescr *bd;
629 uint32_t n;
630 for (n = 0; n < n_nurseries; n++) {
631 for (bd = nurseries[n].blocks; bd; bd = bd->link) {
632 ASSERT(bd->gen_no == 0);
633 ASSERT(bd->gen == g0);
634 IF_DEBUG(sanity, memset(bd->start, 0xaa, BLOCK_SIZE));
635 }
636 }
637 #endif
638 }
639
640 W_
641 countNurseryBlocks (void)
642 {
643 uint32_t i;
644 W_ blocks = 0;
645
646 for (i = 0; i < n_nurseries; i++) {
647 blocks += nurseries[i].n_blocks;
648 }
649 return blocks;
650 }
651
652 static void
653 resizeNursery (nursery *nursery, W_ blocks)
654 {
655 bdescr *bd;
656 W_ nursery_blocks;
657
658 nursery_blocks = nursery->n_blocks;
659 if (nursery_blocks == blocks) return;
660
661 if (nursery_blocks < blocks) {
662 debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
663 blocks);
664 nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
665 }
666 else {
667 bdescr *next_bd;
668
669 debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
670 blocks);
671
672 bd = nursery->blocks;
673 while (nursery_blocks > blocks) {
674 next_bd = bd->link;
675 next_bd->u.back = NULL;
676 nursery_blocks -= bd->blocks; // might be a large block
677 freeGroup(bd);
678 bd = next_bd;
679 }
680 nursery->blocks = bd;
681 // might have gone just under, by freeing a large block, so make
682 // up the difference.
683 if (nursery_blocks < blocks) {
684 nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
685 }
686 }
687
688 nursery->n_blocks = blocks;
689 ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
690 }
691
692 //
693 // Resize each of the nurseries to the specified size.
694 //
695 static void
696 resizeNurseriesEach (W_ blocks)
697 {
698 uint32_t i;
699
700 for (i = 0; i < n_nurseries; i++) {
701 resizeNursery(&nurseries[i], blocks);
702 }
703 }
704
705 void
706 resizeNurseriesFixed (void)
707 {
708 uint32_t blocks;
709
710 if (RtsFlags.GcFlags.nurseryChunkSize) {
711 blocks = RtsFlags.GcFlags.nurseryChunkSize;
712 } else {
713 blocks = RtsFlags.GcFlags.minAllocAreaSize;
714 }
715
716 resizeNurseriesEach(blocks);
717 }
718
719 //
720 // Resize the nurseries to the total specified size.
721 //
722 void
723 resizeNurseries (W_ blocks)
724 {
725 // If there are multiple nurseries, then we just divide the number
726 // of available blocks between them.
727 resizeNurseriesEach(blocks / n_nurseries);
728 }
729
730 rtsBool
731 getNewNursery (Capability *cap)
732 {
733 StgWord i;
734
735 for(;;) {
736 i = next_nursery;
737 if (i >= n_nurseries) {
738 return rtsFalse;
739 }
740 if (cas(&next_nursery, i, i+1) == i) {
741 assignNurseryToCapability(cap, i);
742 return rtsTrue;
743 }
744 }
745 }
746 /* -----------------------------------------------------------------------------
747 move_STACK is called to update the TSO structure after it has been
748 moved from one place to another.
749 -------------------------------------------------------------------------- */
750
751 void
752 move_STACK (StgStack *src, StgStack *dest)
753 {
754 ptrdiff_t diff;
755
756 // relocate the stack pointer...
757 diff = (StgPtr)dest - (StgPtr)src; // In *words*
758 dest->sp = (StgPtr)dest->sp + diff;
759 }
760
761 /* -----------------------------------------------------------------------------
762 StgPtr allocate (Capability *cap, W_ n)
763
764 Allocates an area of memory n *words* large, from the nursery of
765 the supplied Capability, or from the global block pool if the area
766 requested is larger than LARGE_OBJECT_THRESHOLD. Memory is not
767 allocated from the current nursery block, so as not to interfere
768 with Hp/HpLim.
769
770 The address of the allocated memory is returned. allocate() never
771 fails; if it returns, the returned value is a valid address. If
772 the nursery is already full, then another block is allocated from
773 the global block pool. If we need to get memory from the OS and
774 that operation fails, then the whole process will be killed.
775 -------------------------------------------------------------------------- */
776
777 StgPtr
778 allocate (Capability *cap, W_ n)
779 {
780 bdescr *bd;
781 StgPtr p;
782
783 TICK_ALLOC_HEAP_NOCTR(WDS(n));
784 CCS_ALLOC(cap->r.rCCCS,n);
785 if (cap->r.rCurrentTSO != NULL) {
786 // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_)
787 ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
788 (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
789 - n*sizeof(W_)));
790 }
791
792 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
793 // The largest number of words such that
794 // the computation of req_blocks will not overflow.
795 W_ max_words = (HS_WORD_MAX & ~(BLOCK_SIZE-1)) / sizeof(W_);
796 W_ req_blocks;
797
798 if (n > max_words)
799 req_blocks = HS_WORD_MAX; // signal overflow below
800 else
801 req_blocks = (W_)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
802
803 // Attempting to allocate an object larger than maxHeapSize
804 // should definitely be disallowed. (bug #1791)
805 if ((RtsFlags.GcFlags.maxHeapSize > 0 &&
806 req_blocks >= RtsFlags.GcFlags.maxHeapSize) ||
807 req_blocks >= HS_INT32_MAX) // avoid overflow when
808 // calling allocGroup() below
809 {
810 heapOverflow();
811 // heapOverflow() doesn't exit (see #2592), but we aren't
812 // in a position to do a clean shutdown here: we
813 // either have to allocate the memory or exit now.
814 // Allocating the memory would be bad, because the user
815 // has requested that we not exceed maxHeapSize, so we
816 // just exit.
817 stg_exit(EXIT_HEAPOVERFLOW);
818 }
819
820 ACQUIRE_SM_LOCK
821 bd = allocGroup(req_blocks);
822 dbl_link_onto(bd, &g0->large_objects);
823 g0->n_large_blocks += bd->blocks; // might be larger than req_blocks
824 g0->n_new_large_words += n;
825 RELEASE_SM_LOCK;
826 initBdescr(bd, g0, g0);
827 bd->flags = BF_LARGE;
828 bd->free = bd->start + n;
829 cap->total_allocated += n;
830 return bd->start;
831 }
832
833 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
834
835 bd = cap->r.rCurrentAlloc;
836 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
837
838 if (bd) finishedNurseryBlock(cap,bd);
839
840 // The CurrentAlloc block is full, we need to find another
841 // one. First, we try taking the next block from the
842 // nursery:
843 bd = cap->r.rCurrentNursery->link;
844
845 if (bd == NULL) {
846 // The nursery is empty: allocate a fresh block (we can't
847 // fail here).
848 ACQUIRE_SM_LOCK;
849 bd = allocBlock();
850 cap->r.rNursery->n_blocks++;
851 RELEASE_SM_LOCK;
852 initBdescr(bd, g0, g0);
853 bd->flags = 0;
854 // If we had to allocate a new block, then we'll GC
855 // pretty quickly now, because MAYBE_GC() will
856 // notice that CurrentNursery->link is NULL.
857 } else {
858 newNurseryBlock(bd);
859 // we have a block in the nursery: take it and put
860 // it at the *front* of the nursery list, and use it
861 // to allocate() from.
862 //
863 // Previously the nursery looked like this:
864 //
865 // CurrentNursery
866 // /
867 // +-+ +-+
868 // nursery -> ... |A| -> |B| -> ...
869 // +-+ +-+
870 //
871 // After doing this, it looks like this:
872 //
873 // CurrentNursery
874 // /
875 // +-+ +-+
876 // nursery -> |B| -> ... -> |A| -> ...
877 // +-+ +-+
878 // |
879 // CurrentAlloc
880 //
881 // The point is to get the block out of the way of the
882 // advancing CurrentNursery pointer, while keeping it
883 // on the nursery list so we don't lose track of it.
884 cap->r.rCurrentNursery->link = bd->link;
885 if (bd->link != NULL) {
886 bd->link->u.back = cap->r.rCurrentNursery;
887 }
888 }
889 dbl_link_onto(bd, &cap->r.rNursery->blocks);
890 cap->r.rCurrentAlloc = bd;
891 IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
892 }
893 p = bd->free;
894 bd->free += n;
895
896 IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa));
897 return p;
898 }
899
900 /* ---------------------------------------------------------------------------
901 Allocate a fixed/pinned object.
902
903 We allocate small pinned objects into a single block, allocating a
904 new block when the current one overflows. The block is chained
905 onto the large_object_list of generation 0.
906
907 NOTE: The GC can't in general handle pinned objects. This
908 interface is only safe to use for ByteArrays, which have no
909 pointers and don't require scavenging. It works because the
910 block's descriptor has the BF_LARGE flag set, so the block is
911 treated as a large object and chained onto various lists, rather
912 than the individual objects being copied. However, when it comes
913 to scavenge the block, the GC will only scavenge the first object.
914 The reason is that the GC can't linearly scan a block of pinned
915 objects at the moment (doing so would require using the
916 mostly-copying techniques). But since we're restricting ourselves
917 to pinned ByteArrays, not scavenging is ok.
918
919 This function is called by newPinnedByteArray# which immediately
920 fills the allocated memory with a MutableByteArray#.
921 ------------------------------------------------------------------------- */
922
923 StgPtr
924 allocatePinned (Capability *cap, W_ n)
925 {
926 StgPtr p;
927 bdescr *bd;
928
929 // If the request is for a large object, then allocate()
930 // will give us a pinned object anyway.
931 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
932 p = allocate(cap, n);
933 Bdescr(p)->flags |= BF_PINNED;
934 return p;
935 }
936
937 TICK_ALLOC_HEAP_NOCTR(WDS(n));
938 CCS_ALLOC(cap->r.rCCCS,n);
939 if (cap->r.rCurrentTSO != NULL) {
940 // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
941 ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
942 (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
943 - n*sizeof(W_)));
944 }
945
946 bd = cap->pinned_object_block;
947
948 // If we don't have a block of pinned objects yet, or the current
949 // one isn't large enough to hold the new object, get a new one.
950 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
951
952 // stash the old block on cap->pinned_object_blocks. On the
953 // next GC cycle these objects will be moved to
954 // g0->large_objects.
955 if (bd != NULL) {
956 // add it to the allocation stats when the block is full
957 finishedNurseryBlock(cap, bd);
958 dbl_link_onto(bd, &cap->pinned_object_blocks);
959 }
960
961 // We need to find another block. We could just allocate one,
962 // but that means taking a global lock and we really want to
963 // avoid that (benchmarks that allocate a lot of pinned
964 // objects scale really badly if we do this).
965 //
966 // So first, we try taking the next block from the nursery, in
967 // the same way as allocate(), but note that we can only take
968 // an *empty* block, because we're about to mark it as
969 // BF_PINNED | BF_LARGE.
970 bd = cap->r.rCurrentNursery->link;
971 if (bd == NULL) { // must be empty!
972 // The nursery is empty, or the next block is non-empty:
973 // allocate a fresh block (we can't fail here).
974
975 // XXX in the case when the next nursery block is
976 // non-empty we aren't exerting any pressure to GC soon,
977 // so if this case ever happens then we could in theory
978 // keep allocating for ever without calling the GC. We
979 // can't bump g0->n_new_large_words because that will be
980 // counted towards allocation, and we're already counting
981 // our pinned obects as allocation in
982 // collect_pinned_object_blocks in the GC.
983 ACQUIRE_SM_LOCK;
984 bd = allocBlock();
985 RELEASE_SM_LOCK;
986 initBdescr(bd, g0, g0);
987 } else {
988 newNurseryBlock(bd);
989 // we have a block in the nursery: steal it
990 cap->r.rCurrentNursery->link = bd->link;
991 if (bd->link != NULL) {
992 bd->link->u.back = cap->r.rCurrentNursery;
993 }
994 cap->r.rNursery->n_blocks -= bd->blocks;
995 }
996
997 cap->pinned_object_block = bd;
998 bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED;
999
1000 // The pinned_object_block remains attached to the capability
1001 // until it is full, even if a GC occurs. We want this
1002 // behaviour because otherwise the unallocated portion of the
1003 // block would be forever slop, and under certain workloads
1004 // (allocating a few ByteStrings per GC) we accumulate a lot
1005 // of slop.
1006 //
1007 // So, the pinned_object_block is initially marked
1008 // BF_EVACUATED so the GC won't touch it. When it is full,
1009 // we place it on the large_objects list, and at the start of
1010 // the next GC the BF_EVACUATED flag will be cleared, and the
1011 // block will be promoted as usual (if anything in it is
1012 // live).
1013 }
1014
1015 p = bd->free;
1016 bd->free += n;
1017 return p;
1018 }
1019
1020 /* -----------------------------------------------------------------------------
1021 Write Barriers
1022 -------------------------------------------------------------------------- */
1023
1024 /*
1025 This is the write barrier for MUT_VARs, a.k.a. IORefs. A
1026 MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
1027 is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
1028 and is put on the mutable list.
1029 */
1030 void
1031 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
1032 {
1033 Capability *cap = regTableToCapability(reg);
1034 if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
1035 p->header.info = &stg_MUT_VAR_DIRTY_info;
1036 recordClosureMutated(cap,p);
1037 }
1038 }
1039
1040 void
1041 dirty_TVAR(Capability *cap, StgTVar *p)
1042 {
1043 if (p->header.info == &stg_TVAR_CLEAN_info) {
1044 p->header.info = &stg_TVAR_DIRTY_info;
1045 recordClosureMutated(cap,(StgClosure*)p);
1046 }
1047 }
1048
1049 // Setting a TSO's link field with a write barrier.
1050 // It is *not* necessary to call this function when
1051 // * setting the link field to END_TSO_QUEUE
1052 // * setting the link field of the currently running TSO, as it
1053 // will already be dirty.
1054 void
1055 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
1056 {
1057 if (tso->dirty == 0) {
1058 tso->dirty = 1;
1059 recordClosureMutated(cap,(StgClosure*)tso);
1060 }
1061 tso->_link = target;
1062 }
1063
1064 void
1065 setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)
1066 {
1067 if (tso->dirty == 0) {
1068 tso->dirty = 1;
1069 recordClosureMutated(cap,(StgClosure*)tso);
1070 }
1071 tso->block_info.prev = target;
1072 }
1073
1074 void
1075 dirty_TSO (Capability *cap, StgTSO *tso)
1076 {
1077 if (tso->dirty == 0) {
1078 tso->dirty = 1;
1079 recordClosureMutated(cap,(StgClosure*)tso);
1080 }
1081 }
1082
1083 void
1084 dirty_STACK (Capability *cap, StgStack *stack)
1085 {
1086 if (stack->dirty == 0) {
1087 stack->dirty = 1;
1088 recordClosureMutated(cap,(StgClosure*)stack);
1089 }
1090 }
1091
1092 /*
1093 This is the write barrier for MVARs. An MVAR_CLEAN objects is not
1094 on the mutable list; a MVAR_DIRTY is. When written to, a
1095 MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
1096 The check for MVAR_CLEAN is inlined at the call site for speed,
1097 this really does make a difference on concurrency-heavy benchmarks
1098 such as Chaneneos and cheap-concurrency.
1099 */
1100 void
1101 dirty_MVAR(StgRegTable *reg, StgClosure *p)
1102 {
1103 recordClosureMutated(regTableToCapability(reg),p);
1104 }
1105
1106 /* -----------------------------------------------------------------------------
1107 * Stats and stuff
1108 * -------------------------------------------------------------------------- */
1109
1110 /* -----------------------------------------------------------------------------
1111 * [Note allocation accounting]
1112 *
1113 * - When cap->r.rCurrentNusery moves to a new block in the nursery,
1114 * we add the size of the used portion of the previous block to
1115 * cap->total_allocated. (see finishedNurseryBlock())
1116 *
1117 * - When we start a GC, the allocated portion of CurrentNursery and
1118 * CurrentAlloc are added to cap->total_allocated. (see
1119 * updateNurseriesStats())
1120 *
1121 * -------------------------------------------------------------------------- */
1122
1123 //
1124 // Calculate the total allocated memory since the start of the
1125 // program. Also emits events reporting the per-cap allocation
1126 // totals.
1127 //
1128 StgWord
1129 calcTotalAllocated (void)
1130 {
1131 W_ tot_alloc = 0;
1132 W_ n;
1133
1134 for (n = 0; n < n_capabilities; n++) {
1135 tot_alloc += capabilities[n]->total_allocated;
1136
1137 traceEventHeapAllocated(capabilities[n],
1138 CAPSET_HEAP_DEFAULT,
1139 capabilities[n]->total_allocated * sizeof(W_));
1140 }
1141
1142 return tot_alloc;
1143 }
1144
1145 //
1146 // Update the per-cap total_allocated numbers with an approximation of
1147 // the amount of memory used in each cap's nursery.
1148 //
1149 void
1150 updateNurseriesStats (void)
1151 {
1152 uint32_t i;
1153 bdescr *bd;
1154
1155 for (i = 0; i < n_capabilities; i++) {
1156 // The current nursery block and the current allocate block have not
1157 // yet been accounted for in cap->total_allocated, so we add them here.
1158 bd = capabilities[i]->r.rCurrentNursery;
1159 if (bd) finishedNurseryBlock(capabilities[i], bd);
1160 bd = capabilities[i]->r.rCurrentAlloc;
1161 if (bd) finishedNurseryBlock(capabilities[i], bd);
1162 }
1163 }
1164
1165 W_ countOccupied (bdescr *bd)
1166 {
1167 W_ words;
1168
1169 words = 0;
1170 for (; bd != NULL; bd = bd->link) {
1171 ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
1172 words += bd->free - bd->start;
1173 }
1174 return words;
1175 }
1176
1177 W_ genLiveWords (generation *gen)
1178 {
1179 return gen->n_words + gen->n_large_words;
1180 }
1181
1182 W_ genLiveBlocks (generation *gen)
1183 {
1184 return gen->n_blocks + gen->n_large_blocks;
1185 }
1186
1187 W_ gcThreadLiveWords (uint32_t i, uint32_t g)
1188 {
1189 W_ a, b, c;
1190
1191 a = countOccupied(gc_threads[i]->gens[g].todo_bd);
1192 b = gc_threads[i]->gens[g].n_part_words;
1193 c = gc_threads[i]->gens[g].n_scavd_words;
1194
1195 // debugBelch("cap %d, g%d, %ld %ld %ld\n", i, g, a, b, c);
1196 return a + b + c;
1197 }
1198
1199 W_ gcThreadLiveBlocks (uint32_t i, uint32_t g)
1200 {
1201 W_ blocks;
1202
1203 blocks = countBlocks(gc_threads[i]->gens[g].todo_bd);
1204 blocks += gc_threads[i]->gens[g].n_part_blocks;
1205 blocks += gc_threads[i]->gens[g].n_scavd_blocks;
1206
1207 return blocks;
1208 }
1209
1210 /* Determine which generation will be collected next, and approximate
1211 * the maximum amount of memory that will be required to do the GC,
1212 * taking into account data that will be copied, and the space needed
1213 * to store bitmaps and the mark stack. Note: blocks_needed does not
1214 * include the blocks in the nursery.
1215 *
1216 * Assume: all data currently live will remain live. Generationss
1217 * that will be collected next time will therefore need twice as many
1218 * blocks since all the data will be copied.
1219 */
1220 extern W_
1221 calcNeeded (rtsBool force_major, memcount *blocks_needed)
1222 {
1223 W_ needed = 0, blocks;
1224 uint32_t g, N;
1225 generation *gen;
1226
1227 if (force_major) {
1228 N = RtsFlags.GcFlags.generations - 1;
1229 } else {
1230 N = 0;
1231 }
1232
1233 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1234 gen = &generations[g];
1235
1236 blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?)
1237 + gen->n_large_blocks;
1238
1239 // we need at least this much space
1240 needed += blocks;
1241
1242 // are we collecting this gen?
1243 if (g == 0 || // always collect gen 0
1244 blocks > gen->max_blocks)
1245 {
1246 N = stg_max(N,g);
1247
1248 // we will collect this gen next time
1249 if (gen->mark) {
1250 // bitmap:
1251 needed += gen->n_blocks / BITS_IN(W_);
1252 // mark stack:
1253 needed += gen->n_blocks / 100;
1254 }
1255 if (gen->compact) {
1256 continue; // no additional space needed for compaction
1257 } else {
1258 needed += gen->n_blocks;
1259 }
1260 }
1261 }
1262
1263 if (blocks_needed != NULL) {
1264 *blocks_needed = needed;
1265 }
1266 return N;
1267 }
1268
1269 /* ----------------------------------------------------------------------------
1270 Executable memory
1271
1272 Executable memory must be managed separately from non-executable
1273 memory. Most OSs these days require you to jump through hoops to
1274 dynamically allocate executable memory, due to various security
1275 measures.
1276
1277 Here we provide a small memory allocator for executable memory.
1278 Memory is managed with a page granularity; we allocate linearly
1279 in the page, and when the page is emptied (all objects on the page
1280 are free) we free the page again, not forgetting to make it
1281 non-executable.
1282
1283 TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1284 the linker cannot use allocateExec for loading object code files
1285 on Windows. Once allocateExec can handle larger objects, the linker
1286 should be modified to use allocateExec instead of VirtualAlloc.
1287 ------------------------------------------------------------------------- */
1288
1289 #if (defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) && defined(ios_HOST_OS)
1290 void sys_icache_invalidate(void *start, size_t len);
1291 #endif
1292
1293 /* On ARM and other platforms, we need to flush the cache after
1294 writing code into memory, so the processor reliably sees it. */
1295 void flushExec (W_ len, AdjustorExecutable exec_addr)
1296 {
1297 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
1298 /* x86 doesn't need to do anything, so just suppress some warnings. */
1299 (void)len;
1300 (void)exec_addr;
1301 #elif (defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) && defined(ios_HOST_OS)
1302 /* On iOS we need to use the special 'sys_icache_invalidate' call. */
1303 sys_icache_invalidate(exec_addr, ((unsigned char*)exec_addr)+len);
1304 #elif defined(__GNUC__)
1305 /* For all other platforms, fall back to a libgcc builtin. */
1306 unsigned char* begin = (unsigned char*)exec_addr;
1307 unsigned char* end = begin + len;
1308 __clear_cache((void*)begin, (void*)end);
1309 #else
1310 #error Missing support to flush the instruction cache
1311 #endif
1312 }
1313
1314 #if defined(linux_HOST_OS)
1315
1316 // On Linux we need to use libffi for allocating executable memory,
1317 // because it knows how to work around the restrictions put in place
1318 // by SELinux.
1319
1320 AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
1321 {
1322 void **ret, **exec;
1323 ACQUIRE_SM_LOCK;
1324 ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
1325 RELEASE_SM_LOCK;
1326 if (ret == NULL) return ret;
1327 *ret = ret; // save the address of the writable mapping, for freeExec().
1328 *exec_ret = exec + 1;
1329 return (ret + 1);
1330 }
1331
1332 // freeExec gets passed the executable address, not the writable address.
1333 void freeExec (AdjustorExecutable addr)
1334 {
1335 AdjustorWritable writable;
1336 writable = *((void**)addr - 1);
1337 ACQUIRE_SM_LOCK;
1338 ffi_closure_free (writable);
1339 RELEASE_SM_LOCK
1340 }
1341
1342 #elif defined(ios_HOST_OS)
1343
1344 static HashTable* allocatedExecs;
1345
1346 AdjustorWritable allocateExec(W_ bytes, AdjustorExecutable *exec_ret)
1347 {
1348 AdjustorWritable writ;
1349 ffi_closure* cl;
1350 if (bytes != sizeof(ffi_closure)) {
1351 barf("allocateExec: for ffi_closure only");
1352 }
1353 ACQUIRE_SM_LOCK;
1354 cl = writ = ffi_closure_alloc((size_t)bytes, exec_ret);
1355 if (cl != NULL) {
1356 if (allocatedExecs == NULL) {
1357 allocatedExecs = allocHashTable();
1358 }
1359 insertHashTable(allocatedExecs, (StgWord)*exec_ret, writ);
1360 }
1361 RELEASE_SM_LOCK;
1362 return writ;
1363 }
1364
1365 AdjustorWritable execToWritable(AdjustorExecutable exec)
1366 {
1367 AdjustorWritable writ;
1368 ACQUIRE_SM_LOCK;
1369 if (allocatedExecs == NULL ||
1370 (writ = lookupHashTable(allocatedExecs, (StgWord)exec)) == NULL) {
1371 RELEASE_SM_LOCK;
1372 barf("execToWritable: not found");
1373 }
1374 RELEASE_SM_LOCK;
1375 return writ;
1376 }
1377
1378 void freeExec(AdjustorExecutable exec)
1379 {
1380 AdjustorWritable writ;
1381 ffi_closure* cl;
1382 cl = writ = execToWritable(exec);
1383 ACQUIRE_SM_LOCK;
1384 removeHashTable(allocatedExecs, (StgWord)exec, writ);
1385 ffi_closure_free(cl);
1386 RELEASE_SM_LOCK
1387 }
1388
1389 #else
1390
1391 AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
1392 {
1393 void *ret;
1394 W_ n;
1395
1396 ACQUIRE_SM_LOCK;
1397
1398 // round up to words.
1399 n = (bytes + sizeof(W_) + 1) / sizeof(W_);
1400
1401 if (n+1 > BLOCK_SIZE_W) {
1402 barf("allocateExec: can't handle large objects");
1403 }
1404
1405 if (exec_block == NULL ||
1406 exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1407 bdescr *bd;
1408 W_ pagesize = getPageSize();
1409 bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1410 debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1411 bd->gen_no = 0;
1412 bd->flags = BF_EXEC;
1413 bd->link = exec_block;
1414 if (exec_block != NULL) {
1415 exec_block->u.back = bd;
1416 }
1417 bd->u.back = NULL;
1418 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1419 exec_block = bd;
1420 }
1421 *(exec_block->free) = n; // store the size of this chunk
1422 exec_block->gen_no += n; // gen_no stores the number of words allocated
1423 ret = exec_block->free + 1;
1424 exec_block->free += n + 1;
1425
1426 RELEASE_SM_LOCK
1427 *exec_ret = ret;
1428 return ret;
1429 }
1430
1431 void freeExec (void *addr)
1432 {
1433 StgPtr p = (StgPtr)addr - 1;
1434 bdescr *bd = Bdescr((StgPtr)p);
1435
1436 if ((bd->flags & BF_EXEC) == 0) {
1437 barf("freeExec: not executable");
1438 }
1439
1440 if (*(StgPtr)p == 0) {
1441 barf("freeExec: already free?");
1442 }
1443
1444 ACQUIRE_SM_LOCK;
1445
1446 bd->gen_no -= *(StgPtr)p;
1447 *(StgPtr)p = 0;
1448
1449 if (bd->gen_no == 0) {
1450 // Free the block if it is empty, but not if it is the block at
1451 // the head of the queue.
1452 if (bd != exec_block) {
1453 debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1454 dbl_link_remove(bd, &exec_block);
1455 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1456 freeGroup(bd);
1457 } else {
1458 bd->free = bd->start;
1459 }
1460 }
1461
1462 RELEASE_SM_LOCK
1463 }
1464
1465 #endif /* switch(HOST_OS) */
1466
1467 #ifdef DEBUG
1468
1469 // handy function for use in gdb, because Bdescr() is inlined.
1470 extern bdescr *_bdescr (StgPtr p);
1471
1472 bdescr *
1473 _bdescr (StgPtr p)
1474 {
1475 return Bdescr(p);
1476 }
1477
1478 #endif