NUMA support
[ghc.git] / rts / sm / Storage.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2012
4 *
5 * Storage manager front end
6 *
7 * Documentation on the architecture of the Storage Manager can be
8 * found in the online commentary:
9 *
10 * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16
17 #include "Storage.h"
18 #include "GCThread.h"
19 #include "RtsUtils.h"
20 #include "Stats.h"
21 #include "BlockAlloc.h"
22 #include "Weak.h"
23 #include "Sanity.h"
24 #include "Arena.h"
25 #include "Capability.h"
26 #include "Schedule.h"
27 #include "RetainerProfile.h" // for counting memory blocks (memInventory)
28 #include "OSMem.h"
29 #include "Trace.h"
30 #include "GC.h"
31 #include "Evac.h"
32 #if defined(ios_HOST_OS)
33 #include "Hash.h"
34 #endif
35
36 #include <string.h>
37
38 #include "ffi.h"
39
40 /*
41 * All these globals require sm_mutex to access in THREADED_RTS mode.
42 */
43 StgIndStatic *dyn_caf_list = NULL;
44 StgIndStatic *debug_caf_list = NULL;
45 StgIndStatic *revertible_caf_list = NULL;
46 rtsBool keepCAFs;
47
48 W_ large_alloc_lim; /* GC if n_large_blocks in any nursery
49 * reaches this. */
50
51 bdescr *exec_block;
52
53 generation *generations = NULL; /* all the generations */
54 generation *g0 = NULL; /* generation 0, for convenience */
55 generation *oldest_gen = NULL; /* oldest generation, for convenience */
56
57 /*
58 * Array of nurseries, size == n_capabilities
59 *
60 * nursery[i] belongs to NUMA node (i % RtsFlags.GcFlags.nNumaNodes)
61 * This is chosen to be the same convention as capabilities[i], so
62 * that when not using nursery chunks (+RTS -n), we just map
63 * capabilities to nurseries 1:1.
64 */
65 nursery *nurseries = NULL;
66 uint32_t n_nurseries;
67
68 /*
69 * When we are using nursery chunks, we need a separate next_nursery
70 * pointer for each NUMA node.
71 */
72 volatile StgWord next_nursery[MAX_NUMA_NODES];
73
74 #ifdef THREADED_RTS
75 /*
76 * Storage manager mutex: protects all the above state from
77 * simultaneous access by two STG threads.
78 */
79 Mutex sm_mutex;
80 #endif
81
82 static void allocNurseries (uint32_t from, uint32_t to);
83 static void assignNurseriesToCapabilities (uint32_t from, uint32_t to);
84
85 static void
86 initGeneration (generation *gen, int g)
87 {
88 gen->no = g;
89 gen->collections = 0;
90 gen->par_collections = 0;
91 gen->failed_promotions = 0;
92 gen->max_blocks = 0;
93 gen->blocks = NULL;
94 gen->n_blocks = 0;
95 gen->n_words = 0;
96 gen->live_estimate = 0;
97 gen->old_blocks = NULL;
98 gen->n_old_blocks = 0;
99 gen->large_objects = NULL;
100 gen->n_large_blocks = 0;
101 gen->n_large_words = 0;
102 gen->n_new_large_words = 0;
103 gen->scavenged_large_objects = NULL;
104 gen->n_scavenged_large_blocks = 0;
105 gen->mark = 0;
106 gen->compact = 0;
107 gen->bitmap = NULL;
108 #ifdef THREADED_RTS
109 initSpinLock(&gen->sync);
110 #endif
111 gen->threads = END_TSO_QUEUE;
112 gen->old_threads = END_TSO_QUEUE;
113 gen->weak_ptr_list = NULL;
114 gen->old_weak_ptr_list = NULL;
115 }
116
117 void
118 initStorage (void)
119 {
120 uint32_t g, n;
121
122 if (generations != NULL) {
123 // multi-init protection
124 return;
125 }
126
127 initMBlocks();
128
129 /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
130 * doing something reasonable.
131 */
132 /* We use the NOT_NULL variant or gcc warns that the test is always true */
133 ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info));
134 ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
135 ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
136
137 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
138 RtsFlags.GcFlags.heapSizeSuggestion >
139 RtsFlags.GcFlags.maxHeapSize) {
140 RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
141 }
142
143 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
144 RtsFlags.GcFlags.minAllocAreaSize >
145 RtsFlags.GcFlags.maxHeapSize) {
146 errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
147 RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
148 }
149
150 initBlockAllocator();
151
152 #if defined(THREADED_RTS)
153 initMutex(&sm_mutex);
154 #endif
155
156 ACQUIRE_SM_LOCK;
157
158 /* allocate generation info array */
159 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
160 * sizeof(struct generation_),
161 "initStorage: gens");
162
163 /* Initialise all generations */
164 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
165 initGeneration(&generations[g], g);
166 }
167
168 /* A couple of convenience pointers */
169 g0 = &generations[0];
170 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
171
172 /* Set up the destination pointers in each younger gen. step */
173 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
174 generations[g].to = &generations[g+1];
175 }
176 oldest_gen->to = oldest_gen;
177
178 /* The oldest generation has one step. */
179 if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
180 if (RtsFlags.GcFlags.generations == 1) {
181 errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
182 } else {
183 oldest_gen->mark = 1;
184 if (RtsFlags.GcFlags.compact)
185 oldest_gen->compact = 1;
186 }
187 }
188
189 generations[0].max_blocks = 0;
190
191 dyn_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
192 debug_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
193 revertible_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
194
195 if (RtsFlags.GcFlags.largeAllocLim > 0) {
196 large_alloc_lim = RtsFlags.GcFlags.largeAllocLim * BLOCK_SIZE_W;
197 } else {
198 large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W;
199 }
200
201 exec_block = NULL;
202
203 #ifdef THREADED_RTS
204 initSpinLock(&gc_alloc_block_sync);
205 #ifdef PROF_SPIN
206 whitehole_spin = 0;
207 #endif
208 #endif
209
210 N = 0;
211
212 for (n = 0; n < RtsFlags.GcFlags.nNumaNodes; n++) {
213 next_nursery[n] = n;
214 }
215 storageAddCapabilities(0, n_capabilities);
216
217 IF_DEBUG(gc, statDescribeGens());
218
219 RELEASE_SM_LOCK;
220
221 traceEventHeapInfo(CAPSET_HEAP_DEFAULT,
222 RtsFlags.GcFlags.generations,
223 RtsFlags.GcFlags.maxHeapSize * BLOCK_SIZE,
224 RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE,
225 MBLOCK_SIZE,
226 BLOCK_SIZE);
227 }
228
229 void storageAddCapabilities (uint32_t from, uint32_t to)
230 {
231 uint32_t n, g, i, new_n_nurseries;
232
233 if (RtsFlags.GcFlags.nurseryChunkSize == 0) {
234 new_n_nurseries = to;
235 } else {
236 memcount total_alloc = to * RtsFlags.GcFlags.minAllocAreaSize;
237 new_n_nurseries =
238 stg_max(to, total_alloc / RtsFlags.GcFlags.nurseryChunkSize);
239 }
240
241 if (from > 0) {
242 nurseries = stgReallocBytes(nurseries,
243 new_n_nurseries * sizeof(struct nursery_),
244 "storageAddCapabilities");
245 } else {
246 nurseries = stgMallocBytes(new_n_nurseries * sizeof(struct nursery_),
247 "storageAddCapabilities");
248 }
249
250 // we've moved the nurseries, so we have to update the rNursery
251 // pointers from the Capabilities.
252 for (i = 0; i < to; i++) {
253 capabilities[i]->r.rNursery = &nurseries[i];
254 }
255
256 /* The allocation area. Policy: keep the allocation area
257 * small to begin with, even if we have a large suggested heap
258 * size. Reason: we're going to do a major collection first, and we
259 * don't want it to be a big one. This vague idea is borne out by
260 * rigorous experimental evidence.
261 */
262 allocNurseries(n_nurseries, new_n_nurseries);
263 n_nurseries = new_n_nurseries;
264
265 /*
266 * Assign each of the new capabilities a nursery. Remember to start from
267 * next_nursery, because we may have already consumed some of the earlier
268 * nurseries.
269 */
270 assignNurseriesToCapabilities(from,to);
271
272 // allocate a block for each mut list
273 for (n = from; n < to; n++) {
274 for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
275 capabilities[n]->mut_lists[g] =
276 allocBlockOnNode(capNoToNumaNode(n));
277 }
278 }
279
280 #if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR) && (CC_SUPPORTS_TLS == 0)
281 newThreadLocalKey(&gctKey);
282 #endif
283
284 initGcThreads(from, to);
285 }
286
287
288 void
289 exitStorage (void)
290 {
291 updateNurseriesStats();
292 stat_exit();
293 }
294
295 void
296 freeStorage (rtsBool free_heap)
297 {
298 stgFree(generations);
299 if (free_heap) freeAllMBlocks();
300 #if defined(THREADED_RTS)
301 closeMutex(&sm_mutex);
302 #endif
303 stgFree(nurseries);
304 #if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR) && (CC_SUPPORTS_TLS == 0)
305 freeThreadLocalKey(&gctKey);
306 #endif
307 freeGcThreads();
308 }
309
310 /* -----------------------------------------------------------------------------
311 Note [CAF management].
312
313 The entry code for every CAF does the following:
314
315 - calls newCaf, which builds a CAF_BLACKHOLE on the heap and atomically
316 updates the CAF with IND_STATIC pointing to the CAF_BLACKHOLE
317
318 - if newCaf returns zero, it re-enters the CAF (see Note [atomic
319 CAF entry])
320
321 - pushes an update frame pointing to the CAF_BLACKHOLE
322
323 Why do we build an BLACKHOLE in the heap rather than just updating
324 the thunk directly? It's so that we only need one kind of update
325 frame - otherwise we'd need a static version of the update frame
326 too, and various other parts of the RTS that deal with update
327 frames would also need special cases for static update frames.
328
329 newCaf() does the following:
330
331 - atomically locks the CAF (see [atomic CAF entry])
332
333 - it builds a CAF_BLACKHOLE on the heap
334
335 - it updates the CAF with an IND_STATIC pointing to the
336 CAF_BLACKHOLE, atomically.
337
338 - it puts the CAF on the oldest generation's mutable list.
339 This is so that we treat the CAF as a root when collecting
340 younger generations.
341
342 - links the CAF onto the CAF list (see below)
343
344 ------------------
345 Note [atomic CAF entry]
346
347 With THREADED_RTS, newCaf() is required to be atomic (see
348 #5558). This is because if two threads happened to enter the same
349 CAF simultaneously, they would create two distinct CAF_BLACKHOLEs,
350 and so the normal threadPaused() machinery for detecting duplicate
351 evaluation will not detect this. Hence in lockCAF() below, we
352 atomically lock the CAF with WHITEHOLE before updating it with
353 IND_STATIC, and return zero if another thread locked the CAF first.
354 In the event that we lost the race, CAF entry code will re-enter
355 the CAF and block on the other thread's CAF_BLACKHOLE.
356
357 ------------------
358 Note [GHCi CAFs]
359
360 For GHCI, we have additional requirements when dealing with CAFs:
361
362 - we must *retain* all dynamically-loaded CAFs ever entered,
363 just in case we need them again.
364 - we must be able to *revert* CAFs that have been evaluated, to
365 their pre-evaluated form.
366
367 To do this, we use an additional CAF list. When newCaf() is
368 called on a dynamically-loaded CAF, we add it to the CAF list
369 instead of the old-generation mutable list, and save away its
370 old info pointer (in caf->saved_info) for later reversion.
371
372 To revert all the CAFs, we traverse the CAF list and reset the
373 info pointer to caf->saved_info, then throw away the CAF list.
374 (see GC.c:revertCAFs()).
375
376 -- SDM 29/1/01
377
378 -------------------------------------------------------------------------- */
379
380 STATIC_INLINE StgInd *
381 lockCAF (StgRegTable *reg, StgIndStatic *caf)
382 {
383 const StgInfoTable *orig_info;
384 Capability *cap = regTableToCapability(reg);
385 StgInd *bh;
386
387 orig_info = caf->header.info;
388
389 #ifdef THREADED_RTS
390 const StgInfoTable *cur_info;
391
392 if (orig_info == &stg_IND_STATIC_info ||
393 orig_info == &stg_WHITEHOLE_info) {
394 // already claimed by another thread; re-enter the CAF
395 return NULL;
396 }
397
398 cur_info = (const StgInfoTable *)
399 cas((StgVolatilePtr)&caf->header.info,
400 (StgWord)orig_info,
401 (StgWord)&stg_WHITEHOLE_info);
402
403 if (cur_info != orig_info) {
404 // already claimed by another thread; re-enter the CAF
405 return NULL;
406 }
407
408 // successfully claimed by us; overwrite with IND_STATIC
409 #endif
410
411 // For the benefit of revertCAFs(), save the original info pointer
412 caf->saved_info = orig_info;
413
414 // Allocate the blackhole indirection closure
415 bh = (StgInd *)allocate(cap, sizeofW(*bh));
416 SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs);
417 bh->indirectee = (StgClosure *)cap->r.rCurrentTSO;
418
419 caf->indirectee = (StgClosure *)bh;
420 write_barrier();
421 SET_INFO((StgClosure*)caf,&stg_IND_STATIC_info);
422
423 return bh;
424 }
425
426 StgInd *
427 newCAF(StgRegTable *reg, StgIndStatic *caf)
428 {
429 StgInd *bh;
430
431 bh = lockCAF(reg, caf);
432 if (!bh) return NULL;
433
434 if(keepCAFs)
435 {
436 // Note [dyn_caf_list]
437 // If we are in GHCi _and_ we are using dynamic libraries,
438 // then we can't redirect newCAF calls to newRetainedCAF (see below),
439 // so we make newCAF behave almost like newRetainedCAF.
440 // The dynamic libraries might be used by both the interpreted
441 // program and GHCi itself, so they must not be reverted.
442 // This also means that in GHCi with dynamic libraries, CAFs are not
443 // garbage collected. If this turns out to be a problem, we could
444 // do another hack here and do an address range test on caf to figure
445 // out whether it is from a dynamic library.
446
447 ACQUIRE_SM_LOCK; // dyn_caf_list is global, locked by sm_mutex
448 caf->static_link = (StgClosure*)dyn_caf_list;
449 dyn_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
450 RELEASE_SM_LOCK;
451 }
452 else
453 {
454 // Put this CAF on the mutable list for the old generation.
455 if (oldest_gen->no != 0) {
456 recordMutableCap((StgClosure*)caf,
457 regTableToCapability(reg), oldest_gen->no);
458 }
459
460 #ifdef DEBUG
461 // In the DEBUG rts, we keep track of live CAFs by chaining them
462 // onto a list debug_caf_list. This is so that we can tell if we
463 // ever enter a GC'd CAF, and emit a suitable barf().
464 //
465 // The saved_info field of the CAF is used as the link field for
466 // debug_caf_list, because this field is only used by newDynCAF
467 // for revertible CAFs, and we don't put those on the
468 // debug_caf_list.
469 ACQUIRE_SM_LOCK; // debug_caf_list is global, locked by sm_mutex
470 ((StgIndStatic *)caf)->saved_info = (const StgInfoTable*)debug_caf_list;
471 debug_caf_list = (StgIndStatic*)caf;
472 RELEASE_SM_LOCK;
473 #endif
474 }
475
476 return bh;
477 }
478
479 // External API for setting the keepCAFs flag. see #3900.
480 void
481 setKeepCAFs (void)
482 {
483 keepCAFs = 1;
484 }
485
486 // An alternate version of newCAF which is used for dynamically loaded
487 // object code in GHCi. In this case we want to retain *all* CAFs in
488 // the object code, because they might be demanded at any time from an
489 // expression evaluated on the command line.
490 // Also, GHCi might want to revert CAFs, so we add these to the
491 // revertible_caf_list.
492 //
493 // The linker hackily arranges that references to newCAF from dynamic
494 // code end up pointing to newRetainedCAF.
495 //
496 StgInd* newRetainedCAF (StgRegTable *reg, StgIndStatic *caf)
497 {
498 StgInd *bh;
499
500 bh = lockCAF(reg, caf);
501 if (!bh) return NULL;
502
503 ACQUIRE_SM_LOCK;
504
505 caf->static_link = (StgClosure*)revertible_caf_list;
506 revertible_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
507
508 RELEASE_SM_LOCK;
509
510 return bh;
511 }
512
513 // If we are using loadObj/unloadObj in the linker, then we want to
514 //
515 // - retain all CAFs in statically linked code (keepCAFs == rtsTrue),
516 // because we might link a new object that uses any of these CAFs.
517 //
518 // - GC CAFs in dynamically-linked code, so that we can detect when
519 // a dynamically-linked object is unloadable.
520 //
521 // So for this case, we set keepCAFs to rtsTrue, and link newCAF to newGCdCAF
522 // for dynamically-linked code.
523 //
524 StgInd* newGCdCAF (StgRegTable *reg, StgIndStatic *caf)
525 {
526 StgInd *bh;
527
528 bh = lockCAF(reg, caf);
529 if (!bh) return NULL;
530
531 // Put this CAF on the mutable list for the old generation.
532 if (oldest_gen->no != 0) {
533 recordMutableCap((StgClosure*)caf,
534 regTableToCapability(reg), oldest_gen->no);
535 }
536
537 return bh;
538 }
539
540 /* -----------------------------------------------------------------------------
541 Nursery management.
542 -------------------------------------------------------------------------- */
543
544 static bdescr *
545 allocNursery (uint32_t node, bdescr *tail, W_ blocks)
546 {
547 bdescr *bd = NULL;
548 W_ i, n;
549
550 // We allocate the nursery as a single contiguous block and then
551 // divide it into single blocks manually. This way we guarantee
552 // that the nursery blocks are adjacent, so that the processor's
553 // automatic prefetching works across nursery blocks. This is a
554 // tiny optimisation (~0.5%), but it's free.
555
556 while (blocks > 0) {
557 n = stg_min(BLOCKS_PER_MBLOCK, blocks);
558 // allocLargeChunk will prefer large chunks, but will pick up
559 // small chunks if there are any available. We must allow
560 // single blocks here to avoid fragmentation (#7257)
561 bd = allocLargeChunkOnNode(node, 1, n);
562 n = bd->blocks;
563 blocks -= n;
564
565 for (i = 0; i < n; i++) {
566 initBdescr(&bd[i], g0, g0);
567
568 bd[i].blocks = 1;
569 bd[i].flags = 0;
570
571 if (i > 0) {
572 bd[i].u.back = &bd[i-1];
573 } else {
574 bd[i].u.back = NULL;
575 }
576
577 if (i+1 < n) {
578 bd[i].link = &bd[i+1];
579 } else {
580 bd[i].link = tail;
581 if (tail != NULL) {
582 tail->u.back = &bd[i];
583 }
584 }
585
586 bd[i].free = bd[i].start;
587 }
588
589 tail = &bd[0];
590 }
591
592 return &bd[0];
593 }
594
595 STATIC_INLINE void
596 assignNurseryToCapability (Capability *cap, uint32_t n)
597 {
598 ASSERT(n < n_nurseries);
599 cap->r.rNursery = &nurseries[n];
600 cap->r.rCurrentNursery = nurseries[n].blocks;
601 newNurseryBlock(nurseries[n].blocks);
602 cap->r.rCurrentAlloc = NULL;
603 ASSERT(cap->r.rCurrentNursery->node == cap->node);
604 }
605
606 /*
607 * Give each Capability a nursery from the pool. No need to do atomic increments
608 * here, everything must be stopped to call this function.
609 */
610 static void
611 assignNurseriesToCapabilities (uint32_t from, uint32_t to)
612 {
613 uint32_t i, node;
614
615 for (i = from; i < to; i++) {
616 node = capabilities[i]->node;
617 assignNurseryToCapability(capabilities[i], next_nursery[node]);
618 next_nursery[node] += RtsFlags.GcFlags.nNumaNodes;
619 }
620 }
621
622 static void
623 allocNurseries (uint32_t from, uint32_t to)
624 {
625 uint32_t i;
626 memcount n_blocks;
627
628 if (RtsFlags.GcFlags.nurseryChunkSize) {
629 n_blocks = RtsFlags.GcFlags.nurseryChunkSize;
630 } else {
631 n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
632 }
633
634 for (i = from; i < to; i++) {
635 nurseries[i].blocks = allocNursery(capNoToNumaNode(i), NULL, n_blocks);
636 nurseries[i].n_blocks = n_blocks;
637 }
638 }
639
640 void
641 resetNurseries (void)
642 {
643 uint32_t n;
644
645 for (n = 0; n < RtsFlags.GcFlags.nNumaNodes; n++) {
646 next_nursery[n] = n;
647 }
648 assignNurseriesToCapabilities(0, n_capabilities);
649
650 #ifdef DEBUG
651 bdescr *bd;
652 for (n = 0; n < n_nurseries; n++) {
653 for (bd = nurseries[n].blocks; bd; bd = bd->link) {
654 ASSERT(bd->gen_no == 0);
655 ASSERT(bd->gen == g0);
656 ASSERT(bd->node == capNoToNumaNode(n));
657 IF_DEBUG(sanity, memset(bd->start, 0xaa, BLOCK_SIZE));
658 }
659 }
660 #endif
661 }
662
663 W_
664 countNurseryBlocks (void)
665 {
666 uint32_t i;
667 W_ blocks = 0;
668
669 for (i = 0; i < n_nurseries; i++) {
670 blocks += nurseries[i].n_blocks;
671 }
672 return blocks;
673 }
674
675 //
676 // Resize each of the nurseries to the specified size.
677 //
678 static void
679 resizeNurseriesEach (W_ blocks)
680 {
681 uint32_t i, node;
682 bdescr *bd;
683 W_ nursery_blocks;
684 nursery *nursery;
685
686 for (i = 0; i < n_nurseries; i++) {
687 nursery = &nurseries[i];
688 nursery_blocks = nursery->n_blocks;
689 if (nursery_blocks == blocks) continue;
690
691 node = capNoToNumaNode(i);
692 if (nursery_blocks < blocks) {
693 debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
694 blocks);
695 nursery->blocks = allocNursery(node, nursery->blocks,
696 blocks-nursery_blocks);
697 }
698 else
699 {
700 bdescr *next_bd;
701
702 debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
703 blocks);
704
705 bd = nursery->blocks;
706 while (nursery_blocks > blocks) {
707 next_bd = bd->link;
708 next_bd->u.back = NULL;
709 nursery_blocks -= bd->blocks; // might be a large block
710 freeGroup(bd);
711 bd = next_bd;
712 }
713 nursery->blocks = bd;
714 // might have gone just under, by freeing a large block, so make
715 // up the difference.
716 if (nursery_blocks < blocks) {
717 nursery->blocks = allocNursery(node, nursery->blocks,
718 blocks-nursery_blocks);
719 }
720 }
721 nursery->n_blocks = blocks;
722 ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
723 }
724 }
725
726 void
727 resizeNurseriesFixed (void)
728 {
729 uint32_t blocks;
730
731 if (RtsFlags.GcFlags.nurseryChunkSize) {
732 blocks = RtsFlags.GcFlags.nurseryChunkSize;
733 } else {
734 blocks = RtsFlags.GcFlags.minAllocAreaSize;
735 }
736
737 resizeNurseriesEach(blocks);
738 }
739
740 //
741 // Resize the nurseries to the total specified size.
742 //
743 void
744 resizeNurseries (W_ blocks)
745 {
746 // If there are multiple nurseries, then we just divide the number
747 // of available blocks between them.
748 resizeNurseriesEach(blocks / n_nurseries);
749 }
750
751 rtsBool
752 getNewNursery (Capability *cap)
753 {
754 StgWord i;
755 uint32_t node = cap->node;
756 uint32_t n;
757
758 for(;;) {
759 i = next_nursery[node];
760 if (i < n_nurseries) {
761 if (cas(&next_nursery[node], i,
762 i+RtsFlags.GcFlags.nNumaNodes) == i) {
763 assignNurseryToCapability(cap, i);
764 return rtsTrue;
765 }
766 } else if (RtsFlags.GcFlags.nNumaNodes > 1) {
767 // Try to find an unused nursery chunk on other nodes. We'll get
768 // remote memory, but the rationale is that avoiding GC is better
769 // than avoiding remote memory access.
770 rtsBool lost = rtsFalse;
771 for (n = 0; n < RtsFlags.GcFlags.nNumaNodes; n++) {
772 if (n == node) continue;
773 i = next_nursery[n];
774 if (i < n_nurseries) {
775 if (cas(&next_nursery[n], i,
776 i+RtsFlags.GcFlags.nNumaNodes) == i) {
777 assignNurseryToCapability(cap, i);
778 return rtsTrue;
779 } else {
780 lost = rtsTrue; /* lost a race */
781 }
782 }
783 }
784 if (!lost) return rtsFalse;
785 } else {
786 return rtsFalse;
787 }
788 }
789 }
790
791 /* -----------------------------------------------------------------------------
792 move_STACK is called to update the TSO structure after it has been
793 moved from one place to another.
794 -------------------------------------------------------------------------- */
795
796 void
797 move_STACK (StgStack *src, StgStack *dest)
798 {
799 ptrdiff_t diff;
800
801 // relocate the stack pointer...
802 diff = (StgPtr)dest - (StgPtr)src; // In *words*
803 dest->sp = (StgPtr)dest->sp + diff;
804 }
805
806 /* -----------------------------------------------------------------------------
807 StgPtr allocate (Capability *cap, W_ n)
808
809 Allocates an area of memory n *words* large, from the nursery of
810 the supplied Capability, or from the global block pool if the area
811 requested is larger than LARGE_OBJECT_THRESHOLD. Memory is not
812 allocated from the current nursery block, so as not to interfere
813 with Hp/HpLim.
814
815 The address of the allocated memory is returned. allocate() never
816 fails; if it returns, the returned value is a valid address. If
817 the nursery is already full, then another block is allocated from
818 the global block pool. If we need to get memory from the OS and
819 that operation fails, then the whole process will be killed.
820 -------------------------------------------------------------------------- */
821
822 StgPtr
823 allocate (Capability *cap, W_ n)
824 {
825 bdescr *bd;
826 StgPtr p;
827
828 TICK_ALLOC_HEAP_NOCTR(WDS(n));
829 CCS_ALLOC(cap->r.rCCCS,n);
830 if (cap->r.rCurrentTSO != NULL) {
831 // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_)
832 ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
833 (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
834 - n*sizeof(W_)));
835 }
836
837 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
838 // The largest number of words such that
839 // the computation of req_blocks will not overflow.
840 W_ max_words = (HS_WORD_MAX & ~(BLOCK_SIZE-1)) / sizeof(W_);
841 W_ req_blocks;
842
843 if (n > max_words)
844 req_blocks = HS_WORD_MAX; // signal overflow below
845 else
846 req_blocks = (W_)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
847
848 // Attempting to allocate an object larger than maxHeapSize
849 // should definitely be disallowed. (bug #1791)
850 if ((RtsFlags.GcFlags.maxHeapSize > 0 &&
851 req_blocks >= RtsFlags.GcFlags.maxHeapSize) ||
852 req_blocks >= HS_INT32_MAX) // avoid overflow when
853 // calling allocGroup() below
854 {
855 heapOverflow();
856 // heapOverflow() doesn't exit (see #2592), but we aren't
857 // in a position to do a clean shutdown here: we
858 // either have to allocate the memory or exit now.
859 // Allocating the memory would be bad, because the user
860 // has requested that we not exceed maxHeapSize, so we
861 // just exit.
862 stg_exit(EXIT_HEAPOVERFLOW);
863 }
864
865 ACQUIRE_SM_LOCK
866 bd = allocGroupOnNode(cap->node,req_blocks);
867 dbl_link_onto(bd, &g0->large_objects);
868 g0->n_large_blocks += bd->blocks; // might be larger than req_blocks
869 g0->n_new_large_words += n;
870 RELEASE_SM_LOCK;
871 initBdescr(bd, g0, g0);
872 bd->flags = BF_LARGE;
873 bd->free = bd->start + n;
874 cap->total_allocated += n;
875 return bd->start;
876 }
877
878 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
879
880 bd = cap->r.rCurrentAlloc;
881 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
882
883 if (bd) finishedNurseryBlock(cap,bd);
884
885 // The CurrentAlloc block is full, we need to find another
886 // one. First, we try taking the next block from the
887 // nursery:
888 bd = cap->r.rCurrentNursery->link;
889
890 if (bd == NULL) {
891 // The nursery is empty: allocate a fresh block (we can't
892 // fail here).
893 ACQUIRE_SM_LOCK;
894 bd = allocBlockOnNode(cap->node);
895 cap->r.rNursery->n_blocks++;
896 RELEASE_SM_LOCK;
897 initBdescr(bd, g0, g0);
898 bd->flags = 0;
899 // If we had to allocate a new block, then we'll GC
900 // pretty quickly now, because MAYBE_GC() will
901 // notice that CurrentNursery->link is NULL.
902 } else {
903 newNurseryBlock(bd);
904 // we have a block in the nursery: take it and put
905 // it at the *front* of the nursery list, and use it
906 // to allocate() from.
907 //
908 // Previously the nursery looked like this:
909 //
910 // CurrentNursery
911 // /
912 // +-+ +-+
913 // nursery -> ... |A| -> |B| -> ...
914 // +-+ +-+
915 //
916 // After doing this, it looks like this:
917 //
918 // CurrentNursery
919 // /
920 // +-+ +-+
921 // nursery -> |B| -> ... -> |A| -> ...
922 // +-+ +-+
923 // |
924 // CurrentAlloc
925 //
926 // The point is to get the block out of the way of the
927 // advancing CurrentNursery pointer, while keeping it
928 // on the nursery list so we don't lose track of it.
929 cap->r.rCurrentNursery->link = bd->link;
930 if (bd->link != NULL) {
931 bd->link->u.back = cap->r.rCurrentNursery;
932 }
933 }
934 dbl_link_onto(bd, &cap->r.rNursery->blocks);
935 cap->r.rCurrentAlloc = bd;
936 IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
937 }
938 p = bd->free;
939 bd->free += n;
940
941 IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa));
942 return p;
943 }
944
945 /* ---------------------------------------------------------------------------
946 Allocate a fixed/pinned object.
947
948 We allocate small pinned objects into a single block, allocating a
949 new block when the current one overflows. The block is chained
950 onto the large_object_list of generation 0.
951
952 NOTE: The GC can't in general handle pinned objects. This
953 interface is only safe to use for ByteArrays, which have no
954 pointers and don't require scavenging. It works because the
955 block's descriptor has the BF_LARGE flag set, so the block is
956 treated as a large object and chained onto various lists, rather
957 than the individual objects being copied. However, when it comes
958 to scavenge the block, the GC will only scavenge the first object.
959 The reason is that the GC can't linearly scan a block of pinned
960 objects at the moment (doing so would require using the
961 mostly-copying techniques). But since we're restricting ourselves
962 to pinned ByteArrays, not scavenging is ok.
963
964 This function is called by newPinnedByteArray# which immediately
965 fills the allocated memory with a MutableByteArray#.
966 ------------------------------------------------------------------------- */
967
968 StgPtr
969 allocatePinned (Capability *cap, W_ n)
970 {
971 StgPtr p;
972 bdescr *bd;
973
974 // If the request is for a large object, then allocate()
975 // will give us a pinned object anyway.
976 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
977 p = allocate(cap, n);
978 Bdescr(p)->flags |= BF_PINNED;
979 return p;
980 }
981
982 TICK_ALLOC_HEAP_NOCTR(WDS(n));
983 CCS_ALLOC(cap->r.rCCCS,n);
984 if (cap->r.rCurrentTSO != NULL) {
985 // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
986 ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
987 (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
988 - n*sizeof(W_)));
989 }
990
991 bd = cap->pinned_object_block;
992
993 // If we don't have a block of pinned objects yet, or the current
994 // one isn't large enough to hold the new object, get a new one.
995 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
996
997 // stash the old block on cap->pinned_object_blocks. On the
998 // next GC cycle these objects will be moved to
999 // g0->large_objects.
1000 if (bd != NULL) {
1001 // add it to the allocation stats when the block is full
1002 finishedNurseryBlock(cap, bd);
1003 dbl_link_onto(bd, &cap->pinned_object_blocks);
1004 }
1005
1006 // We need to find another block. We could just allocate one,
1007 // but that means taking a global lock and we really want to
1008 // avoid that (benchmarks that allocate a lot of pinned
1009 // objects scale really badly if we do this).
1010 //
1011 // So first, we try taking the next block from the nursery, in
1012 // the same way as allocate().
1013 bd = cap->r.rCurrentNursery->link;
1014 if (bd == NULL) {
1015 // The nursery is empty: allocate a fresh block (we can't fail
1016 // here).
1017 ACQUIRE_SM_LOCK;
1018 bd = allocBlockOnNode(cap->node);
1019 RELEASE_SM_LOCK;
1020 initBdescr(bd, g0, g0);
1021 } else {
1022 newNurseryBlock(bd);
1023 // we have a block in the nursery: steal it
1024 cap->r.rCurrentNursery->link = bd->link;
1025 if (bd->link != NULL) {
1026 bd->link->u.back = cap->r.rCurrentNursery;
1027 }
1028 cap->r.rNursery->n_blocks -= bd->blocks;
1029 }
1030
1031 cap->pinned_object_block = bd;
1032 bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED;
1033
1034 // The pinned_object_block remains attached to the capability
1035 // until it is full, even if a GC occurs. We want this
1036 // behaviour because otherwise the unallocated portion of the
1037 // block would be forever slop, and under certain workloads
1038 // (allocating a few ByteStrings per GC) we accumulate a lot
1039 // of slop.
1040 //
1041 // So, the pinned_object_block is initially marked
1042 // BF_EVACUATED so the GC won't touch it. When it is full,
1043 // we place it on the large_objects list, and at the start of
1044 // the next GC the BF_EVACUATED flag will be cleared, and the
1045 // block will be promoted as usual (if anything in it is
1046 // live).
1047 }
1048
1049 p = bd->free;
1050 bd->free += n;
1051 return p;
1052 }
1053
1054 /* -----------------------------------------------------------------------------
1055 Write Barriers
1056 -------------------------------------------------------------------------- */
1057
1058 /*
1059 This is the write barrier for MUT_VARs, a.k.a. IORefs. A
1060 MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
1061 is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
1062 and is put on the mutable list.
1063 */
1064 void
1065 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
1066 {
1067 Capability *cap = regTableToCapability(reg);
1068 if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
1069 p->header.info = &stg_MUT_VAR_DIRTY_info;
1070 recordClosureMutated(cap,p);
1071 }
1072 }
1073
1074 void
1075 dirty_TVAR(Capability *cap, StgTVar *p)
1076 {
1077 if (p->header.info == &stg_TVAR_CLEAN_info) {
1078 p->header.info = &stg_TVAR_DIRTY_info;
1079 recordClosureMutated(cap,(StgClosure*)p);
1080 }
1081 }
1082
1083 // Setting a TSO's link field with a write barrier.
1084 // It is *not* necessary to call this function when
1085 // * setting the link field to END_TSO_QUEUE
1086 // * setting the link field of the currently running TSO, as it
1087 // will already be dirty.
1088 void
1089 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
1090 {
1091 if (tso->dirty == 0) {
1092 tso->dirty = 1;
1093 recordClosureMutated(cap,(StgClosure*)tso);
1094 }
1095 tso->_link = target;
1096 }
1097
1098 void
1099 setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)
1100 {
1101 if (tso->dirty == 0) {
1102 tso->dirty = 1;
1103 recordClosureMutated(cap,(StgClosure*)tso);
1104 }
1105 tso->block_info.prev = target;
1106 }
1107
1108 void
1109 dirty_TSO (Capability *cap, StgTSO *tso)
1110 {
1111 if (tso->dirty == 0) {
1112 tso->dirty = 1;
1113 recordClosureMutated(cap,(StgClosure*)tso);
1114 }
1115 }
1116
1117 void
1118 dirty_STACK (Capability *cap, StgStack *stack)
1119 {
1120 if (stack->dirty == 0) {
1121 stack->dirty = 1;
1122 recordClosureMutated(cap,(StgClosure*)stack);
1123 }
1124 }
1125
1126 /*
1127 This is the write barrier for MVARs. An MVAR_CLEAN objects is not
1128 on the mutable list; a MVAR_DIRTY is. When written to, a
1129 MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
1130 The check for MVAR_CLEAN is inlined at the call site for speed,
1131 this really does make a difference on concurrency-heavy benchmarks
1132 such as Chaneneos and cheap-concurrency.
1133 */
1134 void
1135 dirty_MVAR(StgRegTable *reg, StgClosure *p)
1136 {
1137 recordClosureMutated(regTableToCapability(reg),p);
1138 }
1139
1140 /* -----------------------------------------------------------------------------
1141 * Stats and stuff
1142 * -------------------------------------------------------------------------- */
1143
1144 /* -----------------------------------------------------------------------------
1145 * [Note allocation accounting]
1146 *
1147 * - When cap->r.rCurrentNusery moves to a new block in the nursery,
1148 * we add the size of the used portion of the previous block to
1149 * cap->total_allocated. (see finishedNurseryBlock())
1150 *
1151 * - When we start a GC, the allocated portion of CurrentNursery and
1152 * CurrentAlloc are added to cap->total_allocated. (see
1153 * updateNurseriesStats())
1154 *
1155 * -------------------------------------------------------------------------- */
1156
1157 //
1158 // Calculate the total allocated memory since the start of the
1159 // program. Also emits events reporting the per-cap allocation
1160 // totals.
1161 //
1162 StgWord
1163 calcTotalAllocated (void)
1164 {
1165 W_ tot_alloc = 0;
1166 W_ n;
1167
1168 for (n = 0; n < n_capabilities; n++) {
1169 tot_alloc += capabilities[n]->total_allocated;
1170
1171 traceEventHeapAllocated(capabilities[n],
1172 CAPSET_HEAP_DEFAULT,
1173 capabilities[n]->total_allocated * sizeof(W_));
1174 }
1175
1176 return tot_alloc;
1177 }
1178
1179 //
1180 // Update the per-cap total_allocated numbers with an approximation of
1181 // the amount of memory used in each cap's nursery.
1182 //
1183 void
1184 updateNurseriesStats (void)
1185 {
1186 uint32_t i;
1187 bdescr *bd;
1188
1189 for (i = 0; i < n_capabilities; i++) {
1190 // The current nursery block and the current allocate block have not
1191 // yet been accounted for in cap->total_allocated, so we add them here.
1192 bd = capabilities[i]->r.rCurrentNursery;
1193 if (bd) finishedNurseryBlock(capabilities[i], bd);
1194 bd = capabilities[i]->r.rCurrentAlloc;
1195 if (bd) finishedNurseryBlock(capabilities[i], bd);
1196 }
1197 }
1198
1199 W_ countOccupied (bdescr *bd)
1200 {
1201 W_ words;
1202
1203 words = 0;
1204 for (; bd != NULL; bd = bd->link) {
1205 ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
1206 words += bd->free - bd->start;
1207 }
1208 return words;
1209 }
1210
1211 W_ genLiveWords (generation *gen)
1212 {
1213 return gen->n_words + gen->n_large_words;
1214 }
1215
1216 W_ genLiveBlocks (generation *gen)
1217 {
1218 return gen->n_blocks + gen->n_large_blocks;
1219 }
1220
1221 W_ gcThreadLiveWords (uint32_t i, uint32_t g)
1222 {
1223 W_ a, b, c;
1224
1225 a = countOccupied(gc_threads[i]->gens[g].todo_bd);
1226 b = gc_threads[i]->gens[g].n_part_words;
1227 c = gc_threads[i]->gens[g].n_scavd_words;
1228
1229 // debugBelch("cap %d, g%d, %ld %ld %ld\n", i, g, a, b, c);
1230 return a + b + c;
1231 }
1232
1233 W_ gcThreadLiveBlocks (uint32_t i, uint32_t g)
1234 {
1235 W_ blocks;
1236
1237 blocks = countBlocks(gc_threads[i]->gens[g].todo_bd);
1238 blocks += gc_threads[i]->gens[g].n_part_blocks;
1239 blocks += gc_threads[i]->gens[g].n_scavd_blocks;
1240
1241 return blocks;
1242 }
1243
1244 /* Determine which generation will be collected next, and approximate
1245 * the maximum amount of memory that will be required to do the GC,
1246 * taking into account data that will be copied, and the space needed
1247 * to store bitmaps and the mark stack. Note: blocks_needed does not
1248 * include the blocks in the nursery.
1249 *
1250 * Assume: all data currently live will remain live. Generationss
1251 * that will be collected next time will therefore need twice as many
1252 * blocks since all the data will be copied.
1253 */
1254 extern W_
1255 calcNeeded (rtsBool force_major, memcount *blocks_needed)
1256 {
1257 W_ needed = 0, blocks;
1258 uint32_t g, N;
1259 generation *gen;
1260
1261 if (force_major) {
1262 N = RtsFlags.GcFlags.generations - 1;
1263 } else {
1264 N = 0;
1265 }
1266
1267 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1268 gen = &generations[g];
1269
1270 blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?)
1271 + gen->n_large_blocks;
1272
1273 // we need at least this much space
1274 needed += blocks;
1275
1276 // are we collecting this gen?
1277 if (g == 0 || // always collect gen 0
1278 blocks > gen->max_blocks)
1279 {
1280 N = stg_max(N,g);
1281
1282 // we will collect this gen next time
1283 if (gen->mark) {
1284 // bitmap:
1285 needed += gen->n_blocks / BITS_IN(W_);
1286 // mark stack:
1287 needed += gen->n_blocks / 100;
1288 }
1289 if (gen->compact) {
1290 continue; // no additional space needed for compaction
1291 } else {
1292 needed += gen->n_blocks;
1293 }
1294 }
1295 }
1296
1297 if (blocks_needed != NULL) {
1298 *blocks_needed = needed;
1299 }
1300 return N;
1301 }
1302
1303 /* ----------------------------------------------------------------------------
1304 Executable memory
1305
1306 Executable memory must be managed separately from non-executable
1307 memory. Most OSs these days require you to jump through hoops to
1308 dynamically allocate executable memory, due to various security
1309 measures.
1310
1311 Here we provide a small memory allocator for executable memory.
1312 Memory is managed with a page granularity; we allocate linearly
1313 in the page, and when the page is emptied (all objects on the page
1314 are free) we free the page again, not forgetting to make it
1315 non-executable.
1316
1317 TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1318 the linker cannot use allocateExec for loading object code files
1319 on Windows. Once allocateExec can handle larger objects, the linker
1320 should be modified to use allocateExec instead of VirtualAlloc.
1321 ------------------------------------------------------------------------- */
1322
1323 #if (defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) && defined(ios_HOST_OS)
1324 void sys_icache_invalidate(void *start, size_t len);
1325 #endif
1326
1327 /* On ARM and other platforms, we need to flush the cache after
1328 writing code into memory, so the processor reliably sees it. */
1329 void flushExec (W_ len, AdjustorExecutable exec_addr)
1330 {
1331 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
1332 /* x86 doesn't need to do anything, so just suppress some warnings. */
1333 (void)len;
1334 (void)exec_addr;
1335 #elif (defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) && defined(ios_HOST_OS)
1336 /* On iOS we need to use the special 'sys_icache_invalidate' call. */
1337 sys_icache_invalidate(exec_addr, ((unsigned char*)exec_addr)+len);
1338 #elif defined(__GNUC__)
1339 /* For all other platforms, fall back to a libgcc builtin. */
1340 unsigned char* begin = (unsigned char*)exec_addr;
1341 unsigned char* end = begin + len;
1342 __clear_cache((void*)begin, (void*)end);
1343 #else
1344 #error Missing support to flush the instruction cache
1345 #endif
1346 }
1347
1348 #if defined(linux_HOST_OS)
1349
1350 // On Linux we need to use libffi for allocating executable memory,
1351 // because it knows how to work around the restrictions put in place
1352 // by SELinux.
1353
1354 AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
1355 {
1356 void **ret, **exec;
1357 ACQUIRE_SM_LOCK;
1358 ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
1359 RELEASE_SM_LOCK;
1360 if (ret == NULL) return ret;
1361 *ret = ret; // save the address of the writable mapping, for freeExec().
1362 *exec_ret = exec + 1;
1363 return (ret + 1);
1364 }
1365
1366 // freeExec gets passed the executable address, not the writable address.
1367 void freeExec (AdjustorExecutable addr)
1368 {
1369 AdjustorWritable writable;
1370 writable = *((void**)addr - 1);
1371 ACQUIRE_SM_LOCK;
1372 ffi_closure_free (writable);
1373 RELEASE_SM_LOCK
1374 }
1375
1376 #elif defined(ios_HOST_OS)
1377
1378 static HashTable* allocatedExecs;
1379
1380 AdjustorWritable allocateExec(W_ bytes, AdjustorExecutable *exec_ret)
1381 {
1382 AdjustorWritable writ;
1383 ffi_closure* cl;
1384 if (bytes != sizeof(ffi_closure)) {
1385 barf("allocateExec: for ffi_closure only");
1386 }
1387 ACQUIRE_SM_LOCK;
1388 cl = writ = ffi_closure_alloc((size_t)bytes, exec_ret);
1389 if (cl != NULL) {
1390 if (allocatedExecs == NULL) {
1391 allocatedExecs = allocHashTable();
1392 }
1393 insertHashTable(allocatedExecs, (StgWord)*exec_ret, writ);
1394 }
1395 RELEASE_SM_LOCK;
1396 return writ;
1397 }
1398
1399 AdjustorWritable execToWritable(AdjustorExecutable exec)
1400 {
1401 AdjustorWritable writ;
1402 ACQUIRE_SM_LOCK;
1403 if (allocatedExecs == NULL ||
1404 (writ = lookupHashTable(allocatedExecs, (StgWord)exec)) == NULL) {
1405 RELEASE_SM_LOCK;
1406 barf("execToWritable: not found");
1407 }
1408 RELEASE_SM_LOCK;
1409 return writ;
1410 }
1411
1412 void freeExec(AdjustorExecutable exec)
1413 {
1414 AdjustorWritable writ;
1415 ffi_closure* cl;
1416 cl = writ = execToWritable(exec);
1417 ACQUIRE_SM_LOCK;
1418 removeHashTable(allocatedExecs, (StgWord)exec, writ);
1419 ffi_closure_free(cl);
1420 RELEASE_SM_LOCK
1421 }
1422
1423 #else
1424
1425 AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
1426 {
1427 void *ret;
1428 W_ n;
1429
1430 ACQUIRE_SM_LOCK;
1431
1432 // round up to words.
1433 n = (bytes + sizeof(W_) + 1) / sizeof(W_);
1434
1435 if (n+1 > BLOCK_SIZE_W) {
1436 barf("allocateExec: can't handle large objects");
1437 }
1438
1439 if (exec_block == NULL ||
1440 exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1441 bdescr *bd;
1442 W_ pagesize = getPageSize();
1443 bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1444 debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1445 bd->gen_no = 0;
1446 bd->flags = BF_EXEC;
1447 bd->link = exec_block;
1448 if (exec_block != NULL) {
1449 exec_block->u.back = bd;
1450 }
1451 bd->u.back = NULL;
1452 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1453 exec_block = bd;
1454 }
1455 *(exec_block->free) = n; // store the size of this chunk
1456 exec_block->gen_no += n; // gen_no stores the number of words allocated
1457 ret = exec_block->free + 1;
1458 exec_block->free += n + 1;
1459
1460 RELEASE_SM_LOCK
1461 *exec_ret = ret;
1462 return ret;
1463 }
1464
1465 void freeExec (void *addr)
1466 {
1467 StgPtr p = (StgPtr)addr - 1;
1468 bdescr *bd = Bdescr((StgPtr)p);
1469
1470 if ((bd->flags & BF_EXEC) == 0) {
1471 barf("freeExec: not executable");
1472 }
1473
1474 if (*(StgPtr)p == 0) {
1475 barf("freeExec: already free?");
1476 }
1477
1478 ACQUIRE_SM_LOCK;
1479
1480 bd->gen_no -= *(StgPtr)p;
1481 *(StgPtr)p = 0;
1482
1483 if (bd->gen_no == 0) {
1484 // Free the block if it is empty, but not if it is the block at
1485 // the head of the queue.
1486 if (bd != exec_block) {
1487 debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1488 dbl_link_remove(bd, &exec_block);
1489 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1490 freeGroup(bd);
1491 } else {
1492 bd->free = bd->start;
1493 }
1494 }
1495
1496 RELEASE_SM_LOCK
1497 }
1498
1499 #endif /* switch(HOST_OS) */
1500
1501 #ifdef DEBUG
1502
1503 // handy function for use in gdb, because Bdescr() is inlined.
1504 extern bdescr *_bdescr (StgPtr p);
1505
1506 bdescr *
1507 _bdescr (StgPtr p)
1508 {
1509 return Bdescr(p);
1510 }
1511
1512 #endif