e2435173c1ac08fb5c85df3ab0feaa56c4aa32c8
[ghc.git] / rts / sm / Storage.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2012
4 *
5 * Storage manager front end
6 *
7 * Documentation on the architecture of the Storage Manager can be
8 * found in the online commentary:
9 *
10 * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16
17 #include "Storage.h"
18 #include "GCThread.h"
19 #include "RtsUtils.h"
20 #include "Stats.h"
21 #include "BlockAlloc.h"
22 #include "Weak.h"
23 #include "Sanity.h"
24 #include "Arena.h"
25 #include "Capability.h"
26 #include "Schedule.h"
27 #include "RetainerProfile.h" // for counting memory blocks (memInventory)
28 #include "OSMem.h"
29 #include "Trace.h"
30 #include "GC.h"
31 #include "Evac.h"
32 #if defined(ios_HOST_OS)
33 #include "Hash.h"
34 #endif
35
36 #include <string.h>
37
38 #include "ffi.h"
39
40 /*
41 * All these globals require sm_mutex to access in THREADED_RTS mode.
42 */
43 StgIndStatic *dyn_caf_list = NULL;
44 StgIndStatic *debug_caf_list = NULL;
45 StgIndStatic *revertible_caf_list = NULL;
46 bool keepCAFs;
47
48 W_ large_alloc_lim; /* GC if n_large_blocks in any nursery
49 * reaches this. */
50
51 bdescr *exec_block;
52
53 generation *generations = NULL; /* all the generations */
54 generation *g0 = NULL; /* generation 0, for convenience */
55 generation *oldest_gen = NULL; /* oldest generation, for convenience */
56
57 /*
58 * Array of nurseries, size == n_capabilities
59 *
60 * nursery[i] belongs to NUMA node (i % n_numa_nodes)
61 * This is chosen to be the same convention as capabilities[i], so
62 * that when not using nursery chunks (+RTS -n), we just map
63 * capabilities to nurseries 1:1.
64 */
65 nursery *nurseries = NULL;
66 uint32_t n_nurseries;
67
68 /*
69 * When we are using nursery chunks, we need a separate next_nursery
70 * pointer for each NUMA node.
71 */
72 volatile StgWord next_nursery[MAX_NUMA_NODES];
73
74 #if defined(THREADED_RTS)
75 /*
76 * Storage manager mutex: protects all the above state from
77 * simultaneous access by two STG threads.
78 */
79 Mutex sm_mutex;
80 #endif
81
82 static void allocNurseries (uint32_t from, uint32_t to);
83 static void assignNurseriesToCapabilities (uint32_t from, uint32_t to);
84
85 static void
86 initGeneration (generation *gen, int g)
87 {
88 gen->no = g;
89 gen->collections = 0;
90 gen->par_collections = 0;
91 gen->failed_promotions = 0;
92 gen->max_blocks = 0;
93 gen->blocks = NULL;
94 gen->n_blocks = 0;
95 gen->n_words = 0;
96 gen->live_estimate = 0;
97 gen->old_blocks = NULL;
98 gen->n_old_blocks = 0;
99 gen->large_objects = NULL;
100 gen->n_large_blocks = 0;
101 gen->n_large_words = 0;
102 gen->n_new_large_words = 0;
103 gen->compact_objects = NULL;
104 gen->n_compact_blocks = 0;
105 gen->compact_blocks_in_import = NULL;
106 gen->n_compact_blocks_in_import = 0;
107 gen->scavenged_large_objects = NULL;
108 gen->n_scavenged_large_blocks = 0;
109 gen->live_compact_objects = NULL;
110 gen->n_live_compact_blocks = 0;
111 gen->compact_blocks_in_import = NULL;
112 gen->n_compact_blocks_in_import = 0;
113 gen->mark = 0;
114 gen->compact = 0;
115 gen->bitmap = NULL;
116 #if defined(THREADED_RTS)
117 initSpinLock(&gen->sync);
118 #endif
119 gen->threads = END_TSO_QUEUE;
120 gen->old_threads = END_TSO_QUEUE;
121 gen->weak_ptr_list = NULL;
122 gen->old_weak_ptr_list = NULL;
123 }
124
125 void
126 initStorage (void)
127 {
128 uint32_t g, n;
129
130 if (generations != NULL) {
131 // multi-init protection
132 return;
133 }
134
135 initMBlocks();
136
137 /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
138 * doing something reasonable.
139 */
140 /* We use the NOT_NULL variant or gcc warns that the test is always true */
141 ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info));
142 ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
143 ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
144
145 initBlockAllocator();
146
147 #if defined(THREADED_RTS)
148 initMutex(&sm_mutex);
149 #endif
150
151 ACQUIRE_SM_LOCK;
152
153 /* allocate generation info array */
154 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
155 * sizeof(struct generation_),
156 "initStorage: gens");
157
158 /* Initialise all generations */
159 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
160 initGeneration(&generations[g], g);
161 }
162
163 /* A couple of convenience pointers */
164 g0 = &generations[0];
165 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
166
167 /* Set up the destination pointers in each younger gen. step */
168 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
169 generations[g].to = &generations[g+1];
170 }
171 oldest_gen->to = oldest_gen;
172
173 /* The oldest generation has one step. */
174 if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
175 if (RtsFlags.GcFlags.generations == 1) {
176 errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
177 } else {
178 oldest_gen->mark = 1;
179 if (RtsFlags.GcFlags.compact)
180 oldest_gen->compact = 1;
181 }
182 }
183
184 generations[0].max_blocks = 0;
185
186 dyn_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
187 debug_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
188 revertible_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
189
190 if (RtsFlags.GcFlags.largeAllocLim > 0) {
191 large_alloc_lim = RtsFlags.GcFlags.largeAllocLim * BLOCK_SIZE_W;
192 } else {
193 large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W;
194 }
195
196 exec_block = NULL;
197
198 #if defined(THREADED_RTS)
199 initSpinLock(&gc_alloc_block_sync);
200 #if defined(PROF_SPIN)
201 whitehole_spin = 0;
202 #endif
203 #endif
204
205 N = 0;
206
207 for (n = 0; n < n_numa_nodes; n++) {
208 next_nursery[n] = n;
209 }
210 storageAddCapabilities(0, n_capabilities);
211
212 IF_DEBUG(gc, statDescribeGens());
213
214 RELEASE_SM_LOCK;
215
216 traceEventHeapInfo(CAPSET_HEAP_DEFAULT,
217 RtsFlags.GcFlags.generations,
218 RtsFlags.GcFlags.maxHeapSize * BLOCK_SIZE,
219 RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE,
220 MBLOCK_SIZE,
221 BLOCK_SIZE);
222 }
223
224 void storageAddCapabilities (uint32_t from, uint32_t to)
225 {
226 uint32_t n, g, i, new_n_nurseries;
227
228 if (RtsFlags.GcFlags.nurseryChunkSize == 0) {
229 new_n_nurseries = to;
230 } else {
231 memcount total_alloc = to * RtsFlags.GcFlags.minAllocAreaSize;
232 new_n_nurseries =
233 stg_max(to, total_alloc / RtsFlags.GcFlags.nurseryChunkSize);
234 }
235
236 if (from > 0) {
237 nurseries = stgReallocBytes(nurseries,
238 new_n_nurseries * sizeof(struct nursery_),
239 "storageAddCapabilities");
240 } else {
241 nurseries = stgMallocBytes(new_n_nurseries * sizeof(struct nursery_),
242 "storageAddCapabilities");
243 }
244
245 // we've moved the nurseries, so we have to update the rNursery
246 // pointers from the Capabilities.
247 for (i = 0; i < to; i++) {
248 capabilities[i]->r.rNursery = &nurseries[i];
249 }
250
251 /* The allocation area. Policy: keep the allocation area
252 * small to begin with, even if we have a large suggested heap
253 * size. Reason: we're going to do a major collection first, and we
254 * don't want it to be a big one. This vague idea is borne out by
255 * rigorous experimental evidence.
256 */
257 allocNurseries(n_nurseries, new_n_nurseries);
258 n_nurseries = new_n_nurseries;
259
260 /*
261 * Assign each of the new capabilities a nursery. Remember to start from
262 * next_nursery, because we may have already consumed some of the earlier
263 * nurseries.
264 */
265 assignNurseriesToCapabilities(from,to);
266
267 // allocate a block for each mut list
268 for (n = from; n < to; n++) {
269 for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
270 capabilities[n]->mut_lists[g] =
271 allocBlockOnNode(capNoToNumaNode(n));
272 }
273 }
274
275 #if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR) && (CC_SUPPORTS_TLS == 0)
276 newThreadLocalKey(&gctKey);
277 #endif
278
279 initGcThreads(from, to);
280 }
281
282
283 void
284 exitStorage (void)
285 {
286 updateNurseriesStats();
287 stat_exit();
288 }
289
290 void
291 freeStorage (bool free_heap)
292 {
293 stgFree(generations);
294 if (free_heap) freeAllMBlocks();
295 #if defined(THREADED_RTS)
296 closeMutex(&sm_mutex);
297 #endif
298 stgFree(nurseries);
299 #if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR) && (CC_SUPPORTS_TLS == 0)
300 freeThreadLocalKey(&gctKey);
301 #endif
302 freeGcThreads();
303 }
304
305 /* -----------------------------------------------------------------------------
306 Note [CAF management].
307
308 The entry code for every CAF does the following:
309
310 - calls newCaf, which builds a CAF_BLACKHOLE on the heap and atomically
311 updates the CAF with IND_STATIC pointing to the CAF_BLACKHOLE
312
313 - if newCaf returns zero, it re-enters the CAF (see Note [atomic
314 CAF entry])
315
316 - pushes an update frame pointing to the CAF_BLACKHOLE
317
318 Why do we build an BLACKHOLE in the heap rather than just updating
319 the thunk directly? It's so that we only need one kind of update
320 frame - otherwise we'd need a static version of the update frame
321 too, and various other parts of the RTS that deal with update
322 frames would also need special cases for static update frames.
323
324 newCaf() does the following:
325
326 - atomically locks the CAF (see [atomic CAF entry])
327
328 - it builds a CAF_BLACKHOLE on the heap
329
330 - it updates the CAF with an IND_STATIC pointing to the
331 CAF_BLACKHOLE, atomically.
332
333 - it puts the CAF on the oldest generation's mutable list.
334 This is so that we treat the CAF as a root when collecting
335 younger generations.
336
337 - links the CAF onto the CAF list (see below)
338
339 ------------------
340 Note [atomic CAF entry]
341
342 With THREADED_RTS, newCaf() is required to be atomic (see
343 #5558). This is because if two threads happened to enter the same
344 CAF simultaneously, they would create two distinct CAF_BLACKHOLEs,
345 and so the normal threadPaused() machinery for detecting duplicate
346 evaluation will not detect this. Hence in lockCAF() below, we
347 atomically lock the CAF with WHITEHOLE before updating it with
348 IND_STATIC, and return zero if another thread locked the CAF first.
349 In the event that we lost the race, CAF entry code will re-enter
350 the CAF and block on the other thread's CAF_BLACKHOLE.
351
352 ------------------
353 Note [GHCi CAFs]
354
355 For GHCI, we have additional requirements when dealing with CAFs:
356
357 - we must *retain* all dynamically-loaded CAFs ever entered,
358 just in case we need them again.
359 - we must be able to *revert* CAFs that have been evaluated, to
360 their pre-evaluated form.
361
362 To do this, we use an additional CAF list. When newCaf() is
363 called on a dynamically-loaded CAF, we add it to the CAF list
364 instead of the old-generation mutable list, and save away its
365 old info pointer (in caf->saved_info) for later reversion.
366
367 To revert all the CAFs, we traverse the CAF list and reset the
368 info pointer to caf->saved_info, then throw away the CAF list.
369 (see GC.c:revertCAFs()).
370
371 -- SDM 29/1/01
372
373 -------------------------------------------------------------------------- */
374
375 STATIC_INLINE StgInd *
376 lockCAF (StgRegTable *reg, StgIndStatic *caf)
377 {
378 const StgInfoTable *orig_info;
379 Capability *cap = regTableToCapability(reg);
380 StgInd *bh;
381
382 orig_info = caf->header.info;
383
384 #if defined(THREADED_RTS)
385 const StgInfoTable *cur_info;
386
387 if (orig_info == &stg_IND_STATIC_info ||
388 orig_info == &stg_WHITEHOLE_info) {
389 // already claimed by another thread; re-enter the CAF
390 return NULL;
391 }
392
393 cur_info = (const StgInfoTable *)
394 cas((StgVolatilePtr)&caf->header.info,
395 (StgWord)orig_info,
396 (StgWord)&stg_WHITEHOLE_info);
397
398 if (cur_info != orig_info) {
399 // already claimed by another thread; re-enter the CAF
400 return NULL;
401 }
402
403 // successfully claimed by us; overwrite with IND_STATIC
404 #endif
405
406 // For the benefit of revertCAFs(), save the original info pointer
407 caf->saved_info = orig_info;
408
409 // Allocate the blackhole indirection closure
410 bh = (StgInd *)allocate(cap, sizeofW(*bh));
411 SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs);
412 bh->indirectee = (StgClosure *)cap->r.rCurrentTSO;
413
414 caf->indirectee = (StgClosure *)bh;
415 write_barrier();
416 SET_INFO((StgClosure*)caf,&stg_IND_STATIC_info);
417
418 return bh;
419 }
420
421 StgInd *
422 newCAF(StgRegTable *reg, StgIndStatic *caf)
423 {
424 StgInd *bh;
425
426 bh = lockCAF(reg, caf);
427 if (!bh) return NULL;
428
429 if(keepCAFs)
430 {
431 // Note [dyn_caf_list]
432 // If we are in GHCi _and_ we are using dynamic libraries,
433 // then we can't redirect newCAF calls to newRetainedCAF (see below),
434 // so we make newCAF behave almost like newRetainedCAF.
435 // The dynamic libraries might be used by both the interpreted
436 // program and GHCi itself, so they must not be reverted.
437 // This also means that in GHCi with dynamic libraries, CAFs are not
438 // garbage collected. If this turns out to be a problem, we could
439 // do another hack here and do an address range test on caf to figure
440 // out whether it is from a dynamic library.
441
442 ACQUIRE_SM_LOCK; // dyn_caf_list is global, locked by sm_mutex
443 caf->static_link = (StgClosure*)dyn_caf_list;
444 dyn_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
445 RELEASE_SM_LOCK;
446 }
447 else
448 {
449 // Put this CAF on the mutable list for the old generation.
450 if (oldest_gen->no != 0) {
451 recordMutableCap((StgClosure*)caf,
452 regTableToCapability(reg), oldest_gen->no);
453 }
454
455 #if defined(DEBUG)
456 // In the DEBUG rts, we keep track of live CAFs by chaining them
457 // onto a list debug_caf_list. This is so that we can tell if we
458 // ever enter a GC'd CAF, and emit a suitable barf().
459 //
460 // The saved_info field of the CAF is used as the link field for
461 // debug_caf_list, because this field is only used by newDynCAF
462 // for revertible CAFs, and we don't put those on the
463 // debug_caf_list.
464 ACQUIRE_SM_LOCK; // debug_caf_list is global, locked by sm_mutex
465 ((StgIndStatic *)caf)->saved_info = (const StgInfoTable*)debug_caf_list;
466 debug_caf_list = (StgIndStatic*)caf;
467 RELEASE_SM_LOCK;
468 #endif
469 }
470
471 return bh;
472 }
473
474 // External API for setting the keepCAFs flag. see #3900.
475 void
476 setKeepCAFs (void)
477 {
478 keepCAFs = 1;
479 }
480
481 // An alternate version of newCAF which is used for dynamically loaded
482 // object code in GHCi. In this case we want to retain *all* CAFs in
483 // the object code, because they might be demanded at any time from an
484 // expression evaluated on the command line.
485 // Also, GHCi might want to revert CAFs, so we add these to the
486 // revertible_caf_list.
487 //
488 // The linker hackily arranges that references to newCAF from dynamic
489 // code end up pointing to newRetainedCAF.
490 //
491 StgInd* newRetainedCAF (StgRegTable *reg, StgIndStatic *caf)
492 {
493 StgInd *bh;
494
495 bh = lockCAF(reg, caf);
496 if (!bh) return NULL;
497
498 ACQUIRE_SM_LOCK;
499
500 caf->static_link = (StgClosure*)revertible_caf_list;
501 revertible_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
502
503 RELEASE_SM_LOCK;
504
505 return bh;
506 }
507
508 // If we are using loadObj/unloadObj in the linker, then we want to
509 //
510 // - retain all CAFs in statically linked code (keepCAFs == true),
511 // because we might link a new object that uses any of these CAFs.
512 //
513 // - GC CAFs in dynamically-linked code, so that we can detect when
514 // a dynamically-linked object is unloadable.
515 //
516 // So for this case, we set keepCAFs to true, and link newCAF to newGCdCAF
517 // for dynamically-linked code.
518 //
519 StgInd* newGCdCAF (StgRegTable *reg, StgIndStatic *caf)
520 {
521 StgInd *bh;
522
523 bh = lockCAF(reg, caf);
524 if (!bh) return NULL;
525
526 // Put this CAF on the mutable list for the old generation.
527 if (oldest_gen->no != 0) {
528 recordMutableCap((StgClosure*)caf,
529 regTableToCapability(reg), oldest_gen->no);
530 }
531
532 return bh;
533 }
534
535 /* -----------------------------------------------------------------------------
536 Nursery management.
537 -------------------------------------------------------------------------- */
538
539 static bdescr *
540 allocNursery (uint32_t node, bdescr *tail, W_ blocks)
541 {
542 bdescr *bd = NULL;
543 W_ i, n;
544
545 // We allocate the nursery as a single contiguous block and then
546 // divide it into single blocks manually. This way we guarantee
547 // that the nursery blocks are adjacent, so that the processor's
548 // automatic prefetching works across nursery blocks. This is a
549 // tiny optimisation (~0.5%), but it's free.
550
551 while (blocks > 0) {
552 n = stg_min(BLOCKS_PER_MBLOCK, blocks);
553 // allocLargeChunk will prefer large chunks, but will pick up
554 // small chunks if there are any available. We must allow
555 // single blocks here to avoid fragmentation (#7257)
556 bd = allocLargeChunkOnNode(node, 1, n);
557 n = bd->blocks;
558 blocks -= n;
559
560 for (i = 0; i < n; i++) {
561 initBdescr(&bd[i], g0, g0);
562
563 bd[i].blocks = 1;
564 bd[i].flags = 0;
565
566 if (i > 0) {
567 bd[i].u.back = &bd[i-1];
568 } else {
569 bd[i].u.back = NULL;
570 }
571
572 if (i+1 < n) {
573 bd[i].link = &bd[i+1];
574 } else {
575 bd[i].link = tail;
576 if (tail != NULL) {
577 tail->u.back = &bd[i];
578 }
579 }
580
581 bd[i].free = bd[i].start;
582 }
583
584 tail = &bd[0];
585 }
586
587 return &bd[0];
588 }
589
590 STATIC_INLINE void
591 assignNurseryToCapability (Capability *cap, uint32_t n)
592 {
593 ASSERT(n < n_nurseries);
594 cap->r.rNursery = &nurseries[n];
595 cap->r.rCurrentNursery = nurseries[n].blocks;
596 newNurseryBlock(nurseries[n].blocks);
597 cap->r.rCurrentAlloc = NULL;
598 ASSERT(cap->r.rCurrentNursery->node == cap->node);
599 }
600
601 /*
602 * Give each Capability a nursery from the pool. No need to do atomic increments
603 * here, everything must be stopped to call this function.
604 */
605 static void
606 assignNurseriesToCapabilities (uint32_t from, uint32_t to)
607 {
608 uint32_t i, node;
609
610 for (i = from; i < to; i++) {
611 node = capabilities[i]->node;
612 assignNurseryToCapability(capabilities[i], next_nursery[node]);
613 next_nursery[node] += n_numa_nodes;
614 }
615 }
616
617 static void
618 allocNurseries (uint32_t from, uint32_t to)
619 {
620 uint32_t i;
621 memcount n_blocks;
622
623 if (RtsFlags.GcFlags.nurseryChunkSize) {
624 n_blocks = RtsFlags.GcFlags.nurseryChunkSize;
625 } else {
626 n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
627 }
628
629 for (i = from; i < to; i++) {
630 nurseries[i].blocks = allocNursery(capNoToNumaNode(i), NULL, n_blocks);
631 nurseries[i].n_blocks = n_blocks;
632 }
633 }
634
635 void
636 resetNurseries (void)
637 {
638 uint32_t n;
639
640 for (n = 0; n < n_numa_nodes; n++) {
641 next_nursery[n] = n;
642 }
643 assignNurseriesToCapabilities(0, n_capabilities);
644
645 #if defined(DEBUG)
646 bdescr *bd;
647 for (n = 0; n < n_nurseries; n++) {
648 for (bd = nurseries[n].blocks; bd; bd = bd->link) {
649 ASSERT(bd->gen_no == 0);
650 ASSERT(bd->gen == g0);
651 ASSERT(bd->node == capNoToNumaNode(n));
652 IF_DEBUG(sanity, memset(bd->start, 0xaa, BLOCK_SIZE));
653 }
654 }
655 #endif
656 }
657
658 W_
659 countNurseryBlocks (void)
660 {
661 uint32_t i;
662 W_ blocks = 0;
663
664 for (i = 0; i < n_nurseries; i++) {
665 blocks += nurseries[i].n_blocks;
666 }
667 return blocks;
668 }
669
670 //
671 // Resize each of the nurseries to the specified size.
672 //
673 static void
674 resizeNurseriesEach (W_ blocks)
675 {
676 uint32_t i, node;
677 bdescr *bd;
678 W_ nursery_blocks;
679 nursery *nursery;
680
681 for (i = 0; i < n_nurseries; i++) {
682 nursery = &nurseries[i];
683 nursery_blocks = nursery->n_blocks;
684 if (nursery_blocks == blocks) continue;
685
686 node = capNoToNumaNode(i);
687 if (nursery_blocks < blocks) {
688 debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
689 blocks);
690 nursery->blocks = allocNursery(node, nursery->blocks,
691 blocks-nursery_blocks);
692 }
693 else
694 {
695 bdescr *next_bd;
696
697 debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
698 blocks);
699
700 bd = nursery->blocks;
701 while (nursery_blocks > blocks) {
702 next_bd = bd->link;
703 next_bd->u.back = NULL;
704 nursery_blocks -= bd->blocks; // might be a large block
705 freeGroup(bd);
706 bd = next_bd;
707 }
708 nursery->blocks = bd;
709 // might have gone just under, by freeing a large block, so make
710 // up the difference.
711 if (nursery_blocks < blocks) {
712 nursery->blocks = allocNursery(node, nursery->blocks,
713 blocks-nursery_blocks);
714 }
715 }
716 nursery->n_blocks = blocks;
717 ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
718 }
719 }
720
721 void
722 resizeNurseriesFixed (void)
723 {
724 uint32_t blocks;
725
726 if (RtsFlags.GcFlags.nurseryChunkSize) {
727 blocks = RtsFlags.GcFlags.nurseryChunkSize;
728 } else {
729 blocks = RtsFlags.GcFlags.minAllocAreaSize;
730 }
731
732 resizeNurseriesEach(blocks);
733 }
734
735 //
736 // Resize the nurseries to the total specified size.
737 //
738 void
739 resizeNurseries (W_ blocks)
740 {
741 // If there are multiple nurseries, then we just divide the number
742 // of available blocks between them.
743 resizeNurseriesEach(blocks / n_nurseries);
744 }
745
746 bool
747 getNewNursery (Capability *cap)
748 {
749 StgWord i;
750 uint32_t node = cap->node;
751 uint32_t n;
752
753 for(;;) {
754 i = next_nursery[node];
755 if (i < n_nurseries) {
756 if (cas(&next_nursery[node], i, i+n_numa_nodes) == i) {
757 assignNurseryToCapability(cap, i);
758 return true;
759 }
760 } else if (n_numa_nodes > 1) {
761 // Try to find an unused nursery chunk on other nodes. We'll get
762 // remote memory, but the rationale is that avoiding GC is better
763 // than avoiding remote memory access.
764 bool lost = false;
765 for (n = 0; n < n_numa_nodes; n++) {
766 if (n == node) continue;
767 i = next_nursery[n];
768 if (i < n_nurseries) {
769 if (cas(&next_nursery[n], i, i+n_numa_nodes) == i) {
770 assignNurseryToCapability(cap, i);
771 return true;
772 } else {
773 lost = true; /* lost a race */
774 }
775 }
776 }
777 if (!lost) return false;
778 } else {
779 return false;
780 }
781 }
782 }
783
784 /* -----------------------------------------------------------------------------
785 move_STACK is called to update the TSO structure after it has been
786 moved from one place to another.
787 -------------------------------------------------------------------------- */
788
789 void
790 move_STACK (StgStack *src, StgStack *dest)
791 {
792 ptrdiff_t diff;
793
794 // relocate the stack pointer...
795 diff = (StgPtr)dest - (StgPtr)src; // In *words*
796 dest->sp = (StgPtr)dest->sp + diff;
797 }
798
799 /* -----------------------------------------------------------------------------
800 StgPtr allocate (Capability *cap, W_ n)
801
802 Allocates an area of memory n *words* large, from the nursery of
803 the supplied Capability, or from the global block pool if the area
804 requested is larger than LARGE_OBJECT_THRESHOLD. Memory is not
805 allocated from the current nursery block, so as not to interfere
806 with Hp/HpLim.
807
808 The address of the allocated memory is returned. allocate() never
809 fails; if it returns, the returned value is a valid address. If
810 the nursery is already full, then another block is allocated from
811 the global block pool. If we need to get memory from the OS and
812 that operation fails, then the whole process will be killed.
813 -------------------------------------------------------------------------- */
814
815 StgPtr
816 allocate (Capability *cap, W_ n)
817 {
818 bdescr *bd;
819 StgPtr p;
820
821 TICK_ALLOC_HEAP_NOCTR(WDS(n));
822 CCS_ALLOC(cap->r.rCCCS,n);
823 if (cap->r.rCurrentTSO != NULL) {
824 // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_)
825 ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
826 (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
827 - n*sizeof(W_)));
828 }
829
830 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
831 // The largest number of words such that
832 // the computation of req_blocks will not overflow.
833 W_ max_words = (HS_WORD_MAX & ~(BLOCK_SIZE-1)) / sizeof(W_);
834 W_ req_blocks;
835
836 if (n > max_words)
837 req_blocks = HS_WORD_MAX; // signal overflow below
838 else
839 req_blocks = (W_)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
840
841 // Attempting to allocate an object larger than maxHeapSize
842 // should definitely be disallowed. (bug #1791)
843 if ((RtsFlags.GcFlags.maxHeapSize > 0 &&
844 req_blocks >= RtsFlags.GcFlags.maxHeapSize) ||
845 req_blocks >= HS_INT32_MAX) // avoid overflow when
846 // calling allocGroup() below
847 {
848 reportHeapOverflow();
849 // heapOverflow() doesn't exit (see #2592), but we aren't
850 // in a position to do a clean shutdown here: we
851 // either have to allocate the memory or exit now.
852 // Allocating the memory would be bad, because the user
853 // has requested that we not exceed maxHeapSize, so we
854 // just exit.
855 stg_exit(EXIT_HEAPOVERFLOW);
856 }
857
858 ACQUIRE_SM_LOCK
859 bd = allocGroupOnNode(cap->node,req_blocks);
860 dbl_link_onto(bd, &g0->large_objects);
861 g0->n_large_blocks += bd->blocks; // might be larger than req_blocks
862 g0->n_new_large_words += n;
863 RELEASE_SM_LOCK;
864 initBdescr(bd, g0, g0);
865 bd->flags = BF_LARGE;
866 bd->free = bd->start + n;
867 cap->total_allocated += n;
868 return bd->start;
869 }
870
871 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
872
873 bd = cap->r.rCurrentAlloc;
874 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
875
876 if (bd) finishedNurseryBlock(cap,bd);
877
878 // The CurrentAlloc block is full, we need to find another
879 // one. First, we try taking the next block from the
880 // nursery:
881 bd = cap->r.rCurrentNursery->link;
882
883 if (bd == NULL) {
884 // The nursery is empty: allocate a fresh block (we can't
885 // fail here).
886 ACQUIRE_SM_LOCK;
887 bd = allocBlockOnNode(cap->node);
888 cap->r.rNursery->n_blocks++;
889 RELEASE_SM_LOCK;
890 initBdescr(bd, g0, g0);
891 bd->flags = 0;
892 // If we had to allocate a new block, then we'll GC
893 // pretty quickly now, because MAYBE_GC() will
894 // notice that CurrentNursery->link is NULL.
895 } else {
896 newNurseryBlock(bd);
897 // we have a block in the nursery: take it and put
898 // it at the *front* of the nursery list, and use it
899 // to allocate() from.
900 //
901 // Previously the nursery looked like this:
902 //
903 // CurrentNursery
904 // /
905 // +-+ +-+
906 // nursery -> ... |A| -> |B| -> ...
907 // +-+ +-+
908 //
909 // After doing this, it looks like this:
910 //
911 // CurrentNursery
912 // /
913 // +-+ +-+
914 // nursery -> |B| -> ... -> |A| -> ...
915 // +-+ +-+
916 // |
917 // CurrentAlloc
918 //
919 // The point is to get the block out of the way of the
920 // advancing CurrentNursery pointer, while keeping it
921 // on the nursery list so we don't lose track of it.
922 cap->r.rCurrentNursery->link = bd->link;
923 if (bd->link != NULL) {
924 bd->link->u.back = cap->r.rCurrentNursery;
925 }
926 }
927 dbl_link_onto(bd, &cap->r.rNursery->blocks);
928 cap->r.rCurrentAlloc = bd;
929 IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
930 }
931 p = bd->free;
932 bd->free += n;
933
934 IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa));
935 return p;
936 }
937
938 /* ---------------------------------------------------------------------------
939 Allocate a fixed/pinned object.
940
941 We allocate small pinned objects into a single block, allocating a
942 new block when the current one overflows. The block is chained
943 onto the large_object_list of generation 0.
944
945 NOTE: The GC can't in general handle pinned objects. This
946 interface is only safe to use for ByteArrays, which have no
947 pointers and don't require scavenging. It works because the
948 block's descriptor has the BF_LARGE flag set, so the block is
949 treated as a large object and chained onto various lists, rather
950 than the individual objects being copied. However, when it comes
951 to scavenge the block, the GC will only scavenge the first object.
952 The reason is that the GC can't linearly scan a block of pinned
953 objects at the moment (doing so would require using the
954 mostly-copying techniques). But since we're restricting ourselves
955 to pinned ByteArrays, not scavenging is ok.
956
957 This function is called by newPinnedByteArray# which immediately
958 fills the allocated memory with a MutableByteArray#.
959 ------------------------------------------------------------------------- */
960
961 StgPtr
962 allocatePinned (Capability *cap, W_ n)
963 {
964 StgPtr p;
965 bdescr *bd;
966
967 // If the request is for a large object, then allocate()
968 // will give us a pinned object anyway.
969 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
970 p = allocate(cap, n);
971 Bdescr(p)->flags |= BF_PINNED;
972 return p;
973 }
974
975 TICK_ALLOC_HEAP_NOCTR(WDS(n));
976 CCS_ALLOC(cap->r.rCCCS,n);
977 if (cap->r.rCurrentTSO != NULL) {
978 // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
979 ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
980 (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
981 - n*sizeof(W_)));
982 }
983
984 bd = cap->pinned_object_block;
985
986 // If we don't have a block of pinned objects yet, or the current
987 // one isn't large enough to hold the new object, get a new one.
988 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
989
990 // stash the old block on cap->pinned_object_blocks. On the
991 // next GC cycle these objects will be moved to
992 // g0->large_objects.
993 if (bd != NULL) {
994 // add it to the allocation stats when the block is full
995 finishedNurseryBlock(cap, bd);
996 dbl_link_onto(bd, &cap->pinned_object_blocks);
997 }
998
999 // We need to find another block. We could just allocate one,
1000 // but that means taking a global lock and we really want to
1001 // avoid that (benchmarks that allocate a lot of pinned
1002 // objects scale really badly if we do this).
1003 //
1004 // So first, we try taking the next block from the nursery, in
1005 // the same way as allocate().
1006 bd = cap->r.rCurrentNursery->link;
1007 if (bd == NULL) {
1008 // The nursery is empty: allocate a fresh block (we can't fail
1009 // here).
1010 ACQUIRE_SM_LOCK;
1011 bd = allocBlockOnNode(cap->node);
1012 RELEASE_SM_LOCK;
1013 initBdescr(bd, g0, g0);
1014 } else {
1015 newNurseryBlock(bd);
1016 // we have a block in the nursery: steal it
1017 cap->r.rCurrentNursery->link = bd->link;
1018 if (bd->link != NULL) {
1019 bd->link->u.back = cap->r.rCurrentNursery;
1020 }
1021 cap->r.rNursery->n_blocks -= bd->blocks;
1022 }
1023
1024 cap->pinned_object_block = bd;
1025 bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED;
1026
1027 // The pinned_object_block remains attached to the capability
1028 // until it is full, even if a GC occurs. We want this
1029 // behaviour because otherwise the unallocated portion of the
1030 // block would be forever slop, and under certain workloads
1031 // (allocating a few ByteStrings per GC) we accumulate a lot
1032 // of slop.
1033 //
1034 // So, the pinned_object_block is initially marked
1035 // BF_EVACUATED so the GC won't touch it. When it is full,
1036 // we place it on the large_objects list, and at the start of
1037 // the next GC the BF_EVACUATED flag will be cleared, and the
1038 // block will be promoted as usual (if anything in it is
1039 // live).
1040 }
1041
1042 p = bd->free;
1043 bd->free += n;
1044 return p;
1045 }
1046
1047 /* -----------------------------------------------------------------------------
1048 Write Barriers
1049 -------------------------------------------------------------------------- */
1050
1051 /*
1052 This is the write barrier for MUT_VARs, a.k.a. IORefs. A
1053 MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
1054 is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
1055 and is put on the mutable list.
1056 */
1057 void
1058 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
1059 {
1060 Capability *cap = regTableToCapability(reg);
1061 if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
1062 p->header.info = &stg_MUT_VAR_DIRTY_info;
1063 recordClosureMutated(cap,p);
1064 }
1065 }
1066
1067 void
1068 dirty_TVAR(Capability *cap, StgTVar *p)
1069 {
1070 if (p->header.info == &stg_TVAR_CLEAN_info) {
1071 p->header.info = &stg_TVAR_DIRTY_info;
1072 recordClosureMutated(cap,(StgClosure*)p);
1073 }
1074 }
1075
1076 // Setting a TSO's link field with a write barrier.
1077 // It is *not* necessary to call this function when
1078 // * setting the link field to END_TSO_QUEUE
1079 // * setting the link field of the currently running TSO, as it
1080 // will already be dirty.
1081 void
1082 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
1083 {
1084 if (tso->dirty == 0) {
1085 tso->dirty = 1;
1086 recordClosureMutated(cap,(StgClosure*)tso);
1087 }
1088 tso->_link = target;
1089 }
1090
1091 void
1092 setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)
1093 {
1094 if (tso->dirty == 0) {
1095 tso->dirty = 1;
1096 recordClosureMutated(cap,(StgClosure*)tso);
1097 }
1098 tso->block_info.prev = target;
1099 }
1100
1101 void
1102 dirty_TSO (Capability *cap, StgTSO *tso)
1103 {
1104 if (tso->dirty == 0) {
1105 tso->dirty = 1;
1106 recordClosureMutated(cap,(StgClosure*)tso);
1107 }
1108 }
1109
1110 void
1111 dirty_STACK (Capability *cap, StgStack *stack)
1112 {
1113 if (stack->dirty == 0) {
1114 stack->dirty = 1;
1115 recordClosureMutated(cap,(StgClosure*)stack);
1116 }
1117 }
1118
1119 /*
1120 This is the write barrier for MVARs. An MVAR_CLEAN objects is not
1121 on the mutable list; a MVAR_DIRTY is. When written to, a
1122 MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
1123 The check for MVAR_CLEAN is inlined at the call site for speed,
1124 this really does make a difference on concurrency-heavy benchmarks
1125 such as Chaneneos and cheap-concurrency.
1126 */
1127 void
1128 dirty_MVAR(StgRegTable *reg, StgClosure *p)
1129 {
1130 recordClosureMutated(regTableToCapability(reg),p);
1131 }
1132
1133 /* -----------------------------------------------------------------------------
1134 * Stats and stuff
1135 * -------------------------------------------------------------------------- */
1136
1137 /* -----------------------------------------------------------------------------
1138 * [Note allocation accounting]
1139 *
1140 * - When cap->r.rCurrentNusery moves to a new block in the nursery,
1141 * we add the size of the used portion of the previous block to
1142 * cap->total_allocated. (see finishedNurseryBlock())
1143 *
1144 * - When we start a GC, the allocated portion of CurrentNursery and
1145 * CurrentAlloc are added to cap->total_allocated. (see
1146 * updateNurseriesStats())
1147 *
1148 * -------------------------------------------------------------------------- */
1149
1150 //
1151 // Calculate the total allocated memory since the start of the
1152 // program. Also emits events reporting the per-cap allocation
1153 // totals.
1154 //
1155 StgWord
1156 calcTotalAllocated (void)
1157 {
1158 W_ tot_alloc = 0;
1159 W_ n;
1160
1161 for (n = 0; n < n_capabilities; n++) {
1162 tot_alloc += capabilities[n]->total_allocated;
1163
1164 traceEventHeapAllocated(capabilities[n],
1165 CAPSET_HEAP_DEFAULT,
1166 capabilities[n]->total_allocated * sizeof(W_));
1167 }
1168
1169 return tot_alloc;
1170 }
1171
1172 //
1173 // Update the per-cap total_allocated numbers with an approximation of
1174 // the amount of memory used in each cap's nursery.
1175 //
1176 void
1177 updateNurseriesStats (void)
1178 {
1179 uint32_t i;
1180 bdescr *bd;
1181
1182 for (i = 0; i < n_capabilities; i++) {
1183 // The current nursery block and the current allocate block have not
1184 // yet been accounted for in cap->total_allocated, so we add them here.
1185 bd = capabilities[i]->r.rCurrentNursery;
1186 if (bd) finishedNurseryBlock(capabilities[i], bd);
1187 bd = capabilities[i]->r.rCurrentAlloc;
1188 if (bd) finishedNurseryBlock(capabilities[i], bd);
1189 }
1190 }
1191
1192 W_ countOccupied (bdescr *bd)
1193 {
1194 W_ words;
1195
1196 words = 0;
1197 for (; bd != NULL; bd = bd->link) {
1198 ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
1199 words += bd->free - bd->start;
1200 }
1201 return words;
1202 }
1203
1204 W_ genLiveWords (generation *gen)
1205 {
1206 return gen->n_words + gen->n_large_words +
1207 gen->n_compact_blocks * BLOCK_SIZE_W;
1208 }
1209
1210 W_ genLiveBlocks (generation *gen)
1211 {
1212 return gen->n_blocks + gen->n_large_blocks + gen->n_compact_blocks;
1213 }
1214
1215 W_ gcThreadLiveWords (uint32_t i, uint32_t g)
1216 {
1217 W_ a, b, c;
1218
1219 a = countOccupied(gc_threads[i]->gens[g].todo_bd);
1220 b = gc_threads[i]->gens[g].n_part_words;
1221 c = gc_threads[i]->gens[g].n_scavd_words;
1222
1223 // debugBelch("cap %d, g%d, %ld %ld %ld\n", i, g, a, b, c);
1224 return a + b + c;
1225 }
1226
1227 W_ gcThreadLiveBlocks (uint32_t i, uint32_t g)
1228 {
1229 W_ blocks;
1230
1231 blocks = countBlocks(gc_threads[i]->gens[g].todo_bd);
1232 blocks += gc_threads[i]->gens[g].n_part_blocks;
1233 blocks += gc_threads[i]->gens[g].n_scavd_blocks;
1234
1235 return blocks;
1236 }
1237
1238 /* Determine which generation will be collected next, and approximate
1239 * the maximum amount of memory that will be required to do the GC,
1240 * taking into account data that will be copied, and the space needed
1241 * to store bitmaps and the mark stack. Note: blocks_needed does not
1242 * include the blocks in the nursery.
1243 *
1244 * Assume: all data currently live will remain live. Generationss
1245 * that will be collected next time will therefore need twice as many
1246 * blocks since all the data will be copied.
1247 */
1248 extern W_
1249 calcNeeded (bool force_major, memcount *blocks_needed)
1250 {
1251 W_ needed = 0, blocks;
1252 uint32_t g, N;
1253 generation *gen;
1254
1255 if (force_major) {
1256 N = RtsFlags.GcFlags.generations - 1;
1257 } else {
1258 N = 0;
1259 }
1260
1261 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1262 gen = &generations[g];
1263
1264 blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?)
1265 + gen->n_large_blocks
1266 + gen->n_compact_blocks;
1267
1268 // we need at least this much space
1269 needed += blocks;
1270
1271 // are we collecting this gen?
1272 if (g == 0 || // always collect gen 0
1273 blocks > gen->max_blocks)
1274 {
1275 N = stg_max(N,g);
1276
1277 // we will collect this gen next time
1278 if (gen->mark) {
1279 // bitmap:
1280 needed += gen->n_blocks / BITS_IN(W_);
1281 // mark stack:
1282 needed += gen->n_blocks / 100;
1283 }
1284 if (gen->compact) {
1285 continue; // no additional space needed for compaction
1286 } else {
1287 needed += gen->n_blocks;
1288 }
1289 }
1290 }
1291
1292 if (blocks_needed != NULL) {
1293 *blocks_needed = needed;
1294 }
1295 return N;
1296 }
1297
1298 StgWord calcTotalLargeObjectsW (void)
1299 {
1300 uint32_t g;
1301 StgWord totalW = 0;
1302
1303 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1304 totalW += generations[g].n_large_words;
1305 }
1306 return totalW;
1307 }
1308
1309 StgWord calcTotalCompactW (void)
1310 {
1311 uint32_t g;
1312 StgWord totalW = 0;
1313
1314 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1315 totalW += generations[g].n_compact_blocks * BLOCK_SIZE_W;
1316 }
1317 return totalW;
1318 }
1319
1320 /* ----------------------------------------------------------------------------
1321 Executable memory
1322
1323 Executable memory must be managed separately from non-executable
1324 memory. Most OSs these days require you to jump through hoops to
1325 dynamically allocate executable memory, due to various security
1326 measures.
1327
1328 Here we provide a small memory allocator for executable memory.
1329 Memory is managed with a page granularity; we allocate linearly
1330 in the page, and when the page is emptied (all objects on the page
1331 are free) we free the page again, not forgetting to make it
1332 non-executable.
1333
1334 TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1335 the linker cannot use allocateExec for loading object code files
1336 on Windows. Once allocateExec can handle larger objects, the linker
1337 should be modified to use allocateExec instead of VirtualAlloc.
1338 ------------------------------------------------------------------------- */
1339
1340 #if (defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) && defined(ios_HOST_OS)
1341 #include <libkern/OSCacheControl.h>
1342 #endif
1343
1344 #if defined(__GNUC__)
1345 /* __clear_cache is a libgcc function.
1346 * It existed before __builtin___clear_cache was introduced.
1347 * See Trac #8562.
1348 */
1349 extern void __clear_cache(char * begin, char * end);
1350 #endif /* __GNUC__ */
1351
1352 /* On ARM and other platforms, we need to flush the cache after
1353 writing code into memory, so the processor reliably sees it. */
1354 void flushExec (W_ len, AdjustorExecutable exec_addr)
1355 {
1356 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
1357 /* x86 doesn't need to do anything, so just suppress some warnings. */
1358 (void)len;
1359 (void)exec_addr;
1360 #elif (defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) && defined(ios_HOST_OS)
1361 /* On iOS we need to use the special 'sys_icache_invalidate' call. */
1362 sys_icache_invalidate(exec_addr, len);
1363 #elif defined(__GNUC__)
1364 /* For all other platforms, fall back to a libgcc builtin. */
1365 unsigned char* begin = (unsigned char*)exec_addr;
1366 unsigned char* end = begin + len;
1367
1368 /* __builtin___clear_cache is supported since GNU C 4.3.6.
1369 * We pick 4.4 to simplify condition a bit.
1370 */
1371 # if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
1372 __builtin___clear_cache((void*)begin, (void*)end);
1373 # else
1374 __clear_cache((void*)begin, (void*)end);
1375 # endif
1376 #else
1377 #error Missing support to flush the instruction cache
1378 #endif
1379 }
1380
1381 #if defined(linux_HOST_OS)
1382
1383 // On Linux we need to use libffi for allocating executable memory,
1384 // because it knows how to work around the restrictions put in place
1385 // by SELinux.
1386
1387 AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
1388 {
1389 void **ret, **exec;
1390 ACQUIRE_SM_LOCK;
1391 ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
1392 RELEASE_SM_LOCK;
1393 if (ret == NULL) return ret;
1394 *ret = ret; // save the address of the writable mapping, for freeExec().
1395 *exec_ret = exec + 1;
1396 return (ret + 1);
1397 }
1398
1399 // freeExec gets passed the executable address, not the writable address.
1400 void freeExec (AdjustorExecutable addr)
1401 {
1402 AdjustorWritable writable;
1403 writable = *((void**)addr - 1);
1404 ACQUIRE_SM_LOCK;
1405 ffi_closure_free (writable);
1406 RELEASE_SM_LOCK
1407 }
1408
1409 #elif defined(ios_HOST_OS)
1410
1411 static HashTable* allocatedExecs;
1412
1413 AdjustorWritable allocateExec(W_ bytes, AdjustorExecutable *exec_ret)
1414 {
1415 AdjustorWritable writ;
1416 ffi_closure* cl;
1417 if (bytes != sizeof(ffi_closure)) {
1418 barf("allocateExec: for ffi_closure only");
1419 }
1420 ACQUIRE_SM_LOCK;
1421 cl = writ = ffi_closure_alloc((size_t)bytes, exec_ret);
1422 if (cl != NULL) {
1423 if (allocatedExecs == NULL) {
1424 allocatedExecs = allocHashTable();
1425 }
1426 insertHashTable(allocatedExecs, (StgWord)*exec_ret, writ);
1427 }
1428 RELEASE_SM_LOCK;
1429 return writ;
1430 }
1431
1432 AdjustorWritable execToWritable(AdjustorExecutable exec)
1433 {
1434 AdjustorWritable writ;
1435 ACQUIRE_SM_LOCK;
1436 if (allocatedExecs == NULL ||
1437 (writ = lookupHashTable(allocatedExecs, (StgWord)exec)) == NULL) {
1438 RELEASE_SM_LOCK;
1439 barf("execToWritable: not found");
1440 }
1441 RELEASE_SM_LOCK;
1442 return writ;
1443 }
1444
1445 void freeExec(AdjustorExecutable exec)
1446 {
1447 AdjustorWritable writ;
1448 ffi_closure* cl;
1449 cl = writ = execToWritable(exec);
1450 ACQUIRE_SM_LOCK;
1451 removeHashTable(allocatedExecs, (StgWord)exec, writ);
1452 ffi_closure_free(cl);
1453 RELEASE_SM_LOCK
1454 }
1455
1456 #else
1457
1458 AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
1459 {
1460 void *ret;
1461 W_ n;
1462
1463 ACQUIRE_SM_LOCK;
1464
1465 // round up to words.
1466 n = (bytes + sizeof(W_) + 1) / sizeof(W_);
1467
1468 if (n+1 > BLOCK_SIZE_W) {
1469 barf("allocateExec: can't handle large objects");
1470 }
1471
1472 if (exec_block == NULL ||
1473 exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1474 bdescr *bd;
1475 W_ pagesize = getPageSize();
1476 bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1477 debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1478 bd->gen_no = 0;
1479 bd->flags = BF_EXEC;
1480 bd->link = exec_block;
1481 if (exec_block != NULL) {
1482 exec_block->u.back = bd;
1483 }
1484 bd->u.back = NULL;
1485 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, true);
1486 exec_block = bd;
1487 }
1488 *(exec_block->free) = n; // store the size of this chunk
1489 exec_block->gen_no += n; // gen_no stores the number of words allocated
1490 ret = exec_block->free + 1;
1491 exec_block->free += n + 1;
1492
1493 RELEASE_SM_LOCK
1494 *exec_ret = ret;
1495 return ret;
1496 }
1497
1498 void freeExec (void *addr)
1499 {
1500 StgPtr p = (StgPtr)addr - 1;
1501 bdescr *bd = Bdescr((StgPtr)p);
1502
1503 if ((bd->flags & BF_EXEC) == 0) {
1504 barf("freeExec: not executable");
1505 }
1506
1507 if (*(StgPtr)p == 0) {
1508 barf("freeExec: already free?");
1509 }
1510
1511 ACQUIRE_SM_LOCK;
1512
1513 bd->gen_no -= *(StgPtr)p;
1514 *(StgPtr)p = 0;
1515
1516 if (bd->gen_no == 0) {
1517 // Free the block if it is empty, but not if it is the block at
1518 // the head of the queue.
1519 if (bd != exec_block) {
1520 debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1521 dbl_link_remove(bd, &exec_block);
1522 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, false);
1523 freeGroup(bd);
1524 } else {
1525 bd->free = bd->start;
1526 }
1527 }
1528
1529 RELEASE_SM_LOCK
1530 }
1531
1532 #endif /* switch(HOST_OS) */
1533
1534 #if defined(DEBUG)
1535
1536 // handy function for use in gdb, because Bdescr() is inlined.
1537 extern bdescr *_bdescr (StgPtr p);
1538
1539 bdescr *
1540 _bdescr (StgPtr p)
1541 {
1542 return Bdescr(p);
1543 }
1544
1545 #endif