Delete ghctags cabal file
[ghc.git] / rts / sm / Storage.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2012
4 *
5 * Storage manager front end
6 *
7 * Documentation on the architecture of the Storage Manager can be
8 * found in the online commentary:
9 *
10 * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/rts/storage
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16
17 #include "Storage.h"
18 #include "GCThread.h"
19 #include "RtsUtils.h"
20 #include "Stats.h"
21 #include "BlockAlloc.h"
22 #include "Weak.h"
23 #include "Sanity.h"
24 #include "Arena.h"
25 #include "Capability.h"
26 #include "Schedule.h"
27 #include "RetainerProfile.h" // for counting memory blocks (memInventory)
28 #include "OSMem.h"
29 #include "Trace.h"
30 #include "GC.h"
31 #include "Evac.h"
32 #if defined(ios_HOST_OS)
33 #include "Hash.h"
34 #endif
35
36 #include <string.h>
37
38 #include "ffi.h"
39
40 /*
41 * All these globals require sm_mutex to access in THREADED_RTS mode.
42 */
43 StgIndStatic *dyn_caf_list = NULL;
44 StgIndStatic *debug_caf_list = NULL;
45 StgIndStatic *revertible_caf_list = NULL;
46 bool keepCAFs;
47
48 W_ large_alloc_lim; /* GC if n_large_blocks in any nursery
49 * reaches this. */
50
51 bdescr *exec_block;
52
53 generation *generations = NULL; /* all the generations */
54 generation *g0 = NULL; /* generation 0, for convenience */
55 generation *oldest_gen = NULL; /* oldest generation, for convenience */
56
57 /*
58 * Array of nurseries, size == n_capabilities
59 *
60 * nursery[i] belongs to NUMA node (i % n_numa_nodes)
61 * This is chosen to be the same convention as capabilities[i], so
62 * that when not using nursery chunks (+RTS -n), we just map
63 * capabilities to nurseries 1:1.
64 */
65 nursery *nurseries = NULL;
66 uint32_t n_nurseries;
67
68 /*
69 * When we are using nursery chunks, we need a separate next_nursery
70 * pointer for each NUMA node.
71 */
72 volatile StgWord next_nursery[MAX_NUMA_NODES];
73
74 #if defined(THREADED_RTS)
75 /*
76 * Storage manager mutex: protects all the above state from
77 * simultaneous access by two STG threads.
78 */
79 Mutex sm_mutex;
80 #endif
81
82 static void allocNurseries (uint32_t from, uint32_t to);
83 static void assignNurseriesToCapabilities (uint32_t from, uint32_t to);
84
85 static void
86 initGeneration (generation *gen, int g)
87 {
88 gen->no = g;
89 gen->collections = 0;
90 gen->par_collections = 0;
91 gen->failed_promotions = 0;
92 gen->max_blocks = 0;
93 gen->blocks = NULL;
94 gen->n_blocks = 0;
95 gen->n_words = 0;
96 gen->live_estimate = 0;
97 gen->old_blocks = NULL;
98 gen->n_old_blocks = 0;
99 gen->large_objects = NULL;
100 gen->n_large_blocks = 0;
101 gen->n_large_words = 0;
102 gen->n_new_large_words = 0;
103 gen->compact_objects = NULL;
104 gen->n_compact_blocks = 0;
105 gen->compact_blocks_in_import = NULL;
106 gen->n_compact_blocks_in_import = 0;
107 gen->scavenged_large_objects = NULL;
108 gen->n_scavenged_large_blocks = 0;
109 gen->live_compact_objects = NULL;
110 gen->n_live_compact_blocks = 0;
111 gen->compact_blocks_in_import = NULL;
112 gen->n_compact_blocks_in_import = 0;
113 gen->mark = 0;
114 gen->compact = 0;
115 gen->bitmap = NULL;
116 #if defined(THREADED_RTS)
117 initSpinLock(&gen->sync);
118 #endif
119 gen->threads = END_TSO_QUEUE;
120 gen->old_threads = END_TSO_QUEUE;
121 gen->weak_ptr_list = NULL;
122 gen->old_weak_ptr_list = NULL;
123 }
124
125 void
126 initStorage (void)
127 {
128 uint32_t g, n;
129
130 if (generations != NULL) {
131 // multi-init protection
132 return;
133 }
134
135 initMBlocks();
136
137 /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
138 * doing something reasonable.
139 */
140 /* We use the NOT_NULL variant or gcc warns that the test is always true */
141 ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info));
142 ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
143 ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
144
145 initBlockAllocator();
146
147 #if defined(THREADED_RTS)
148 initMutex(&sm_mutex);
149 #endif
150
151 ACQUIRE_SM_LOCK;
152
153 /* allocate generation info array */
154 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
155 * sizeof(struct generation_),
156 "initStorage: gens");
157
158 /* Initialise all generations */
159 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
160 initGeneration(&generations[g], g);
161 }
162
163 /* A couple of convenience pointers */
164 g0 = &generations[0];
165 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
166
167 /* Set up the destination pointers in each younger gen. step */
168 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
169 generations[g].to = &generations[g+1];
170 }
171 oldest_gen->to = oldest_gen;
172
173 /* The oldest generation has one step. */
174 if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
175 if (RtsFlags.GcFlags.generations == 1) {
176 errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
177 } else {
178 oldest_gen->mark = 1;
179 if (RtsFlags.GcFlags.compact)
180 oldest_gen->compact = 1;
181 }
182 }
183
184 generations[0].max_blocks = 0;
185
186 dyn_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
187 debug_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
188 revertible_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
189
190 if (RtsFlags.GcFlags.largeAllocLim > 0) {
191 large_alloc_lim = RtsFlags.GcFlags.largeAllocLim * BLOCK_SIZE_W;
192 } else {
193 large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W;
194 }
195
196 exec_block = NULL;
197
198 #if defined(THREADED_RTS)
199 initSpinLock(&gc_alloc_block_sync);
200 #endif
201 N = 0;
202
203 for (n = 0; n < n_numa_nodes; n++) {
204 next_nursery[n] = n;
205 }
206 storageAddCapabilities(0, n_capabilities);
207
208 IF_DEBUG(gc, statDescribeGens());
209
210 RELEASE_SM_LOCK;
211
212 traceEventHeapInfo(CAPSET_HEAP_DEFAULT,
213 RtsFlags.GcFlags.generations,
214 RtsFlags.GcFlags.maxHeapSize * BLOCK_SIZE,
215 RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE,
216 MBLOCK_SIZE,
217 BLOCK_SIZE);
218 }
219
220 void storageAddCapabilities (uint32_t from, uint32_t to)
221 {
222 uint32_t n, g, i, new_n_nurseries;
223 nursery *old_nurseries;
224
225 if (RtsFlags.GcFlags.nurseryChunkSize == 0) {
226 new_n_nurseries = to;
227 } else {
228 memcount total_alloc = to * RtsFlags.GcFlags.minAllocAreaSize;
229 new_n_nurseries =
230 stg_max(to, total_alloc / RtsFlags.GcFlags.nurseryChunkSize);
231 }
232
233 old_nurseries = nurseries;
234 if (from > 0) {
235 nurseries = stgReallocBytes(nurseries,
236 new_n_nurseries * sizeof(struct nursery_),
237 "storageAddCapabilities");
238 } else {
239 nurseries = stgMallocBytes(new_n_nurseries * sizeof(struct nursery_),
240 "storageAddCapabilities");
241 }
242
243 // we've moved the nurseries, so we have to update the rNursery
244 // pointers from the Capabilities.
245 for (i = 0; i < from; i++) {
246 uint32_t index = capabilities[i]->r.rNursery - old_nurseries;
247 capabilities[i]->r.rNursery = &nurseries[index];
248 }
249
250 /* The allocation area. Policy: keep the allocation area
251 * small to begin with, even if we have a large suggested heap
252 * size. Reason: we're going to do a major collection first, and we
253 * don't want it to be a big one. This vague idea is borne out by
254 * rigorous experimental evidence.
255 */
256 allocNurseries(n_nurseries, new_n_nurseries);
257 n_nurseries = new_n_nurseries;
258
259 /*
260 * Assign each of the new capabilities a nursery. Remember to start from
261 * next_nursery, because we may have already consumed some of the earlier
262 * nurseries.
263 */
264 assignNurseriesToCapabilities(from,to);
265
266 // allocate a block for each mut list
267 for (n = from; n < to; n++) {
268 for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
269 capabilities[n]->mut_lists[g] =
270 allocBlockOnNode(capNoToNumaNode(n));
271 }
272 }
273
274 #if defined(THREADED_RTS) && defined(CC_LLVM_BACKEND) && (CC_SUPPORTS_TLS == 0)
275 newThreadLocalKey(&gctKey);
276 #endif
277
278 initGcThreads(from, to);
279 }
280
281
282 void
283 exitStorage (void)
284 {
285 updateNurseriesStats();
286 stat_exit();
287 }
288
289 void
290 freeStorage (bool free_heap)
291 {
292 stgFree(generations);
293 if (free_heap) freeAllMBlocks();
294 #if defined(THREADED_RTS)
295 closeMutex(&sm_mutex);
296 #endif
297 stgFree(nurseries);
298 #if defined(THREADED_RTS) && defined(CC_LLVM_BACKEND) && (CC_SUPPORTS_TLS == 0)
299 freeThreadLocalKey(&gctKey);
300 #endif
301 freeGcThreads();
302 }
303
304 /* -----------------------------------------------------------------------------
305 Note [CAF management].
306
307 The entry code for every CAF does the following:
308
309 - calls newCAF, which builds a CAF_BLACKHOLE on the heap and atomically
310 updates the CAF with IND_STATIC pointing to the CAF_BLACKHOLE
311
312 - if newCAF returns zero, it re-enters the CAF (see Note [atomic
313 CAF entry])
314
315 - pushes an update frame pointing to the CAF_BLACKHOLE
316
317 Why do we build a BLACKHOLE in the heap rather than just updating
318 the thunk directly? It's so that we only need one kind of update
319 frame - otherwise we'd need a static version of the update frame
320 too, and various other parts of the RTS that deal with update
321 frames would also need special cases for static update frames.
322
323 newCAF() does the following:
324
325 - atomically locks the CAF (see [atomic CAF entry])
326
327 - it builds a CAF_BLACKHOLE on the heap
328
329 - it updates the CAF with an IND_STATIC pointing to the
330 CAF_BLACKHOLE, atomically.
331
332 - it puts the CAF on the oldest generation's mutable list.
333 This is so that we treat the CAF as a root when collecting
334 younger generations.
335
336 - links the CAF onto the CAF list (see below)
337
338 ------------------
339 Note [atomic CAF entry]
340
341 With THREADED_RTS, newCAF() is required to be atomic (see
342 #5558). This is because if two threads happened to enter the same
343 CAF simultaneously, they would create two distinct CAF_BLACKHOLEs,
344 and so the normal threadPaused() machinery for detecting duplicate
345 evaluation will not detect this. Hence in lockCAF() below, we
346 atomically lock the CAF with WHITEHOLE before updating it with
347 IND_STATIC, and return zero if another thread locked the CAF first.
348 In the event that we lost the race, CAF entry code will re-enter
349 the CAF and block on the other thread's CAF_BLACKHOLE.
350
351 ------------------
352 Note [GHCi CAFs]
353
354 For GHCI, we have additional requirements when dealing with CAFs:
355
356 - we must *retain* all dynamically-loaded CAFs ever entered,
357 just in case we need them again.
358 - we must be able to *revert* CAFs that have been evaluated, to
359 their pre-evaluated form.
360
361 To do this, we use an additional CAF list. When newCAF() is
362 called on a dynamically-loaded CAF, we add it to the CAF list
363 instead of the old-generation mutable list, and save away its
364 old info pointer (in caf->saved_info) for later reversion.
365
366 To revert all the CAFs, we traverse the CAF list and reset the
367 info pointer to caf->saved_info, then throw away the CAF list.
368 (see GC.c:revertCAFs()).
369
370 -- SDM 29/1/01
371
372 -------------------------------------------------------------------------- */
373
374 STATIC_INLINE StgInd *
375 lockCAF (StgRegTable *reg, StgIndStatic *caf)
376 {
377 const StgInfoTable *orig_info;
378 Capability *cap = regTableToCapability(reg);
379 StgInd *bh;
380
381 orig_info = caf->header.info;
382
383 #if defined(THREADED_RTS)
384 const StgInfoTable *cur_info;
385
386 if (orig_info == &stg_IND_STATIC_info ||
387 orig_info == &stg_WHITEHOLE_info) {
388 // already claimed by another thread; re-enter the CAF
389 return NULL;
390 }
391
392 cur_info = (const StgInfoTable *)
393 cas((StgVolatilePtr)&caf->header.info,
394 (StgWord)orig_info,
395 (StgWord)&stg_WHITEHOLE_info);
396
397 if (cur_info != orig_info) {
398 // already claimed by another thread; re-enter the CAF
399 return NULL;
400 }
401
402 // successfully claimed by us; overwrite with IND_STATIC
403 #endif
404
405 // For the benefit of revertCAFs(), save the original info pointer
406 caf->saved_info = orig_info;
407
408 // Allocate the blackhole indirection closure
409 bh = (StgInd *)allocate(cap, sizeofW(*bh));
410 bh->indirectee = (StgClosure *)cap->r.rCurrentTSO;
411 SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs);
412 // Ensure that above writes are visible before we introduce reference as CAF indirectee.
413 write_barrier();
414
415 caf->indirectee = (StgClosure *)bh;
416 write_barrier();
417 SET_INFO((StgClosure*)caf,&stg_IND_STATIC_info);
418
419 return bh;
420 }
421
422 StgInd *
423 newCAF(StgRegTable *reg, StgIndStatic *caf)
424 {
425 StgInd *bh;
426
427 bh = lockCAF(reg, caf);
428 if (!bh) return NULL;
429
430 if(keepCAFs)
431 {
432 // Note [dyn_caf_list]
433 // If we are in GHCi _and_ we are using dynamic libraries,
434 // then we can't redirect newCAF calls to newRetainedCAF (see below),
435 // so we make newCAF behave almost like newRetainedCAF.
436 // The dynamic libraries might be used by both the interpreted
437 // program and GHCi itself, so they must not be reverted.
438 // This also means that in GHCi with dynamic libraries, CAFs are not
439 // garbage collected. If this turns out to be a problem, we could
440 // do another hack here and do an address range test on caf to figure
441 // out whether it is from a dynamic library.
442
443 ACQUIRE_SM_LOCK; // dyn_caf_list is global, locked by sm_mutex
444 caf->static_link = (StgClosure*)dyn_caf_list;
445 dyn_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
446 RELEASE_SM_LOCK;
447 }
448 else
449 {
450 // Put this CAF on the mutable list for the old generation.
451 if (oldest_gen->no != 0) {
452 recordMutableCap((StgClosure*)caf,
453 regTableToCapability(reg), oldest_gen->no);
454 }
455
456 #if defined(DEBUG)
457 // In the DEBUG rts, we keep track of live CAFs by chaining them
458 // onto a list debug_caf_list. This is so that we can tell if we
459 // ever enter a GC'd CAF, and emit a suitable barf().
460 //
461 // The saved_info field of the CAF is used as the link field for
462 // debug_caf_list, because this field is only used by newDynCAF
463 // for revertible CAFs, and we don't put those on the
464 // debug_caf_list.
465 ACQUIRE_SM_LOCK; // debug_caf_list is global, locked by sm_mutex
466 ((StgIndStatic *)caf)->saved_info = (const StgInfoTable*)debug_caf_list;
467 debug_caf_list = (StgIndStatic*)caf;
468 RELEASE_SM_LOCK;
469 #endif
470 }
471
472 return bh;
473 }
474
475 // External API for setting the keepCAFs flag. see #3900.
476 void
477 setKeepCAFs (void)
478 {
479 keepCAFs = 1;
480 }
481
482 // An alternate version of newCAF which is used for dynamically loaded
483 // object code in GHCi. In this case we want to retain *all* CAFs in
484 // the object code, because they might be demanded at any time from an
485 // expression evaluated on the command line.
486 // Also, GHCi might want to revert CAFs, so we add these to the
487 // revertible_caf_list.
488 //
489 // The linker hackily arranges that references to newCAF from dynamic
490 // code end up pointing to newRetainedCAF.
491 //
492 StgInd* newRetainedCAF (StgRegTable *reg, StgIndStatic *caf)
493 {
494 StgInd *bh;
495
496 bh = lockCAF(reg, caf);
497 if (!bh) return NULL;
498
499 ACQUIRE_SM_LOCK;
500
501 caf->static_link = (StgClosure*)revertible_caf_list;
502 revertible_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
503
504 RELEASE_SM_LOCK;
505
506 return bh;
507 }
508
509 // If we are using loadObj/unloadObj in the linker, then we want to
510 //
511 // - retain all CAFs in statically linked code (keepCAFs == true),
512 // because we might link a new object that uses any of these CAFs.
513 //
514 // - GC CAFs in dynamically-linked code, so that we can detect when
515 // a dynamically-linked object is unloadable.
516 //
517 // So for this case, we set keepCAFs to true, and link newCAF to newGCdCAF
518 // for dynamically-linked code.
519 //
520 StgInd* newGCdCAF (StgRegTable *reg, StgIndStatic *caf)
521 {
522 StgInd *bh;
523
524 bh = lockCAF(reg, caf);
525 if (!bh) return NULL;
526
527 // Put this CAF on the mutable list for the old generation.
528 if (oldest_gen->no != 0) {
529 recordMutableCap((StgClosure*)caf,
530 regTableToCapability(reg), oldest_gen->no);
531 }
532
533 return bh;
534 }
535
536 /* -----------------------------------------------------------------------------
537 Nursery management.
538 -------------------------------------------------------------------------- */
539
540 static bdescr *
541 allocNursery (uint32_t node, bdescr *tail, W_ blocks)
542 {
543 bdescr *bd = NULL;
544 W_ i, n;
545
546 // We allocate the nursery as a single contiguous block and then
547 // divide it into single blocks manually. This way we guarantee
548 // that the nursery blocks are adjacent, so that the processor's
549 // automatic prefetching works across nursery blocks. This is a
550 // tiny optimisation (~0.5%), but it's free.
551
552 while (blocks > 0) {
553 n = stg_min(BLOCKS_PER_MBLOCK, blocks);
554 // allocLargeChunk will prefer large chunks, but will pick up
555 // small chunks if there are any available. We must allow
556 // single blocks here to avoid fragmentation (#7257)
557 bd = allocLargeChunkOnNode(node, 1, n);
558 n = bd->blocks;
559 blocks -= n;
560
561 for (i = 0; i < n; i++) {
562 initBdescr(&bd[i], g0, g0);
563
564 bd[i].blocks = 1;
565 bd[i].flags = 0;
566
567 if (i > 0) {
568 bd[i].u.back = &bd[i-1];
569 } else {
570 bd[i].u.back = NULL;
571 }
572
573 if (i+1 < n) {
574 bd[i].link = &bd[i+1];
575 } else {
576 bd[i].link = tail;
577 if (tail != NULL) {
578 tail->u.back = &bd[i];
579 }
580 }
581
582 bd[i].free = bd[i].start;
583 }
584
585 tail = &bd[0];
586 }
587
588 return &bd[0];
589 }
590
591 STATIC_INLINE void
592 assignNurseryToCapability (Capability *cap, uint32_t n)
593 {
594 ASSERT(n < n_nurseries);
595 cap->r.rNursery = &nurseries[n];
596 cap->r.rCurrentNursery = nurseries[n].blocks;
597 newNurseryBlock(nurseries[n].blocks);
598 cap->r.rCurrentAlloc = NULL;
599 ASSERT(cap->r.rCurrentNursery->node == cap->node);
600 }
601
602 /*
603 * Give each Capability a nursery from the pool. No need to do atomic increments
604 * here, everything must be stopped to call this function.
605 */
606 static void
607 assignNurseriesToCapabilities (uint32_t from, uint32_t to)
608 {
609 uint32_t i, node;
610
611 for (i = from; i < to; i++) {
612 node = capabilities[i]->node;
613 assignNurseryToCapability(capabilities[i], next_nursery[node]);
614 next_nursery[node] += n_numa_nodes;
615 }
616 }
617
618 static void
619 allocNurseries (uint32_t from, uint32_t to)
620 {
621 uint32_t i;
622 memcount n_blocks;
623
624 if (RtsFlags.GcFlags.nurseryChunkSize) {
625 n_blocks = RtsFlags.GcFlags.nurseryChunkSize;
626 } else {
627 n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
628 }
629
630 for (i = from; i < to; i++) {
631 nurseries[i].blocks = allocNursery(capNoToNumaNode(i), NULL, n_blocks);
632 nurseries[i].n_blocks = n_blocks;
633 }
634 }
635
636 void
637 resetNurseries (void)
638 {
639 uint32_t n;
640
641 for (n = 0; n < n_numa_nodes; n++) {
642 next_nursery[n] = n;
643 }
644 assignNurseriesToCapabilities(0, n_capabilities);
645
646 #if defined(DEBUG)
647 bdescr *bd;
648 for (n = 0; n < n_nurseries; n++) {
649 for (bd = nurseries[n].blocks; bd; bd = bd->link) {
650 ASSERT(bd->gen_no == 0);
651 ASSERT(bd->gen == g0);
652 ASSERT(bd->node == capNoToNumaNode(n));
653 IF_DEBUG(zero_on_gc, memset(bd->start, 0xaa, BLOCK_SIZE));
654 }
655 }
656 #endif
657 }
658
659 W_
660 countNurseryBlocks (void)
661 {
662 uint32_t i;
663 W_ blocks = 0;
664
665 for (i = 0; i < n_nurseries; i++) {
666 blocks += nurseries[i].n_blocks;
667 }
668 return blocks;
669 }
670
671 //
672 // Resize each of the nurseries to the specified size.
673 //
674 static void
675 resizeNurseriesEach (W_ blocks)
676 {
677 uint32_t i, node;
678 bdescr *bd;
679 W_ nursery_blocks;
680 nursery *nursery;
681
682 for (i = 0; i < n_nurseries; i++) {
683 nursery = &nurseries[i];
684 nursery_blocks = nursery->n_blocks;
685 if (nursery_blocks == blocks) continue;
686
687 node = capNoToNumaNode(i);
688 if (nursery_blocks < blocks) {
689 debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
690 blocks);
691 nursery->blocks = allocNursery(node, nursery->blocks,
692 blocks-nursery_blocks);
693 }
694 else
695 {
696 bdescr *next_bd;
697
698 debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
699 blocks);
700
701 bd = nursery->blocks;
702 while (nursery_blocks > blocks) {
703 next_bd = bd->link;
704 next_bd->u.back = NULL;
705 nursery_blocks -= bd->blocks; // might be a large block
706 freeGroup(bd);
707 bd = next_bd;
708 }
709 nursery->blocks = bd;
710 // might have gone just under, by freeing a large block, so make
711 // up the difference.
712 if (nursery_blocks < blocks) {
713 nursery->blocks = allocNursery(node, nursery->blocks,
714 blocks-nursery_blocks);
715 }
716 }
717 nursery->n_blocks = blocks;
718 ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
719 }
720 }
721
722 void
723 resizeNurseriesFixed (void)
724 {
725 uint32_t blocks;
726
727 if (RtsFlags.GcFlags.nurseryChunkSize) {
728 blocks = RtsFlags.GcFlags.nurseryChunkSize;
729 } else {
730 blocks = RtsFlags.GcFlags.minAllocAreaSize;
731 }
732
733 resizeNurseriesEach(blocks);
734 }
735
736 //
737 // Resize the nurseries to the total specified size.
738 //
739 void
740 resizeNurseries (W_ blocks)
741 {
742 // If there are multiple nurseries, then we just divide the number
743 // of available blocks between them.
744 resizeNurseriesEach(blocks / n_nurseries);
745 }
746
747 bool
748 getNewNursery (Capability *cap)
749 {
750 StgWord i;
751 uint32_t node = cap->node;
752 uint32_t n;
753
754 for(;;) {
755 i = next_nursery[node];
756 if (i < n_nurseries) {
757 if (cas(&next_nursery[node], i, i+n_numa_nodes) == i) {
758 assignNurseryToCapability(cap, i);
759 return true;
760 }
761 } else if (n_numa_nodes > 1) {
762 // Try to find an unused nursery chunk on other nodes. We'll get
763 // remote memory, but the rationale is that avoiding GC is better
764 // than avoiding remote memory access.
765 bool lost = false;
766 for (n = 0; n < n_numa_nodes; n++) {
767 if (n == node) continue;
768 i = next_nursery[n];
769 if (i < n_nurseries) {
770 if (cas(&next_nursery[n], i, i+n_numa_nodes) == i) {
771 assignNurseryToCapability(cap, i);
772 return true;
773 } else {
774 lost = true; /* lost a race */
775 }
776 }
777 }
778 if (!lost) return false;
779 } else {
780 return false;
781 }
782 }
783 }
784
785 /* -----------------------------------------------------------------------------
786 move_STACK is called to update the TSO structure after it has been
787 moved from one place to another.
788 -------------------------------------------------------------------------- */
789
790 void
791 move_STACK (StgStack *src, StgStack *dest)
792 {
793 ptrdiff_t diff;
794
795 // relocate the stack pointer...
796 diff = (StgPtr)dest - (StgPtr)src; // In *words*
797 dest->sp = (StgPtr)dest->sp + diff;
798 }
799
800 STATIC_INLINE void
801 accountAllocation(Capability *cap, W_ n)
802 {
803 TICK_ALLOC_HEAP_NOCTR(WDS(n));
804 CCS_ALLOC(cap->r.rCCCS,n);
805 if (cap->r.rCurrentTSO != NULL) {
806 // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_)
807 ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
808 (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
809 - n*sizeof(W_)));
810 }
811
812 }
813
814 /* -----------------------------------------------------------------------------
815 StgPtr allocate (Capability *cap, W_ n)
816
817 Allocates an area of memory n *words* large, from the nursery of
818 the supplied Capability, or from the global block pool if the area
819 requested is larger than LARGE_OBJECT_THRESHOLD. Memory is not
820 allocated from the current nursery block, so as not to interfere
821 with Hp/HpLim.
822
823 The address of the allocated memory is returned. allocate() never
824 fails; if it returns, the returned value is a valid address. If
825 the nursery is already full, then another block is allocated from
826 the global block pool. If we need to get memory from the OS and
827 that operation fails, then the whole process will be killed.
828 -------------------------------------------------------------------------- */
829
830 /*
831 * Allocate some n words of heap memory; terminating
832 * on heap overflow
833 */
834 StgPtr
835 allocate (Capability *cap, W_ n)
836 {
837 StgPtr p = allocateMightFail(cap, n);
838 if (p == NULL) {
839 reportHeapOverflow();
840 // heapOverflow() doesn't exit (see #2592), but we aren't
841 // in a position to do a clean shutdown here: we
842 // either have to allocate the memory or exit now.
843 // Allocating the memory would be bad, because the user
844 // has requested that we not exceed maxHeapSize, so we
845 // just exit.
846 stg_exit(EXIT_HEAPOVERFLOW);
847 }
848 return p;
849 }
850
851 /*
852 * Allocate some n words of heap memory; returning NULL
853 * on heap overflow
854 */
855 StgPtr
856 allocateMightFail (Capability *cap, W_ n)
857 {
858 bdescr *bd;
859 StgPtr p;
860
861 if (RTS_UNLIKELY(n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))) {
862 // The largest number of words such that
863 // the computation of req_blocks will not overflow.
864 W_ max_words = (HS_WORD_MAX & ~(BLOCK_SIZE-1)) / sizeof(W_);
865 W_ req_blocks;
866
867 if (n > max_words)
868 req_blocks = HS_WORD_MAX; // signal overflow below
869 else
870 req_blocks = (W_)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
871
872 // Attempting to allocate an object larger than maxHeapSize
873 // should definitely be disallowed. (bug #1791)
874 if ((RtsFlags.GcFlags.maxHeapSize > 0 &&
875 req_blocks >= RtsFlags.GcFlags.maxHeapSize) ||
876 req_blocks >= HS_INT32_MAX) // avoid overflow when
877 // calling allocGroup() below
878 {
879 return NULL;
880 }
881
882 // Only credit allocation after we've passed the size check above
883 accountAllocation(cap, n);
884
885 ACQUIRE_SM_LOCK
886 bd = allocGroupOnNode(cap->node,req_blocks);
887 dbl_link_onto(bd, &g0->large_objects);
888 g0->n_large_blocks += bd->blocks; // might be larger than req_blocks
889 g0->n_new_large_words += n;
890 RELEASE_SM_LOCK;
891 initBdescr(bd, g0, g0);
892 bd->flags = BF_LARGE;
893 bd->free = bd->start + n;
894 cap->total_allocated += n;
895 return bd->start;
896 }
897
898 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
899
900 accountAllocation(cap, n);
901 bd = cap->r.rCurrentAlloc;
902 if (RTS_UNLIKELY(bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W)) {
903
904 if (bd) finishedNurseryBlock(cap,bd);
905
906 // The CurrentAlloc block is full, we need to find another
907 // one. First, we try taking the next block from the
908 // nursery:
909 bd = cap->r.rCurrentNursery->link;
910
911 if (bd == NULL) {
912 // The nursery is empty: allocate a fresh block (we can't
913 // fail here).
914 ACQUIRE_SM_LOCK;
915 bd = allocBlockOnNode(cap->node);
916 cap->r.rNursery->n_blocks++;
917 RELEASE_SM_LOCK;
918 initBdescr(bd, g0, g0);
919 bd->flags = 0;
920 // If we had to allocate a new block, then we'll GC
921 // pretty quickly now, because MAYBE_GC() will
922 // notice that CurrentNursery->link is NULL.
923 } else {
924 newNurseryBlock(bd);
925 // we have a block in the nursery: take it and put
926 // it at the *front* of the nursery list, and use it
927 // to allocate() from.
928 //
929 // Previously the nursery looked like this:
930 //
931 // CurrentNursery
932 // /
933 // +-+ +-+
934 // nursery -> ... |A| -> |B| -> ...
935 // +-+ +-+
936 //
937 // After doing this, it looks like this:
938 //
939 // CurrentNursery
940 // /
941 // +-+ +-+
942 // nursery -> |B| -> ... -> |A| -> ...
943 // +-+ +-+
944 // |
945 // CurrentAlloc
946 //
947 // The point is to get the block out of the way of the
948 // advancing CurrentNursery pointer, while keeping it
949 // on the nursery list so we don't lose track of it.
950 cap->r.rCurrentNursery->link = bd->link;
951 if (bd->link != NULL) {
952 bd->link->u.back = cap->r.rCurrentNursery;
953 }
954 }
955 dbl_link_onto(bd, &cap->r.rNursery->blocks);
956 cap->r.rCurrentAlloc = bd;
957 IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
958 }
959 p = bd->free;
960 bd->free += n;
961
962 IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa));
963 return p;
964 }
965
966 /* ---------------------------------------------------------------------------
967 Allocate a fixed/pinned object.
968
969 We allocate small pinned objects into a single block, allocating a
970 new block when the current one overflows. The block is chained
971 onto the large_object_list of generation 0.
972
973 NOTE: The GC can't in general handle pinned objects. This
974 interface is only safe to use for ByteArrays, which have no
975 pointers and don't require scavenging. It works because the
976 block's descriptor has the BF_LARGE flag set, so the block is
977 treated as a large object and chained onto various lists, rather
978 than the individual objects being copied. However, when it comes
979 to scavenge the block, the GC will only scavenge the first object.
980 The reason is that the GC can't linearly scan a block of pinned
981 objects at the moment (doing so would require using the
982 mostly-copying techniques). But since we're restricting ourselves
983 to pinned ByteArrays, not scavenging is ok.
984
985 This function is called by newPinnedByteArray# which immediately
986 fills the allocated memory with a MutableByteArray#. Note that
987 this returns NULL on heap overflow.
988 ------------------------------------------------------------------------- */
989
990 StgPtr
991 allocatePinned (Capability *cap, W_ n)
992 {
993 StgPtr p;
994 bdescr *bd;
995
996 // If the request is for a large object, then allocate()
997 // will give us a pinned object anyway.
998 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
999 p = allocateMightFail(cap, n);
1000 if (p == NULL) {
1001 return NULL;
1002 } else {
1003 Bdescr(p)->flags |= BF_PINNED;
1004 return p;
1005 }
1006 }
1007
1008 accountAllocation(cap, n);
1009 bd = cap->pinned_object_block;
1010
1011 // If we don't have a block of pinned objects yet, or the current
1012 // one isn't large enough to hold the new object, get a new one.
1013 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
1014
1015 // stash the old block on cap->pinned_object_blocks. On the
1016 // next GC cycle these objects will be moved to
1017 // g0->large_objects.
1018 if (bd != NULL) {
1019 // add it to the allocation stats when the block is full
1020 finishedNurseryBlock(cap, bd);
1021 dbl_link_onto(bd, &cap->pinned_object_blocks);
1022 }
1023
1024 // We need to find another block. We could just allocate one,
1025 // but that means taking a global lock and we really want to
1026 // avoid that (benchmarks that allocate a lot of pinned
1027 // objects scale really badly if we do this).
1028 //
1029 // So first, we try taking the next block from the nursery, in
1030 // the same way as allocate().
1031 bd = cap->r.rCurrentNursery->link;
1032 if (bd == NULL) {
1033 // The nursery is empty: allocate a fresh block (we can't fail
1034 // here).
1035 ACQUIRE_SM_LOCK;
1036 bd = allocBlockOnNode(cap->node);
1037 RELEASE_SM_LOCK;
1038 initBdescr(bd, g0, g0);
1039 } else {
1040 newNurseryBlock(bd);
1041 // we have a block in the nursery: steal it
1042 cap->r.rCurrentNursery->link = bd->link;
1043 if (bd->link != NULL) {
1044 bd->link->u.back = cap->r.rCurrentNursery;
1045 }
1046 cap->r.rNursery->n_blocks -= bd->blocks;
1047 }
1048
1049 cap->pinned_object_block = bd;
1050 bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED;
1051
1052 // The pinned_object_block remains attached to the capability
1053 // until it is full, even if a GC occurs. We want this
1054 // behaviour because otherwise the unallocated portion of the
1055 // block would be forever slop, and under certain workloads
1056 // (allocating a few ByteStrings per GC) we accumulate a lot
1057 // of slop.
1058 //
1059 // So, the pinned_object_block is initially marked
1060 // BF_EVACUATED so the GC won't touch it. When it is full,
1061 // we place it on the large_objects list, and at the start of
1062 // the next GC the BF_EVACUATED flag will be cleared, and the
1063 // block will be promoted as usual (if anything in it is
1064 // live).
1065 }
1066
1067 p = bd->free;
1068 bd->free += n;
1069 return p;
1070 }
1071
1072 /* -----------------------------------------------------------------------------
1073 Write Barriers
1074 -------------------------------------------------------------------------- */
1075
1076 /*
1077 This is the write barrier for MUT_VARs, a.k.a. IORefs. A
1078 MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
1079 is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
1080 and is put on the mutable list.
1081 */
1082 void
1083 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
1084 {
1085 Capability *cap = regTableToCapability(reg);
1086 // No barrier required here as no other heap object fields are read. See
1087 // note [Heap memory barriers] in SMP.h.
1088 if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
1089 p->header.info = &stg_MUT_VAR_DIRTY_info;
1090 recordClosureMutated(cap,p);
1091 }
1092 }
1093
1094 void
1095 dirty_TVAR(Capability *cap, StgTVar *p)
1096 {
1097 // No barrier required here as no other heap object fields are read. See
1098 // note [Heap memory barriers] in SMP.h.
1099 if (p->header.info == &stg_TVAR_CLEAN_info) {
1100 p->header.info = &stg_TVAR_DIRTY_info;
1101 recordClosureMutated(cap,(StgClosure*)p);
1102 }
1103 }
1104
1105 // Setting a TSO's link field with a write barrier.
1106 // It is *not* necessary to call this function when
1107 // * setting the link field to END_TSO_QUEUE
1108 // * setting the link field of the currently running TSO, as it
1109 // will already be dirty.
1110 void
1111 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
1112 {
1113 if (tso->dirty == 0) {
1114 tso->dirty = 1;
1115 recordClosureMutated(cap,(StgClosure*)tso);
1116 }
1117 tso->_link = target;
1118 }
1119
1120 void
1121 setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)
1122 {
1123 if (tso->dirty == 0) {
1124 tso->dirty = 1;
1125 recordClosureMutated(cap,(StgClosure*)tso);
1126 }
1127 tso->block_info.prev = target;
1128 }
1129
1130 void
1131 dirty_TSO (Capability *cap, StgTSO *tso)
1132 {
1133 if (tso->dirty == 0) {
1134 tso->dirty = 1;
1135 recordClosureMutated(cap,(StgClosure*)tso);
1136 }
1137 }
1138
1139 void
1140 dirty_STACK (Capability *cap, StgStack *stack)
1141 {
1142 if (stack->dirty == 0) {
1143 stack->dirty = 1;
1144 recordClosureMutated(cap,(StgClosure*)stack);
1145 }
1146 }
1147
1148 /*
1149 This is the write barrier for MVARs. An MVAR_CLEAN objects is not
1150 on the mutable list; a MVAR_DIRTY is. When written to, a
1151 MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
1152 The check for MVAR_CLEAN is inlined at the call site for speed,
1153 this really does make a difference on concurrency-heavy benchmarks
1154 such as Chaneneos and cheap-concurrency.
1155 */
1156 void
1157 dirty_MVAR(StgRegTable *reg, StgClosure *p)
1158 {
1159 recordClosureMutated(regTableToCapability(reg),p);
1160 }
1161
1162 /* -----------------------------------------------------------------------------
1163 * Stats and stuff
1164 * -------------------------------------------------------------------------- */
1165
1166 /* -----------------------------------------------------------------------------
1167 * Note [allocation accounting]
1168 *
1169 * - When cap->r.rCurrentNusery moves to a new block in the nursery,
1170 * we add the size of the used portion of the previous block to
1171 * cap->total_allocated. (see finishedNurseryBlock())
1172 *
1173 * - When we start a GC, the allocated portion of CurrentNursery and
1174 * CurrentAlloc are added to cap->total_allocated. (see
1175 * updateNurseriesStats())
1176 *
1177 * -------------------------------------------------------------------------- */
1178
1179 //
1180 // Calculate the total allocated memory since the start of the
1181 // program. Also emits events reporting the per-cap allocation
1182 // totals.
1183 //
1184 StgWord
1185 calcTotalAllocated (void)
1186 {
1187 W_ tot_alloc = 0;
1188 W_ n;
1189
1190 for (n = 0; n < n_capabilities; n++) {
1191 tot_alloc += capabilities[n]->total_allocated;
1192
1193 traceEventHeapAllocated(capabilities[n],
1194 CAPSET_HEAP_DEFAULT,
1195 capabilities[n]->total_allocated * sizeof(W_));
1196 }
1197
1198 return tot_alloc;
1199 }
1200
1201 //
1202 // Update the per-cap total_allocated numbers with an approximation of
1203 // the amount of memory used in each cap's nursery.
1204 //
1205 void
1206 updateNurseriesStats (void)
1207 {
1208 uint32_t i;
1209 bdescr *bd;
1210
1211 for (i = 0; i < n_capabilities; i++) {
1212 // The current nursery block and the current allocate block have not
1213 // yet been accounted for in cap->total_allocated, so we add them here.
1214 bd = capabilities[i]->r.rCurrentNursery;
1215 if (bd) finishedNurseryBlock(capabilities[i], bd);
1216 bd = capabilities[i]->r.rCurrentAlloc;
1217 if (bd) finishedNurseryBlock(capabilities[i], bd);
1218 }
1219 }
1220
1221 W_ countOccupied (bdescr *bd)
1222 {
1223 W_ words;
1224
1225 words = 0;
1226 for (; bd != NULL; bd = bd->link) {
1227 ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
1228 words += bd->free - bd->start;
1229 }
1230 return words;
1231 }
1232
1233 W_ genLiveWords (generation *gen)
1234 {
1235 return gen->n_words + gen->n_large_words +
1236 gen->n_compact_blocks * BLOCK_SIZE_W;
1237 }
1238
1239 W_ genLiveBlocks (generation *gen)
1240 {
1241 return gen->n_blocks + gen->n_large_blocks + gen->n_compact_blocks;
1242 }
1243
1244 W_ gcThreadLiveWords (uint32_t i, uint32_t g)
1245 {
1246 W_ a, b, c;
1247
1248 a = countOccupied(gc_threads[i]->gens[g].todo_bd);
1249 b = gc_threads[i]->gens[g].n_part_words;
1250 c = gc_threads[i]->gens[g].n_scavd_words;
1251
1252 // debugBelch("cap %d, g%d, %ld %ld %ld\n", i, g, a, b, c);
1253 return a + b + c;
1254 }
1255
1256 W_ gcThreadLiveBlocks (uint32_t i, uint32_t g)
1257 {
1258 W_ blocks;
1259
1260 blocks = countBlocks(gc_threads[i]->gens[g].todo_bd);
1261 blocks += gc_threads[i]->gens[g].n_part_blocks;
1262 blocks += gc_threads[i]->gens[g].n_scavd_blocks;
1263
1264 return blocks;
1265 }
1266
1267 /* Determine which generation will be collected next, and approximate
1268 * the maximum amount of memory that will be required to do the GC,
1269 * taking into account data that will be copied, and the space needed
1270 * to store bitmaps and the mark stack. Note: blocks_needed does not
1271 * include the blocks in the nursery.
1272 *
1273 * Assume: all data currently live will remain live. Generations
1274 * that will be collected next time will therefore need twice as many
1275 * blocks since all the data will be copied.
1276 */
1277 extern W_
1278 calcNeeded (bool force_major, memcount *blocks_needed)
1279 {
1280 W_ needed = 0;
1281 uint32_t N;
1282
1283 if (force_major) {
1284 N = RtsFlags.GcFlags.generations - 1;
1285 } else {
1286 N = 0;
1287 }
1288
1289 for (uint32_t g = 0; g < RtsFlags.GcFlags.generations; g++) {
1290 generation *gen = &generations[g];
1291
1292 W_ blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?)
1293 + gen->n_large_blocks
1294 + gen->n_compact_blocks;
1295
1296 // we need at least this much space
1297 needed += blocks;
1298
1299 // are we collecting this gen?
1300 if (g == 0 || // always collect gen 0
1301 blocks > gen->max_blocks)
1302 {
1303 N = stg_max(N,g);
1304
1305 // we will collect this gen next time
1306 if (gen->mark) {
1307 // bitmap:
1308 needed += gen->n_blocks / BITS_IN(W_);
1309 // mark stack:
1310 needed += gen->n_blocks / 100;
1311 }
1312 if (gen->compact) {
1313 continue; // no additional space needed for compaction
1314 } else {
1315 needed += gen->n_blocks;
1316 }
1317 }
1318 }
1319
1320 if (blocks_needed != NULL) {
1321 *blocks_needed = needed;
1322 }
1323 return N;
1324 }
1325
1326 StgWord calcTotalLargeObjectsW (void)
1327 {
1328 uint32_t g;
1329 StgWord totalW = 0;
1330
1331 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1332 totalW += generations[g].n_large_words;
1333 }
1334 return totalW;
1335 }
1336
1337 StgWord calcTotalCompactW (void)
1338 {
1339 uint32_t g;
1340 StgWord totalW = 0;
1341
1342 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1343 totalW += generations[g].n_compact_blocks * BLOCK_SIZE_W;
1344 }
1345 return totalW;
1346 }
1347
1348 /* ----------------------------------------------------------------------------
1349 Executable memory
1350
1351 Executable memory must be managed separately from non-executable
1352 memory. Most OSs these days require you to jump through hoops to
1353 dynamically allocate executable memory, due to various security
1354 measures.
1355
1356 Here we provide a small memory allocator for executable memory.
1357 Memory is managed with a page granularity; we allocate linearly
1358 in the page, and when the page is emptied (all objects on the page
1359 are free) we free the page again, not forgetting to make it
1360 non-executable.
1361
1362 TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1363 the linker cannot use allocateExec for loading object code files
1364 on Windows. Once allocateExec can handle larger objects, the linker
1365 should be modified to use allocateExec instead of VirtualAlloc.
1366 ------------------------------------------------------------------------- */
1367
1368 #if (defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) && defined(ios_HOST_OS)
1369 #include <libkern/OSCacheControl.h>
1370 #endif
1371
1372 /* __builtin___clear_cache is supported since GNU C 4.3.6.
1373 * We pick 4.4 to simplify condition a bit.
1374 */
1375 #define GCC_HAS_BUILTIN_CLEAR_CACHE (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
1376
1377 #if defined(__clang__)
1378 /* clang defines __clear_cache as a builtin on some platforms.
1379 * For example on armv7-linux-androideabi. The type slightly
1380 * differs from gcc.
1381 */
1382 extern void __clear_cache(void * begin, void * end);
1383 #elif defined(__GNUC__) && !GCC_HAS_BUILTIN_CLEAR_CACHE
1384 /* __clear_cache is a libgcc function.
1385 * It existed before __builtin___clear_cache was introduced.
1386 * See #8562.
1387 */
1388 extern void __clear_cache(char * begin, char * end);
1389 #endif /* __GNUC__ */
1390
1391 /* On ARM and other platforms, we need to flush the cache after
1392 writing code into memory, so the processor reliably sees it. */
1393 void flushExec (W_ len, AdjustorExecutable exec_addr)
1394 {
1395 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
1396 /* x86 doesn't need to do anything, so just suppress some warnings. */
1397 (void)len;
1398 (void)exec_addr;
1399 #elif (defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) && defined(ios_HOST_OS)
1400 /* On iOS we need to use the special 'sys_icache_invalidate' call. */
1401 sys_icache_invalidate(exec_addr, len);
1402 #elif defined(__clang__)
1403 unsigned char* begin = (unsigned char*)exec_addr;
1404 unsigned char* end = begin + len;
1405 # if __has_builtin(__builtin___clear_cache)
1406 __builtin___clear_cache((void*)begin, (void*)end);
1407 # else
1408 __clear_cache((void*)begin, (void*)end);
1409 # endif
1410 #elif defined(__GNUC__)
1411 unsigned char* begin = (unsigned char*)exec_addr;
1412 unsigned char* end = begin + len;
1413 # if GCC_HAS_BUILTIN_CLEAR_CACHE
1414 __builtin___clear_cache((void*)begin, (void*)end);
1415 # else
1416 /* For all other platforms, fall back to a libgcc builtin. */
1417 __clear_cache((void*)begin, (void*)end);
1418 # endif
1419 #else
1420 #error Missing support to flush the instruction cache
1421 #endif
1422 }
1423
1424 #if defined(linux_HOST_OS)
1425
1426 // On Linux we need to use libffi for allocating executable memory,
1427 // because it knows how to work around the restrictions put in place
1428 // by SELinux.
1429
1430 AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
1431 {
1432 void **ret, **exec;
1433 ACQUIRE_SM_LOCK;
1434 ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
1435 RELEASE_SM_LOCK;
1436 if (ret == NULL) return ret;
1437 *ret = ret; // save the address of the writable mapping, for freeExec().
1438 *exec_ret = exec + 1;
1439 return (ret + 1);
1440 }
1441
1442 // freeExec gets passed the executable address, not the writable address.
1443 void freeExec (AdjustorExecutable addr)
1444 {
1445 AdjustorWritable writable;
1446 writable = *((void**)addr - 1);
1447 ACQUIRE_SM_LOCK;
1448 ffi_closure_free (writable);
1449 RELEASE_SM_LOCK
1450 }
1451
1452 #elif defined(ios_HOST_OS)
1453
1454 static HashTable* allocatedExecs;
1455
1456 AdjustorWritable allocateExec(W_ bytes, AdjustorExecutable *exec_ret)
1457 {
1458 AdjustorWritable writ;
1459 ffi_closure* cl;
1460 if (bytes != sizeof(ffi_closure)) {
1461 barf("allocateExec: for ffi_closure only");
1462 }
1463 ACQUIRE_SM_LOCK;
1464 cl = writ = ffi_closure_alloc((size_t)bytes, exec_ret);
1465 if (cl != NULL) {
1466 if (allocatedExecs == NULL) {
1467 allocatedExecs = allocHashTable();
1468 }
1469 insertHashTable(allocatedExecs, (StgWord)*exec_ret, writ);
1470 }
1471 RELEASE_SM_LOCK;
1472 return writ;
1473 }
1474
1475 AdjustorWritable execToWritable(AdjustorExecutable exec)
1476 {
1477 AdjustorWritable writ;
1478 ACQUIRE_SM_LOCK;
1479 if (allocatedExecs == NULL ||
1480 (writ = lookupHashTable(allocatedExecs, (StgWord)exec)) == NULL) {
1481 RELEASE_SM_LOCK;
1482 barf("execToWritable: not found");
1483 }
1484 RELEASE_SM_LOCK;
1485 return writ;
1486 }
1487
1488 void freeExec(AdjustorExecutable exec)
1489 {
1490 AdjustorWritable writ;
1491 ffi_closure* cl;
1492 cl = writ = execToWritable(exec);
1493 ACQUIRE_SM_LOCK;
1494 removeHashTable(allocatedExecs, (StgWord)exec, writ);
1495 ffi_closure_free(cl);
1496 RELEASE_SM_LOCK
1497 }
1498
1499 #else
1500
1501 AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
1502 {
1503 void *ret;
1504 W_ n;
1505
1506 ACQUIRE_SM_LOCK;
1507
1508 // round up to words.
1509 n = (bytes + sizeof(W_) + 1) / sizeof(W_);
1510
1511 if (n+1 > BLOCK_SIZE_W) {
1512 barf("allocateExec: can't handle large objects");
1513 }
1514
1515 if (exec_block == NULL ||
1516 exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1517 bdescr *bd;
1518 W_ pagesize = getPageSize();
1519 bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1520 debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1521 bd->gen_no = 0;
1522 bd->flags = BF_EXEC;
1523 bd->link = exec_block;
1524 if (exec_block != NULL) {
1525 exec_block->u.back = bd;
1526 }
1527 bd->u.back = NULL;
1528 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, true);
1529 exec_block = bd;
1530 }
1531 *(exec_block->free) = n; // store the size of this chunk
1532 exec_block->gen_no += n; // gen_no stores the number of words allocated
1533 ret = exec_block->free + 1;
1534 exec_block->free += n + 1;
1535
1536 RELEASE_SM_LOCK
1537 *exec_ret = ret;
1538 return ret;
1539 }
1540
1541 void freeExec (void *addr)
1542 {
1543 StgPtr p = (StgPtr)addr - 1;
1544 bdescr *bd = Bdescr((StgPtr)p);
1545
1546 if ((bd->flags & BF_EXEC) == 0) {
1547 barf("freeExec: not executable");
1548 }
1549
1550 if (*(StgPtr)p == 0) {
1551 barf("freeExec: already free?");
1552 }
1553
1554 ACQUIRE_SM_LOCK;
1555
1556 bd->gen_no -= *(StgPtr)p;
1557 *(StgPtr)p = 0;
1558
1559 if (bd->gen_no == 0) {
1560 // Free the block if it is empty, but not if it is the block at
1561 // the head of the queue.
1562 if (bd != exec_block) {
1563 debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1564 dbl_link_remove(bd, &exec_block);
1565 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, false);
1566 freeGroup(bd);
1567 } else {
1568 bd->free = bd->start;
1569 }
1570 }
1571
1572 RELEASE_SM_LOCK
1573 }
1574
1575 #endif /* switch(HOST_OS) */
1576
1577 #if defined(DEBUG)
1578
1579 // handy function for use in gdb, because Bdescr() is inlined.
1580 extern bdescr *_bdescr (StgPtr p);
1581
1582 bdescr *
1583 _bdescr (StgPtr p)
1584 {
1585 return Bdescr(p);
1586 }
1587
1588 #endif