Update Trac ticket URLs to point to GitLab
[ghc.git] / rts / sm / Storage.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2012
4 *
5 * Storage manager front end
6 *
7 * Documentation on the architecture of the Storage Manager can be
8 * found in the online commentary:
9 *
10 * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16
17 #include "Storage.h"
18 #include "GCThread.h"
19 #include "RtsUtils.h"
20 #include "Stats.h"
21 #include "BlockAlloc.h"
22 #include "Weak.h"
23 #include "Sanity.h"
24 #include "Arena.h"
25 #include "Capability.h"
26 #include "Schedule.h"
27 #include "RetainerProfile.h" // for counting memory blocks (memInventory)
28 #include "OSMem.h"
29 #include "Trace.h"
30 #include "GC.h"
31 #include "Evac.h"
32 #if defined(ios_HOST_OS)
33 #include "Hash.h"
34 #endif
35
36 #include <string.h>
37
38 #include "ffi.h"
39
40 /*
41 * All these globals require sm_mutex to access in THREADED_RTS mode.
42 */
43 StgIndStatic *dyn_caf_list = NULL;
44 StgIndStatic *debug_caf_list = NULL;
45 StgIndStatic *revertible_caf_list = NULL;
46 bool keepCAFs;
47
48 W_ large_alloc_lim; /* GC if n_large_blocks in any nursery
49 * reaches this. */
50
51 bdescr *exec_block;
52
53 generation *generations = NULL; /* all the generations */
54 generation *g0 = NULL; /* generation 0, for convenience */
55 generation *oldest_gen = NULL; /* oldest generation, for convenience */
56
57 /*
58 * Array of nurseries, size == n_capabilities
59 *
60 * nursery[i] belongs to NUMA node (i % n_numa_nodes)
61 * This is chosen to be the same convention as capabilities[i], so
62 * that when not using nursery chunks (+RTS -n), we just map
63 * capabilities to nurseries 1:1.
64 */
65 nursery *nurseries = NULL;
66 uint32_t n_nurseries;
67
68 /*
69 * When we are using nursery chunks, we need a separate next_nursery
70 * pointer for each NUMA node.
71 */
72 volatile StgWord next_nursery[MAX_NUMA_NODES];
73
74 #if defined(THREADED_RTS)
75 /*
76 * Storage manager mutex: protects all the above state from
77 * simultaneous access by two STG threads.
78 */
79 Mutex sm_mutex;
80 #endif
81
82 static void allocNurseries (uint32_t from, uint32_t to);
83 static void assignNurseriesToCapabilities (uint32_t from, uint32_t to);
84
85 static void
86 initGeneration (generation *gen, int g)
87 {
88 gen->no = g;
89 gen->collections = 0;
90 gen->par_collections = 0;
91 gen->failed_promotions = 0;
92 gen->max_blocks = 0;
93 gen->blocks = NULL;
94 gen->n_blocks = 0;
95 gen->n_words = 0;
96 gen->live_estimate = 0;
97 gen->old_blocks = NULL;
98 gen->n_old_blocks = 0;
99 gen->large_objects = NULL;
100 gen->n_large_blocks = 0;
101 gen->n_large_words = 0;
102 gen->n_new_large_words = 0;
103 gen->compact_objects = NULL;
104 gen->n_compact_blocks = 0;
105 gen->compact_blocks_in_import = NULL;
106 gen->n_compact_blocks_in_import = 0;
107 gen->scavenged_large_objects = NULL;
108 gen->n_scavenged_large_blocks = 0;
109 gen->live_compact_objects = NULL;
110 gen->n_live_compact_blocks = 0;
111 gen->compact_blocks_in_import = NULL;
112 gen->n_compact_blocks_in_import = 0;
113 gen->mark = 0;
114 gen->compact = 0;
115 gen->bitmap = NULL;
116 #if defined(THREADED_RTS)
117 initSpinLock(&gen->sync);
118 #endif
119 gen->threads = END_TSO_QUEUE;
120 gen->old_threads = END_TSO_QUEUE;
121 gen->weak_ptr_list = NULL;
122 gen->old_weak_ptr_list = NULL;
123 }
124
125 void
126 initStorage (void)
127 {
128 uint32_t g, n;
129
130 if (generations != NULL) {
131 // multi-init protection
132 return;
133 }
134
135 initMBlocks();
136
137 /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
138 * doing something reasonable.
139 */
140 /* We use the NOT_NULL variant or gcc warns that the test is always true */
141 ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info));
142 ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
143 ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
144
145 initBlockAllocator();
146
147 #if defined(THREADED_RTS)
148 initMutex(&sm_mutex);
149 #endif
150
151 ACQUIRE_SM_LOCK;
152
153 /* allocate generation info array */
154 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
155 * sizeof(struct generation_),
156 "initStorage: gens");
157
158 /* Initialise all generations */
159 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
160 initGeneration(&generations[g], g);
161 }
162
163 /* A couple of convenience pointers */
164 g0 = &generations[0];
165 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
166
167 /* Set up the destination pointers in each younger gen. step */
168 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
169 generations[g].to = &generations[g+1];
170 }
171 oldest_gen->to = oldest_gen;
172
173 /* The oldest generation has one step. */
174 if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
175 if (RtsFlags.GcFlags.generations == 1) {
176 errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
177 } else {
178 oldest_gen->mark = 1;
179 if (RtsFlags.GcFlags.compact)
180 oldest_gen->compact = 1;
181 }
182 }
183
184 generations[0].max_blocks = 0;
185
186 dyn_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
187 debug_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
188 revertible_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
189
190 if (RtsFlags.GcFlags.largeAllocLim > 0) {
191 large_alloc_lim = RtsFlags.GcFlags.largeAllocLim * BLOCK_SIZE_W;
192 } else {
193 large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W;
194 }
195
196 exec_block = NULL;
197
198 #if defined(THREADED_RTS)
199 initSpinLock(&gc_alloc_block_sync);
200 #endif
201 N = 0;
202
203 for (n = 0; n < n_numa_nodes; n++) {
204 next_nursery[n] = n;
205 }
206 storageAddCapabilities(0, n_capabilities);
207
208 IF_DEBUG(gc, statDescribeGens());
209
210 RELEASE_SM_LOCK;
211
212 traceEventHeapInfo(CAPSET_HEAP_DEFAULT,
213 RtsFlags.GcFlags.generations,
214 RtsFlags.GcFlags.maxHeapSize * BLOCK_SIZE,
215 RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE,
216 MBLOCK_SIZE,
217 BLOCK_SIZE);
218 }
219
220 void storageAddCapabilities (uint32_t from, uint32_t to)
221 {
222 uint32_t n, g, i, new_n_nurseries;
223 nursery *old_nurseries;
224
225 if (RtsFlags.GcFlags.nurseryChunkSize == 0) {
226 new_n_nurseries = to;
227 } else {
228 memcount total_alloc = to * RtsFlags.GcFlags.minAllocAreaSize;
229 new_n_nurseries =
230 stg_max(to, total_alloc / RtsFlags.GcFlags.nurseryChunkSize);
231 }
232
233 old_nurseries = nurseries;
234 if (from > 0) {
235 nurseries = stgReallocBytes(nurseries,
236 new_n_nurseries * sizeof(struct nursery_),
237 "storageAddCapabilities");
238 } else {
239 nurseries = stgMallocBytes(new_n_nurseries * sizeof(struct nursery_),
240 "storageAddCapabilities");
241 }
242
243 // we've moved the nurseries, so we have to update the rNursery
244 // pointers from the Capabilities.
245 for (i = 0; i < from; i++) {
246 uint32_t index = capabilities[i]->r.rNursery - old_nurseries;
247 capabilities[i]->r.rNursery = &nurseries[index];
248 }
249
250 /* The allocation area. Policy: keep the allocation area
251 * small to begin with, even if we have a large suggested heap
252 * size. Reason: we're going to do a major collection first, and we
253 * don't want it to be a big one. This vague idea is borne out by
254 * rigorous experimental evidence.
255 */
256 allocNurseries(n_nurseries, new_n_nurseries);
257 n_nurseries = new_n_nurseries;
258
259 /*
260 * Assign each of the new capabilities a nursery. Remember to start from
261 * next_nursery, because we may have already consumed some of the earlier
262 * nurseries.
263 */
264 assignNurseriesToCapabilities(from,to);
265
266 // allocate a block for each mut list
267 for (n = from; n < to; n++) {
268 for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
269 capabilities[n]->mut_lists[g] =
270 allocBlockOnNode(capNoToNumaNode(n));
271 }
272 }
273
274 #if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR) && (CC_SUPPORTS_TLS == 0)
275 newThreadLocalKey(&gctKey);
276 #endif
277
278 initGcThreads(from, to);
279 }
280
281
282 void
283 exitStorage (void)
284 {
285 updateNurseriesStats();
286 stat_exit();
287 }
288
289 void
290 freeStorage (bool free_heap)
291 {
292 stgFree(generations);
293 if (free_heap) freeAllMBlocks();
294 #if defined(THREADED_RTS)
295 closeMutex(&sm_mutex);
296 #endif
297 stgFree(nurseries);
298 #if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR) && (CC_SUPPORTS_TLS == 0)
299 freeThreadLocalKey(&gctKey);
300 #endif
301 freeGcThreads();
302 }
303
304 /* -----------------------------------------------------------------------------
305 Note [CAF management].
306
307 The entry code for every CAF does the following:
308
309 - calls newCAF, which builds a CAF_BLACKHOLE on the heap and atomically
310 updates the CAF with IND_STATIC pointing to the CAF_BLACKHOLE
311
312 - if newCAF returns zero, it re-enters the CAF (see Note [atomic
313 CAF entry])
314
315 - pushes an update frame pointing to the CAF_BLACKHOLE
316
317 Why do we build a BLACKHOLE in the heap rather than just updating
318 the thunk directly? It's so that we only need one kind of update
319 frame - otherwise we'd need a static version of the update frame
320 too, and various other parts of the RTS that deal with update
321 frames would also need special cases for static update frames.
322
323 newCAF() does the following:
324
325 - atomically locks the CAF (see [atomic CAF entry])
326
327 - it builds a CAF_BLACKHOLE on the heap
328
329 - it updates the CAF with an IND_STATIC pointing to the
330 CAF_BLACKHOLE, atomically.
331
332 - it puts the CAF on the oldest generation's mutable list.
333 This is so that we treat the CAF as a root when collecting
334 younger generations.
335
336 - links the CAF onto the CAF list (see below)
337
338 ------------------
339 Note [atomic CAF entry]
340
341 With THREADED_RTS, newCAF() is required to be atomic (see
342 #5558). This is because if two threads happened to enter the same
343 CAF simultaneously, they would create two distinct CAF_BLACKHOLEs,
344 and so the normal threadPaused() machinery for detecting duplicate
345 evaluation will not detect this. Hence in lockCAF() below, we
346 atomically lock the CAF with WHITEHOLE before updating it with
347 IND_STATIC, and return zero if another thread locked the CAF first.
348 In the event that we lost the race, CAF entry code will re-enter
349 the CAF and block on the other thread's CAF_BLACKHOLE.
350
351 ------------------
352 Note [GHCi CAFs]
353
354 For GHCI, we have additional requirements when dealing with CAFs:
355
356 - we must *retain* all dynamically-loaded CAFs ever entered,
357 just in case we need them again.
358 - we must be able to *revert* CAFs that have been evaluated, to
359 their pre-evaluated form.
360
361 To do this, we use an additional CAF list. When newCAF() is
362 called on a dynamically-loaded CAF, we add it to the CAF list
363 instead of the old-generation mutable list, and save away its
364 old info pointer (in caf->saved_info) for later reversion.
365
366 To revert all the CAFs, we traverse the CAF list and reset the
367 info pointer to caf->saved_info, then throw away the CAF list.
368 (see GC.c:revertCAFs()).
369
370 -- SDM 29/1/01
371
372 -------------------------------------------------------------------------- */
373
374 STATIC_INLINE StgInd *
375 lockCAF (StgRegTable *reg, StgIndStatic *caf)
376 {
377 const StgInfoTable *orig_info;
378 Capability *cap = regTableToCapability(reg);
379 StgInd *bh;
380
381 orig_info = caf->header.info;
382
383 #if defined(THREADED_RTS)
384 const StgInfoTable *cur_info;
385
386 if (orig_info == &stg_IND_STATIC_info ||
387 orig_info == &stg_WHITEHOLE_info) {
388 // already claimed by another thread; re-enter the CAF
389 return NULL;
390 }
391
392 cur_info = (const StgInfoTable *)
393 cas((StgVolatilePtr)&caf->header.info,
394 (StgWord)orig_info,
395 (StgWord)&stg_WHITEHOLE_info);
396
397 if (cur_info != orig_info) {
398 // already claimed by another thread; re-enter the CAF
399 return NULL;
400 }
401
402 // successfully claimed by us; overwrite with IND_STATIC
403 #endif
404
405 // For the benefit of revertCAFs(), save the original info pointer
406 caf->saved_info = orig_info;
407
408 // Allocate the blackhole indirection closure
409 bh = (StgInd *)allocate(cap, sizeofW(*bh));
410 SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs);
411 bh->indirectee = (StgClosure *)cap->r.rCurrentTSO;
412
413 caf->indirectee = (StgClosure *)bh;
414 write_barrier();
415 SET_INFO((StgClosure*)caf,&stg_IND_STATIC_info);
416
417 return bh;
418 }
419
420 StgInd *
421 newCAF(StgRegTable *reg, StgIndStatic *caf)
422 {
423 StgInd *bh;
424
425 bh = lockCAF(reg, caf);
426 if (!bh) return NULL;
427
428 if(keepCAFs)
429 {
430 // Note [dyn_caf_list]
431 // If we are in GHCi _and_ we are using dynamic libraries,
432 // then we can't redirect newCAF calls to newRetainedCAF (see below),
433 // so we make newCAF behave almost like newRetainedCAF.
434 // The dynamic libraries might be used by both the interpreted
435 // program and GHCi itself, so they must not be reverted.
436 // This also means that in GHCi with dynamic libraries, CAFs are not
437 // garbage collected. If this turns out to be a problem, we could
438 // do another hack here and do an address range test on caf to figure
439 // out whether it is from a dynamic library.
440
441 ACQUIRE_SM_LOCK; // dyn_caf_list is global, locked by sm_mutex
442 caf->static_link = (StgClosure*)dyn_caf_list;
443 dyn_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
444 RELEASE_SM_LOCK;
445 }
446 else
447 {
448 // Put this CAF on the mutable list for the old generation.
449 if (oldest_gen->no != 0) {
450 recordMutableCap((StgClosure*)caf,
451 regTableToCapability(reg), oldest_gen->no);
452 }
453
454 #if defined(DEBUG)
455 // In the DEBUG rts, we keep track of live CAFs by chaining them
456 // onto a list debug_caf_list. This is so that we can tell if we
457 // ever enter a GC'd CAF, and emit a suitable barf().
458 //
459 // The saved_info field of the CAF is used as the link field for
460 // debug_caf_list, because this field is only used by newDynCAF
461 // for revertible CAFs, and we don't put those on the
462 // debug_caf_list.
463 ACQUIRE_SM_LOCK; // debug_caf_list is global, locked by sm_mutex
464 ((StgIndStatic *)caf)->saved_info = (const StgInfoTable*)debug_caf_list;
465 debug_caf_list = (StgIndStatic*)caf;
466 RELEASE_SM_LOCK;
467 #endif
468 }
469
470 return bh;
471 }
472
473 // External API for setting the keepCAFs flag. see #3900.
474 void
475 setKeepCAFs (void)
476 {
477 keepCAFs = 1;
478 }
479
480 // An alternate version of newCAF which is used for dynamically loaded
481 // object code in GHCi. In this case we want to retain *all* CAFs in
482 // the object code, because they might be demanded at any time from an
483 // expression evaluated on the command line.
484 // Also, GHCi might want to revert CAFs, so we add these to the
485 // revertible_caf_list.
486 //
487 // The linker hackily arranges that references to newCAF from dynamic
488 // code end up pointing to newRetainedCAF.
489 //
490 StgInd* newRetainedCAF (StgRegTable *reg, StgIndStatic *caf)
491 {
492 StgInd *bh;
493
494 bh = lockCAF(reg, caf);
495 if (!bh) return NULL;
496
497 ACQUIRE_SM_LOCK;
498
499 caf->static_link = (StgClosure*)revertible_caf_list;
500 revertible_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
501
502 RELEASE_SM_LOCK;
503
504 return bh;
505 }
506
507 // If we are using loadObj/unloadObj in the linker, then we want to
508 //
509 // - retain all CAFs in statically linked code (keepCAFs == true),
510 // because we might link a new object that uses any of these CAFs.
511 //
512 // - GC CAFs in dynamically-linked code, so that we can detect when
513 // a dynamically-linked object is unloadable.
514 //
515 // So for this case, we set keepCAFs to true, and link newCAF to newGCdCAF
516 // for dynamically-linked code.
517 //
518 StgInd* newGCdCAF (StgRegTable *reg, StgIndStatic *caf)
519 {
520 StgInd *bh;
521
522 bh = lockCAF(reg, caf);
523 if (!bh) return NULL;
524
525 // Put this CAF on the mutable list for the old generation.
526 if (oldest_gen->no != 0) {
527 recordMutableCap((StgClosure*)caf,
528 regTableToCapability(reg), oldest_gen->no);
529 }
530
531 return bh;
532 }
533
534 /* -----------------------------------------------------------------------------
535 Nursery management.
536 -------------------------------------------------------------------------- */
537
538 static bdescr *
539 allocNursery (uint32_t node, bdescr *tail, W_ blocks)
540 {
541 bdescr *bd = NULL;
542 W_ i, n;
543
544 // We allocate the nursery as a single contiguous block and then
545 // divide it into single blocks manually. This way we guarantee
546 // that the nursery blocks are adjacent, so that the processor's
547 // automatic prefetching works across nursery blocks. This is a
548 // tiny optimisation (~0.5%), but it's free.
549
550 while (blocks > 0) {
551 n = stg_min(BLOCKS_PER_MBLOCK, blocks);
552 // allocLargeChunk will prefer large chunks, but will pick up
553 // small chunks if there are any available. We must allow
554 // single blocks here to avoid fragmentation (#7257)
555 bd = allocLargeChunkOnNode(node, 1, n);
556 n = bd->blocks;
557 blocks -= n;
558
559 for (i = 0; i < n; i++) {
560 initBdescr(&bd[i], g0, g0);
561
562 bd[i].blocks = 1;
563 bd[i].flags = 0;
564
565 if (i > 0) {
566 bd[i].u.back = &bd[i-1];
567 } else {
568 bd[i].u.back = NULL;
569 }
570
571 if (i+1 < n) {
572 bd[i].link = &bd[i+1];
573 } else {
574 bd[i].link = tail;
575 if (tail != NULL) {
576 tail->u.back = &bd[i];
577 }
578 }
579
580 bd[i].free = bd[i].start;
581 }
582
583 tail = &bd[0];
584 }
585
586 return &bd[0];
587 }
588
589 STATIC_INLINE void
590 assignNurseryToCapability (Capability *cap, uint32_t n)
591 {
592 ASSERT(n < n_nurseries);
593 cap->r.rNursery = &nurseries[n];
594 cap->r.rCurrentNursery = nurseries[n].blocks;
595 newNurseryBlock(nurseries[n].blocks);
596 cap->r.rCurrentAlloc = NULL;
597 ASSERT(cap->r.rCurrentNursery->node == cap->node);
598 }
599
600 /*
601 * Give each Capability a nursery from the pool. No need to do atomic increments
602 * here, everything must be stopped to call this function.
603 */
604 static void
605 assignNurseriesToCapabilities (uint32_t from, uint32_t to)
606 {
607 uint32_t i, node;
608
609 for (i = from; i < to; i++) {
610 node = capabilities[i]->node;
611 assignNurseryToCapability(capabilities[i], next_nursery[node]);
612 next_nursery[node] += n_numa_nodes;
613 }
614 }
615
616 static void
617 allocNurseries (uint32_t from, uint32_t to)
618 {
619 uint32_t i;
620 memcount n_blocks;
621
622 if (RtsFlags.GcFlags.nurseryChunkSize) {
623 n_blocks = RtsFlags.GcFlags.nurseryChunkSize;
624 } else {
625 n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
626 }
627
628 for (i = from; i < to; i++) {
629 nurseries[i].blocks = allocNursery(capNoToNumaNode(i), NULL, n_blocks);
630 nurseries[i].n_blocks = n_blocks;
631 }
632 }
633
634 void
635 resetNurseries (void)
636 {
637 uint32_t n;
638
639 for (n = 0; n < n_numa_nodes; n++) {
640 next_nursery[n] = n;
641 }
642 assignNurseriesToCapabilities(0, n_capabilities);
643
644 #if defined(DEBUG)
645 bdescr *bd;
646 for (n = 0; n < n_nurseries; n++) {
647 for (bd = nurseries[n].blocks; bd; bd = bd->link) {
648 ASSERT(bd->gen_no == 0);
649 ASSERT(bd->gen == g0);
650 ASSERT(bd->node == capNoToNumaNode(n));
651 IF_DEBUG(sanity, memset(bd->start, 0xaa, BLOCK_SIZE));
652 }
653 }
654 #endif
655 }
656
657 W_
658 countNurseryBlocks (void)
659 {
660 uint32_t i;
661 W_ blocks = 0;
662
663 for (i = 0; i < n_nurseries; i++) {
664 blocks += nurseries[i].n_blocks;
665 }
666 return blocks;
667 }
668
669 //
670 // Resize each of the nurseries to the specified size.
671 //
672 static void
673 resizeNurseriesEach (W_ blocks)
674 {
675 uint32_t i, node;
676 bdescr *bd;
677 W_ nursery_blocks;
678 nursery *nursery;
679
680 for (i = 0; i < n_nurseries; i++) {
681 nursery = &nurseries[i];
682 nursery_blocks = nursery->n_blocks;
683 if (nursery_blocks == blocks) continue;
684
685 node = capNoToNumaNode(i);
686 if (nursery_blocks < blocks) {
687 debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
688 blocks);
689 nursery->blocks = allocNursery(node, nursery->blocks,
690 blocks-nursery_blocks);
691 }
692 else
693 {
694 bdescr *next_bd;
695
696 debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
697 blocks);
698
699 bd = nursery->blocks;
700 while (nursery_blocks > blocks) {
701 next_bd = bd->link;
702 next_bd->u.back = NULL;
703 nursery_blocks -= bd->blocks; // might be a large block
704 freeGroup(bd);
705 bd = next_bd;
706 }
707 nursery->blocks = bd;
708 // might have gone just under, by freeing a large block, so make
709 // up the difference.
710 if (nursery_blocks < blocks) {
711 nursery->blocks = allocNursery(node, nursery->blocks,
712 blocks-nursery_blocks);
713 }
714 }
715 nursery->n_blocks = blocks;
716 ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
717 }
718 }
719
720 void
721 resizeNurseriesFixed (void)
722 {
723 uint32_t blocks;
724
725 if (RtsFlags.GcFlags.nurseryChunkSize) {
726 blocks = RtsFlags.GcFlags.nurseryChunkSize;
727 } else {
728 blocks = RtsFlags.GcFlags.minAllocAreaSize;
729 }
730
731 resizeNurseriesEach(blocks);
732 }
733
734 //
735 // Resize the nurseries to the total specified size.
736 //
737 void
738 resizeNurseries (W_ blocks)
739 {
740 // If there are multiple nurseries, then we just divide the number
741 // of available blocks between them.
742 resizeNurseriesEach(blocks / n_nurseries);
743 }
744
745 bool
746 getNewNursery (Capability *cap)
747 {
748 StgWord i;
749 uint32_t node = cap->node;
750 uint32_t n;
751
752 for(;;) {
753 i = next_nursery[node];
754 if (i < n_nurseries) {
755 if (cas(&next_nursery[node], i, i+n_numa_nodes) == i) {
756 assignNurseryToCapability(cap, i);
757 return true;
758 }
759 } else if (n_numa_nodes > 1) {
760 // Try to find an unused nursery chunk on other nodes. We'll get
761 // remote memory, but the rationale is that avoiding GC is better
762 // than avoiding remote memory access.
763 bool lost = false;
764 for (n = 0; n < n_numa_nodes; n++) {
765 if (n == node) continue;
766 i = next_nursery[n];
767 if (i < n_nurseries) {
768 if (cas(&next_nursery[n], i, i+n_numa_nodes) == i) {
769 assignNurseryToCapability(cap, i);
770 return true;
771 } else {
772 lost = true; /* lost a race */
773 }
774 }
775 }
776 if (!lost) return false;
777 } else {
778 return false;
779 }
780 }
781 }
782
783 /* -----------------------------------------------------------------------------
784 move_STACK is called to update the TSO structure after it has been
785 moved from one place to another.
786 -------------------------------------------------------------------------- */
787
788 void
789 move_STACK (StgStack *src, StgStack *dest)
790 {
791 ptrdiff_t diff;
792
793 // relocate the stack pointer...
794 diff = (StgPtr)dest - (StgPtr)src; // In *words*
795 dest->sp = (StgPtr)dest->sp + diff;
796 }
797
798 STATIC_INLINE void
799 accountAllocation(Capability *cap, W_ n)
800 {
801 TICK_ALLOC_HEAP_NOCTR(WDS(n));
802 CCS_ALLOC(cap->r.rCCCS,n);
803 if (cap->r.rCurrentTSO != NULL) {
804 // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_)
805 ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
806 (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
807 - n*sizeof(W_)));
808 }
809
810 }
811
812 /* -----------------------------------------------------------------------------
813 StgPtr allocate (Capability *cap, W_ n)
814
815 Allocates an area of memory n *words* large, from the nursery of
816 the supplied Capability, or from the global block pool if the area
817 requested is larger than LARGE_OBJECT_THRESHOLD. Memory is not
818 allocated from the current nursery block, so as not to interfere
819 with Hp/HpLim.
820
821 The address of the allocated memory is returned. allocate() never
822 fails; if it returns, the returned value is a valid address. If
823 the nursery is already full, then another block is allocated from
824 the global block pool. If we need to get memory from the OS and
825 that operation fails, then the whole process will be killed.
826 -------------------------------------------------------------------------- */
827
828 /*
829 * Allocate some n words of heap memory; terminating
830 * on heap overflow
831 */
832 StgPtr
833 allocate (Capability *cap, W_ n)
834 {
835 StgPtr p = allocateMightFail(cap, n);
836 if (p == NULL) {
837 reportHeapOverflow();
838 // heapOverflow() doesn't exit (see #2592), but we aren't
839 // in a position to do a clean shutdown here: we
840 // either have to allocate the memory or exit now.
841 // Allocating the memory would be bad, because the user
842 // has requested that we not exceed maxHeapSize, so we
843 // just exit.
844 stg_exit(EXIT_HEAPOVERFLOW);
845 }
846 return p;
847 }
848
849 /*
850 * Allocate some n words of heap memory; returning NULL
851 * on heap overflow
852 */
853 StgPtr
854 allocateMightFail (Capability *cap, W_ n)
855 {
856 bdescr *bd;
857 StgPtr p;
858
859 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
860 // The largest number of words such that
861 // the computation of req_blocks will not overflow.
862 W_ max_words = (HS_WORD_MAX & ~(BLOCK_SIZE-1)) / sizeof(W_);
863 W_ req_blocks;
864
865 if (n > max_words)
866 req_blocks = HS_WORD_MAX; // signal overflow below
867 else
868 req_blocks = (W_)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
869
870 // Attempting to allocate an object larger than maxHeapSize
871 // should definitely be disallowed. (bug #1791)
872 if ((RtsFlags.GcFlags.maxHeapSize > 0 &&
873 req_blocks >= RtsFlags.GcFlags.maxHeapSize) ||
874 req_blocks >= HS_INT32_MAX) // avoid overflow when
875 // calling allocGroup() below
876 {
877 return NULL;
878 }
879
880 // Only credit allocation after we've passed the size check above
881 accountAllocation(cap, n);
882
883 ACQUIRE_SM_LOCK
884 bd = allocGroupOnNode(cap->node,req_blocks);
885 dbl_link_onto(bd, &g0->large_objects);
886 g0->n_large_blocks += bd->blocks; // might be larger than req_blocks
887 g0->n_new_large_words += n;
888 RELEASE_SM_LOCK;
889 initBdescr(bd, g0, g0);
890 bd->flags = BF_LARGE;
891 bd->free = bd->start + n;
892 cap->total_allocated += n;
893 return bd->start;
894 }
895
896 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
897
898 accountAllocation(cap, n);
899 bd = cap->r.rCurrentAlloc;
900 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
901
902 if (bd) finishedNurseryBlock(cap,bd);
903
904 // The CurrentAlloc block is full, we need to find another
905 // one. First, we try taking the next block from the
906 // nursery:
907 bd = cap->r.rCurrentNursery->link;
908
909 if (bd == NULL) {
910 // The nursery is empty: allocate a fresh block (we can't
911 // fail here).
912 ACQUIRE_SM_LOCK;
913 bd = allocBlockOnNode(cap->node);
914 cap->r.rNursery->n_blocks++;
915 RELEASE_SM_LOCK;
916 initBdescr(bd, g0, g0);
917 bd->flags = 0;
918 // If we had to allocate a new block, then we'll GC
919 // pretty quickly now, because MAYBE_GC() will
920 // notice that CurrentNursery->link is NULL.
921 } else {
922 newNurseryBlock(bd);
923 // we have a block in the nursery: take it and put
924 // it at the *front* of the nursery list, and use it
925 // to allocate() from.
926 //
927 // Previously the nursery looked like this:
928 //
929 // CurrentNursery
930 // /
931 // +-+ +-+
932 // nursery -> ... |A| -> |B| -> ...
933 // +-+ +-+
934 //
935 // After doing this, it looks like this:
936 //
937 // CurrentNursery
938 // /
939 // +-+ +-+
940 // nursery -> |B| -> ... -> |A| -> ...
941 // +-+ +-+
942 // |
943 // CurrentAlloc
944 //
945 // The point is to get the block out of the way of the
946 // advancing CurrentNursery pointer, while keeping it
947 // on the nursery list so we don't lose track of it.
948 cap->r.rCurrentNursery->link = bd->link;
949 if (bd->link != NULL) {
950 bd->link->u.back = cap->r.rCurrentNursery;
951 }
952 }
953 dbl_link_onto(bd, &cap->r.rNursery->blocks);
954 cap->r.rCurrentAlloc = bd;
955 IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
956 }
957 p = bd->free;
958 bd->free += n;
959
960 IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa));
961 return p;
962 }
963
964 /* ---------------------------------------------------------------------------
965 Allocate a fixed/pinned object.
966
967 We allocate small pinned objects into a single block, allocating a
968 new block when the current one overflows. The block is chained
969 onto the large_object_list of generation 0.
970
971 NOTE: The GC can't in general handle pinned objects. This
972 interface is only safe to use for ByteArrays, which have no
973 pointers and don't require scavenging. It works because the
974 block's descriptor has the BF_LARGE flag set, so the block is
975 treated as a large object and chained onto various lists, rather
976 than the individual objects being copied. However, when it comes
977 to scavenge the block, the GC will only scavenge the first object.
978 The reason is that the GC can't linearly scan a block of pinned
979 objects at the moment (doing so would require using the
980 mostly-copying techniques). But since we're restricting ourselves
981 to pinned ByteArrays, not scavenging is ok.
982
983 This function is called by newPinnedByteArray# which immediately
984 fills the allocated memory with a MutableByteArray#. Note that
985 this returns NULL on heap overflow.
986 ------------------------------------------------------------------------- */
987
988 StgPtr
989 allocatePinned (Capability *cap, W_ n)
990 {
991 StgPtr p;
992 bdescr *bd;
993
994 // If the request is for a large object, then allocate()
995 // will give us a pinned object anyway.
996 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
997 p = allocateMightFail(cap, n);
998 if (p == NULL) {
999 return NULL;
1000 } else {
1001 Bdescr(p)->flags |= BF_PINNED;
1002 return p;
1003 }
1004 }
1005
1006 accountAllocation(cap, n);
1007 bd = cap->pinned_object_block;
1008
1009 // If we don't have a block of pinned objects yet, or the current
1010 // one isn't large enough to hold the new object, get a new one.
1011 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
1012
1013 // stash the old block on cap->pinned_object_blocks. On the
1014 // next GC cycle these objects will be moved to
1015 // g0->large_objects.
1016 if (bd != NULL) {
1017 // add it to the allocation stats when the block is full
1018 finishedNurseryBlock(cap, bd);
1019 dbl_link_onto(bd, &cap->pinned_object_blocks);
1020 }
1021
1022 // We need to find another block. We could just allocate one,
1023 // but that means taking a global lock and we really want to
1024 // avoid that (benchmarks that allocate a lot of pinned
1025 // objects scale really badly if we do this).
1026 //
1027 // So first, we try taking the next block from the nursery, in
1028 // the same way as allocate().
1029 bd = cap->r.rCurrentNursery->link;
1030 if (bd == NULL) {
1031 // The nursery is empty: allocate a fresh block (we can't fail
1032 // here).
1033 ACQUIRE_SM_LOCK;
1034 bd = allocBlockOnNode(cap->node);
1035 RELEASE_SM_LOCK;
1036 initBdescr(bd, g0, g0);
1037 } else {
1038 newNurseryBlock(bd);
1039 // we have a block in the nursery: steal it
1040 cap->r.rCurrentNursery->link = bd->link;
1041 if (bd->link != NULL) {
1042 bd->link->u.back = cap->r.rCurrentNursery;
1043 }
1044 cap->r.rNursery->n_blocks -= bd->blocks;
1045 }
1046
1047 cap->pinned_object_block = bd;
1048 bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED;
1049
1050 // The pinned_object_block remains attached to the capability
1051 // until it is full, even if a GC occurs. We want this
1052 // behaviour because otherwise the unallocated portion of the
1053 // block would be forever slop, and under certain workloads
1054 // (allocating a few ByteStrings per GC) we accumulate a lot
1055 // of slop.
1056 //
1057 // So, the pinned_object_block is initially marked
1058 // BF_EVACUATED so the GC won't touch it. When it is full,
1059 // we place it on the large_objects list, and at the start of
1060 // the next GC the BF_EVACUATED flag will be cleared, and the
1061 // block will be promoted as usual (if anything in it is
1062 // live).
1063 }
1064
1065 p = bd->free;
1066 bd->free += n;
1067 return p;
1068 }
1069
1070 /* -----------------------------------------------------------------------------
1071 Write Barriers
1072 -------------------------------------------------------------------------- */
1073
1074 /*
1075 This is the write barrier for MUT_VARs, a.k.a. IORefs. A
1076 MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
1077 is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
1078 and is put on the mutable list.
1079 */
1080 void
1081 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
1082 {
1083 Capability *cap = regTableToCapability(reg);
1084 if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
1085 p->header.info = &stg_MUT_VAR_DIRTY_info;
1086 recordClosureMutated(cap,p);
1087 }
1088 }
1089
1090 void
1091 dirty_TVAR(Capability *cap, StgTVar *p)
1092 {
1093 if (p->header.info == &stg_TVAR_CLEAN_info) {
1094 p->header.info = &stg_TVAR_DIRTY_info;
1095 recordClosureMutated(cap,(StgClosure*)p);
1096 }
1097 }
1098
1099 // Setting a TSO's link field with a write barrier.
1100 // It is *not* necessary to call this function when
1101 // * setting the link field to END_TSO_QUEUE
1102 // * setting the link field of the currently running TSO, as it
1103 // will already be dirty.
1104 void
1105 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
1106 {
1107 if (tso->dirty == 0) {
1108 tso->dirty = 1;
1109 recordClosureMutated(cap,(StgClosure*)tso);
1110 }
1111 tso->_link = target;
1112 }
1113
1114 void
1115 setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)
1116 {
1117 if (tso->dirty == 0) {
1118 tso->dirty = 1;
1119 recordClosureMutated(cap,(StgClosure*)tso);
1120 }
1121 tso->block_info.prev = target;
1122 }
1123
1124 void
1125 dirty_TSO (Capability *cap, StgTSO *tso)
1126 {
1127 if (tso->dirty == 0) {
1128 tso->dirty = 1;
1129 recordClosureMutated(cap,(StgClosure*)tso);
1130 }
1131 }
1132
1133 void
1134 dirty_STACK (Capability *cap, StgStack *stack)
1135 {
1136 if (stack->dirty == 0) {
1137 stack->dirty = 1;
1138 recordClosureMutated(cap,(StgClosure*)stack);
1139 }
1140 }
1141
1142 /*
1143 This is the write barrier for MVARs. An MVAR_CLEAN objects is not
1144 on the mutable list; a MVAR_DIRTY is. When written to, a
1145 MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
1146 The check for MVAR_CLEAN is inlined at the call site for speed,
1147 this really does make a difference on concurrency-heavy benchmarks
1148 such as Chaneneos and cheap-concurrency.
1149 */
1150 void
1151 dirty_MVAR(StgRegTable *reg, StgClosure *p)
1152 {
1153 recordClosureMutated(regTableToCapability(reg),p);
1154 }
1155
1156 /* -----------------------------------------------------------------------------
1157 * Stats and stuff
1158 * -------------------------------------------------------------------------- */
1159
1160 /* -----------------------------------------------------------------------------
1161 * Note [allocation accounting]
1162 *
1163 * - When cap->r.rCurrentNusery moves to a new block in the nursery,
1164 * we add the size of the used portion of the previous block to
1165 * cap->total_allocated. (see finishedNurseryBlock())
1166 *
1167 * - When we start a GC, the allocated portion of CurrentNursery and
1168 * CurrentAlloc are added to cap->total_allocated. (see
1169 * updateNurseriesStats())
1170 *
1171 * -------------------------------------------------------------------------- */
1172
1173 //
1174 // Calculate the total allocated memory since the start of the
1175 // program. Also emits events reporting the per-cap allocation
1176 // totals.
1177 //
1178 StgWord
1179 calcTotalAllocated (void)
1180 {
1181 W_ tot_alloc = 0;
1182 W_ n;
1183
1184 for (n = 0; n < n_capabilities; n++) {
1185 tot_alloc += capabilities[n]->total_allocated;
1186
1187 traceEventHeapAllocated(capabilities[n],
1188 CAPSET_HEAP_DEFAULT,
1189 capabilities[n]->total_allocated * sizeof(W_));
1190 }
1191
1192 return tot_alloc;
1193 }
1194
1195 //
1196 // Update the per-cap total_allocated numbers with an approximation of
1197 // the amount of memory used in each cap's nursery.
1198 //
1199 void
1200 updateNurseriesStats (void)
1201 {
1202 uint32_t i;
1203 bdescr *bd;
1204
1205 for (i = 0; i < n_capabilities; i++) {
1206 // The current nursery block and the current allocate block have not
1207 // yet been accounted for in cap->total_allocated, so we add them here.
1208 bd = capabilities[i]->r.rCurrentNursery;
1209 if (bd) finishedNurseryBlock(capabilities[i], bd);
1210 bd = capabilities[i]->r.rCurrentAlloc;
1211 if (bd) finishedNurseryBlock(capabilities[i], bd);
1212 }
1213 }
1214
1215 W_ countOccupied (bdescr *bd)
1216 {
1217 W_ words;
1218
1219 words = 0;
1220 for (; bd != NULL; bd = bd->link) {
1221 ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
1222 words += bd->free - bd->start;
1223 }
1224 return words;
1225 }
1226
1227 W_ genLiveWords (generation *gen)
1228 {
1229 return gen->n_words + gen->n_large_words +
1230 gen->n_compact_blocks * BLOCK_SIZE_W;
1231 }
1232
1233 W_ genLiveBlocks (generation *gen)
1234 {
1235 return gen->n_blocks + gen->n_large_blocks + gen->n_compact_blocks;
1236 }
1237
1238 W_ gcThreadLiveWords (uint32_t i, uint32_t g)
1239 {
1240 W_ a, b, c;
1241
1242 a = countOccupied(gc_threads[i]->gens[g].todo_bd);
1243 b = gc_threads[i]->gens[g].n_part_words;
1244 c = gc_threads[i]->gens[g].n_scavd_words;
1245
1246 // debugBelch("cap %d, g%d, %ld %ld %ld\n", i, g, a, b, c);
1247 return a + b + c;
1248 }
1249
1250 W_ gcThreadLiveBlocks (uint32_t i, uint32_t g)
1251 {
1252 W_ blocks;
1253
1254 blocks = countBlocks(gc_threads[i]->gens[g].todo_bd);
1255 blocks += gc_threads[i]->gens[g].n_part_blocks;
1256 blocks += gc_threads[i]->gens[g].n_scavd_blocks;
1257
1258 return blocks;
1259 }
1260
1261 /* Determine which generation will be collected next, and approximate
1262 * the maximum amount of memory that will be required to do the GC,
1263 * taking into account data that will be copied, and the space needed
1264 * to store bitmaps and the mark stack. Note: blocks_needed does not
1265 * include the blocks in the nursery.
1266 *
1267 * Assume: all data currently live will remain live. Generations
1268 * that will be collected next time will therefore need twice as many
1269 * blocks since all the data will be copied.
1270 */
1271 extern W_
1272 calcNeeded (bool force_major, memcount *blocks_needed)
1273 {
1274 W_ needed = 0;
1275 uint32_t N;
1276
1277 if (force_major) {
1278 N = RtsFlags.GcFlags.generations - 1;
1279 } else {
1280 N = 0;
1281 }
1282
1283 for (uint32_t g = 0; g < RtsFlags.GcFlags.generations; g++) {
1284 generation *gen = &generations[g];
1285
1286 W_ blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?)
1287 + gen->n_large_blocks
1288 + gen->n_compact_blocks;
1289
1290 // we need at least this much space
1291 needed += blocks;
1292
1293 // are we collecting this gen?
1294 if (g == 0 || // always collect gen 0
1295 blocks > gen->max_blocks)
1296 {
1297 N = stg_max(N,g);
1298
1299 // we will collect this gen next time
1300 if (gen->mark) {
1301 // bitmap:
1302 needed += gen->n_blocks / BITS_IN(W_);
1303 // mark stack:
1304 needed += gen->n_blocks / 100;
1305 }
1306 if (gen->compact) {
1307 continue; // no additional space needed for compaction
1308 } else {
1309 needed += gen->n_blocks;
1310 }
1311 }
1312 }
1313
1314 if (blocks_needed != NULL) {
1315 *blocks_needed = needed;
1316 }
1317 return N;
1318 }
1319
1320 StgWord calcTotalLargeObjectsW (void)
1321 {
1322 uint32_t g;
1323 StgWord totalW = 0;
1324
1325 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1326 totalW += generations[g].n_large_words;
1327 }
1328 return totalW;
1329 }
1330
1331 StgWord calcTotalCompactW (void)
1332 {
1333 uint32_t g;
1334 StgWord totalW = 0;
1335
1336 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1337 totalW += generations[g].n_compact_blocks * BLOCK_SIZE_W;
1338 }
1339 return totalW;
1340 }
1341
1342 /* ----------------------------------------------------------------------------
1343 Executable memory
1344
1345 Executable memory must be managed separately from non-executable
1346 memory. Most OSs these days require you to jump through hoops to
1347 dynamically allocate executable memory, due to various security
1348 measures.
1349
1350 Here we provide a small memory allocator for executable memory.
1351 Memory is managed with a page granularity; we allocate linearly
1352 in the page, and when the page is emptied (all objects on the page
1353 are free) we free the page again, not forgetting to make it
1354 non-executable.
1355
1356 TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1357 the linker cannot use allocateExec for loading object code files
1358 on Windows. Once allocateExec can handle larger objects, the linker
1359 should be modified to use allocateExec instead of VirtualAlloc.
1360 ------------------------------------------------------------------------- */
1361
1362 #if (defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) && defined(ios_HOST_OS)
1363 #include <libkern/OSCacheControl.h>
1364 #endif
1365
1366 #if defined(__clang__)
1367 /* clang defines __clear_cache as a builtin on some platforms.
1368 * For example on armv7-linux-androideabi. The type slightly
1369 * differs from gcc.
1370 */
1371 extern void __clear_cache(void * begin, void * end);
1372 #elif defined(__GNUC__)
1373 /* __clear_cache is a libgcc function.
1374 * It existed before __builtin___clear_cache was introduced.
1375 * See #8562.
1376 */
1377 extern void __clear_cache(char * begin, char * end);
1378 #endif /* __GNUC__ */
1379
1380 /* On ARM and other platforms, we need to flush the cache after
1381 writing code into memory, so the processor reliably sees it. */
1382 void flushExec (W_ len, AdjustorExecutable exec_addr)
1383 {
1384 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
1385 /* x86 doesn't need to do anything, so just suppress some warnings. */
1386 (void)len;
1387 (void)exec_addr;
1388 #elif (defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) && defined(ios_HOST_OS)
1389 /* On iOS we need to use the special 'sys_icache_invalidate' call. */
1390 sys_icache_invalidate(exec_addr, len);
1391 #elif defined(__clang__)
1392 unsigned char* begin = (unsigned char*)exec_addr;
1393 unsigned char* end = begin + len;
1394 # if __has_builtin(__builtin___clear_cache)
1395 __builtin___clear_cache((void*)begin, (void*)end);
1396 # else
1397 __clear_cache((void*)begin, (void*)end);
1398 # endif
1399 #elif defined(__GNUC__)
1400 /* For all other platforms, fall back to a libgcc builtin. */
1401 unsigned char* begin = (unsigned char*)exec_addr;
1402 unsigned char* end = begin + len;
1403 /* __builtin___clear_cache is supported since GNU C 4.3.6.
1404 * We pick 4.4 to simplify condition a bit.
1405 */
1406 # if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
1407 __builtin___clear_cache((void*)begin, (void*)end);
1408 # else
1409 __clear_cache((void*)begin, (void*)end);
1410 # endif
1411 #else
1412 #error Missing support to flush the instruction cache
1413 #endif
1414 }
1415
1416 #if defined(linux_HOST_OS)
1417
1418 // On Linux we need to use libffi for allocating executable memory,
1419 // because it knows how to work around the restrictions put in place
1420 // by SELinux.
1421
1422 AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
1423 {
1424 void **ret, **exec;
1425 ACQUIRE_SM_LOCK;
1426 ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
1427 RELEASE_SM_LOCK;
1428 if (ret == NULL) return ret;
1429 *ret = ret; // save the address of the writable mapping, for freeExec().
1430 *exec_ret = exec + 1;
1431 return (ret + 1);
1432 }
1433
1434 // freeExec gets passed the executable address, not the writable address.
1435 void freeExec (AdjustorExecutable addr)
1436 {
1437 AdjustorWritable writable;
1438 writable = *((void**)addr - 1);
1439 ACQUIRE_SM_LOCK;
1440 ffi_closure_free (writable);
1441 RELEASE_SM_LOCK
1442 }
1443
1444 #elif defined(ios_HOST_OS)
1445
1446 static HashTable* allocatedExecs;
1447
1448 AdjustorWritable allocateExec(W_ bytes, AdjustorExecutable *exec_ret)
1449 {
1450 AdjustorWritable writ;
1451 ffi_closure* cl;
1452 if (bytes != sizeof(ffi_closure)) {
1453 barf("allocateExec: for ffi_closure only");
1454 }
1455 ACQUIRE_SM_LOCK;
1456 cl = writ = ffi_closure_alloc((size_t)bytes, exec_ret);
1457 if (cl != NULL) {
1458 if (allocatedExecs == NULL) {
1459 allocatedExecs = allocHashTable();
1460 }
1461 insertHashTable(allocatedExecs, (StgWord)*exec_ret, writ);
1462 }
1463 RELEASE_SM_LOCK;
1464 return writ;
1465 }
1466
1467 AdjustorWritable execToWritable(AdjustorExecutable exec)
1468 {
1469 AdjustorWritable writ;
1470 ACQUIRE_SM_LOCK;
1471 if (allocatedExecs == NULL ||
1472 (writ = lookupHashTable(allocatedExecs, (StgWord)exec)) == NULL) {
1473 RELEASE_SM_LOCK;
1474 barf("execToWritable: not found");
1475 }
1476 RELEASE_SM_LOCK;
1477 return writ;
1478 }
1479
1480 void freeExec(AdjustorExecutable exec)
1481 {
1482 AdjustorWritable writ;
1483 ffi_closure* cl;
1484 cl = writ = execToWritable(exec);
1485 ACQUIRE_SM_LOCK;
1486 removeHashTable(allocatedExecs, (StgWord)exec, writ);
1487 ffi_closure_free(cl);
1488 RELEASE_SM_LOCK
1489 }
1490
1491 #else
1492
1493 AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
1494 {
1495 void *ret;
1496 W_ n;
1497
1498 ACQUIRE_SM_LOCK;
1499
1500 // round up to words.
1501 n = (bytes + sizeof(W_) + 1) / sizeof(W_);
1502
1503 if (n+1 > BLOCK_SIZE_W) {
1504 barf("allocateExec: can't handle large objects");
1505 }
1506
1507 if (exec_block == NULL ||
1508 exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1509 bdescr *bd;
1510 W_ pagesize = getPageSize();
1511 bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1512 debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1513 bd->gen_no = 0;
1514 bd->flags = BF_EXEC;
1515 bd->link = exec_block;
1516 if (exec_block != NULL) {
1517 exec_block->u.back = bd;
1518 }
1519 bd->u.back = NULL;
1520 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, true);
1521 exec_block = bd;
1522 }
1523 *(exec_block->free) = n; // store the size of this chunk
1524 exec_block->gen_no += n; // gen_no stores the number of words allocated
1525 ret = exec_block->free + 1;
1526 exec_block->free += n + 1;
1527
1528 RELEASE_SM_LOCK
1529 *exec_ret = ret;
1530 return ret;
1531 }
1532
1533 void freeExec (void *addr)
1534 {
1535 StgPtr p = (StgPtr)addr - 1;
1536 bdescr *bd = Bdescr((StgPtr)p);
1537
1538 if ((bd->flags & BF_EXEC) == 0) {
1539 barf("freeExec: not executable");
1540 }
1541
1542 if (*(StgPtr)p == 0) {
1543 barf("freeExec: already free?");
1544 }
1545
1546 ACQUIRE_SM_LOCK;
1547
1548 bd->gen_no -= *(StgPtr)p;
1549 *(StgPtr)p = 0;
1550
1551 if (bd->gen_no == 0) {
1552 // Free the block if it is empty, but not if it is the block at
1553 // the head of the queue.
1554 if (bd != exec_block) {
1555 debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1556 dbl_link_remove(bd, &exec_block);
1557 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, false);
1558 freeGroup(bd);
1559 } else {
1560 bd->free = bd->start;
1561 }
1562 }
1563
1564 RELEASE_SM_LOCK
1565 }
1566
1567 #endif /* switch(HOST_OS) */
1568
1569 #if defined(DEBUG)
1570
1571 // handy function for use in gdb, because Bdescr() is inlined.
1572 extern bdescr *_bdescr (StgPtr p);
1573
1574 bdescr *
1575 _bdescr (StgPtr p)
1576 {
1577 return Bdescr(p);
1578 }
1579
1580 #endif