Eliminate zero_static_objects_list()
[ghc.git] / rts / sm / Storage.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2012
4 *
5 * Storage manager front end
6 *
7 * Documentation on the architecture of the Storage Manager can be
8 * found in the online commentary:
9 *
10 * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #include "PosixSource.h"
15 #include "Rts.h"
16
17 #include "Storage.h"
18 #include "GCThread.h"
19 #include "RtsUtils.h"
20 #include "Stats.h"
21 #include "BlockAlloc.h"
22 #include "Weak.h"
23 #include "Sanity.h"
24 #include "Arena.h"
25 #include "Capability.h"
26 #include "Schedule.h"
27 #include "RetainerProfile.h" // for counting memory blocks (memInventory)
28 #include "OSMem.h"
29 #include "Trace.h"
30 #include "GC.h"
31 #include "Evac.h"
32 #if defined(ios_HOST_OS)
33 #include "Hash.h"
34 #endif
35
36 #include <string.h>
37
38 #include "ffi.h"
39
40 /*
41 * All these globals require sm_mutex to access in THREADED_RTS mode.
42 */
43 StgIndStatic *dyn_caf_list = NULL;
44 StgIndStatic *debug_caf_list = NULL;
45 StgIndStatic *revertible_caf_list = NULL;
46 rtsBool keepCAFs;
47
48 W_ large_alloc_lim; /* GC if n_large_blocks in any nursery
49 * reaches this. */
50
51 bdescr *exec_block;
52
53 generation *generations = NULL; /* all the generations */
54 generation *g0 = NULL; /* generation 0, for convenience */
55 generation *oldest_gen = NULL; /* oldest generation, for convenience */
56
57 nursery *nurseries = NULL; /* array of nurseries, size == n_capabilities */
58 nat n_nurseries;
59 volatile StgWord next_nursery = 0;
60
61 #ifdef THREADED_RTS
62 /*
63 * Storage manager mutex: protects all the above state from
64 * simultaneous access by two STG threads.
65 */
66 Mutex sm_mutex;
67 #endif
68
69 static void allocNurseries (nat from, nat to);
70 static void assignNurseriesToCapabilities (nat from, nat to);
71
72 static void
73 initGeneration (generation *gen, int g)
74 {
75 gen->no = g;
76 gen->collections = 0;
77 gen->par_collections = 0;
78 gen->failed_promotions = 0;
79 gen->max_blocks = 0;
80 gen->blocks = NULL;
81 gen->n_blocks = 0;
82 gen->n_words = 0;
83 gen->live_estimate = 0;
84 gen->old_blocks = NULL;
85 gen->n_old_blocks = 0;
86 gen->large_objects = NULL;
87 gen->n_large_blocks = 0;
88 gen->n_large_words = 0;
89 gen->n_new_large_words = 0;
90 gen->scavenged_large_objects = NULL;
91 gen->n_scavenged_large_blocks = 0;
92 gen->mark = 0;
93 gen->compact = 0;
94 gen->bitmap = NULL;
95 #ifdef THREADED_RTS
96 initSpinLock(&gen->sync);
97 #endif
98 gen->threads = END_TSO_QUEUE;
99 gen->old_threads = END_TSO_QUEUE;
100 gen->weak_ptr_list = NULL;
101 gen->old_weak_ptr_list = NULL;
102 }
103
104 void
105 initStorage (void)
106 {
107 nat g;
108
109 if (generations != NULL) {
110 // multi-init protection
111 return;
112 }
113
114 initMBlocks();
115
116 /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
117 * doing something reasonable.
118 */
119 /* We use the NOT_NULL variant or gcc warns that the test is always true */
120 ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info));
121 ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
122 ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
123
124 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
125 RtsFlags.GcFlags.heapSizeSuggestion >
126 RtsFlags.GcFlags.maxHeapSize) {
127 RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
128 }
129
130 if (RtsFlags.GcFlags.maxHeapSize != 0 &&
131 RtsFlags.GcFlags.minAllocAreaSize >
132 RtsFlags.GcFlags.maxHeapSize) {
133 errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
134 RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
135 }
136
137 initBlockAllocator();
138
139 #if defined(THREADED_RTS)
140 initMutex(&sm_mutex);
141 #endif
142
143 ACQUIRE_SM_LOCK;
144
145 /* allocate generation info array */
146 generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
147 * sizeof(struct generation_),
148 "initStorage: gens");
149
150 /* Initialise all generations */
151 for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
152 initGeneration(&generations[g], g);
153 }
154
155 /* A couple of convenience pointers */
156 g0 = &generations[0];
157 oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
158
159 /* Set up the destination pointers in each younger gen. step */
160 for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
161 generations[g].to = &generations[g+1];
162 }
163 oldest_gen->to = oldest_gen;
164
165 /* The oldest generation has one step. */
166 if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
167 if (RtsFlags.GcFlags.generations == 1) {
168 errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
169 } else {
170 oldest_gen->mark = 1;
171 if (RtsFlags.GcFlags.compact)
172 oldest_gen->compact = 1;
173 }
174 }
175
176 generations[0].max_blocks = 0;
177
178 dyn_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
179 debug_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
180 revertible_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
181
182 /* initialise the allocate() interface */
183 large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W;
184
185 exec_block = NULL;
186
187 #ifdef THREADED_RTS
188 initSpinLock(&gc_alloc_block_sync);
189 #ifdef PROF_SPIN
190 whitehole_spin = 0;
191 #endif
192 #endif
193
194 N = 0;
195
196 next_nursery = 0;
197 storageAddCapabilities(0, n_capabilities);
198
199 IF_DEBUG(gc, statDescribeGens());
200
201 RELEASE_SM_LOCK;
202
203 traceEventHeapInfo(CAPSET_HEAP_DEFAULT,
204 RtsFlags.GcFlags.generations,
205 RtsFlags.GcFlags.maxHeapSize * BLOCK_SIZE_W * sizeof(W_),
206 RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W * sizeof(W_),
207 MBLOCK_SIZE_W * sizeof(W_),
208 BLOCK_SIZE_W * sizeof(W_));
209 }
210
211 void storageAddCapabilities (nat from, nat to)
212 {
213 nat n, g, i, new_n_nurseries;
214
215 if (RtsFlags.GcFlags.nurseryChunkSize == 0) {
216 new_n_nurseries = to;
217 } else {
218 memcount total_alloc = to * RtsFlags.GcFlags.minAllocAreaSize;
219 new_n_nurseries =
220 stg_max(to, total_alloc / RtsFlags.GcFlags.nurseryChunkSize);
221 }
222
223 if (from > 0) {
224 nurseries = stgReallocBytes(nurseries,
225 new_n_nurseries * sizeof(struct nursery_),
226 "storageAddCapabilities");
227 } else {
228 nurseries = stgMallocBytes(new_n_nurseries * sizeof(struct nursery_),
229 "storageAddCapabilities");
230 }
231
232 // we've moved the nurseries, so we have to update the rNursery
233 // pointers from the Capabilities.
234 for (i = 0; i < to; i++) {
235 capabilities[i]->r.rNursery = &nurseries[i];
236 }
237
238 /* The allocation area. Policy: keep the allocation area
239 * small to begin with, even if we have a large suggested heap
240 * size. Reason: we're going to do a major collection first, and we
241 * don't want it to be a big one. This vague idea is borne out by
242 * rigorous experimental evidence.
243 */
244 allocNurseries(n_nurseries, new_n_nurseries);
245 n_nurseries = new_n_nurseries;
246
247 /*
248 * Assign each of the new capabilities a nursery. Remember to start from
249 * next_nursery, because we may have already consumed some of the earlier
250 * nurseries.
251 */
252 assignNurseriesToCapabilities(from,to);
253
254 // allocate a block for each mut list
255 for (n = from; n < to; n++) {
256 for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
257 capabilities[n]->mut_lists[g] = allocBlock();
258 }
259 }
260
261 #if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR) && (CC_SUPPORTS_TLS == 0)
262 newThreadLocalKey(&gctKey);
263 #endif
264
265 initGcThreads(from, to);
266 }
267
268
269 void
270 exitStorage (void)
271 {
272 updateNurseriesStats();
273 stat_exit();
274 }
275
276 void
277 freeStorage (rtsBool free_heap)
278 {
279 stgFree(generations);
280 if (free_heap) freeAllMBlocks();
281 #if defined(THREADED_RTS)
282 closeMutex(&sm_mutex);
283 #endif
284 stgFree(nurseries);
285 #if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR) && (CC_SUPPORTS_TLS == 0)
286 freeThreadLocalKey(&gctKey);
287 #endif
288 freeGcThreads();
289 }
290
291 /* -----------------------------------------------------------------------------
292 Note [CAF management].
293
294 The entry code for every CAF does the following:
295
296 - calls newCaf, which builds a CAF_BLACKHOLE on the heap and atomically
297 updates the CAF with IND_STATIC pointing to the CAF_BLACKHOLE
298
299 - if newCaf returns zero, it re-enters the CAF (see Note [atomic
300 CAF entry])
301
302 - pushes an update frame pointing to the CAF_BLACKHOLE
303
304 Why do we build an BLACKHOLE in the heap rather than just updating
305 the thunk directly? It's so that we only need one kind of update
306 frame - otherwise we'd need a static version of the update frame
307 too, and various other parts of the RTS that deal with update
308 frames would also need special cases for static update frames.
309
310 newCaf() does the following:
311
312 - atomically locks the CAF (see [atomic CAF entry])
313
314 - it builds a CAF_BLACKHOLE on the heap
315
316 - it updates the CAF with an IND_STATIC pointing to the
317 CAF_BLACKHOLE, atomically.
318
319 - it puts the CAF on the oldest generation's mutable list.
320 This is so that we treat the CAF as a root when collecting
321 younger generations.
322
323 - links the CAF onto the CAF list (see below)
324
325 ------------------
326 Note [atomic CAF entry]
327
328 With THREADED_RTS, newCaf() is required to be atomic (see
329 #5558). This is because if two threads happened to enter the same
330 CAF simultaneously, they would create two distinct CAF_BLACKHOLEs,
331 and so the normal threadPaused() machinery for detecting duplicate
332 evaluation will not detect this. Hence in lockCAF() below, we
333 atomically lock the CAF with WHITEHOLE before updating it with
334 IND_STATIC, and return zero if another thread locked the CAF first.
335 In the event that we lost the race, CAF entry code will re-enter
336 the CAF and block on the other thread's CAF_BLACKHOLE.
337
338 ------------------
339 Note [GHCi CAFs]
340
341 For GHCI, we have additional requirements when dealing with CAFs:
342
343 - we must *retain* all dynamically-loaded CAFs ever entered,
344 just in case we need them again.
345 - we must be able to *revert* CAFs that have been evaluated, to
346 their pre-evaluated form.
347
348 To do this, we use an additional CAF list. When newCaf() is
349 called on a dynamically-loaded CAF, we add it to the CAF list
350 instead of the old-generation mutable list, and save away its
351 old info pointer (in caf->saved_info) for later reversion.
352
353 To revert all the CAFs, we traverse the CAF list and reset the
354 info pointer to caf->saved_info, then throw away the CAF list.
355 (see GC.c:revertCAFs()).
356
357 -- SDM 29/1/01
358
359 -------------------------------------------------------------------------- */
360
361 STATIC_INLINE StgInd *
362 lockCAF (StgRegTable *reg, StgIndStatic *caf)
363 {
364 const StgInfoTable *orig_info;
365 Capability *cap = regTableToCapability(reg);
366 StgInd *bh;
367
368 orig_info = caf->header.info;
369
370 #ifdef THREADED_RTS
371 const StgInfoTable *cur_info;
372
373 if (orig_info == &stg_IND_STATIC_info ||
374 orig_info == &stg_WHITEHOLE_info) {
375 // already claimed by another thread; re-enter the CAF
376 return NULL;
377 }
378
379 cur_info = (const StgInfoTable *)
380 cas((StgVolatilePtr)&caf->header.info,
381 (StgWord)orig_info,
382 (StgWord)&stg_WHITEHOLE_info);
383
384 if (cur_info != orig_info) {
385 // already claimed by another thread; re-enter the CAF
386 return NULL;
387 }
388
389 // successfully claimed by us; overwrite with IND_STATIC
390 #endif
391
392 // For the benefit of revertCAFs(), save the original info pointer
393 caf->saved_info = orig_info;
394
395 // Allocate the blackhole indirection closure
396 bh = (StgInd *)allocate(cap, sizeofW(*bh));
397 SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs);
398 bh->indirectee = (StgClosure *)cap->r.rCurrentTSO;
399
400 caf->indirectee = (StgClosure *)bh;
401 write_barrier();
402 SET_INFO((StgClosure*)caf,&stg_IND_STATIC_info);
403
404 return bh;
405 }
406
407 StgInd *
408 newCAF(StgRegTable *reg, StgIndStatic *caf)
409 {
410 StgInd *bh;
411
412 bh = lockCAF(reg, caf);
413 if (!bh) return NULL;
414
415 if(keepCAFs)
416 {
417 // Note [dyn_caf_list]
418 // If we are in GHCi _and_ we are using dynamic libraries,
419 // then we can't redirect newCAF calls to newRetainedCAF (see below),
420 // so we make newCAF behave almost like newRetainedCAF.
421 // The dynamic libraries might be used by both the interpreted
422 // program and GHCi itself, so they must not be reverted.
423 // This also means that in GHCi with dynamic libraries, CAFs are not
424 // garbage collected. If this turns out to be a problem, we could
425 // do another hack here and do an address range test on caf to figure
426 // out whether it is from a dynamic library.
427
428 ACQUIRE_SM_LOCK; // dyn_caf_list is global, locked by sm_mutex
429 caf->static_link = (StgClosure*)dyn_caf_list;
430 dyn_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
431 RELEASE_SM_LOCK;
432 }
433 else
434 {
435 // Put this CAF on the mutable list for the old generation.
436 if (oldest_gen->no != 0) {
437 recordMutableCap((StgClosure*)caf,
438 regTableToCapability(reg), oldest_gen->no);
439 }
440
441 #ifdef DEBUG
442 // In the DEBUG rts, we keep track of live CAFs by chaining them
443 // onto a list debug_caf_list. This is so that we can tell if we
444 // ever enter a GC'd CAF, and emit a suitable barf().
445 //
446 // The saved_info field of the CAF is used as the link field for
447 // debug_caf_list, because this field is only used by newDynCAF
448 // for revertible CAFs, and we don't put those on the
449 // debug_caf_list.
450 ACQUIRE_SM_LOCK; // debug_caf_list is global, locked by sm_mutex
451 ((StgIndStatic *)caf)->saved_info = (const StgInfoTable*)debug_caf_list;
452 debug_caf_list = (StgIndStatic*)caf;
453 RELEASE_SM_LOCK;
454 #endif
455 }
456
457 return bh;
458 }
459
460 // External API for setting the keepCAFs flag. see #3900.
461 void
462 setKeepCAFs (void)
463 {
464 keepCAFs = 1;
465 }
466
467 // An alternate version of newCAF which is used for dynamically loaded
468 // object code in GHCi. In this case we want to retain *all* CAFs in
469 // the object code, because they might be demanded at any time from an
470 // expression evaluated on the command line.
471 // Also, GHCi might want to revert CAFs, so we add these to the
472 // revertible_caf_list.
473 //
474 // The linker hackily arranges that references to newCAF from dynamic
475 // code end up pointing to newRetainedCAF.
476 //
477 StgInd* newRetainedCAF (StgRegTable *reg, StgIndStatic *caf)
478 {
479 StgInd *bh;
480
481 bh = lockCAF(reg, caf);
482 if (!bh) return NULL;
483
484 ACQUIRE_SM_LOCK;
485
486 caf->static_link = (StgClosure*)revertible_caf_list;
487 revertible_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
488
489 RELEASE_SM_LOCK;
490
491 return bh;
492 }
493
494 // If we are using loadObj/unloadObj in the linker, then we want to
495 //
496 // - retain all CAFs in statically linked code (keepCAFs == rtsTrue),
497 // because we might link a new object that uses any of these CAFs.
498 //
499 // - GC CAFs in dynamically-linked code, so that we can detect when
500 // a dynamically-linked object is unloadable.
501 //
502 // So for this case, we set keepCAFs to rtsTrue, and link newCAF to newGCdCAF
503 // for dynamically-linked code.
504 //
505 StgInd* newGCdCAF (StgRegTable *reg, StgIndStatic *caf)
506 {
507 StgInd *bh;
508
509 bh = lockCAF(reg, caf);
510 if (!bh) return NULL;
511
512 // Put this CAF on the mutable list for the old generation.
513 if (oldest_gen->no != 0) {
514 recordMutableCap((StgClosure*)caf,
515 regTableToCapability(reg), oldest_gen->no);
516 }
517
518 return bh;
519 }
520
521 /* -----------------------------------------------------------------------------
522 Nursery management.
523 -------------------------------------------------------------------------- */
524
525 static bdescr *
526 allocNursery (bdescr *tail, W_ blocks)
527 {
528 bdescr *bd = NULL;
529 W_ i, n;
530
531 // We allocate the nursery as a single contiguous block and then
532 // divide it into single blocks manually. This way we guarantee
533 // that the nursery blocks are adjacent, so that the processor's
534 // automatic prefetching works across nursery blocks. This is a
535 // tiny optimisation (~0.5%), but it's free.
536
537 while (blocks > 0) {
538 n = stg_min(BLOCKS_PER_MBLOCK, blocks);
539 // allocLargeChunk will prefer large chunks, but will pick up
540 // small chunks if there are any available. We must allow
541 // single blocks here to avoid fragmentation (#7257)
542 bd = allocLargeChunk(1, n);
543 n = bd->blocks;
544 blocks -= n;
545
546 for (i = 0; i < n; i++) {
547 initBdescr(&bd[i], g0, g0);
548
549 bd[i].blocks = 1;
550 bd[i].flags = 0;
551
552 if (i > 0) {
553 bd[i].u.back = &bd[i-1];
554 } else {
555 bd[i].u.back = NULL;
556 }
557
558 if (i+1 < n) {
559 bd[i].link = &bd[i+1];
560 } else {
561 bd[i].link = tail;
562 if (tail != NULL) {
563 tail->u.back = &bd[i];
564 }
565 }
566
567 bd[i].free = bd[i].start;
568 }
569
570 tail = &bd[0];
571 }
572
573 return &bd[0];
574 }
575
576 STATIC_INLINE void
577 assignNurseryToCapability (Capability *cap, nat n)
578 {
579 ASSERT(n < n_nurseries);
580 cap->r.rNursery = &nurseries[n];
581 cap->r.rCurrentNursery = nurseries[n].blocks;
582 newNurseryBlock(nurseries[n].blocks);
583 cap->r.rCurrentAlloc = NULL;
584 }
585
586 /*
587 * Give each Capability a nursery from the pool. No need to do atomic increments
588 * here, everything must be stopped to call this function.
589 */
590 static void
591 assignNurseriesToCapabilities (nat from, nat to)
592 {
593 nat i;
594
595 for (i = from; i < to; i++) {
596 assignNurseryToCapability(capabilities[i], next_nursery++);
597 }
598 }
599
600 static void
601 allocNurseries (nat from, nat to)
602 {
603 nat i;
604 memcount n_blocks;
605
606 if (RtsFlags.GcFlags.nurseryChunkSize) {
607 n_blocks = RtsFlags.GcFlags.nurseryChunkSize;
608 } else {
609 n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
610 }
611
612 for (i = from; i < to; i++) {
613 nurseries[i].blocks = allocNursery(NULL, n_blocks);
614 nurseries[i].n_blocks = n_blocks;
615 }
616 }
617
618 void
619 resetNurseries (void)
620 {
621 next_nursery = 0;
622 assignNurseriesToCapabilities(0, n_capabilities);
623
624 #ifdef DEBUG
625 bdescr *bd;
626 nat n;
627 for (n = 0; n < n_nurseries; n++) {
628 for (bd = nurseries[n].blocks; bd; bd = bd->link) {
629 ASSERT(bd->gen_no == 0);
630 ASSERT(bd->gen == g0);
631 IF_DEBUG(sanity, memset(bd->start, 0xaa, BLOCK_SIZE));
632 }
633 }
634 #endif
635 }
636
637 W_
638 countNurseryBlocks (void)
639 {
640 nat i;
641 W_ blocks = 0;
642
643 for (i = 0; i < n_nurseries; i++) {
644 blocks += nurseries[i].n_blocks;
645 }
646 return blocks;
647 }
648
649 static void
650 resizeNursery (nursery *nursery, W_ blocks)
651 {
652 bdescr *bd;
653 W_ nursery_blocks;
654
655 nursery_blocks = nursery->n_blocks;
656 if (nursery_blocks == blocks) return;
657
658 if (nursery_blocks < blocks) {
659 debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
660 blocks);
661 nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
662 }
663 else {
664 bdescr *next_bd;
665
666 debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
667 blocks);
668
669 bd = nursery->blocks;
670 while (nursery_blocks > blocks) {
671 next_bd = bd->link;
672 next_bd->u.back = NULL;
673 nursery_blocks -= bd->blocks; // might be a large block
674 freeGroup(bd);
675 bd = next_bd;
676 }
677 nursery->blocks = bd;
678 // might have gone just under, by freeing a large block, so make
679 // up the difference.
680 if (nursery_blocks < blocks) {
681 nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
682 }
683 }
684
685 nursery->n_blocks = blocks;
686 ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
687 }
688
689 //
690 // Resize each of the nurseries to the specified size.
691 //
692 static void
693 resizeNurseriesEach (W_ blocks)
694 {
695 nat i;
696
697 for (i = 0; i < n_nurseries; i++) {
698 resizeNursery(&nurseries[i], blocks);
699 }
700 }
701
702 void
703 resizeNurseriesFixed (void)
704 {
705 nat blocks;
706
707 if (RtsFlags.GcFlags.nurseryChunkSize) {
708 blocks = RtsFlags.GcFlags.nurseryChunkSize;
709 } else {
710 blocks = RtsFlags.GcFlags.minAllocAreaSize;
711 }
712
713 resizeNurseriesEach(blocks);
714 }
715
716 //
717 // Resize the nurseries to the total specified size.
718 //
719 void
720 resizeNurseries (W_ blocks)
721 {
722 // If there are multiple nurseries, then we just divide the number
723 // of available blocks between them.
724 resizeNurseriesEach(blocks / n_nurseries);
725 }
726
727 rtsBool
728 getNewNursery (Capability *cap)
729 {
730 StgWord i;
731
732 for(;;) {
733 i = next_nursery;
734 if (i >= n_nurseries) {
735 return rtsFalse;
736 }
737 if (cas(&next_nursery, i, i+1) == i) {
738 assignNurseryToCapability(cap, i);
739 return rtsTrue;
740 }
741 }
742 }
743 /* -----------------------------------------------------------------------------
744 move_STACK is called to update the TSO structure after it has been
745 moved from one place to another.
746 -------------------------------------------------------------------------- */
747
748 void
749 move_STACK (StgStack *src, StgStack *dest)
750 {
751 ptrdiff_t diff;
752
753 // relocate the stack pointer...
754 diff = (StgPtr)dest - (StgPtr)src; // In *words*
755 dest->sp = (StgPtr)dest->sp + diff;
756 }
757
758 /* -----------------------------------------------------------------------------
759 StgPtr allocate (Capability *cap, W_ n)
760
761 Allocates an area of memory n *words* large, from the nursery of
762 the supplied Capability, or from the global block pool if the area
763 requested is larger than LARGE_OBJECT_THRESHOLD. Memory is not
764 allocated from the current nursery block, so as not to interfere
765 with Hp/HpLim.
766
767 The address of the allocated memory is returned. allocate() never
768 fails; if it returns, the returned value is a valid address. If
769 the nursery is already full, then another block is allocated from
770 the global block pool. If we need to get memory from the OS and
771 that operation fails, then the whole process will be killed.
772 -------------------------------------------------------------------------- */
773
774 StgPtr
775 allocate (Capability *cap, W_ n)
776 {
777 bdescr *bd;
778 StgPtr p;
779
780 TICK_ALLOC_HEAP_NOCTR(WDS(n));
781 CCS_ALLOC(cap->r.rCCCS,n);
782 if (cap->r.rCurrentTSO != NULL) {
783 // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_)
784 ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
785 (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
786 - n*sizeof(W_)));
787 }
788
789 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
790 // The largest number of words such that
791 // the computation of req_blocks will not overflow.
792 W_ max_words = (HS_WORD_MAX & ~(BLOCK_SIZE-1)) / sizeof(W_);
793 W_ req_blocks;
794
795 if (n > max_words)
796 req_blocks = HS_WORD_MAX; // signal overflow below
797 else
798 req_blocks = (W_)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
799
800 // Attempting to allocate an object larger than maxHeapSize
801 // should definitely be disallowed. (bug #1791)
802 if ((RtsFlags.GcFlags.maxHeapSize > 0 &&
803 req_blocks >= RtsFlags.GcFlags.maxHeapSize) ||
804 req_blocks >= HS_INT32_MAX) // avoid overflow when
805 // calling allocGroup() below
806 {
807 heapOverflow();
808 // heapOverflow() doesn't exit (see #2592), but we aren't
809 // in a position to do a clean shutdown here: we
810 // either have to allocate the memory or exit now.
811 // Allocating the memory would be bad, because the user
812 // has requested that we not exceed maxHeapSize, so we
813 // just exit.
814 stg_exit(EXIT_HEAPOVERFLOW);
815 }
816
817 ACQUIRE_SM_LOCK
818 bd = allocGroup(req_blocks);
819 dbl_link_onto(bd, &g0->large_objects);
820 g0->n_large_blocks += bd->blocks; // might be larger than req_blocks
821 g0->n_new_large_words += n;
822 RELEASE_SM_LOCK;
823 initBdescr(bd, g0, g0);
824 bd->flags = BF_LARGE;
825 bd->free = bd->start + n;
826 cap->total_allocated += n;
827 return bd->start;
828 }
829
830 /* small allocation (<LARGE_OBJECT_THRESHOLD) */
831
832 bd = cap->r.rCurrentAlloc;
833 if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
834
835 if (bd) finishedNurseryBlock(cap,bd);
836
837 // The CurrentAlloc block is full, we need to find another
838 // one. First, we try taking the next block from the
839 // nursery:
840 bd = cap->r.rCurrentNursery->link;
841
842 if (bd == NULL) {
843 // The nursery is empty: allocate a fresh block (we can't
844 // fail here).
845 ACQUIRE_SM_LOCK;
846 bd = allocBlock();
847 cap->r.rNursery->n_blocks++;
848 RELEASE_SM_LOCK;
849 initBdescr(bd, g0, g0);
850 bd->flags = 0;
851 // If we had to allocate a new block, then we'll GC
852 // pretty quickly now, because MAYBE_GC() will
853 // notice that CurrentNursery->link is NULL.
854 } else {
855 newNurseryBlock(bd);
856 // we have a block in the nursery: take it and put
857 // it at the *front* of the nursery list, and use it
858 // to allocate() from.
859 //
860 // Previously the nursery looked like this:
861 //
862 // CurrentNursery
863 // /
864 // +-+ +-+
865 // nursery -> ... |A| -> |B| -> ...
866 // +-+ +-+
867 //
868 // After doing this, it looks like this:
869 //
870 // CurrentNursery
871 // /
872 // +-+ +-+
873 // nursery -> |B| -> ... -> |A| -> ...
874 // +-+ +-+
875 // |
876 // CurrentAlloc
877 //
878 // The point is to get the block out of the way of the
879 // advancing CurrentNursery pointer, while keeping it
880 // on the nursery list so we don't lose track of it.
881 cap->r.rCurrentNursery->link = bd->link;
882 if (bd->link != NULL) {
883 bd->link->u.back = cap->r.rCurrentNursery;
884 }
885 }
886 dbl_link_onto(bd, &cap->r.rNursery->blocks);
887 cap->r.rCurrentAlloc = bd;
888 IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
889 }
890 p = bd->free;
891 bd->free += n;
892
893 IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa));
894 return p;
895 }
896
897 /* ---------------------------------------------------------------------------
898 Allocate a fixed/pinned object.
899
900 We allocate small pinned objects into a single block, allocating a
901 new block when the current one overflows. The block is chained
902 onto the large_object_list of generation 0.
903
904 NOTE: The GC can't in general handle pinned objects. This
905 interface is only safe to use for ByteArrays, which have no
906 pointers and don't require scavenging. It works because the
907 block's descriptor has the BF_LARGE flag set, so the block is
908 treated as a large object and chained onto various lists, rather
909 than the individual objects being copied. However, when it comes
910 to scavenge the block, the GC will only scavenge the first object.
911 The reason is that the GC can't linearly scan a block of pinned
912 objects at the moment (doing so would require using the
913 mostly-copying techniques). But since we're restricting ourselves
914 to pinned ByteArrays, not scavenging is ok.
915
916 This function is called by newPinnedByteArray# which immediately
917 fills the allocated memory with a MutableByteArray#.
918 ------------------------------------------------------------------------- */
919
920 StgPtr
921 allocatePinned (Capability *cap, W_ n)
922 {
923 StgPtr p;
924 bdescr *bd;
925
926 // If the request is for a large object, then allocate()
927 // will give us a pinned object anyway.
928 if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
929 p = allocate(cap, n);
930 Bdescr(p)->flags |= BF_PINNED;
931 return p;
932 }
933
934 TICK_ALLOC_HEAP_NOCTR(WDS(n));
935 CCS_ALLOC(cap->r.rCCCS,n);
936 if (cap->r.rCurrentTSO != NULL) {
937 // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
938 ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
939 (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
940 - n*sizeof(W_)));
941 }
942
943 bd = cap->pinned_object_block;
944
945 // If we don't have a block of pinned objects yet, or the current
946 // one isn't large enough to hold the new object, get a new one.
947 if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
948
949 // stash the old block on cap->pinned_object_blocks. On the
950 // next GC cycle these objects will be moved to
951 // g0->large_objects.
952 if (bd != NULL) {
953 // add it to the allocation stats when the block is full
954 finishedNurseryBlock(cap, bd);
955 dbl_link_onto(bd, &cap->pinned_object_blocks);
956 }
957
958 // We need to find another block. We could just allocate one,
959 // but that means taking a global lock and we really want to
960 // avoid that (benchmarks that allocate a lot of pinned
961 // objects scale really badly if we do this).
962 //
963 // So first, we try taking the next block from the nursery, in
964 // the same way as allocate(), but note that we can only take
965 // an *empty* block, because we're about to mark it as
966 // BF_PINNED | BF_LARGE.
967 bd = cap->r.rCurrentNursery->link;
968 if (bd == NULL) { // must be empty!
969 // The nursery is empty, or the next block is non-empty:
970 // allocate a fresh block (we can't fail here).
971
972 // XXX in the case when the next nursery block is
973 // non-empty we aren't exerting any pressure to GC soon,
974 // so if this case ever happens then we could in theory
975 // keep allocating for ever without calling the GC. We
976 // can't bump g0->n_new_large_words because that will be
977 // counted towards allocation, and we're already counting
978 // our pinned obects as allocation in
979 // collect_pinned_object_blocks in the GC.
980 ACQUIRE_SM_LOCK;
981 bd = allocBlock();
982 RELEASE_SM_LOCK;
983 initBdescr(bd, g0, g0);
984 } else {
985 newNurseryBlock(bd);
986 // we have a block in the nursery: steal it
987 cap->r.rCurrentNursery->link = bd->link;
988 if (bd->link != NULL) {
989 bd->link->u.back = cap->r.rCurrentNursery;
990 }
991 cap->r.rNursery->n_blocks -= bd->blocks;
992 }
993
994 cap->pinned_object_block = bd;
995 bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED;
996
997 // The pinned_object_block remains attached to the capability
998 // until it is full, even if a GC occurs. We want this
999 // behaviour because otherwise the unallocated portion of the
1000 // block would be forever slop, and under certain workloads
1001 // (allocating a few ByteStrings per GC) we accumulate a lot
1002 // of slop.
1003 //
1004 // So, the pinned_object_block is initially marked
1005 // BF_EVACUATED so the GC won't touch it. When it is full,
1006 // we place it on the large_objects list, and at the start of
1007 // the next GC the BF_EVACUATED flag will be cleared, and the
1008 // block will be promoted as usual (if anything in it is
1009 // live).
1010 }
1011
1012 p = bd->free;
1013 bd->free += n;
1014 return p;
1015 }
1016
1017 /* -----------------------------------------------------------------------------
1018 Write Barriers
1019 -------------------------------------------------------------------------- */
1020
1021 /*
1022 This is the write barrier for MUT_VARs, a.k.a. IORefs. A
1023 MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
1024 is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
1025 and is put on the mutable list.
1026 */
1027 void
1028 dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
1029 {
1030 Capability *cap = regTableToCapability(reg);
1031 if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
1032 p->header.info = &stg_MUT_VAR_DIRTY_info;
1033 recordClosureMutated(cap,p);
1034 }
1035 }
1036
1037 void
1038 dirty_TVAR(Capability *cap, StgTVar *p)
1039 {
1040 if (p->header.info == &stg_TVAR_CLEAN_info) {
1041 p->header.info = &stg_TVAR_DIRTY_info;
1042 recordClosureMutated(cap,(StgClosure*)p);
1043 }
1044 }
1045
1046 // Setting a TSO's link field with a write barrier.
1047 // It is *not* necessary to call this function when
1048 // * setting the link field to END_TSO_QUEUE
1049 // * putting a TSO on the blackhole_queue
1050 // * setting the link field of the currently running TSO, as it
1051 // will already be dirty.
1052 void
1053 setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
1054 {
1055 if (tso->dirty == 0) {
1056 tso->dirty = 1;
1057 recordClosureMutated(cap,(StgClosure*)tso);
1058 }
1059 tso->_link = target;
1060 }
1061
1062 void
1063 setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)
1064 {
1065 if (tso->dirty == 0) {
1066 tso->dirty = 1;
1067 recordClosureMutated(cap,(StgClosure*)tso);
1068 }
1069 tso->block_info.prev = target;
1070 }
1071
1072 void
1073 dirty_TSO (Capability *cap, StgTSO *tso)
1074 {
1075 if (tso->dirty == 0) {
1076 tso->dirty = 1;
1077 recordClosureMutated(cap,(StgClosure*)tso);
1078 }
1079 }
1080
1081 void
1082 dirty_STACK (Capability *cap, StgStack *stack)
1083 {
1084 if (stack->dirty == 0) {
1085 stack->dirty = 1;
1086 recordClosureMutated(cap,(StgClosure*)stack);
1087 }
1088 }
1089
1090 /*
1091 This is the write barrier for MVARs. An MVAR_CLEAN objects is not
1092 on the mutable list; a MVAR_DIRTY is. When written to, a
1093 MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
1094 The check for MVAR_CLEAN is inlined at the call site for speed,
1095 this really does make a difference on concurrency-heavy benchmarks
1096 such as Chaneneos and cheap-concurrency.
1097 */
1098 void
1099 dirty_MVAR(StgRegTable *reg, StgClosure *p)
1100 {
1101 recordClosureMutated(regTableToCapability(reg),p);
1102 }
1103
1104 /* -----------------------------------------------------------------------------
1105 * Stats and stuff
1106 * -------------------------------------------------------------------------- */
1107
1108 /* -----------------------------------------------------------------------------
1109 * [Note allocation accounting]
1110 *
1111 * - When cap->r.rCurrentNusery moves to a new block in the nursery,
1112 * we add the size of the used portion of the previous block to
1113 * cap->total_allocated. (see finishedNurseryBlock())
1114 *
1115 * - When we start a GC, the allocated portion of CurrentNursery and
1116 * CurrentAlloc are added to cap->total_allocated. (see
1117 * updateNurseriesStats())
1118 *
1119 * -------------------------------------------------------------------------- */
1120
1121 //
1122 // Calculate the total allocated memory since the start of the
1123 // program. Also emits events reporting the per-cap allocation
1124 // totals.
1125 //
1126 StgWord
1127 calcTotalAllocated (void)
1128 {
1129 W_ tot_alloc = 0;
1130 W_ n;
1131
1132 for (n = 0; n < n_capabilities; n++) {
1133 tot_alloc += capabilities[n]->total_allocated;
1134
1135 traceEventHeapAllocated(capabilities[n],
1136 CAPSET_HEAP_DEFAULT,
1137 capabilities[n]->total_allocated * sizeof(W_));
1138 }
1139
1140 return tot_alloc;
1141 }
1142
1143 //
1144 // Update the per-cap total_allocated numbers with an approximation of
1145 // the amount of memory used in each cap's nursery.
1146 //
1147 void
1148 updateNurseriesStats (void)
1149 {
1150 nat i;
1151 bdescr *bd;
1152
1153 for (i = 0; i < n_capabilities; i++) {
1154 // The current nursery block and the current allocate block have not
1155 // yet been accounted for in cap->total_allocated, so we add them here.
1156 bd = capabilities[i]->r.rCurrentNursery;
1157 if (bd) finishedNurseryBlock(capabilities[i], bd);
1158 bd = capabilities[i]->r.rCurrentAlloc;
1159 if (bd) finishedNurseryBlock(capabilities[i], bd);
1160 }
1161 }
1162
1163 W_ countOccupied (bdescr *bd)
1164 {
1165 W_ words;
1166
1167 words = 0;
1168 for (; bd != NULL; bd = bd->link) {
1169 ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
1170 words += bd->free - bd->start;
1171 }
1172 return words;
1173 }
1174
1175 W_ genLiveWords (generation *gen)
1176 {
1177 return gen->n_words + gen->n_large_words;
1178 }
1179
1180 W_ genLiveBlocks (generation *gen)
1181 {
1182 return gen->n_blocks + gen->n_large_blocks;
1183 }
1184
1185 W_ gcThreadLiveWords (nat i, nat g)
1186 {
1187 W_ words;
1188
1189 words = countOccupied(gc_threads[i]->gens[g].todo_bd);
1190 words += countOccupied(gc_threads[i]->gens[g].part_list);
1191 words += countOccupied(gc_threads[i]->gens[g].scavd_list);
1192
1193 return words;
1194 }
1195
1196 W_ gcThreadLiveBlocks (nat i, nat g)
1197 {
1198 W_ blocks;
1199
1200 blocks = countBlocks(gc_threads[i]->gens[g].todo_bd);
1201 blocks += gc_threads[i]->gens[g].n_part_blocks;
1202 blocks += gc_threads[i]->gens[g].n_scavd_blocks;
1203
1204 return blocks;
1205 }
1206
1207 // Return an accurate count of the live data in the heap, excluding
1208 // generation 0.
1209 W_ calcLiveWords (void)
1210 {
1211 nat g;
1212 W_ live;
1213
1214 live = 0;
1215 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1216 live += genLiveWords(&generations[g]);
1217 }
1218 return live;
1219 }
1220
1221 W_ calcLiveBlocks (void)
1222 {
1223 nat g;
1224 W_ live;
1225
1226 live = 0;
1227 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1228 live += genLiveBlocks(&generations[g]);
1229 }
1230 return live;
1231 }
1232
1233 /* Determine which generation will be collected next, and approximate
1234 * the maximum amount of memory that will be required to do the GC,
1235 * taking into account data that will be copied, and the space needed
1236 * to store bitmaps and the mark stack. Note: blocks_needed does not
1237 * include the blocks in the nursery.
1238 *
1239 * Assume: all data currently live will remain live. Generationss
1240 * that will be collected next time will therefore need twice as many
1241 * blocks since all the data will be copied.
1242 */
1243 extern W_
1244 calcNeeded (rtsBool force_major, memcount *blocks_needed)
1245 {
1246 W_ needed = 0, blocks;
1247 nat g, N;
1248 generation *gen;
1249
1250 if (force_major) {
1251 N = RtsFlags.GcFlags.generations - 1;
1252 } else {
1253 N = 0;
1254 }
1255
1256 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1257 gen = &generations[g];
1258
1259 blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?)
1260 + gen->n_large_blocks;
1261
1262 // we need at least this much space
1263 needed += blocks;
1264
1265 // are we collecting this gen?
1266 if (g == 0 || // always collect gen 0
1267 blocks > gen->max_blocks)
1268 {
1269 N = stg_max(N,g);
1270
1271 // we will collect this gen next time
1272 if (gen->mark) {
1273 // bitmap:
1274 needed += gen->n_blocks / BITS_IN(W_);
1275 // mark stack:
1276 needed += gen->n_blocks / 100;
1277 }
1278 if (gen->compact) {
1279 continue; // no additional space needed for compaction
1280 } else {
1281 needed += gen->n_blocks;
1282 }
1283 }
1284 }
1285
1286 if (blocks_needed != NULL) {
1287 *blocks_needed = needed;
1288 }
1289 return N;
1290 }
1291
1292 /* ----------------------------------------------------------------------------
1293 Executable memory
1294
1295 Executable memory must be managed separately from non-executable
1296 memory. Most OSs these days require you to jump through hoops to
1297 dynamically allocate executable memory, due to various security
1298 measures.
1299
1300 Here we provide a small memory allocator for executable memory.
1301 Memory is managed with a page granularity; we allocate linearly
1302 in the page, and when the page is emptied (all objects on the page
1303 are free) we free the page again, not forgetting to make it
1304 non-executable.
1305
1306 TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1307 the linker cannot use allocateExec for loading object code files
1308 on Windows. Once allocateExec can handle larger objects, the linker
1309 should be modified to use allocateExec instead of VirtualAlloc.
1310 ------------------------------------------------------------------------- */
1311
1312 #if (defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) && defined(ios_HOST_OS)
1313 void sys_icache_invalidate(void *start, size_t len);
1314 #endif
1315
1316 /* On ARM and other platforms, we need to flush the cache after
1317 writing code into memory, so the processor reliably sees it. */
1318 void flushExec (W_ len, AdjustorExecutable exec_addr)
1319 {
1320 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
1321 /* x86 doesn't need to do anything, so just suppress some warnings. */
1322 (void)len;
1323 (void)exec_addr;
1324 #elif (defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) && defined(ios_HOST_OS)
1325 /* On iOS we need to use the special 'sys_icache_invalidate' call. */
1326 sys_icache_invalidate(exec_addr, ((unsigned char*)exec_addr)+len);
1327 #elif defined(__GNUC__)
1328 /* For all other platforms, fall back to a libgcc builtin. */
1329 unsigned char* begin = (unsigned char*)exec_addr;
1330 unsigned char* end = begin + len;
1331 __clear_cache((void*)begin, (void*)end);
1332 #else
1333 #error Missing support to flush the instruction cache
1334 #endif
1335 }
1336
1337 #if defined(linux_HOST_OS)
1338
1339 // On Linux we need to use libffi for allocating executable memory,
1340 // because it knows how to work around the restrictions put in place
1341 // by SELinux.
1342
1343 AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
1344 {
1345 void **ret, **exec;
1346 ACQUIRE_SM_LOCK;
1347 ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
1348 RELEASE_SM_LOCK;
1349 if (ret == NULL) return ret;
1350 *ret = ret; // save the address of the writable mapping, for freeExec().
1351 *exec_ret = exec + 1;
1352 return (ret + 1);
1353 }
1354
1355 // freeExec gets passed the executable address, not the writable address.
1356 void freeExec (AdjustorExecutable addr)
1357 {
1358 AdjustorWritable writable;
1359 writable = *((void**)addr - 1);
1360 ACQUIRE_SM_LOCK;
1361 ffi_closure_free (writable);
1362 RELEASE_SM_LOCK
1363 }
1364
1365 #elif defined(ios_HOST_OS)
1366
1367 static HashTable* allocatedExecs;
1368
1369 AdjustorWritable allocateExec(W_ bytes, AdjustorExecutable *exec_ret)
1370 {
1371 AdjustorWritable writ;
1372 ffi_closure* cl;
1373 if (bytes != sizeof(ffi_closure)) {
1374 barf("allocateExec: for ffi_closure only");
1375 }
1376 ACQUIRE_SM_LOCK;
1377 cl = writ = ffi_closure_alloc((size_t)bytes, exec_ret);
1378 if (cl != NULL) {
1379 if (allocatedExecs == NULL) {
1380 allocatedExecs = allocHashTable();
1381 }
1382 insertHashTable(allocatedExecs, (StgWord)*exec_ret, writ);
1383 }
1384 RELEASE_SM_LOCK;
1385 return writ;
1386 }
1387
1388 AdjustorWritable execToWritable(AdjustorExecutable exec)
1389 {
1390 AdjustorWritable writ;
1391 ACQUIRE_SM_LOCK;
1392 if (allocatedExecs == NULL ||
1393 (writ = lookupHashTable(allocatedExecs, (StgWord)exec)) == NULL) {
1394 RELEASE_SM_LOCK;
1395 barf("execToWritable: not found");
1396 }
1397 RELEASE_SM_LOCK;
1398 return writ;
1399 }
1400
1401 void freeExec(AdjustorExecutable exec)
1402 {
1403 AdjustorWritable writ;
1404 ffi_closure* cl;
1405 cl = writ = execToWritable(exec);
1406 ACQUIRE_SM_LOCK;
1407 removeHashTable(allocatedExecs, (StgWord)exec, writ);
1408 ffi_closure_free(cl);
1409 RELEASE_SM_LOCK
1410 }
1411
1412 #else
1413
1414 AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
1415 {
1416 void *ret;
1417 W_ n;
1418
1419 ACQUIRE_SM_LOCK;
1420
1421 // round up to words.
1422 n = (bytes + sizeof(W_) + 1) / sizeof(W_);
1423
1424 if (n+1 > BLOCK_SIZE_W) {
1425 barf("allocateExec: can't handle large objects");
1426 }
1427
1428 if (exec_block == NULL ||
1429 exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1430 bdescr *bd;
1431 W_ pagesize = getPageSize();
1432 bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1433 debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1434 bd->gen_no = 0;
1435 bd->flags = BF_EXEC;
1436 bd->link = exec_block;
1437 if (exec_block != NULL) {
1438 exec_block->u.back = bd;
1439 }
1440 bd->u.back = NULL;
1441 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1442 exec_block = bd;
1443 }
1444 *(exec_block->free) = n; // store the size of this chunk
1445 exec_block->gen_no += n; // gen_no stores the number of words allocated
1446 ret = exec_block->free + 1;
1447 exec_block->free += n + 1;
1448
1449 RELEASE_SM_LOCK
1450 *exec_ret = ret;
1451 return ret;
1452 }
1453
1454 void freeExec (void *addr)
1455 {
1456 StgPtr p = (StgPtr)addr - 1;
1457 bdescr *bd = Bdescr((StgPtr)p);
1458
1459 if ((bd->flags & BF_EXEC) == 0) {
1460 barf("freeExec: not executable");
1461 }
1462
1463 if (*(StgPtr)p == 0) {
1464 barf("freeExec: already free?");
1465 }
1466
1467 ACQUIRE_SM_LOCK;
1468
1469 bd->gen_no -= *(StgPtr)p;
1470 *(StgPtr)p = 0;
1471
1472 if (bd->gen_no == 0) {
1473 // Free the block if it is empty, but not if it is the block at
1474 // the head of the queue.
1475 if (bd != exec_block) {
1476 debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1477 dbl_link_remove(bd, &exec_block);
1478 setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1479 freeGroup(bd);
1480 } else {
1481 bd->free = bd->start;
1482 }
1483 }
1484
1485 RELEASE_SM_LOCK
1486 }
1487
1488 #endif /* switch(HOST_OS) */
1489
1490 #ifdef DEBUG
1491
1492 // handy function for use in gdb, because Bdescr() is inlined.
1493 extern bdescr *_bdescr (StgPtr p);
1494
1495 bdescr *
1496 _bdescr (StgPtr p)
1497 {
1498 return Bdescr(p);
1499 }
1500
1501 #endif