Prefer #if defined to #ifdef
[ghc.git] / rts / sm / BlockAlloc.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * The block allocator and free list manager.
6 *
7 * This is the architecture independent part of the block allocator.
8 * It requires only the following support from the operating system:
9 *
10 * void *getMBlocks(uint32_t n);
11 *
12 * returns the address of an n*MBLOCK_SIZE region of memory, aligned on
13 * an MBLOCK_SIZE boundary. There are no other restrictions on the
14 * addresses of memory returned by getMBlocks().
15 *
16 * ---------------------------------------------------------------------------*/
17
18 #include "PosixSource.h"
19 #include "Rts.h"
20
21 #include "Storage.h"
22 #include "RtsUtils.h"
23 #include "BlockAlloc.h"
24 #include "OSMem.h"
25
26 #include <string.h>
27
28 static void initMBlock(void *mblock, uint32_t node);
29
30 /* -----------------------------------------------------------------------------
31
32 Implementation notes
33 ~~~~~~~~~~~~~~~~~~~~
34
35 Terminology:
36 - bdescr = block descriptor
37 - bgroup = block group (1 or more adjacent blocks)
38 - mblock = mega block
39 - mgroup = mega group (1 or more adjacent mblocks)
40
41 Invariants on block descriptors
42 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
43 bd->start always points to the start of the block.
44
45 bd->free is either:
46 - zero for a non-group-head; bd->link points to the head
47 - (-1) for the head of a free block group
48 - or it points within the block (group)
49
50 bd->blocks is either:
51 - zero for a non-group-head; bd->link points to the head
52 - number of blocks in this group otherwise
53
54 bd->link either points to a block descriptor or is NULL
55
56 The following fields are not used by the allocator:
57 bd->flags
58 bd->gen_no
59 bd->gen
60 bd->dest
61
62 Exceptions: we don't maintain invariants for all the blocks within a
63 group on the free list, because it is expensive to modify every
64 bdescr in a group when coalescing. Just the head and last bdescrs
65 will be correct for a group on the free list.
66
67
68 Free lists
69 ~~~~~~~~~~
70
71 Preliminaries:
72 - most allocations are for a small number of blocks
73 - sometimes the OS gives us new memory backwards in the address
74 space, sometimes forwards, so we should not be biased towards
75 any particular layout in the address space
76 - We want to avoid fragmentation
77 - We want allocation and freeing to be O(1) or close.
78
79 Coalescing trick: when a bgroup is freed (freeGroup()), we can check
80 whether it can be coalesced with other free bgroups by checking the
81 bdescrs for the blocks on either side of it. This means that we can
82 coalesce in O(1) time. Every free bgroup must have its head and tail
83 bdescrs initialised, the rest don't matter.
84
85 We keep the free list in buckets, using a heap-sort strategy.
86 Bucket N contains blocks with sizes 2^N - 2^(N+1)-1. The list of
87 blocks in each bucket is doubly-linked, so that if a block is
88 coalesced we can easily remove it from its current free list.
89
90 To allocate a new block of size S, grab a block from bucket
91 log2ceiling(S) (i.e. log2() rounded up), in which all blocks are at
92 least as big as S, and split it if necessary. If there are no
93 blocks in that bucket, look at bigger buckets until a block is found
94 Allocation is therefore O(logN) time.
95
96 To free a block:
97 - coalesce it with neighbours.
98 - remove coalesced neighbour(s) from free list(s)
99 - add the new (coalesced) block to the front of the appropriate
100 bucket, given by log2(S) where S is the size of the block.
101
102 Free is O(1).
103
104 Megablocks
105 ~~~~~~~~~~
106
107 Separately from the free list of block groups, which are smaller than
108 an mblock, we maintain a free list of mblock groups. This is the unit
109 of memory the operating system gives us, and we may either split mblocks
110 into blocks or allocate them directly (when very large contiguous regions
111 of memory). mblocks have a different set of invariants than blocks:
112
113 bd->start points to the start of the block IF the block is in the first mblock
114 bd->blocks and bd->link are only valid IF this block is the first block
115 of the first mblock
116 No other fields are used (in particular, free is not used, meaning that
117 space that is not used by the (single) object is wasted.
118
119 This has implications for the free list as well:
120 We cannot play the coalescing trick with mblocks, because there is
121 no requirement that the bdescrs in the second and subsequent mblock
122 of an mgroup are initialised (the mgroup might be filled with a
123 large array, overwriting the bdescrs for example).
124
125 The separate free list for megablocks is thus sorted in *address*
126 order, so that we can coalesce. Allocation in this list is best-fit
127 by traversing the whole list: we don't expect this list to be long,
128 and allocation/freeing of large blocks is rare; avoiding
129 fragmentation is more important than performance here.
130
131 freeGroup() might end up moving a block from free_list to
132 free_mblock_list, if after coalescing we end up with a full mblock.
133
134 checkFreeListSanity() checks all the invariants on the free lists.
135
136 --------------------------------------------------------------------------- */
137
138 /* ---------------------------------------------------------------------------
139 WATCH OUT FOR OVERFLOW
140
141 Be very careful with integer overflow here. If you have an
142 expression like (n_blocks * BLOCK_SIZE), and n_blocks is an int or
143 a uint32_t, then it will very likely overflow on a 64-bit platform.
144 Always cast to StgWord (or W_ for short) first: ((W_)n_blocks * BLOCK_SIZE).
145
146 --------------------------------------------------------------------------- */
147
148 // free_list[i] contains blocks that are at least size 2^i, and at
149 // most size 2^(i+1) - 1.
150 //
151 // To find the free list in which to place a block, use log_2(size).
152 // To find a free block of the right size, use log_2_ceil(size).
153 //
154 // The largest free list (free_list[NUM_FREE_LISTS-1]) needs to contain sizes
155 // from half a megablock up to (but not including) a full megablock.
156
157 #define NUM_FREE_LISTS (MBLOCK_SHIFT-BLOCK_SHIFT)
158
159 // In THREADED_RTS mode, the free list is protected by sm_mutex.
160
161 static bdescr *free_list[MAX_NUMA_NODES][NUM_FREE_LISTS];
162 static bdescr *free_mblock_list[MAX_NUMA_NODES];
163
164 W_ n_alloc_blocks; // currently allocated blocks
165 W_ hw_alloc_blocks; // high-water allocated blocks
166
167 W_ n_alloc_blocks_by_node[MAX_NUMA_NODES];
168
169 /* -----------------------------------------------------------------------------
170 Initialisation
171 -------------------------------------------------------------------------- */
172
173 void initBlockAllocator(void)
174 {
175 uint32_t i, node;
176 for (node = 0; node < MAX_NUMA_NODES; node++) {
177 for (i=0; i < NUM_FREE_LISTS; i++) {
178 free_list[node][i] = NULL;
179 }
180 free_mblock_list[node] = NULL;
181 n_alloc_blocks_by_node[node] = 0;
182 }
183 n_alloc_blocks = 0;
184 hw_alloc_blocks = 0;
185 }
186
187 /* -----------------------------------------------------------------------------
188 Accounting
189 -------------------------------------------------------------------------- */
190
191 STATIC_INLINE
192 void recordAllocatedBlocks(uint32_t node, uint32_t n)
193 {
194 n_alloc_blocks += n;
195 n_alloc_blocks_by_node[node] += n;
196 if (n > 0 && n_alloc_blocks > hw_alloc_blocks) {
197 hw_alloc_blocks = n_alloc_blocks;
198 }
199 }
200
201 STATIC_INLINE
202 void recordFreedBlocks(uint32_t node, uint32_t n)
203 {
204 ASSERT(n_alloc_blocks >= n);
205 n_alloc_blocks -= n;
206 n_alloc_blocks_by_node[node] -= n;
207 }
208
209 /* -----------------------------------------------------------------------------
210 Allocation
211 -------------------------------------------------------------------------- */
212
213 STATIC_INLINE void
214 initGroup(bdescr *head)
215 {
216 head->free = head->start;
217 head->link = NULL;
218
219 // If this is a block group (but not a megablock group), we
220 // make the last block of the group point to the head. This is used
221 // when coalescing blocks in freeGroup(). We don't do this for
222 // megablock groups because blocks in the second and subsequent
223 // mblocks don't have bdescrs; freeing these is handled in a
224 // different way by free_mblock_group().
225 if (head->blocks > 1 && head->blocks <= BLOCKS_PER_MBLOCK) {
226 bdescr *last = head + head->blocks-1;
227 last->blocks = 0;
228 last->link = head;
229 }
230 }
231
232 #if SIZEOF_VOID_P == SIZEOF_LONG
233 #define CLZW(n) (__builtin_clzl(n))
234 #else
235 #define CLZW(n) (__builtin_clzll(n))
236 #endif
237
238 // log base 2 (floor), needs to support up to (2^NUM_FREE_LISTS)-1
239 STATIC_INLINE uint32_t
240 log_2(W_ n)
241 {
242 ASSERT(n > 0 && n < (1<<NUM_FREE_LISTS));
243 #if defined(__GNUC__)
244 return CLZW(n) ^ (sizeof(StgWord)*8 - 1);
245 // generates good code on x86. __builtin_clz() compiles to bsr+xor, but
246 // we want just bsr, so the xor here cancels out gcc's xor.
247 #else
248 W_ i, x;
249 x = n;
250 for (i=0; i < NUM_FREE_LISTS; i++) {
251 x = x >> 1;
252 if (x == 0) return i;
253 }
254 return NUM_FREE_LISTS;
255 #endif
256 }
257
258 // log base 2 (ceiling), needs to support up to (2^NUM_FREE_LISTS)-1
259 STATIC_INLINE uint32_t
260 log_2_ceil(W_ n)
261 {
262 ASSERT(n > 0 && n < (1<<NUM_FREE_LISTS));
263 #if defined(__GNUC__)
264 uint32_t r = log_2(n);
265 return (n & (n-1)) ? r+1 : r;
266 #else
267 W_ i, x;
268 x = 1;
269 for (i=0; i < MAX_FREE_LIST; i++) {
270 if (x >= n) return i;
271 x = x << 1;
272 }
273 return MAX_FREE_LIST;
274 #endif
275 }
276
277 STATIC_INLINE void
278 free_list_insert (uint32_t node, bdescr *bd)
279 {
280 uint32_t ln;
281
282 ASSERT(bd->blocks < BLOCKS_PER_MBLOCK);
283 ln = log_2(bd->blocks);
284
285 dbl_link_onto(bd, &free_list[node][ln]);
286 }
287
288
289 STATIC_INLINE bdescr *
290 tail_of (bdescr *bd)
291 {
292 return bd + bd->blocks - 1;
293 }
294
295 // After splitting a group, the last block of each group must have a
296 // tail that points to the head block, to keep our invariants for
297 // coalescing.
298 STATIC_INLINE void
299 setup_tail (bdescr *bd)
300 {
301 bdescr *tail;
302 tail = tail_of(bd);
303 if (tail != bd) {
304 tail->blocks = 0;
305 tail->free = 0;
306 tail->link = bd;
307 }
308 }
309
310
311 // Take a free block group bd, and split off a group of size n from
312 // it. Adjust the free list as necessary, and return the new group.
313 static bdescr *
314 split_free_block (bdescr *bd, uint32_t node, W_ n, uint32_t ln)
315 {
316 bdescr *fg; // free group
317
318 ASSERT(bd->blocks > n);
319 dbl_link_remove(bd, &free_list[node][ln]);
320 fg = bd + bd->blocks - n; // take n blocks off the end
321 fg->blocks = n;
322 bd->blocks -= n;
323 setup_tail(bd);
324 ln = log_2(bd->blocks);
325 dbl_link_onto(bd, &free_list[node][ln]);
326 return fg;
327 }
328
329 /* Only initializes the start pointers on the first megablock and the
330 * blocks field of the first bdescr; callers are responsible for calling
331 * initGroup afterwards.
332 */
333 static bdescr *
334 alloc_mega_group (uint32_t node, StgWord mblocks)
335 {
336 bdescr *best, *bd, *prev;
337 StgWord n;
338
339 n = MBLOCK_GROUP_BLOCKS(mblocks);
340
341 best = NULL;
342 prev = NULL;
343 for (bd = free_mblock_list[node]; bd != NULL; prev = bd, bd = bd->link)
344 {
345 if (bd->blocks == n)
346 {
347 if (prev) {
348 prev->link = bd->link;
349 } else {
350 free_mblock_list[node] = bd->link;
351 }
352 return bd;
353 }
354 else if (bd->blocks > n)
355 {
356 if (!best || bd->blocks < best->blocks)
357 {
358 best = bd;
359 }
360 }
361 }
362
363 if (best)
364 {
365 // we take our chunk off the end here.
366 StgWord best_mblocks = BLOCKS_TO_MBLOCKS(best->blocks);
367 bd = FIRST_BDESCR((StgWord8*)MBLOCK_ROUND_DOWN(best) +
368 (best_mblocks-mblocks)*MBLOCK_SIZE);
369
370 best->blocks = MBLOCK_GROUP_BLOCKS(best_mblocks - mblocks);
371 initMBlock(MBLOCK_ROUND_DOWN(bd), node);
372 }
373 else
374 {
375 void *mblock;
376 if (RtsFlags.GcFlags.numa) {
377 mblock = getMBlocksOnNode(node, mblocks);
378 } else {
379 mblock = getMBlocks(mblocks);
380 }
381 initMBlock(mblock, node); // only need to init the 1st one
382 bd = FIRST_BDESCR(mblock);
383 }
384 bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
385 return bd;
386 }
387
388 bdescr *
389 allocGroupOnNode (uint32_t node, W_ n)
390 {
391 bdescr *bd, *rem;
392 StgWord ln;
393
394 if (n == 0) barf("allocGroup: requested zero blocks");
395
396 if (n >= BLOCKS_PER_MBLOCK)
397 {
398 StgWord mblocks;
399
400 mblocks = BLOCKS_TO_MBLOCKS(n);
401
402 // n_alloc_blocks doesn't count the extra blocks we get in a
403 // megablock group.
404 recordAllocatedBlocks(node, mblocks * BLOCKS_PER_MBLOCK);
405
406 bd = alloc_mega_group(node, mblocks);
407 // only the bdescrs of the first MB are required to be initialised
408 initGroup(bd);
409 goto finish;
410 }
411
412 recordAllocatedBlocks(node, n);
413
414 ln = log_2_ceil(n);
415
416 while (ln < NUM_FREE_LISTS && free_list[node][ln] == NULL) {
417 ln++;
418 }
419
420 if (ln == NUM_FREE_LISTS) {
421 #if 0 /* useful for debugging fragmentation */
422 if ((W_)mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W
423 - (W_)((n_alloc_blocks - n) * BLOCK_SIZE_W) > (2*1024*1024)/sizeof(W_)) {
424 debugBelch("Fragmentation, wanted %d blocks, %ld MB free\n", n, ((mblocks_allocated * BLOCKS_PER_MBLOCK) - n_alloc_blocks) / BLOCKS_PER_MBLOCK);
425 RtsFlags.DebugFlags.block_alloc = 1;
426 checkFreeListSanity();
427 }
428 #endif
429
430 bd = alloc_mega_group(node,1);
431 bd->blocks = n;
432 initGroup(bd); // we know the group will fit
433 rem = bd + n;
434 rem->blocks = BLOCKS_PER_MBLOCK-n;
435 initGroup(rem); // init the slop
436 recordAllocatedBlocks(node,rem->blocks);
437 freeGroup(rem); // add the slop on to the free list
438 goto finish;
439 }
440
441 bd = free_list[node][ln];
442
443 if (bd->blocks == n) // exactly the right size!
444 {
445 dbl_link_remove(bd, &free_list[node][ln]);
446 initGroup(bd);
447 }
448 else if (bd->blocks > n) // block too big...
449 {
450 bd = split_free_block(bd, node, n, ln);
451 ASSERT(bd->blocks == n);
452 initGroup(bd);
453 }
454 else
455 {
456 barf("allocGroup: free list corrupted");
457 }
458
459 finish:
460 IF_DEBUG(sanity, memset(bd->start, 0xaa, bd->blocks * BLOCK_SIZE));
461 IF_DEBUG(sanity, checkFreeListSanity());
462 return bd;
463 }
464
465 STATIC_INLINE
466 uint32_t nodeWithLeastBlocks (void)
467 {
468 uint32_t node = 0, i;
469 uint32_t min_blocks = n_alloc_blocks_by_node[0];
470 for (i = 1; i < n_numa_nodes; i++) {
471 if (n_alloc_blocks_by_node[i] < min_blocks) {
472 min_blocks = n_alloc_blocks_by_node[i];
473 node = i;
474 }
475 }
476 return node;
477 }
478
479 bdescr* allocGroup (W_ n)
480 {
481 return allocGroupOnNode(nodeWithLeastBlocks(),n);
482 }
483
484
485 //
486 // Allocate a chunk of blocks that is at least min and at most max
487 // blocks in size. This API is used by the nursery allocator that
488 // wants contiguous memory preferably, but doesn't require it. When
489 // memory is fragmented we might have lots of chunks that are
490 // less than a full megablock, so allowing the nursery allocator to
491 // use these reduces fragmentation considerably. e.g. on a GHC build
492 // with +RTS -H, I saw fragmentation go from 17MB down to 3MB on a
493 // single compile.
494 //
495 // Further to this: in #7257 there is a program that creates serious
496 // fragmentation such that the heap is full of tiny <4 block chains.
497 // The nursery allocator therefore has to use single blocks to avoid
498 // fragmentation, but we make sure that we allocate large blocks
499 // preferably if there are any.
500 //
501 bdescr* allocLargeChunkOnNode (uint32_t node, W_ min, W_ max)
502 {
503 bdescr *bd;
504 StgWord ln, lnmax;
505
506 if (min >= BLOCKS_PER_MBLOCK) {
507 return allocGroupOnNode(node,max);
508 }
509
510 ln = log_2_ceil(min);
511 lnmax = log_2_ceil(max);
512
513 while (ln < NUM_FREE_LISTS && ln < lnmax && free_list[node][ln] == NULL) {
514 ln++;
515 }
516 if (ln == NUM_FREE_LISTS || ln == lnmax) {
517 return allocGroupOnNode(node,max);
518 }
519 bd = free_list[node][ln];
520
521 if (bd->blocks <= max) // exactly the right size!
522 {
523 dbl_link_remove(bd, &free_list[node][ln]);
524 initGroup(bd);
525 }
526 else // block too big...
527 {
528 bd = split_free_block(bd, node, max, ln);
529 ASSERT(bd->blocks == max);
530 initGroup(bd);
531 }
532
533 recordAllocatedBlocks(node, bd->blocks);
534
535 IF_DEBUG(sanity, memset(bd->start, 0xaa, bd->blocks * BLOCK_SIZE));
536 IF_DEBUG(sanity, checkFreeListSanity());
537 return bd;
538 }
539
540 bdescr* allocLargeChunk (W_ min, W_ max)
541 {
542 return allocLargeChunkOnNode(nodeWithLeastBlocks(), min, max);
543 }
544
545 bdescr *
546 allocGroup_lock(W_ n)
547 {
548 bdescr *bd;
549 ACQUIRE_SM_LOCK;
550 bd = allocGroup(n);
551 RELEASE_SM_LOCK;
552 return bd;
553 }
554
555 bdescr *
556 allocBlock_lock(void)
557 {
558 bdescr *bd;
559 ACQUIRE_SM_LOCK;
560 bd = allocBlock();
561 RELEASE_SM_LOCK;
562 return bd;
563 }
564
565 bdescr *
566 allocGroupOnNode_lock(uint32_t node, W_ n)
567 {
568 bdescr *bd;
569 ACQUIRE_SM_LOCK;
570 bd = allocGroupOnNode(node,n);
571 RELEASE_SM_LOCK;
572 return bd;
573 }
574
575 bdescr *
576 allocBlockOnNode_lock(uint32_t node)
577 {
578 bdescr *bd;
579 ACQUIRE_SM_LOCK;
580 bd = allocBlockOnNode(node);
581 RELEASE_SM_LOCK;
582 return bd;
583 }
584
585 /* -----------------------------------------------------------------------------
586 De-Allocation
587 -------------------------------------------------------------------------- */
588
589 STATIC_INLINE bdescr *
590 coalesce_mblocks (bdescr *p)
591 {
592 bdescr *q;
593
594 q = p->link;
595 if (q != NULL &&
596 MBLOCK_ROUND_DOWN(q) ==
597 (StgWord8*)MBLOCK_ROUND_DOWN(p) +
598 BLOCKS_TO_MBLOCKS(p->blocks) * MBLOCK_SIZE) {
599 // can coalesce
600 p->blocks = MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks) +
601 BLOCKS_TO_MBLOCKS(q->blocks));
602 p->link = q->link;
603 return p;
604 }
605 return q;
606 }
607
608 static void
609 free_mega_group (bdescr *mg)
610 {
611 bdescr *bd, *prev;
612 uint32_t node;
613
614 // Find the right place in the free list. free_mblock_list is
615 // sorted by *address*, not by size as the free_list is.
616 prev = NULL;
617 node = mg->node;
618 bd = free_mblock_list[node];
619 while (bd && bd->start < mg->start) {
620 prev = bd;
621 bd = bd->link;
622 }
623
624 // coalesce backwards
625 if (prev)
626 {
627 mg->link = prev->link;
628 prev->link = mg;
629 mg = coalesce_mblocks(prev);
630 }
631 else
632 {
633 mg->link = free_mblock_list[node];
634 free_mblock_list[node] = mg;
635 }
636 // coalesce forwards
637 coalesce_mblocks(mg);
638
639 IF_DEBUG(sanity, checkFreeListSanity());
640 }
641
642
643 void
644 freeGroup(bdescr *p)
645 {
646 StgWord ln;
647 uint32_t node;
648
649 // not true in multithreaded GC:
650 // ASSERT_SM_LOCK();
651
652 ASSERT(p->free != (P_)-1);
653
654 node = p->node;
655
656 p->free = (void *)-1; /* indicates that this block is free */
657 p->gen = NULL;
658 p->gen_no = 0;
659 /* fill the block group with garbage if sanity checking is on */
660 IF_DEBUG(sanity,memset(p->start, 0xaa, (W_)p->blocks * BLOCK_SIZE));
661
662 if (p->blocks == 0) barf("freeGroup: block size is zero");
663
664 if (p->blocks >= BLOCKS_PER_MBLOCK)
665 {
666 StgWord mblocks;
667
668 mblocks = BLOCKS_TO_MBLOCKS(p->blocks);
669 // If this is an mgroup, make sure it has the right number of blocks
670 ASSERT(p->blocks == MBLOCK_GROUP_BLOCKS(mblocks));
671
672 recordFreedBlocks(node, mblocks * BLOCKS_PER_MBLOCK);
673
674 free_mega_group(p);
675 return;
676 }
677
678 recordFreedBlocks(node, p->blocks);
679
680 // coalesce forwards
681 {
682 bdescr *next;
683 next = p + p->blocks;
684 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p)) && next->free == (P_)-1)
685 {
686 p->blocks += next->blocks;
687 ln = log_2(next->blocks);
688 dbl_link_remove(next, &free_list[node][ln]);
689 if (p->blocks == BLOCKS_PER_MBLOCK)
690 {
691 free_mega_group(p);
692 return;
693 }
694 setup_tail(p);
695 }
696 }
697
698 // coalesce backwards
699 if (p != FIRST_BDESCR(MBLOCK_ROUND_DOWN(p)))
700 {
701 bdescr *prev;
702 prev = p - 1;
703 if (prev->blocks == 0) prev = prev->link; // find the head
704
705 if (prev->free == (P_)-1)
706 {
707 ln = log_2(prev->blocks);
708 dbl_link_remove(prev, &free_list[node][ln]);
709 prev->blocks += p->blocks;
710 if (prev->blocks >= BLOCKS_PER_MBLOCK)
711 {
712 free_mega_group(prev);
713 return;
714 }
715 p = prev;
716 }
717 }
718
719 setup_tail(p);
720 free_list_insert(node,p);
721
722 IF_DEBUG(sanity, checkFreeListSanity());
723 }
724
725 void
726 freeGroup_lock(bdescr *p)
727 {
728 ACQUIRE_SM_LOCK;
729 freeGroup(p);
730 RELEASE_SM_LOCK;
731 }
732
733 void
734 freeChain(bdescr *bd)
735 {
736 bdescr *next_bd;
737 while (bd != NULL) {
738 next_bd = bd->link;
739 freeGroup(bd);
740 bd = next_bd;
741 }
742 }
743
744 void
745 freeChain_lock(bdescr *bd)
746 {
747 ACQUIRE_SM_LOCK;
748 freeChain(bd);
749 RELEASE_SM_LOCK;
750 }
751
752 static void
753 initMBlock(void *mblock, uint32_t node)
754 {
755 bdescr *bd;
756 StgWord8 *block;
757
758 /* the first few Bdescr's in a block are unused, so we don't want to
759 * put them all on the free list.
760 */
761 block = FIRST_BLOCK(mblock);
762 bd = FIRST_BDESCR(mblock);
763
764 /* Initialise the start field of each block descriptor
765 */
766 for (; block <= (StgWord8*)LAST_BLOCK(mblock); bd += 1,
767 block += BLOCK_SIZE) {
768 bd->start = (void*)block;
769 bd->node = node;
770 }
771 }
772
773 /* -----------------------------------------------------------------------------
774 Stats / metrics
775 -------------------------------------------------------------------------- */
776
777 W_
778 countBlocks(bdescr *bd)
779 {
780 W_ n;
781 for (n=0; bd != NULL; bd=bd->link) {
782 n += bd->blocks;
783 }
784 return n;
785 }
786
787 // (*1) Just like countBlocks, except that we adjust the count for a
788 // megablock group so that it doesn't include the extra few blocks
789 // that would be taken up by block descriptors in the second and
790 // subsequent megablock. This is so we can tally the count with the
791 // number of blocks allocated in the system, for memInventory().
792 W_
793 countAllocdBlocks(bdescr *bd)
794 {
795 W_ n;
796 for (n=0; bd != NULL; bd=bd->link) {
797 n += bd->blocks;
798
799 // hack for megablock groups: see (*1) above
800 if (bd->blocks > BLOCKS_PER_MBLOCK) {
801 n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
802 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
803 }
804 }
805 return n;
806 }
807
808 void returnMemoryToOS(uint32_t n /* megablocks */)
809 {
810 bdescr *bd;
811 uint32_t node;
812 StgWord size;
813
814 // ToDo: not fair, we free all the memory starting with node 0.
815 for (node = 0; n > 0 && node < n_numa_nodes; node++) {
816 bd = free_mblock_list[node];
817 while ((n > 0) && (bd != NULL)) {
818 size = BLOCKS_TO_MBLOCKS(bd->blocks);
819 if (size > n) {
820 StgWord newSize = size - n;
821 char *freeAddr = MBLOCK_ROUND_DOWN(bd->start);
822 freeAddr += newSize * MBLOCK_SIZE;
823 bd->blocks = MBLOCK_GROUP_BLOCKS(newSize);
824 freeMBlocks(freeAddr, n);
825 n = 0;
826 }
827 else {
828 char *freeAddr = MBLOCK_ROUND_DOWN(bd->start);
829 n -= size;
830 bd = bd->link;
831 freeMBlocks(freeAddr, size);
832 }
833 }
834 free_mblock_list[node] = bd;
835 }
836
837 // Ask the OS to release any address space portion
838 // that was associated with the just released MBlocks
839 //
840 // Historically, we used to ask the OS directly (via
841 // osReleaseFreeMemory()) - now the MBlock layer might
842 // have a reason to preserve the address space range,
843 // so we keep it
844 releaseFreeMemory();
845
846 IF_DEBUG(gc,
847 if (n != 0) {
848 debugBelch("Wanted to free %d more MBlocks than are freeable\n",
849 n);
850 }
851 );
852 }
853
854 /* -----------------------------------------------------------------------------
855 Debugging
856 -------------------------------------------------------------------------- */
857
858 #if defined(DEBUG)
859 static void
860 check_tail (bdescr *bd)
861 {
862 bdescr *tail = tail_of(bd);
863
864 if (tail != bd)
865 {
866 ASSERT(tail->blocks == 0);
867 ASSERT(tail->free == 0);
868 ASSERT(tail->link == bd);
869 }
870 }
871
872 void
873 checkFreeListSanity(void)
874 {
875 bdescr *bd, *prev;
876 StgWord ln, min;
877 uint32_t node;
878
879 for (node = 0; node < n_numa_nodes; node++) {
880 min = 1;
881 for (ln = 0; ln < NUM_FREE_LISTS; ln++) {
882 IF_DEBUG(block_alloc,
883 debugBelch("free block list [%" FMT_Word "]:\n", ln));
884
885 prev = NULL;
886 for (bd = free_list[node][ln]; bd != NULL; prev = bd, bd = bd->link)
887 {
888 IF_DEBUG(block_alloc,
889 debugBelch("group at %p, length %ld blocks\n",
890 bd->start, (long)bd->blocks));
891 ASSERT(bd->free == (P_)-1);
892 ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
893 ASSERT(bd->blocks >= min && bd->blocks <= (min*2 - 1));
894 ASSERT(bd->link != bd); // catch easy loops
895 ASSERT(bd->node == node);
896
897 check_tail(bd);
898
899 if (prev)
900 ASSERT(bd->u.back == prev);
901 else
902 ASSERT(bd->u.back == NULL);
903
904 {
905 bdescr *next;
906 next = bd + bd->blocks;
907 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
908 {
909 ASSERT(next->free != (P_)-1);
910 }
911 }
912 }
913 min = min << 1;
914 }
915
916 prev = NULL;
917 for (bd = free_mblock_list[node]; bd != NULL; prev = bd, bd = bd->link)
918 {
919 IF_DEBUG(block_alloc,
920 debugBelch("mega group at %p, length %ld blocks\n",
921 bd->start, (long)bd->blocks));
922
923 ASSERT(bd->link != bd); // catch easy loops
924
925 if (bd->link != NULL)
926 {
927 // make sure the list is sorted
928 ASSERT(bd->start < bd->link->start);
929 }
930
931 ASSERT(bd->blocks >= BLOCKS_PER_MBLOCK);
932 ASSERT(MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(bd->blocks))
933 == bd->blocks);
934
935 // make sure we're fully coalesced
936 if (bd->link != NULL)
937 {
938 ASSERT(MBLOCK_ROUND_DOWN(bd->link) !=
939 (StgWord8*)MBLOCK_ROUND_DOWN(bd) +
940 BLOCKS_TO_MBLOCKS(bd->blocks) * MBLOCK_SIZE);
941 }
942 }
943 }
944 }
945
946 W_ /* BLOCKS */
947 countFreeList(void)
948 {
949 bdescr *bd;
950 W_ total_blocks = 0;
951 StgWord ln;
952 uint32_t node;
953
954 for (node = 0; node < n_numa_nodes; node++) {
955 for (ln=0; ln < NUM_FREE_LISTS; ln++) {
956 for (bd = free_list[node][ln]; bd != NULL; bd = bd->link) {
957 total_blocks += bd->blocks;
958 }
959 }
960 for (bd = free_mblock_list[node]; bd != NULL; bd = bd->link) {
961 total_blocks += BLOCKS_PER_MBLOCK * BLOCKS_TO_MBLOCKS(bd->blocks);
962 // The caller of this function, memInventory(), expects to match
963 // the total number of blocks in the system against mblocks *
964 // BLOCKS_PER_MBLOCK, so we must subtract the space for the
965 // block descriptors from *every* mblock.
966 }
967 }
968 return total_blocks;
969 }
970
971 void
972 markBlocks (bdescr *bd)
973 {
974 for (; bd != NULL; bd = bd->link) {
975 bd->flags |= BF_KNOWN;
976 }
977 }
978
979 void
980 reportUnmarkedBlocks (void)
981 {
982 void *mblock;
983 void *state;
984 bdescr *bd;
985
986 debugBelch("Unreachable blocks:\n");
987 for (mblock = getFirstMBlock(&state); mblock != NULL;
988 mblock = getNextMBlock(&state, mblock)) {
989 for (bd = FIRST_BDESCR(mblock); bd <= LAST_BDESCR(mblock); ) {
990 if (!(bd->flags & BF_KNOWN) && bd->free != (P_)-1) {
991 debugBelch(" %p\n",bd);
992 }
993 if (bd->blocks >= BLOCKS_PER_MBLOCK) {
994 mblock = (StgWord8*)mblock +
995 (BLOCKS_TO_MBLOCKS(bd->blocks) - 1) * MBLOCK_SIZE;
996 break;
997 } else {
998 bd += bd->blocks;
999 }
1000 }
1001 }
1002 }
1003
1004 #endif