testsuite: Assert that testsuite ways are known
[ghc.git] / rts / sm / BlockAlloc.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * The block allocator and free list manager.
6 *
7 * This is the architecture independent part of the block allocator.
8 * It requires only the following support from the operating system:
9 *
10 * void *getMBlocks(uint32_t n);
11 *
12 * returns the address of an n*MBLOCK_SIZE region of memory, aligned on
13 * an MBLOCK_SIZE boundary. There are no other restrictions on the
14 * addresses of memory returned by getMBlocks().
15 *
16 * ---------------------------------------------------------------------------*/
17
18 #include "PosixSource.h"
19 #include "Rts.h"
20
21 #include "Storage.h"
22 #include "RtsUtils.h"
23 #include "BlockAlloc.h"
24 #include "OSMem.h"
25
26 #include <string.h>
27
28 static void initMBlock(void *mblock, uint32_t node);
29
30 /* -----------------------------------------------------------------------------
31
32 Implementation notes
33 ~~~~~~~~~~~~~~~~~~~~
34
35 Terminology:
36 - bdescr = block descriptor
37 - bgroup = block group (1 or more adjacent blocks)
38 - mblock = mega block
39 - mgroup = mega group (1 or more adjacent mblocks)
40
41 Invariants on block descriptors
42 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
43 bd->start always points to the start of the block.
44
45 bd->free is either:
46 - zero for a non-group-head; bd->link points to the head
47 - (-1) for the head of a free block group
48 - or it points within the block (group)
49
50 bd->blocks is either:
51 - zero for a non-group-head; bd->link points to the head
52 - number of blocks in this group otherwise
53
54 bd->link either points to a block descriptor or is NULL
55
56 The following fields are not used by the allocator:
57 bd->flags
58 bd->gen_no
59 bd->gen
60 bd->dest
61
62 Exceptions: we don't maintain invariants for all the blocks within a
63 group on the free list, because it is expensive to modify every
64 bdescr in a group when coalescing. Just the head and last bdescrs
65 will be correct for a group on the free list.
66
67
68 Free lists
69 ~~~~~~~~~~
70
71 Preliminaries:
72 - most allocations are for a small number of blocks
73 - sometimes the OS gives us new memory backwards in the address
74 space, sometimes forwards, so we should not be biased towards
75 any particular layout in the address space
76 - We want to avoid fragmentation
77 - We want allocation and freeing to be O(1) or close.
78
79 Coalescing trick: when a bgroup is freed (freeGroup()), we can check
80 whether it can be coalesced with other free bgroups by checking the
81 bdescrs for the blocks on either side of it. This means that we can
82 coalesce in O(1) time. Every free bgroup must have its head and tail
83 bdescrs initialised, the rest don't matter.
84
85 We keep the free list in buckets, using a heap-sort strategy.
86 Bucket N contains blocks with sizes 2^N - 2^(N+1)-1. The list of
87 blocks in each bucket is doubly-linked, so that if a block is
88 coalesced we can easily remove it from its current free list.
89
90 To allocate a new block of size S, grab a block from bucket
91 log2ceiling(S) (i.e. log2() rounded up), in which all blocks are at
92 least as big as S, and split it if necessary. If there are no
93 blocks in that bucket, look at bigger buckets until a block is found
94 Allocation is therefore O(logN) time.
95
96 To free a block:
97 - coalesce it with neighbours.
98 - remove coalesced neighbour(s) from free list(s)
99 - add the new (coalesced) block to the front of the appropriate
100 bucket, given by log2(S) where S is the size of the block.
101
102 Free is O(1).
103
104 Megablocks
105 ~~~~~~~~~~
106
107 Separately from the free list of block groups, which are smaller than
108 an mblock, we maintain a free list of mblock groups. This is the unit
109 of memory the operating system gives us, and we may either split mblocks
110 into blocks or allocate them directly (when very large contiguous regions
111 of memory). mblocks have a different set of invariants than blocks:
112
113 bd->start points to the start of the block IF the block is in the first mblock
114 bd->blocks and bd->link are only valid IF this block is the first block
115 of the first mblock
116 No other fields are used (in particular, free is not used, meaning that
117 space that is not used by the (single) object is wasted.
118
119 This has implications for the free list as well:
120 We cannot play the coalescing trick with mblocks, because there is
121 no requirement that the bdescrs in the second and subsequent mblock
122 of an mgroup are initialised (the mgroup might be filled with a
123 large array, overwriting the bdescrs for example).
124
125 The separate free list for megablocks is thus sorted in *address*
126 order, so that we can coalesce. Allocation in this list is best-fit
127 by traversing the whole list: we don't expect this list to be long,
128 and allocation/freeing of large blocks is rare; avoiding
129 fragmentation is more important than performance here.
130
131 freeGroup() might end up moving a block from free_list to
132 free_mblock_list, if after coalescing we end up with a full mblock.
133
134 checkFreeListSanity() checks all the invariants on the free lists.
135
136 --------------------------------------------------------------------------- */
137
138 /* ---------------------------------------------------------------------------
139 WATCH OUT FOR OVERFLOW
140
141 Be very careful with integer overflow here. If you have an
142 expression like (n_blocks * BLOCK_SIZE), and n_blocks is an int or
143 a uint32_t, then it will very likely overflow on a 64-bit platform.
144 Always cast to StgWord (or W_ for short) first: ((W_)n_blocks * BLOCK_SIZE).
145
146 --------------------------------------------------------------------------- */
147
148 // free_list[i] contains blocks that are at least size 2^i, and at
149 // most size 2^(i+1) - 1.
150 //
151 // To find the free list in which to place a block, use log_2(size).
152 // To find a free block of the right size, use log_2_ceil(size).
153 //
154 // The largest free list (free_list[NUM_FREE_LISTS-1]) needs to contain sizes
155 // from half a megablock up to (but not including) a full megablock.
156
157 #define NUM_FREE_LISTS (MBLOCK_SHIFT-BLOCK_SHIFT)
158
159 // In THREADED_RTS mode, the free list is protected by sm_mutex.
160
161 static bdescr *free_list[MAX_NUMA_NODES][NUM_FREE_LISTS];
162 static bdescr *free_mblock_list[MAX_NUMA_NODES];
163
164 W_ n_alloc_blocks; // currently allocated blocks
165 W_ hw_alloc_blocks; // high-water allocated blocks
166
167 W_ n_alloc_blocks_by_node[MAX_NUMA_NODES];
168
169 /* -----------------------------------------------------------------------------
170 Initialisation
171 -------------------------------------------------------------------------- */
172
173 void initBlockAllocator(void)
174 {
175 uint32_t i, node;
176 for (node = 0; node < MAX_NUMA_NODES; node++) {
177 for (i=0; i < NUM_FREE_LISTS; i++) {
178 free_list[node][i] = NULL;
179 }
180 free_mblock_list[node] = NULL;
181 n_alloc_blocks_by_node[node] = 0;
182 }
183 n_alloc_blocks = 0;
184 hw_alloc_blocks = 0;
185 }
186
187 /* -----------------------------------------------------------------------------
188 Accounting
189 -------------------------------------------------------------------------- */
190
191 STATIC_INLINE
192 void recordAllocatedBlocks(uint32_t node, uint32_t n)
193 {
194 n_alloc_blocks += n;
195 n_alloc_blocks_by_node[node] += n;
196 if (n > 0 && n_alloc_blocks > hw_alloc_blocks) {
197 hw_alloc_blocks = n_alloc_blocks;
198 }
199 }
200
201 STATIC_INLINE
202 void recordFreedBlocks(uint32_t node, uint32_t n)
203 {
204 ASSERT(n_alloc_blocks >= n);
205 n_alloc_blocks -= n;
206 n_alloc_blocks_by_node[node] -= n;
207 }
208
209 /* -----------------------------------------------------------------------------
210 Allocation
211 -------------------------------------------------------------------------- */
212
213 STATIC_INLINE bdescr *
214 tail_of (bdescr *bd)
215 {
216 return bd + bd->blocks - 1;
217 }
218
219 STATIC_INLINE void
220 initGroup(bdescr *head)
221 {
222 head->free = head->start;
223 head->link = NULL;
224
225 // If this is a block group (but not a megablock group), we
226 // make the last block of the group point to the head. This is used
227 // when coalescing blocks in freeGroup(). We don't do this for
228 // megablock groups because blocks in the second and subsequent
229 // mblocks don't have bdescrs; freeing these is handled in a
230 // different way by free_mblock_group().
231 if (head->blocks > 1 && head->blocks <= BLOCKS_PER_MBLOCK) {
232 bdescr *last = tail_of(head);
233 last->blocks = 0;
234 last->link = head;
235 }
236 }
237
238 #if SIZEOF_VOID_P == SIZEOF_LONG
239 #define CLZW(n) (__builtin_clzl(n))
240 #else
241 #define CLZW(n) (__builtin_clzll(n))
242 #endif
243
244 // log base 2 (floor), needs to support up to (2^NUM_FREE_LISTS)-1
245 STATIC_INLINE uint32_t
246 log_2(W_ n)
247 {
248 ASSERT(n > 0 && n < (1<<NUM_FREE_LISTS));
249 #if defined(__GNUC__)
250 return CLZW(n) ^ (sizeof(StgWord)*8 - 1);
251 // generates good code on x86. __builtin_clz() compiles to bsr+xor, but
252 // we want just bsr, so the xor here cancels out gcc's xor.
253 #else
254 W_ i, x;
255 x = n;
256 for (i=0; i < NUM_FREE_LISTS; i++) {
257 x = x >> 1;
258 if (x == 0) return i;
259 }
260 return NUM_FREE_LISTS;
261 #endif
262 }
263
264 // log base 2 (ceiling), needs to support up to (2^NUM_FREE_LISTS)-1
265 STATIC_INLINE uint32_t
266 log_2_ceil(W_ n)
267 {
268 ASSERT(n > 0 && n < (1<<NUM_FREE_LISTS));
269 #if defined(__GNUC__)
270 uint32_t r = log_2(n);
271 return (n & (n-1)) ? r+1 : r;
272 #else
273 W_ i, x;
274 x = 1;
275 for (i=0; i < MAX_FREE_LIST; i++) {
276 if (x >= n) return i;
277 x = x << 1;
278 }
279 return MAX_FREE_LIST;
280 #endif
281 }
282
283 STATIC_INLINE void
284 free_list_insert (uint32_t node, bdescr *bd)
285 {
286 uint32_t ln;
287
288 ASSERT(bd->blocks < BLOCKS_PER_MBLOCK);
289 ln = log_2(bd->blocks);
290
291 dbl_link_onto(bd, &free_list[node][ln]);
292 }
293
294 // After splitting a group, the last block of each group must have a
295 // tail that points to the head block, to keep our invariants for
296 // coalescing.
297 STATIC_INLINE void
298 setup_tail (bdescr *bd)
299 {
300 bdescr *tail;
301 tail = tail_of(bd);
302 if (tail != bd) {
303 tail->blocks = 0;
304 tail->free = 0;
305 tail->link = bd;
306 }
307 }
308
309
310 // Take a free block group bd, and split off a group of size n from
311 // it. Adjust the free list as necessary, and return the new group.
312 static bdescr *
313 split_free_block (bdescr *bd, uint32_t node, W_ n, uint32_t ln)
314 {
315 bdescr *fg; // free group
316
317 ASSERT(bd->blocks > n);
318 dbl_link_remove(bd, &free_list[node][ln]);
319 fg = bd + bd->blocks - n; // take n blocks off the end
320 fg->blocks = n;
321 bd->blocks -= n;
322 setup_tail(bd);
323 ln = log_2(bd->blocks);
324 dbl_link_onto(bd, &free_list[node][ln]);
325 return fg;
326 }
327
328 /* Only initializes the start pointers on the first megablock and the
329 * blocks field of the first bdescr; callers are responsible for calling
330 * initGroup afterwards.
331 */
332 static bdescr *
333 alloc_mega_group (uint32_t node, StgWord mblocks)
334 {
335 bdescr *best, *bd, *prev;
336 StgWord n;
337
338 n = MBLOCK_GROUP_BLOCKS(mblocks);
339
340 best = NULL;
341 prev = NULL;
342 for (bd = free_mblock_list[node]; bd != NULL; prev = bd, bd = bd->link)
343 {
344 if (bd->blocks == n)
345 {
346 if (prev) {
347 prev->link = bd->link;
348 } else {
349 free_mblock_list[node] = bd->link;
350 }
351 return bd;
352 }
353 else if (bd->blocks > n)
354 {
355 if (!best || bd->blocks < best->blocks)
356 {
357 best = bd;
358 }
359 }
360 }
361
362 if (best)
363 {
364 // we take our chunk off the end here.
365 StgWord best_mblocks = BLOCKS_TO_MBLOCKS(best->blocks);
366 bd = FIRST_BDESCR((StgWord8*)MBLOCK_ROUND_DOWN(best) +
367 (best_mblocks-mblocks)*MBLOCK_SIZE);
368
369 best->blocks = MBLOCK_GROUP_BLOCKS(best_mblocks - mblocks);
370 initMBlock(MBLOCK_ROUND_DOWN(bd), node);
371 }
372 else
373 {
374 void *mblock;
375 if (RtsFlags.GcFlags.numa) {
376 mblock = getMBlocksOnNode(node, mblocks);
377 } else {
378 mblock = getMBlocks(mblocks);
379 }
380 initMBlock(mblock, node); // only need to init the 1st one
381 bd = FIRST_BDESCR(mblock);
382 }
383 bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
384 return bd;
385 }
386
387 bdescr *
388 allocGroupOnNode (uint32_t node, W_ n)
389 {
390 bdescr *bd, *rem;
391 StgWord ln;
392
393 if (n == 0) barf("allocGroup: requested zero blocks");
394
395 if (n >= BLOCKS_PER_MBLOCK)
396 {
397 StgWord mblocks;
398
399 mblocks = BLOCKS_TO_MBLOCKS(n);
400
401 // n_alloc_blocks doesn't count the extra blocks we get in a
402 // megablock group.
403 recordAllocatedBlocks(node, mblocks * BLOCKS_PER_MBLOCK);
404
405 bd = alloc_mega_group(node, mblocks);
406 // only the bdescrs of the first MB are required to be initialised
407 initGroup(bd);
408 goto finish;
409 }
410
411 recordAllocatedBlocks(node, n);
412
413 ln = log_2_ceil(n);
414
415 while (ln < NUM_FREE_LISTS && free_list[node][ln] == NULL) {
416 ln++;
417 }
418
419 if (ln == NUM_FREE_LISTS) {
420 #if 0 /* useful for debugging fragmentation */
421 if ((W_)mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W
422 - (W_)((n_alloc_blocks - n) * BLOCK_SIZE_W) > (2*1024*1024)/sizeof(W_)) {
423 debugBelch("Fragmentation, wanted %d blocks, %ld MB free\n", n, ((mblocks_allocated * BLOCKS_PER_MBLOCK) - n_alloc_blocks) / BLOCKS_PER_MBLOCK);
424 RtsFlags.DebugFlags.block_alloc = 1;
425 checkFreeListSanity();
426 }
427 #endif
428
429 bd = alloc_mega_group(node,1);
430 bd->blocks = n;
431 initGroup(bd); // we know the group will fit
432 rem = bd + n;
433 rem->blocks = BLOCKS_PER_MBLOCK-n;
434 initGroup(rem); // init the slop
435 recordAllocatedBlocks(node,rem->blocks);
436 freeGroup(rem); // add the slop on to the free list
437 goto finish;
438 }
439
440 bd = free_list[node][ln];
441
442 if (bd->blocks == n) // exactly the right size!
443 {
444 dbl_link_remove(bd, &free_list[node][ln]);
445 initGroup(bd);
446 }
447 else if (bd->blocks > n) // block too big...
448 {
449 bd = split_free_block(bd, node, n, ln);
450 ASSERT(bd->blocks == n);
451 initGroup(bd);
452 }
453 else
454 {
455 barf("allocGroup: free list corrupted");
456 }
457
458 finish:
459 IF_DEBUG(zero_on_gc, memset(bd->start, 0xaa, bd->blocks * BLOCK_SIZE));
460 IF_DEBUG(sanity, checkFreeListSanity());
461 return bd;
462 }
463
464 STATIC_INLINE
465 uint32_t nodeWithLeastBlocks (void)
466 {
467 uint32_t node = 0, i;
468 uint32_t min_blocks = n_alloc_blocks_by_node[0];
469 for (i = 1; i < n_numa_nodes; i++) {
470 if (n_alloc_blocks_by_node[i] < min_blocks) {
471 min_blocks = n_alloc_blocks_by_node[i];
472 node = i;
473 }
474 }
475 return node;
476 }
477
478 bdescr* allocGroup (W_ n)
479 {
480 return allocGroupOnNode(nodeWithLeastBlocks(),n);
481 }
482
483
484 //
485 // Allocate a chunk of blocks that is at least min and at most max
486 // blocks in size. This API is used by the nursery allocator that
487 // wants contiguous memory preferably, but doesn't require it. When
488 // memory is fragmented we might have lots of chunks that are
489 // less than a full megablock, so allowing the nursery allocator to
490 // use these reduces fragmentation considerably. e.g. on a GHC build
491 // with +RTS -H, I saw fragmentation go from 17MB down to 3MB on a
492 // single compile.
493 //
494 // Further to this: in #7257 there is a program that creates serious
495 // fragmentation such that the heap is full of tiny <4 block chains.
496 // The nursery allocator therefore has to use single blocks to avoid
497 // fragmentation, but we make sure that we allocate large blocks
498 // preferably if there are any.
499 //
500 bdescr* allocLargeChunkOnNode (uint32_t node, W_ min, W_ max)
501 {
502 bdescr *bd;
503 StgWord ln, lnmax;
504
505 if (min >= BLOCKS_PER_MBLOCK) {
506 return allocGroupOnNode(node,max);
507 }
508
509 ln = log_2_ceil(min);
510 lnmax = log_2_ceil(max);
511
512 while (ln < NUM_FREE_LISTS && ln < lnmax && free_list[node][ln] == NULL) {
513 ln++;
514 }
515 if (ln == NUM_FREE_LISTS || ln == lnmax) {
516 return allocGroupOnNode(node,max);
517 }
518 bd = free_list[node][ln];
519
520 if (bd->blocks <= max) // exactly the right size!
521 {
522 dbl_link_remove(bd, &free_list[node][ln]);
523 initGroup(bd);
524 }
525 else // block too big...
526 {
527 bd = split_free_block(bd, node, max, ln);
528 ASSERT(bd->blocks == max);
529 initGroup(bd);
530 }
531
532 recordAllocatedBlocks(node, bd->blocks);
533
534 IF_DEBUG(zero_on_gc, memset(bd->start, 0xaa, bd->blocks * BLOCK_SIZE));
535 IF_DEBUG(sanity, checkFreeListSanity());
536 return bd;
537 }
538
539 bdescr* allocLargeChunk (W_ min, W_ max)
540 {
541 return allocLargeChunkOnNode(nodeWithLeastBlocks(), min, max);
542 }
543
544 bdescr *
545 allocGroup_lock(W_ n)
546 {
547 bdescr *bd;
548 ACQUIRE_SM_LOCK;
549 bd = allocGroup(n);
550 RELEASE_SM_LOCK;
551 return bd;
552 }
553
554 bdescr *
555 allocBlock_lock(void)
556 {
557 bdescr *bd;
558 ACQUIRE_SM_LOCK;
559 bd = allocBlock();
560 RELEASE_SM_LOCK;
561 return bd;
562 }
563
564 bdescr *
565 allocGroupOnNode_lock(uint32_t node, W_ n)
566 {
567 bdescr *bd;
568 ACQUIRE_SM_LOCK;
569 bd = allocGroupOnNode(node,n);
570 RELEASE_SM_LOCK;
571 return bd;
572 }
573
574 bdescr *
575 allocBlockOnNode_lock(uint32_t node)
576 {
577 bdescr *bd;
578 ACQUIRE_SM_LOCK;
579 bd = allocBlockOnNode(node);
580 RELEASE_SM_LOCK;
581 return bd;
582 }
583
584 /* -----------------------------------------------------------------------------
585 De-Allocation
586 -------------------------------------------------------------------------- */
587
588 STATIC_INLINE bdescr *
589 coalesce_mblocks (bdescr *p)
590 {
591 bdescr *q;
592
593 q = p->link;
594 if (q != NULL &&
595 MBLOCK_ROUND_DOWN(q) ==
596 (StgWord8*)MBLOCK_ROUND_DOWN(p) +
597 BLOCKS_TO_MBLOCKS(p->blocks) * MBLOCK_SIZE) {
598 // can coalesce
599 p->blocks = MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks) +
600 BLOCKS_TO_MBLOCKS(q->blocks));
601 p->link = q->link;
602 return p;
603 }
604 return q;
605 }
606
607 static void
608 free_mega_group (bdescr *mg)
609 {
610 bdescr *bd, *prev;
611 uint32_t node;
612
613 // Find the right place in the free list. free_mblock_list is
614 // sorted by *address*, not by size as the free_list is.
615 prev = NULL;
616 node = mg->node;
617 bd = free_mblock_list[node];
618 while (bd && bd->start < mg->start) {
619 prev = bd;
620 bd = bd->link;
621 }
622
623 // coalesce backwards
624 if (prev)
625 {
626 mg->link = prev->link;
627 prev->link = mg;
628 mg = coalesce_mblocks(prev);
629 }
630 else
631 {
632 mg->link = free_mblock_list[node];
633 free_mblock_list[node] = mg;
634 }
635 // coalesce forwards
636 coalesce_mblocks(mg);
637
638 IF_DEBUG(sanity, checkFreeListSanity());
639 }
640
641
642 void
643 freeGroup(bdescr *p)
644 {
645 StgWord ln;
646 uint32_t node;
647
648 // not true in multithreaded GC:
649 // ASSERT_SM_LOCK();
650
651 ASSERT(p->free != (P_)-1);
652
653 node = p->node;
654
655 p->free = (void *)-1; /* indicates that this block is free */
656 p->gen = NULL;
657 p->gen_no = 0;
658 /* fill the block group with garbage if sanity checking is on */
659 IF_DEBUG(zero_on_gc, memset(p->start, 0xaa, (W_)p->blocks * BLOCK_SIZE));
660
661 if (p->blocks == 0) barf("freeGroup: block size is zero");
662
663 if (p->blocks >= BLOCKS_PER_MBLOCK)
664 {
665 StgWord mblocks;
666
667 mblocks = BLOCKS_TO_MBLOCKS(p->blocks);
668 // If this is an mgroup, make sure it has the right number of blocks
669 ASSERT(p->blocks == MBLOCK_GROUP_BLOCKS(mblocks));
670
671 recordFreedBlocks(node, mblocks * BLOCKS_PER_MBLOCK);
672
673 free_mega_group(p);
674 return;
675 }
676
677 recordFreedBlocks(node, p->blocks);
678
679 // coalesce forwards
680 {
681 bdescr *next;
682 next = p + p->blocks;
683 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p)) && next->free == (P_)-1)
684 {
685 p->blocks += next->blocks;
686 ln = log_2(next->blocks);
687 dbl_link_remove(next, &free_list[node][ln]);
688 if (p->blocks == BLOCKS_PER_MBLOCK)
689 {
690 free_mega_group(p);
691 return;
692 }
693 setup_tail(p);
694 }
695 }
696
697 // coalesce backwards
698 if (p != FIRST_BDESCR(MBLOCK_ROUND_DOWN(p)))
699 {
700 bdescr *prev;
701 prev = p - 1;
702 if (prev->blocks == 0) prev = prev->link; // find the head
703
704 if (prev->free == (P_)-1)
705 {
706 ln = log_2(prev->blocks);
707 dbl_link_remove(prev, &free_list[node][ln]);
708 prev->blocks += p->blocks;
709 if (prev->blocks >= BLOCKS_PER_MBLOCK)
710 {
711 free_mega_group(prev);
712 return;
713 }
714 p = prev;
715 }
716 }
717
718 setup_tail(p);
719 free_list_insert(node,p);
720
721 IF_DEBUG(sanity, checkFreeListSanity());
722 }
723
724 void
725 freeGroup_lock(bdescr *p)
726 {
727 ACQUIRE_SM_LOCK;
728 freeGroup(p);
729 RELEASE_SM_LOCK;
730 }
731
732 void
733 freeChain(bdescr *bd)
734 {
735 bdescr *next_bd;
736 while (bd != NULL) {
737 next_bd = bd->link;
738 freeGroup(bd);
739 bd = next_bd;
740 }
741 }
742
743 void
744 freeChain_lock(bdescr *bd)
745 {
746 ACQUIRE_SM_LOCK;
747 freeChain(bd);
748 RELEASE_SM_LOCK;
749 }
750
751 static void
752 initMBlock(void *mblock, uint32_t node)
753 {
754 bdescr *bd;
755 StgWord8 *block;
756
757 /* the first few Bdescr's in a block are unused, so we don't want to
758 * put them all on the free list.
759 */
760 block = FIRST_BLOCK(mblock);
761 bd = FIRST_BDESCR(mblock);
762
763 /* Initialise the start field of each block descriptor
764 */
765 for (; block <= (StgWord8*)LAST_BLOCK(mblock); bd += 1,
766 block += BLOCK_SIZE) {
767 bd->start = (void*)block;
768 bd->node = node;
769 }
770 }
771
772 /* -----------------------------------------------------------------------------
773 Stats / metrics
774 -------------------------------------------------------------------------- */
775
776 W_
777 countBlocks(bdescr *bd)
778 {
779 W_ n;
780 for (n=0; bd != NULL; bd=bd->link) {
781 n += bd->blocks;
782 }
783 return n;
784 }
785
786 // (*1) Just like countBlocks, except that we adjust the count for a
787 // megablock group so that it doesn't include the extra few blocks
788 // that would be taken up by block descriptors in the second and
789 // subsequent megablock. This is so we can tally the count with the
790 // number of blocks allocated in the system, for memInventory().
791 W_
792 countAllocdBlocks(bdescr *bd)
793 {
794 W_ n;
795 for (n=0; bd != NULL; bd=bd->link) {
796 n += bd->blocks;
797
798 // hack for megablock groups: see (*1) above
799 if (bd->blocks > BLOCKS_PER_MBLOCK) {
800 n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
801 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
802 }
803 }
804 return n;
805 }
806
807 void returnMemoryToOS(uint32_t n /* megablocks */)
808 {
809 bdescr *bd;
810 uint32_t node;
811 StgWord size;
812
813 // ToDo: not fair, we free all the memory starting with node 0.
814 for (node = 0; n > 0 && node < n_numa_nodes; node++) {
815 bd = free_mblock_list[node];
816 while ((n > 0) && (bd != NULL)) {
817 size = BLOCKS_TO_MBLOCKS(bd->blocks);
818 if (size > n) {
819 StgWord newSize = size - n;
820 char *freeAddr = MBLOCK_ROUND_DOWN(bd->start);
821 freeAddr += newSize * MBLOCK_SIZE;
822 bd->blocks = MBLOCK_GROUP_BLOCKS(newSize);
823 freeMBlocks(freeAddr, n);
824 n = 0;
825 }
826 else {
827 char *freeAddr = MBLOCK_ROUND_DOWN(bd->start);
828 n -= size;
829 bd = bd->link;
830 freeMBlocks(freeAddr, size);
831 }
832 }
833 free_mblock_list[node] = bd;
834 }
835
836 // Ask the OS to release any address space portion
837 // that was associated with the just released MBlocks
838 //
839 // Historically, we used to ask the OS directly (via
840 // osReleaseFreeMemory()) - now the MBlock layer might
841 // have a reason to preserve the address space range,
842 // so we keep it
843 releaseFreeMemory();
844
845 IF_DEBUG(gc,
846 if (n != 0) {
847 debugBelch("Wanted to free %d more MBlocks than are freeable\n",
848 n);
849 }
850 );
851 }
852
853 /* -----------------------------------------------------------------------------
854 Debugging
855 -------------------------------------------------------------------------- */
856
857 #if defined(DEBUG)
858 static void
859 check_tail (bdescr *bd)
860 {
861 bdescr *tail = tail_of(bd);
862
863 if (tail != bd)
864 {
865 ASSERT(tail->blocks == 0);
866 ASSERT(tail->free == 0);
867 ASSERT(tail->link == bd);
868 }
869 }
870
871 void
872 checkFreeListSanity(void)
873 {
874 bdescr *bd, *prev;
875 StgWord ln, min;
876 uint32_t node;
877
878 for (node = 0; node < n_numa_nodes; node++) {
879 min = 1;
880 for (ln = 0; ln < NUM_FREE_LISTS; ln++) {
881 IF_DEBUG(block_alloc,
882 debugBelch("free block list [%" FMT_Word "]:\n", ln));
883
884 prev = NULL;
885 for (bd = free_list[node][ln]; bd != NULL; prev = bd, bd = bd->link)
886 {
887 IF_DEBUG(block_alloc,
888 debugBelch("group at %p, length %ld blocks\n",
889 bd->start, (long)bd->blocks));
890 ASSERT(bd->free == (P_)-1);
891 ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
892 ASSERT(bd->blocks >= min && bd->blocks <= (min*2 - 1));
893 ASSERT(bd->link != bd); // catch easy loops
894 ASSERT(bd->node == node);
895
896 check_tail(bd);
897
898 if (prev)
899 ASSERT(bd->u.back == prev);
900 else
901 ASSERT(bd->u.back == NULL);
902
903 {
904 bdescr *next;
905 next = bd + bd->blocks;
906 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
907 {
908 ASSERT(next->free != (P_)-1);
909 }
910 }
911 }
912 min = min << 1;
913 }
914
915 prev = NULL;
916 for (bd = free_mblock_list[node]; bd != NULL; prev = bd, bd = bd->link)
917 {
918 IF_DEBUG(block_alloc,
919 debugBelch("mega group at %p, length %ld blocks\n",
920 bd->start, (long)bd->blocks));
921
922 ASSERT(bd->link != bd); // catch easy loops
923
924 if (bd->link != NULL)
925 {
926 // make sure the list is sorted
927 ASSERT(bd->start < bd->link->start);
928 }
929
930 ASSERT(bd->blocks >= BLOCKS_PER_MBLOCK);
931 ASSERT(MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(bd->blocks))
932 == bd->blocks);
933
934 // make sure we're fully coalesced
935 if (bd->link != NULL)
936 {
937 ASSERT(MBLOCK_ROUND_DOWN(bd->link) !=
938 (StgWord8*)MBLOCK_ROUND_DOWN(bd) +
939 BLOCKS_TO_MBLOCKS(bd->blocks) * MBLOCK_SIZE);
940 }
941 }
942 }
943 }
944
945 W_ /* BLOCKS */
946 countFreeList(void)
947 {
948 bdescr *bd;
949 W_ total_blocks = 0;
950 StgWord ln;
951 uint32_t node;
952
953 for (node = 0; node < n_numa_nodes; node++) {
954 for (ln=0; ln < NUM_FREE_LISTS; ln++) {
955 for (bd = free_list[node][ln]; bd != NULL; bd = bd->link) {
956 total_blocks += bd->blocks;
957 }
958 }
959 for (bd = free_mblock_list[node]; bd != NULL; bd = bd->link) {
960 total_blocks += BLOCKS_PER_MBLOCK * BLOCKS_TO_MBLOCKS(bd->blocks);
961 // The caller of this function, memInventory(), expects to match
962 // the total number of blocks in the system against mblocks *
963 // BLOCKS_PER_MBLOCK, so we must subtract the space for the
964 // block descriptors from *every* mblock.
965 }
966 }
967 return total_blocks;
968 }
969
970 void
971 markBlocks (bdescr *bd)
972 {
973 for (; bd != NULL; bd = bd->link) {
974 bd->flags |= BF_KNOWN;
975 }
976 }
977
978 void
979 reportUnmarkedBlocks (void)
980 {
981 void *mblock;
982 void *state;
983 bdescr *bd;
984
985 debugBelch("Unreachable blocks:\n");
986 for (mblock = getFirstMBlock(&state); mblock != NULL;
987 mblock = getNextMBlock(&state, mblock)) {
988 for (bd = FIRST_BDESCR(mblock); bd <= LAST_BDESCR(mblock); ) {
989 if (!(bd->flags & BF_KNOWN) && bd->free != (P_)-1) {
990 debugBelch(" %p\n",bd);
991 }
992 if (bd->blocks >= BLOCKS_PER_MBLOCK) {
993 mblock = (StgWord8*)mblock +
994 (BLOCKS_TO_MBLOCKS(bd->blocks) - 1) * MBLOCK_SIZE;
995 break;
996 } else {
997 bd += bd->blocks;
998 }
999 }
1000 }
1001 }
1002
1003 #endif