Fix lint errors
[ghc.git] / rts / sm / BlockAlloc.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * The block allocator and free list manager.
6 *
7 * This is the architecture independent part of the block allocator.
8 * It requires only the following support from the operating system:
9 *
10 * void *getMBlocks(uint32_t n);
11 *
12 * returns the address of an n*MBLOCK_SIZE region of memory, aligned on
13 * an MBLOCK_SIZE boundary. There are no other restrictions on the
14 * addresses of memory returned by getMBlocks().
15 *
16 * ---------------------------------------------------------------------------*/
17
18 #include "PosixSource.h"
19 #include "Rts.h"
20
21 #include "Storage.h"
22 #include "RtsUtils.h"
23 #include "BlockAlloc.h"
24 #include "OSMem.h"
25
26 #include <string.h>
27
28 static void initMBlock(void *mblock, uint32_t node);
29
30 /* -----------------------------------------------------------------------------
31
32 Implementation notes
33 ~~~~~~~~~~~~~~~~~~~~
34
35 Terminology:
36 - bdescr = block descriptor
37 - bgroup = block group (1 or more adjacent blocks)
38 - mblock = mega block
39 - mgroup = mega group (1 or more adjacent mblocks)
40
41 Invariants on block descriptors
42 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
43 bd->start always points to the start of the block.
44
45 bd->free is either:
46 - zero for a non-group-head; bd->link points to the head
47 - (-1) for the head of a free block group
48 - or it points within the block (group)
49
50 bd->blocks is either:
51 - zero for a non-group-head; bd->link points to the head
52 - number of blocks in this group otherwise
53
54 bd->link either points to a block descriptor or is NULL
55
56 The following fields are not used by the allocator:
57 bd->flags
58 bd->gen_no
59 bd->gen
60 bd->dest
61
62 Exceptions: we don't maintain invariants for all the blocks within a
63 group on the free list, because it is expensive to modify every
64 bdescr in a group when coalescing. Just the head and last bdescrs
65 will be correct for a group on the free list.
66
67
68 Free lists
69 ~~~~~~~~~~
70
71 Preliminaries:
72 - most allocations are for a small number of blocks
73 - sometimes the OS gives us new memory backwards in the address
74 space, sometimes forwards, so we should not be biased towards
75 any particular layout in the address space
76 - We want to avoid fragmentation
77 - We want allocation and freeing to be O(1) or close.
78
79 Coalescing trick: when a bgroup is freed (freeGroup()), we can check
80 whether it can be coalesced with other free bgroups by checking the
81 bdescrs for the blocks on either side of it. This means that we can
82 coalesce in O(1) time. Every free bgroup must have its head and tail
83 bdescrs initialised, the rest don't matter.
84
85 We keep the free list in buckets, using a heap-sort strategy.
86 Bucket N contains blocks with sizes 2^N - 2^(N+1)-1. The list of
87 blocks in each bucket is doubly-linked, so that if a block is
88 coalesced we can easily remove it from its current free list.
89
90 To allocate a new block of size S, grab a block from bucket
91 log2ceiling(S) (i.e. log2() rounded up), in which all blocks are at
92 least as big as S, and split it if necessary. If there are no
93 blocks in that bucket, look at bigger buckets until a block is found
94 Allocation is therefore O(logN) time.
95
96 To free a block:
97 - coalesce it with neighbours.
98 - remove coalesced neighbour(s) from free list(s)
99 - add the new (coalesced) block to the front of the appropriate
100 bucket, given by log2(S) where S is the size of the block.
101
102 Free is O(1).
103
104 Megablocks
105 ~~~~~~~~~~
106
107 Separately from the free list of block groups, which are smaller than
108 an mblock, we maintain a free list of mblock groups. This is the unit
109 of memory the operating system gives us, and we may either split mblocks
110 into blocks or allocate them directly (when very large contiguous regions
111 of memory). mblocks have a different set of invariants than blocks:
112
113 bd->start points to the start of the block IF the block is in the first mblock
114 bd->blocks and bd->link are only valid IF this block is the first block
115 of the first mblock
116 No other fields are used (in particular, free is not used, meaning that
117 space that is not used by the (single) object is wasted.
118
119 This has implications for the free list as well:
120 We cannot play the coalescing trick with mblocks, because there is
121 no requirement that the bdescrs in the second and subsequent mblock
122 of an mgroup are initialised (the mgroup might be filled with a
123 large array, overwriting the bdescrs for example).
124
125 The separate free list for megablocks is thus sorted in *address*
126 order, so that we can coalesce. Allocation in this list is best-fit
127 by traversing the whole list: we don't expect this list to be long,
128 and allocation/freeing of large blocks is rare; avoiding
129 fragmentation is more important than performance here.
130
131 freeGroup() might end up moving a block from free_list to
132 free_mblock_list, if after coalescing we end up with a full mblock.
133
134 checkFreeListSanity() checks all the invariants on the free lists.
135
136 --------------------------------------------------------------------------- */
137
138 /* ---------------------------------------------------------------------------
139 WATCH OUT FOR OVERFLOW
140
141 Be very careful with integer overflow here. If you have an
142 expression like (n_blocks * BLOCK_SIZE), and n_blocks is an int or
143 a uint32_t, then it will very likely overflow on a 64-bit platform.
144 Always cast to StgWord (or W_ for short) first: ((W_)n_blocks * BLOCK_SIZE).
145
146 --------------------------------------------------------------------------- */
147
148 // free_list[i] contains blocks that are at least size 2^i, and at
149 // most size 2^(i+1) - 1.
150 //
151 // To find the free list in which to place a block, use log_2(size).
152 // To find a free block of the right size, use log_2_ceil(size).
153 //
154 // The largest free list (free_list[NUM_FREE_LISTS-1]) needs to contain sizes
155 // from half a megablock up to (but not including) a full megablock.
156
157 #define NUM_FREE_LISTS (MBLOCK_SHIFT-BLOCK_SHIFT)
158
159 // In THREADED_RTS mode, the free list is protected by sm_mutex.
160
161 static bdescr *free_list[MAX_NUMA_NODES][NUM_FREE_LISTS];
162 static bdescr *free_mblock_list[MAX_NUMA_NODES];
163
164 W_ n_alloc_blocks; // currently allocated blocks
165 W_ hw_alloc_blocks; // high-water allocated blocks
166
167 W_ n_alloc_blocks_by_node[MAX_NUMA_NODES];
168
169 /* -----------------------------------------------------------------------------
170 Initialisation
171 -------------------------------------------------------------------------- */
172
173 void initBlockAllocator(void)
174 {
175 uint32_t i, node;
176 for (node = 0; node < MAX_NUMA_NODES; node++) {
177 for (i=0; i < NUM_FREE_LISTS; i++) {
178 free_list[node][i] = NULL;
179 }
180 free_mblock_list[node] = NULL;
181 n_alloc_blocks_by_node[node] = 0;
182 }
183 n_alloc_blocks = 0;
184 hw_alloc_blocks = 0;
185 }
186
187 /* -----------------------------------------------------------------------------
188 Accounting
189 -------------------------------------------------------------------------- */
190
191 STATIC_INLINE
192 void recordAllocatedBlocks(uint32_t node, uint32_t n)
193 {
194 n_alloc_blocks += n;
195 n_alloc_blocks_by_node[node] += n;
196 if (n > 0 && n_alloc_blocks > hw_alloc_blocks) {
197 hw_alloc_blocks = n_alloc_blocks;
198 }
199 }
200
201 STATIC_INLINE
202 void recordFreedBlocks(uint32_t node, uint32_t n)
203 {
204 ASSERT(n_alloc_blocks >= n);
205 n_alloc_blocks -= n;
206 n_alloc_blocks_by_node[node] -= n;
207 }
208
209 /* -----------------------------------------------------------------------------
210 Allocation
211 -------------------------------------------------------------------------- */
212
213 STATIC_INLINE bdescr *
214 tail_of (bdescr *bd)
215 {
216 return bd + bd->blocks - 1;
217 }
218
219 STATIC_INLINE void
220 initGroup(bdescr *head)
221 {
222 head->free = head->start;
223 head->link = NULL;
224
225 // If this is a block group (but not a megablock group), we
226 // make the last block of the group point to the head. This is used
227 // when coalescing blocks in freeGroup(). We don't do this for
228 // megablock groups because blocks in the second and subsequent
229 // mblocks don't have bdescrs; freeing these is handled in a
230 // different way by free_mblock_group().
231 if (head->blocks > 1 && head->blocks <= BLOCKS_PER_MBLOCK) {
232 bdescr *last = tail_of(head);
233 last->blocks = 0;
234 last->link = head;
235 }
236 }
237
238 #if SIZEOF_VOID_P == SIZEOF_LONG
239 #define CLZW(n) (__builtin_clzl(n))
240 #else
241 #define CLZW(n) (__builtin_clzll(n))
242 #endif
243
244 // log base 2 (floor), needs to support up to (2^NUM_FREE_LISTS)-1
245 STATIC_INLINE uint32_t
246 log_2(W_ n)
247 {
248 ASSERT(n > 0 && n < (1<<NUM_FREE_LISTS));
249 #if defined(__GNUC__)
250 return CLZW(n) ^ (sizeof(StgWord)*8 - 1);
251 // generates good code on x86. __builtin_clz() compiles to bsr+xor, but
252 // we want just bsr, so the xor here cancels out gcc's xor.
253 #else
254 W_ i, x;
255 x = n;
256 for (i=0; i < NUM_FREE_LISTS; i++) {
257 x = x >> 1;
258 if (x == 0) return i;
259 }
260 return NUM_FREE_LISTS;
261 #endif
262 }
263
264 // log base 2 (ceiling), needs to support up to (2^NUM_FREE_LISTS)-1
265 STATIC_INLINE uint32_t
266 log_2_ceil(W_ n)
267 {
268 ASSERT(n > 0 && n < (1<<NUM_FREE_LISTS));
269 #if defined(__GNUC__)
270 uint32_t r = log_2(n);
271 return (n & (n-1)) ? r+1 : r;
272 #else
273 W_ i, x;
274 x = 1;
275 for (i=0; i < MAX_FREE_LIST; i++) {
276 if (x >= n) return i;
277 x = x << 1;
278 }
279 return MAX_FREE_LIST;
280 #endif
281 }
282
283 STATIC_INLINE void
284 free_list_insert (uint32_t node, bdescr *bd)
285 {
286 uint32_t ln;
287
288 ASSERT(bd->blocks < BLOCKS_PER_MBLOCK);
289 ln = log_2(bd->blocks);
290
291 dbl_link_onto(bd, &free_list[node][ln]);
292 }
293
294 // After splitting a group, the last block of each group must have a
295 // tail that points to the head block, to keep our invariants for
296 // coalescing.
297 STATIC_INLINE void
298 setup_tail (bdescr *bd)
299 {
300 bdescr *tail;
301 tail = tail_of(bd);
302 if (tail != bd) {
303 tail->blocks = 0;
304 tail->free = 0;
305 tail->link = bd;
306 }
307 }
308
309
310 // Take a free block group bd, and split off a group of size n from
311 // it. Adjust the free list as necessary, and return the new group.
312 static bdescr *
313 split_free_block (bdescr *bd, uint32_t node, W_ n, uint32_t ln /* log_2_ceil(n) */)
314 {
315 bdescr *fg; // free group
316
317 ASSERT(bd->blocks > n);
318 dbl_link_remove(bd, &free_list[node][ln]);
319 fg = bd + bd->blocks - n; // take n blocks off the end
320 fg->blocks = n;
321 bd->blocks -= n;
322 setup_tail(bd);
323 ln = log_2(bd->blocks);
324 dbl_link_onto(bd, &free_list[node][ln]);
325 return fg;
326 }
327
328 // Take N blocks off the end, free the rest.
329 static bdescr *
330 split_block_high (bdescr *bd, W_ n)
331 {
332 ASSERT(bd->blocks > n);
333
334 bdescr* ret = bd + bd->blocks - n; // take n blocks off the end
335 ret->blocks = n;
336 ret->start = ret->free = bd->start + (bd->blocks - n)*BLOCK_SIZE_W;
337 ret->link = NULL;
338
339 bd->blocks -= n;
340
341 setup_tail(ret);
342 setup_tail(bd);
343 freeGroup(bd);
344
345 return ret;
346 }
347
348 // Like `split_block_high`, but takes n blocks off the beginning rather
349 // than the end.
350 static bdescr *
351 split_block_low (bdescr *bd, W_ n)
352 {
353 ASSERT(bd->blocks > n);
354
355 bdescr* bd_ = bd + n;
356 bd_->blocks = bd->blocks - n;
357 bd_->start = bd_->free = bd->start + n*BLOCK_SIZE_W;
358
359 bd->blocks = n;
360
361 setup_tail(bd_);
362 setup_tail(bd);
363 freeGroup(bd_);
364
365 return bd;
366 }
367
368 /* Only initializes the start pointers on the first megablock and the
369 * blocks field of the first bdescr; callers are responsible for calling
370 * initGroup afterwards.
371 */
372 static bdescr *
373 alloc_mega_group (uint32_t node, StgWord mblocks)
374 {
375 bdescr *best, *bd, *prev;
376 StgWord n;
377
378 n = MBLOCK_GROUP_BLOCKS(mblocks);
379
380 best = NULL;
381 prev = NULL;
382 for (bd = free_mblock_list[node]; bd != NULL; prev = bd, bd = bd->link)
383 {
384 if (bd->blocks == n)
385 {
386 if (prev) {
387 prev->link = bd->link;
388 } else {
389 free_mblock_list[node] = bd->link;
390 }
391 return bd;
392 }
393 else if (bd->blocks > n)
394 {
395 if (!best || bd->blocks < best->blocks)
396 {
397 best = bd;
398 }
399 }
400 }
401
402 if (best)
403 {
404 // we take our chunk off the end here.
405 StgWord best_mblocks = BLOCKS_TO_MBLOCKS(best->blocks);
406 bd = FIRST_BDESCR((StgWord8*)MBLOCK_ROUND_DOWN(best) +
407 (best_mblocks-mblocks)*MBLOCK_SIZE);
408
409 best->blocks = MBLOCK_GROUP_BLOCKS(best_mblocks - mblocks);
410 initMBlock(MBLOCK_ROUND_DOWN(bd), node);
411 }
412 else
413 {
414 void *mblock;
415 if (RtsFlags.GcFlags.numa) {
416 mblock = getMBlocksOnNode(node, mblocks);
417 } else {
418 mblock = getMBlocks(mblocks);
419 }
420 initMBlock(mblock, node); // only need to init the 1st one
421 bd = FIRST_BDESCR(mblock);
422 }
423 bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
424 return bd;
425 }
426
427 bdescr *
428 allocGroupOnNode (uint32_t node, W_ n)
429 {
430 bdescr *bd, *rem;
431 StgWord ln;
432
433 if (n == 0) barf("allocGroup: requested zero blocks");
434
435 if (n >= BLOCKS_PER_MBLOCK)
436 {
437 StgWord mblocks;
438
439 mblocks = BLOCKS_TO_MBLOCKS(n);
440
441 // n_alloc_blocks doesn't count the extra blocks we get in a
442 // megablock group.
443 recordAllocatedBlocks(node, mblocks * BLOCKS_PER_MBLOCK);
444
445 bd = alloc_mega_group(node, mblocks);
446 // only the bdescrs of the first MB are required to be initialised
447 initGroup(bd);
448 goto finish;
449 }
450
451 recordAllocatedBlocks(node, n);
452
453 ln = log_2_ceil(n);
454
455 while (ln < NUM_FREE_LISTS && free_list[node][ln] == NULL) {
456 ln++;
457 }
458
459 if (ln == NUM_FREE_LISTS) {
460 #if 0 /* useful for debugging fragmentation */
461 if ((W_)mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W
462 - (W_)((n_alloc_blocks - n) * BLOCK_SIZE_W) > (2*1024*1024)/sizeof(W_)) {
463 debugBelch("Fragmentation, wanted %d blocks, %ld MB free\n", n, ((mblocks_allocated * BLOCKS_PER_MBLOCK) - n_alloc_blocks) / BLOCKS_PER_MBLOCK);
464 RtsFlags.DebugFlags.block_alloc = 1;
465 checkFreeListSanity();
466 }
467 #endif
468
469 bd = alloc_mega_group(node,1);
470 bd->blocks = n;
471 initGroup(bd); // we know the group will fit
472 rem = bd + n;
473 rem->blocks = BLOCKS_PER_MBLOCK-n;
474 initGroup(rem); // init the slop
475 recordAllocatedBlocks(node,rem->blocks);
476 freeGroup(rem); // add the slop on to the free list
477 goto finish;
478 }
479
480 bd = free_list[node][ln];
481
482 if (bd->blocks == n) // exactly the right size!
483 {
484 dbl_link_remove(bd, &free_list[node][ln]);
485 initGroup(bd);
486 }
487 else if (bd->blocks > n) // block too big...
488 {
489 bd = split_free_block(bd, node, n, ln);
490 ASSERT(bd->blocks == n);
491 initGroup(bd);
492 }
493 else
494 {
495 barf("allocGroup: free list corrupted");
496 }
497
498 finish:
499 IF_DEBUG(sanity, memset(bd->start, 0xaa, bd->blocks * BLOCK_SIZE));
500 IF_DEBUG(sanity, checkFreeListSanity());
501 return bd;
502 }
503
504 bdescr *
505 allocAlignedGroupOnNode (uint32_t node, W_ n)
506 {
507 // To make sure we don't allocate megablocks in allocGroup below we need to
508 // check here that we ask for max BLOCKS_PER_MBLOCK/2 blocks.
509 if (2*n >= BLOCKS_PER_MBLOCK) {
510 barf("allocAlignedGroupOnNode: allocating more than a megablock: %" FMT_Word, 2*n);
511 }
512
513 // allocate enough blocks to have enough space aligned at n-block boundary
514 // free any slops on the low and high side of this space
515
516 // number of blocks to allocate to make sure we have enough aligned space
517 uint32_t num_blocks = 2*n - 1;
518 W_ group_size = n * BLOCK_SIZE;
519
520 bdescr *bd = allocGroupOnNode(node, num_blocks);
521
522 // slop on the low side
523 W_ slop_low = 0;
524 if ((uintptr_t)bd->start % group_size != 0) {
525 slop_low = group_size - ((uintptr_t)bd->start % group_size);
526 }
527
528 W_ slop_high = (num_blocks * BLOCK_SIZE) - group_size - slop_low;
529
530 ASSERT((slop_low % BLOCK_SIZE) == 0);
531 ASSERT((slop_high % BLOCK_SIZE) == 0);
532
533 W_ slop_low_blocks = slop_low / BLOCK_SIZE;
534 W_ slop_high_blocks = slop_high / BLOCK_SIZE;
535
536 ASSERT(slop_low_blocks + slop_high_blocks + n == num_blocks);
537
538 #if defined(DEBUG)
539 checkFreeListSanity();
540 W_ free_before = countFreeList();
541 #endif
542
543 if (slop_low_blocks != 0) {
544 bd = split_block_high(bd, num_blocks - slop_low_blocks);
545 ASSERT(countBlocks(bd) == num_blocks - slop_low_blocks);
546 }
547
548 #if defined(DEBUG)
549 ASSERT(countFreeList() == free_before + slop_low_blocks);
550 checkFreeListSanity();
551 #endif
552
553 // At this point the bd should be aligned, but we may have slop on the high side
554 ASSERT((uintptr_t)bd->start % group_size == 0);
555
556 #if defined(DEBUG)
557 free_before = countFreeList();
558 #endif
559
560 if (slop_high_blocks != 0) {
561 bd = split_block_low(bd, n);
562 ASSERT(bd->blocks == n);
563 }
564
565 #if defined(DEBUG)
566 ASSERT(countFreeList() == free_before + slop_high_blocks);
567 checkFreeListSanity();
568 #endif
569
570 // Should still be aligned
571 ASSERT((uintptr_t)bd->start % group_size == 0);
572
573 // Just to make sure I get this right
574 ASSERT(Bdescr(bd->start) == bd);
575
576 return bd;
577 }
578
579 STATIC_INLINE
580 uint32_t nodeWithLeastBlocks (void)
581 {
582 uint32_t node = 0, i;
583 uint32_t min_blocks = n_alloc_blocks_by_node[0];
584 for (i = 1; i < n_numa_nodes; i++) {
585 if (n_alloc_blocks_by_node[i] < min_blocks) {
586 min_blocks = n_alloc_blocks_by_node[i];
587 node = i;
588 }
589 }
590 return node;
591 }
592
593 bdescr* allocGroup (W_ n)
594 {
595 return allocGroupOnNode(nodeWithLeastBlocks(),n);
596 }
597
598
599 //
600 // Allocate a chunk of blocks that is at least min and at most max
601 // blocks in size. This API is used by the nursery allocator that
602 // wants contiguous memory preferably, but doesn't require it. When
603 // memory is fragmented we might have lots of chunks that are
604 // less than a full megablock, so allowing the nursery allocator to
605 // use these reduces fragmentation considerably. e.g. on a GHC build
606 // with +RTS -H, I saw fragmentation go from 17MB down to 3MB on a
607 // single compile.
608 //
609 // Further to this: in #7257 there is a program that creates serious
610 // fragmentation such that the heap is full of tiny <4 block chains.
611 // The nursery allocator therefore has to use single blocks to avoid
612 // fragmentation, but we make sure that we allocate large blocks
613 // preferably if there are any.
614 //
615 bdescr* allocLargeChunkOnNode (uint32_t node, W_ min, W_ max)
616 {
617 bdescr *bd;
618 StgWord ln, lnmax;
619
620 if (min >= BLOCKS_PER_MBLOCK) {
621 return allocGroupOnNode(node,max);
622 }
623
624 ln = log_2_ceil(min);
625 lnmax = log_2_ceil(max);
626
627 while (ln < NUM_FREE_LISTS && ln < lnmax && free_list[node][ln] == NULL) {
628 ln++;
629 }
630 if (ln == NUM_FREE_LISTS || ln == lnmax) {
631 return allocGroupOnNode(node,max);
632 }
633 bd = free_list[node][ln];
634
635 if (bd->blocks <= max) // exactly the right size!
636 {
637 dbl_link_remove(bd, &free_list[node][ln]);
638 initGroup(bd);
639 }
640 else // block too big...
641 {
642 bd = split_free_block(bd, node, max, ln);
643 ASSERT(bd->blocks == max);
644 initGroup(bd);
645 }
646
647 recordAllocatedBlocks(node, bd->blocks);
648
649 IF_DEBUG(sanity, memset(bd->start, 0xaa, bd->blocks * BLOCK_SIZE));
650 IF_DEBUG(sanity, checkFreeListSanity());
651 return bd;
652 }
653
654 bdescr* allocLargeChunk (W_ min, W_ max)
655 {
656 return allocLargeChunkOnNode(nodeWithLeastBlocks(), min, max);
657 }
658
659 bdescr *
660 allocGroup_lock(W_ n)
661 {
662 bdescr *bd;
663 ACQUIRE_SM_LOCK;
664 bd = allocGroup(n);
665 RELEASE_SM_LOCK;
666 return bd;
667 }
668
669 bdescr *
670 allocBlock_lock(void)
671 {
672 bdescr *bd;
673 ACQUIRE_SM_LOCK;
674 bd = allocBlock();
675 RELEASE_SM_LOCK;
676 return bd;
677 }
678
679 bdescr *
680 allocGroupOnNode_lock(uint32_t node, W_ n)
681 {
682 bdescr *bd;
683 ACQUIRE_SM_LOCK;
684 bd = allocGroupOnNode(node,n);
685 RELEASE_SM_LOCK;
686 return bd;
687 }
688
689 bdescr *
690 allocBlockOnNode_lock(uint32_t node)
691 {
692 bdescr *bd;
693 ACQUIRE_SM_LOCK;
694 bd = allocBlockOnNode(node);
695 RELEASE_SM_LOCK;
696 return bd;
697 }
698
699 /* -----------------------------------------------------------------------------
700 De-Allocation
701 -------------------------------------------------------------------------- */
702
703 STATIC_INLINE bdescr *
704 coalesce_mblocks (bdescr *p)
705 {
706 bdescr *q;
707
708 q = p->link;
709 if (q != NULL &&
710 MBLOCK_ROUND_DOWN(q) ==
711 (StgWord8*)MBLOCK_ROUND_DOWN(p) +
712 BLOCKS_TO_MBLOCKS(p->blocks) * MBLOCK_SIZE) {
713 // can coalesce
714 p->blocks = MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(p->blocks) +
715 BLOCKS_TO_MBLOCKS(q->blocks));
716 p->link = q->link;
717 return p;
718 }
719 return q;
720 }
721
722 static void
723 free_mega_group (bdescr *mg)
724 {
725 bdescr *bd, *prev;
726 uint32_t node;
727
728 // Find the right place in the free list. free_mblock_list is
729 // sorted by *address*, not by size as the free_list is.
730 prev = NULL;
731 node = mg->node;
732 bd = free_mblock_list[node];
733 while (bd && bd->start < mg->start) {
734 prev = bd;
735 bd = bd->link;
736 }
737
738 // coalesce backwards
739 if (prev)
740 {
741 mg->link = prev->link;
742 prev->link = mg;
743 mg = coalesce_mblocks(prev);
744 }
745 else
746 {
747 mg->link = free_mblock_list[node];
748 free_mblock_list[node] = mg;
749 }
750 // coalesce forwards
751 coalesce_mblocks(mg);
752
753 IF_DEBUG(sanity, checkFreeListSanity());
754 }
755
756
757 void
758 freeGroup(bdescr *p)
759 {
760 StgWord ln;
761 uint32_t node;
762
763 // not true in multithreaded GC:
764 // ASSERT_SM_LOCK();
765
766 ASSERT(p->free != (P_)-1);
767
768 node = p->node;
769
770 p->free = (void *)-1; /* indicates that this block is free */
771 p->gen = NULL;
772 p->gen_no = 0;
773 /* fill the block group with garbage if sanity checking is on */
774 IF_DEBUG(sanity,memset(p->start, 0xaa, (W_)p->blocks * BLOCK_SIZE));
775
776 if (p->blocks == 0) barf("freeGroup: block size is zero");
777
778 if (p->blocks >= BLOCKS_PER_MBLOCK)
779 {
780 StgWord mblocks;
781
782 mblocks = BLOCKS_TO_MBLOCKS(p->blocks);
783 // If this is an mgroup, make sure it has the right number of blocks
784 ASSERT(p->blocks == MBLOCK_GROUP_BLOCKS(mblocks));
785
786 recordFreedBlocks(node, mblocks * BLOCKS_PER_MBLOCK);
787
788 free_mega_group(p);
789 return;
790 }
791
792 recordFreedBlocks(node, p->blocks);
793
794 // coalesce forwards
795 {
796 bdescr *next;
797 next = p + p->blocks;
798 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(p)) && next->free == (P_)-1)
799 {
800 p->blocks += next->blocks;
801 ln = log_2(next->blocks);
802 dbl_link_remove(next, &free_list[node][ln]);
803 if (p->blocks == BLOCKS_PER_MBLOCK)
804 {
805 free_mega_group(p);
806 return;
807 }
808 setup_tail(p);
809 }
810 }
811
812 // coalesce backwards
813 if (p != FIRST_BDESCR(MBLOCK_ROUND_DOWN(p)))
814 {
815 bdescr *prev;
816 prev = p - 1;
817 if (prev->blocks == 0) prev = prev->link; // find the head
818
819 if (prev->free == (P_)-1)
820 {
821 ln = log_2(prev->blocks);
822 dbl_link_remove(prev, &free_list[node][ln]);
823 prev->blocks += p->blocks;
824 if (prev->blocks >= BLOCKS_PER_MBLOCK)
825 {
826 free_mega_group(prev);
827 return;
828 }
829 p = prev;
830 }
831 }
832
833 setup_tail(p);
834 free_list_insert(node,p);
835
836 IF_DEBUG(sanity, checkFreeListSanity());
837 }
838
839 void
840 freeGroup_lock(bdescr *p)
841 {
842 ACQUIRE_SM_LOCK;
843 freeGroup(p);
844 RELEASE_SM_LOCK;
845 }
846
847 void
848 freeChain(bdescr *bd)
849 {
850 bdescr *next_bd;
851 while (bd != NULL) {
852 next_bd = bd->link;
853 freeGroup(bd);
854 bd = next_bd;
855 }
856 }
857
858 void
859 freeChain_lock(bdescr *bd)
860 {
861 ACQUIRE_SM_LOCK;
862 freeChain(bd);
863 RELEASE_SM_LOCK;
864 }
865
866 static void
867 initMBlock(void *mblock, uint32_t node)
868 {
869 bdescr *bd;
870 StgWord8 *block;
871
872 /* the first few Bdescr's in a block are unused, so we don't want to
873 * put them all on the free list.
874 */
875 block = FIRST_BLOCK(mblock);
876 bd = FIRST_BDESCR(mblock);
877
878 /* Initialise the start field of each block descriptor
879 */
880 for (; block <= (StgWord8*)LAST_BLOCK(mblock); bd += 1,
881 block += BLOCK_SIZE) {
882 bd->start = (void*)block;
883 bd->node = node;
884 }
885 }
886
887 /* -----------------------------------------------------------------------------
888 Stats / metrics
889 -------------------------------------------------------------------------- */
890
891 W_
892 countBlocks(bdescr *bd)
893 {
894 W_ n;
895 for (n=0; bd != NULL; bd=bd->link) {
896 n += bd->blocks;
897 }
898 return n;
899 }
900
901 // (*1) Just like countBlocks, except that we adjust the count for a
902 // megablock group so that it doesn't include the extra few blocks
903 // that would be taken up by block descriptors in the second and
904 // subsequent megablock. This is so we can tally the count with the
905 // number of blocks allocated in the system, for memInventory().
906 W_
907 countAllocdBlocks(bdescr *bd)
908 {
909 W_ n;
910 for (n=0; bd != NULL; bd=bd->link) {
911 n += bd->blocks;
912
913 // hack for megablock groups: see (*1) above
914 if (bd->blocks > BLOCKS_PER_MBLOCK) {
915 n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
916 * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
917 }
918 }
919 return n;
920 }
921
922 void returnMemoryToOS(uint32_t n /* megablocks */)
923 {
924 bdescr *bd;
925 uint32_t node;
926 StgWord size;
927
928 // ToDo: not fair, we free all the memory starting with node 0.
929 for (node = 0; n > 0 && node < n_numa_nodes; node++) {
930 bd = free_mblock_list[node];
931 while ((n > 0) && (bd != NULL)) {
932 size = BLOCKS_TO_MBLOCKS(bd->blocks);
933 if (size > n) {
934 StgWord newSize = size - n;
935 char *freeAddr = MBLOCK_ROUND_DOWN(bd->start);
936 freeAddr += newSize * MBLOCK_SIZE;
937 bd->blocks = MBLOCK_GROUP_BLOCKS(newSize);
938 freeMBlocks(freeAddr, n);
939 n = 0;
940 }
941 else {
942 char *freeAddr = MBLOCK_ROUND_DOWN(bd->start);
943 n -= size;
944 bd = bd->link;
945 freeMBlocks(freeAddr, size);
946 }
947 }
948 free_mblock_list[node] = bd;
949 }
950
951 // Ask the OS to release any address space portion
952 // that was associated with the just released MBlocks
953 //
954 // Historically, we used to ask the OS directly (via
955 // osReleaseFreeMemory()) - now the MBlock layer might
956 // have a reason to preserve the address space range,
957 // so we keep it
958 releaseFreeMemory();
959
960 IF_DEBUG(gc,
961 if (n != 0) {
962 debugBelch("Wanted to free %d more MBlocks than are freeable\n",
963 n);
964 }
965 );
966 }
967
968 /* -----------------------------------------------------------------------------
969 Debugging
970 -------------------------------------------------------------------------- */
971
972 #if defined(DEBUG)
973 static void
974 check_tail (bdescr *bd)
975 {
976 bdescr *tail = tail_of(bd);
977
978 if (tail != bd)
979 {
980 ASSERT(tail->blocks == 0);
981 ASSERT(tail->free == 0);
982 ASSERT(tail->link == bd);
983 }
984 }
985
986 void
987 checkFreeListSanity(void)
988 {
989 bdescr *bd, *prev;
990 StgWord ln, min;
991 uint32_t node;
992
993 for (node = 0; node < n_numa_nodes; node++) {
994 min = 1;
995 for (ln = 0; ln < NUM_FREE_LISTS; ln++) {
996 IF_DEBUG(block_alloc,
997 debugBelch("free block list [%" FMT_Word "]:\n", ln));
998
999 prev = NULL;
1000 for (bd = free_list[node][ln]; bd != NULL; prev = bd, bd = bd->link)
1001 {
1002 IF_DEBUG(block_alloc,
1003 debugBelch("group at %p, length %ld blocks\n",
1004 bd->start, (long)bd->blocks));
1005 ASSERT(bd->free == (P_)-1);
1006 ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
1007 ASSERT(bd->blocks >= min && bd->blocks <= (min*2 - 1));
1008 ASSERT(bd->link != bd); // catch easy loops
1009 ASSERT(bd->node == node);
1010
1011 check_tail(bd);
1012
1013 if (prev)
1014 ASSERT(bd->u.back == prev);
1015 else
1016 ASSERT(bd->u.back == NULL);
1017
1018 {
1019 bdescr *next;
1020 next = bd + bd->blocks;
1021 if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
1022 {
1023 ASSERT(next->free != (P_)-1);
1024 }
1025 }
1026 }
1027 min = min << 1;
1028 }
1029
1030 prev = NULL;
1031 for (bd = free_mblock_list[node]; bd != NULL; prev = bd, bd = bd->link)
1032 {
1033 IF_DEBUG(block_alloc,
1034 debugBelch("mega group at %p, length %ld blocks\n",
1035 bd->start, (long)bd->blocks));
1036
1037 ASSERT(bd->link != bd); // catch easy loops
1038
1039 if (bd->link != NULL)
1040 {
1041 // make sure the list is sorted
1042 ASSERT(bd->start < bd->link->start);
1043 }
1044
1045 ASSERT(bd->blocks >= BLOCKS_PER_MBLOCK);
1046 ASSERT(MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(bd->blocks))
1047 == bd->blocks);
1048
1049 // make sure we're fully coalesced
1050 if (bd->link != NULL)
1051 {
1052 ASSERT(MBLOCK_ROUND_DOWN(bd->link) !=
1053 (StgWord8*)MBLOCK_ROUND_DOWN(bd) +
1054 BLOCKS_TO_MBLOCKS(bd->blocks) * MBLOCK_SIZE);
1055 }
1056 }
1057 }
1058 }
1059
1060 W_ /* BLOCKS */
1061 countFreeList(void)
1062 {
1063 bdescr *bd;
1064 W_ total_blocks = 0;
1065 StgWord ln;
1066 uint32_t node;
1067
1068 for (node = 0; node < n_numa_nodes; node++) {
1069 for (ln=0; ln < NUM_FREE_LISTS; ln++) {
1070 for (bd = free_list[node][ln]; bd != NULL; bd = bd->link) {
1071 total_blocks += bd->blocks;
1072 }
1073 }
1074 for (bd = free_mblock_list[node]; bd != NULL; bd = bd->link) {
1075 total_blocks += BLOCKS_PER_MBLOCK * BLOCKS_TO_MBLOCKS(bd->blocks);
1076 // The caller of this function, memInventory(), expects to match
1077 // the total number of blocks in the system against mblocks *
1078 // BLOCKS_PER_MBLOCK, so we must subtract the space for the
1079 // block descriptors from *every* mblock.
1080 }
1081 }
1082 return total_blocks;
1083 }
1084
1085 void
1086 markBlocks (bdescr *bd)
1087 {
1088 for (; bd != NULL; bd = bd->link) {
1089 bd->flags |= BF_KNOWN;
1090 }
1091 }
1092
1093 void
1094 reportUnmarkedBlocks (void)
1095 {
1096 void *mblock;
1097 void *state;
1098 bdescr *bd;
1099
1100 debugBelch("Unreachable blocks:\n");
1101 for (mblock = getFirstMBlock(&state); mblock != NULL;
1102 mblock = getNextMBlock(&state, mblock)) {
1103 for (bd = FIRST_BDESCR(mblock); bd <= LAST_BDESCR(mblock); ) {
1104 if (!(bd->flags & BF_KNOWN) && bd->free != (P_)-1) {
1105 debugBelch(" %p\n",bd);
1106 }
1107 if (bd->blocks >= BLOCKS_PER_MBLOCK) {
1108 mblock = (StgWord8*)mblock +
1109 (BLOCKS_TO_MBLOCKS(bd->blocks) - 1) * MBLOCK_SIZE;
1110 break;
1111 } else {
1112 bd += bd->blocks;
1113 }
1114 }
1115 }
1116 }
1117
1118 #endif