NUMA support
[ghc.git] / rts / sm / MBlock.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * MegaBlock Allocator Interface. This file contains all the dirty
6 * architecture-dependent hackery required to get a chunk of aligned
7 * memory from the operating system.
8 *
9 * ---------------------------------------------------------------------------*/
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "BlockAlloc.h"
16 #include "Trace.h"
17 #include "OSMem.h"
18
19 #include <string.h>
20
21 W_ peak_mblocks_allocated = 0;
22 W_ mblocks_allocated = 0;
23 W_ mpc_misses = 0;
24
25 /* -----------------------------------------------------------------------------
26 The MBlock Map: provides our implementation of HEAP_ALLOCED() and the
27 utilities to walk the really allocated (thus accessible without risk of
28 segfault) heap
29 -------------------------------------------------------------------------- */
30
31 /*
32 There are two different cases here: either we use "large address
33 space" (which really means two-step allocation), so we have to
34 manage which memory is good (= accessible without fear of segfault)
35 and which is not owned by us, or we use the older method and get
36 good memory straight from the system.
37
38 Both code paths need to provide:
39
40 void *getFirstMBlock(void ** state)
41 return the first (lowest address) mblock
42 that was actually committed
43
44 void *getNextMBlock(void ** state, void * mblock)
45 return the first (lowest address) mblock
46 that was committed, after the given one
47
48 For both these calls, @state is an in-out parameter that points to
49 an opaque state threading the calls togheter. The calls should only
50 be used in an interation fashion. Pass NULL if @state is not
51 interesting,or pass a pointer to NULL if you don't have a state.
52
53 void *getCommittedMBlocks(uint32_t n)
54 return @n new mblocks, ready to be used (reserved and committed)
55
56 void *decommitMBlocks(char *addr, uint32_t n)
57 release memory for @n mblocks, starting at the given address
58
59 void releaseFreeMemory()
60 potentially release any address space that was associated
61 with recently decommitted blocks
62 */
63
64 #ifdef USE_LARGE_ADDRESS_SPACE
65
66 // Large address space means we use two-step allocation: reserve
67 // something large upfront, and then commit as needed
68 // (This is normally only useful on 64-bit, where we can assume
69 // that reserving 1TB is possible)
70 //
71 // There is no block map in this case, but there is a free list
72 // of blocks that were committed and decommitted at least once,
73 // which we use to choose which block to commit next in the already
74 // reserved space.
75 //
76 // We cannot let the OS choose it as we do in the
77 // non large address space case, because the committing wants to
78 // know the exact address upfront.
79 //
80 // The free list is coalesced and ordered, which means that
81 // allocate and free are worst-case O(n), but benchmarks have shown
82 // that this is not a significant problem, because large (>=2MB)
83 // allocations are infrequent and their time is mostly insignificant
84 // compared to the time to use that memory.
85 //
86 // The free list is stored in malloc()'d memory, unlike the other free
87 // lists in BlockAlloc.c which are stored in block descriptors,
88 // because we cannot touch the contents of decommitted mblocks.
89
90 typedef struct free_list {
91 struct free_list *prev;
92 struct free_list *next;
93 W_ address;
94 W_ size;
95 } free_list;
96
97 static free_list *free_list_head;
98 static W_ mblock_high_watermark;
99 /*
100 * it is quite important that these are in the same cache line as they
101 * are both needed by HEAP_ALLOCED. Moreover, we need to ensure that they
102 * don't share a cache line with anything else to prevent false sharing.
103 */
104 struct mblock_address_range mblock_address_space = { 0, 0, {} };
105
106 static void *getAllocatedMBlock(free_list **start_iter, W_ startingAt)
107 {
108 free_list *iter;
109 W_ p = startingAt;
110
111 for (iter = *start_iter; iter != NULL; iter = iter->next)
112 {
113 if (p < iter->address)
114 break;
115
116 if (p == iter->address)
117 p += iter->size;
118 }
119
120 *start_iter = iter;
121
122 if (p >= mblock_high_watermark)
123 return NULL;
124
125 return (void*)p;
126 }
127
128 void * getFirstMBlock(void **state STG_UNUSED)
129 {
130 free_list *fake_state;
131 free_list **casted_state;
132
133 if (state)
134 casted_state = (free_list**)state;
135 else
136 casted_state = &fake_state;
137
138 *casted_state = free_list_head;
139 return getAllocatedMBlock(casted_state, mblock_address_space.begin);
140 }
141
142 void * getNextMBlock(void **state STG_UNUSED, void *mblock)
143 {
144 free_list *fake_state = free_list_head;
145 free_list **casted_state;
146
147 if (state)
148 casted_state = (free_list**)state;
149 else
150 casted_state = &fake_state;
151
152 return getAllocatedMBlock(casted_state, (W_)mblock + MBLOCK_SIZE);
153 }
154
155 static void *getReusableMBlocks(uint32_t n)
156 {
157 struct free_list *iter;
158 W_ size = MBLOCK_SIZE * (W_)n;
159
160 for (iter = free_list_head; iter != NULL; iter = iter->next) {
161 void *addr;
162
163 if (iter->size < size)
164 continue;
165
166 addr = (void*)iter->address;
167 iter->address += size;
168 iter->size -= size;
169 if (iter->size == 0) {
170 struct free_list *prev, *next;
171
172 prev = iter->prev;
173 next = iter->next;
174 if (prev == NULL) {
175 ASSERT(free_list_head == iter);
176 free_list_head = next;
177 } else {
178 prev->next = next;
179 }
180 if (next != NULL) {
181 next->prev = prev;
182 }
183 stgFree(iter);
184 }
185
186 osCommitMemory(addr, size);
187 return addr;
188 }
189
190 return NULL;
191 }
192
193 static void *getFreshMBlocks(uint32_t n)
194 {
195 W_ size = MBLOCK_SIZE * (W_)n;
196 void *addr = (void*)mblock_high_watermark;
197
198 if (mblock_high_watermark + size > mblock_address_space.end)
199 {
200 // whoa, 1 TB of heap?
201 errorBelch("out of memory");
202 stg_exit(EXIT_HEAPOVERFLOW);
203 }
204
205 osCommitMemory(addr, size);
206 mblock_high_watermark += size;
207 return addr;
208 }
209
210 static void *getCommittedMBlocks(uint32_t n)
211 {
212 void *p;
213
214 p = getReusableMBlocks(n);
215 if (p == NULL) {
216 p = getFreshMBlocks(n);
217 }
218
219 ASSERT(p != NULL && p != (void*)-1);
220 return p;
221 }
222
223 static void decommitMBlocks(char *addr, uint32_t n)
224 {
225 struct free_list *iter, *prev;
226 W_ size = MBLOCK_SIZE * (W_)n;
227 W_ address = (W_)addr;
228
229 osDecommitMemory(addr, size);
230
231 prev = NULL;
232 for (iter = free_list_head; iter != NULL; iter = iter->next)
233 {
234 prev = iter;
235
236 if (iter->address + iter->size < address)
237 continue;
238
239 if (iter->address + iter->size == address) {
240 iter->size += size;
241
242 if (address + size == mblock_high_watermark) {
243 mblock_high_watermark -= iter->size;
244 if (iter->prev) {
245 iter->prev->next = NULL;
246 } else {
247 ASSERT(iter == free_list_head);
248 free_list_head = NULL;
249 }
250 stgFree(iter);
251 return;
252 }
253
254 if (iter->next &&
255 iter->next->address == iter->address + iter->size) {
256 struct free_list *next;
257
258 next = iter->next;
259 iter->size += next->size;
260 iter->next = next->next;
261
262 if (iter->next) {
263 iter->next->prev = iter;
264
265 /* We don't need to consolidate more */
266 ASSERT(iter->next->address > iter->address + iter->size);
267 }
268
269 stgFree(next);
270 }
271 return;
272 } else if (address + size == iter->address) {
273 iter->address = address;
274 iter->size += size;
275
276 /* We don't need to consolidate backwards
277 (because otherwise it would have been handled by
278 the previous iteration) */
279 if (iter->prev) {
280 ASSERT(iter->prev->address + iter->prev->size < iter->address);
281 }
282 return;
283 } else {
284 struct free_list *new_iter;
285
286 /* All other cases have been handled */
287 ASSERT(iter->address > address + size);
288
289 new_iter = stgMallocBytes(sizeof(struct free_list), "freeMBlocks");
290 new_iter->address = address;
291 new_iter->size = size;
292 new_iter->next = iter;
293 new_iter->prev = iter->prev;
294 if (new_iter->prev) {
295 new_iter->prev->next = new_iter;
296 } else {
297 ASSERT(iter == free_list_head);
298 free_list_head = new_iter;
299 }
300 iter->prev = new_iter;
301 return;
302 }
303 }
304
305 /* We're past the last free list entry, so we must
306 be the highest allocation so far
307 */
308 ASSERT(address + size <= mblock_high_watermark);
309
310 /* Fast path the case of releasing high or all memory */
311 if (address + size == mblock_high_watermark) {
312 mblock_high_watermark -= size;
313 } else {
314 struct free_list *new_iter;
315
316 new_iter = stgMallocBytes(sizeof(struct free_list), "freeMBlocks");
317 new_iter->address = address;
318 new_iter->size = size;
319 new_iter->next = NULL;
320 new_iter->prev = prev;
321 if (new_iter->prev) {
322 ASSERT(new_iter->prev->next == NULL);
323 new_iter->prev->next = new_iter;
324 } else {
325 ASSERT(free_list_head == NULL);
326 free_list_head = new_iter;
327 }
328 }
329 }
330
331 void releaseFreeMemory(void)
332 {
333 // This function exists for releasing address space
334 // on Windows 32 bit
335 //
336 // Do nothing if USE_LARGE_ADDRESS_SPACE, we never want
337 // to release address space
338
339 debugTrace(DEBUG_gc, "mblock_high_watermark: %p\n", mblock_high_watermark);
340 }
341
342 #else // !USE_LARGE_ADDRESS_SPACE
343
344 #if SIZEOF_VOID_P == 4
345 StgWord8 mblock_map[MBLOCK_MAP_SIZE]; // initially all zeros
346
347 static void
348 setHeapAlloced(void *p, StgWord8 i)
349 {
350 mblock_map[MBLOCK_MAP_ENTRY(p)] = i;
351 }
352
353 #elif SIZEOF_VOID_P == 8
354
355 MBlockMap **mblock_maps = NULL;
356
357 uint32_t mblock_map_count = 0;
358
359 MbcCacheLine mblock_cache[MBC_ENTRIES];
360
361 static MBlockMap *
362 findMBlockMap(const void *p)
363 {
364 uint32_t i;
365 StgWord32 hi = (StgWord32) (((StgWord)p) >> 32);
366 for( i = 0; i < mblock_map_count; i++ )
367 {
368 if(mblock_maps[i]->addrHigh32 == hi)
369 {
370 return mblock_maps[i];
371 }
372 }
373 return NULL;
374 }
375
376 StgBool HEAP_ALLOCED_miss(StgWord mblock, const void *p)
377 {
378 MBlockMap *map;
379 MBlockMapLine value;
380 uint32_t entry_no;
381
382 entry_no = mblock & (MBC_ENTRIES-1);
383
384 map = findMBlockMap(p);
385 if (map)
386 {
387 mpc_misses++;
388 value = map->lines[MBLOCK_MAP_LINE(p)];
389 mblock_cache[entry_no] = (mblock<<1) | value;
390 return value;
391 }
392 else
393 {
394 mblock_cache[entry_no] = (mblock<<1);
395 return 0;
396 }
397 }
398
399 static void
400 setHeapAlloced(void *p, StgWord8 i)
401 {
402 MBlockMap *map = findMBlockMap(p);
403 if(map == NULL)
404 {
405 mblock_map_count++;
406 mblock_maps = stgReallocBytes(mblock_maps,
407 sizeof(MBlockMap*) * mblock_map_count,
408 "markHeapAlloced(1)");
409 map = mblock_maps[mblock_map_count-1] =
410 stgMallocBytes(sizeof(MBlockMap),"markHeapAlloced(2)");
411 memset(map,0,sizeof(MBlockMap));
412 map->addrHigh32 = (StgWord32) (((StgWord)p) >> 32);
413 }
414
415 map->lines[MBLOCK_MAP_LINE(p)] = i;
416
417 {
418 StgWord mblock;
419 uint32_t entry_no;
420
421 mblock = (StgWord)p >> MBLOCK_SHIFT;
422 entry_no = mblock & (MBC_ENTRIES-1);
423 mblock_cache[entry_no] = (mblock << 1) + i;
424 }
425 }
426
427 #endif
428
429 static void
430 markHeapAlloced(void *p)
431 {
432 setHeapAlloced(p, 1);
433 }
434
435 static void
436 markHeapUnalloced(void *p)
437 {
438 setHeapAlloced(p, 0);
439 }
440
441 #if SIZEOF_VOID_P == 4
442
443 STATIC_INLINE
444 void * mapEntryToMBlock(uint32_t i)
445 {
446 return (void *)((StgWord)i << MBLOCK_SHIFT);
447 }
448
449 void * getFirstMBlock(void **state STG_UNUSED)
450 {
451 uint32_t i;
452
453 for (i = 0; i < MBLOCK_MAP_SIZE; i++) {
454 if (mblock_map[i]) return mapEntryToMBlock(i);
455 }
456 return NULL;
457 }
458
459 void * getNextMBlock(void **state STG_UNUSED, void *mblock)
460 {
461 uint32_t i;
462
463 for (i = MBLOCK_MAP_ENTRY(mblock) + 1; i < MBLOCK_MAP_SIZE; i++) {
464 if (mblock_map[i]) return mapEntryToMBlock(i);
465 }
466 return NULL;
467 }
468
469 #elif SIZEOF_VOID_P == 8
470
471 void * getNextMBlock(void **state STG_UNUSED, void *p)
472 {
473 MBlockMap *map;
474 uint32_t off, j;
475 uint32_t line_no;
476 MBlockMapLine line;
477
478 for (j = 0; j < mblock_map_count; j++) {
479 map = mblock_maps[j];
480 if (map->addrHigh32 == (StgWord)p >> 32) break;
481 }
482 if (j == mblock_map_count) return NULL;
483
484 for (; j < mblock_map_count; j++) {
485 map = mblock_maps[j];
486 if (map->addrHigh32 == (StgWord)p >> 32) {
487 line_no = MBLOCK_MAP_LINE(p);
488 off = (((StgWord)p >> MBLOCK_SHIFT) & (MBC_LINE_SIZE-1)) + 1;
489 // + 1 because we want the *next* mblock
490 } else {
491 line_no = 0; off = 0;
492 }
493 for (; line_no < MBLOCK_MAP_ENTRIES; line_no++) {
494 line = map->lines[line_no];
495 for (; off < MBC_LINE_SIZE; off++) {
496 if (line & (1<<off)) {
497 return (void*)(((StgWord)map->addrHigh32 << 32) +
498 line_no * MBC_LINE_SIZE * MBLOCK_SIZE +
499 off * MBLOCK_SIZE);
500 }
501 }
502 off = 0;
503 }
504 }
505 return NULL;
506 }
507
508 void * getFirstMBlock(void **state STG_UNUSED)
509 {
510 MBlockMap *map = mblock_maps[0];
511 uint32_t line_no, off;
512 MbcCacheLine line;
513
514 for (line_no = 0; line_no < MBLOCK_MAP_ENTRIES; line_no++) {
515 line = map->lines[line_no];
516 if (line) {
517 for (off = 0; off < MBC_LINE_SIZE; off++) {
518 if (line & (1<<off)) {
519 return (void*)(((StgWord)map->addrHigh32 << 32) +
520 line_no * MBC_LINE_SIZE * MBLOCK_SIZE +
521 off * MBLOCK_SIZE);
522 }
523 }
524 }
525 }
526 return NULL;
527 }
528
529 #endif // SIZEOF_VOID_P == 8
530
531 static void *getCommittedMBlocks(uint32_t n)
532 {
533 // The OS layer returns committed memory directly
534 void *ret = osGetMBlocks(n);
535 uint32_t i;
536
537 // fill in the table
538 for (i = 0; i < n; i++) {
539 markHeapAlloced( (StgWord8*)ret + i * MBLOCK_SIZE );
540 }
541
542 return ret;
543 }
544
545 static void decommitMBlocks(void *p, uint32_t n)
546 {
547 osFreeMBlocks(p, n);
548 uint32_t i;
549
550 for (i = 0; i < n; i++) {
551 markHeapUnalloced( (StgWord8*)p + i * MBLOCK_SIZE );
552 }
553 }
554
555 void releaseFreeMemory(void)
556 {
557 osReleaseFreeMemory();
558 }
559
560 #endif /* !USE_LARGE_ADDRESS_SPACE */
561
562 /* -----------------------------------------------------------------------------
563 Allocate new mblock(s)
564 -------------------------------------------------------------------------- */
565
566 void *
567 getMBlock(void)
568 {
569 return getMBlocks(1);
570 }
571
572 // The external interface: allocate 'n' mblocks, and return the
573 // address.
574
575 void *
576 getMBlocks(uint32_t n)
577 {
578 void *ret;
579
580 ret = getCommittedMBlocks(n);
581
582 debugTrace(DEBUG_gc, "allocated %d megablock(s) at %p",n,ret);
583
584 mblocks_allocated += n;
585 peak_mblocks_allocated = stg_max(peak_mblocks_allocated, mblocks_allocated);
586
587 return ret;
588 }
589
590 void *
591 getMBlocksOnNode(uint32_t node, uint32_t n)
592 {
593 void *addr = getMBlocks(n);
594 #ifdef DEBUG
595 if (RtsFlags.DebugFlags.numa) return addr; // faking NUMA
596 #endif
597 osBindMBlocksToNode(addr, n * MBLOCK_SIZE, RtsFlags.GcFlags.numaMap[node]);
598 return addr;
599 }
600
601 void *
602 getMBlockOnNode(uint32_t node)
603 {
604 return getMBlocksOnNode(node, 1);
605 }
606
607 void
608 freeMBlocks(void *addr, uint32_t n)
609 {
610 debugTrace(DEBUG_gc, "freeing %d megablock(s) at %p",n,addr);
611
612 mblocks_allocated -= n;
613
614 decommitMBlocks(addr, n);
615 }
616
617 void
618 freeAllMBlocks(void)
619 {
620 debugTrace(DEBUG_gc, "freeing all megablocks");
621
622 #ifdef USE_LARGE_ADDRESS_SPACE
623 {
624 struct free_list *iter, *next;
625
626 for (iter = free_list_head; iter != NULL; iter = next)
627 {
628 next = iter->next;
629 stgFree(iter);
630 }
631 }
632
633 osReleaseHeapMemory();
634
635 mblock_address_space.begin = (W_)-1;
636 mblock_address_space.end = (W_)-1;
637 mblock_high_watermark = (W_)-1;
638 #else
639 osFreeAllMBlocks();
640
641 #if SIZEOF_VOID_P == 8
642 uint32_t n;
643 for (n = 0; n < mblock_map_count; n++) {
644 stgFree(mblock_maps[n]);
645 }
646 stgFree(mblock_maps);
647 #endif
648
649 #endif
650 }
651
652 void
653 initMBlocks(void)
654 {
655 osMemInit();
656
657 #ifdef USE_LARGE_ADDRESS_SPACE
658 {
659 W_ size;
660 #if aarch64_HOST_ARCH
661 size = (W_)1 << 38; // 1/4 TByte
662 #else
663 size = (W_)1 << 40; // 1 TByte
664 #endif
665 void *addr = osReserveHeapMemory(&size);
666
667 mblock_address_space.begin = (W_)addr;
668 mblock_address_space.end = (W_)addr + size;
669 mblock_high_watermark = (W_)addr;
670 }
671 #elif SIZEOF_VOID_P == 8
672 memset(mblock_cache,0xff,sizeof(mblock_cache));
673 #endif
674 }