New tracing interface
[ghc.git] / rts / MBlock.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-1999
4 *
5 * MegaBlock Allocator Interface. This file contains all the dirty
6 * architecture-dependent hackery required to get a chunk of aligned
7 * memory from the operating system.
8 *
9 * ---------------------------------------------------------------------------*/
10
11 /* This is non-posix compliant. */
12 /* #include "PosixSource.h" */
13
14 #include "Rts.h"
15 #include "RtsUtils.h"
16 #include "RtsFlags.h"
17 #include "MBlock.h"
18 #include "BlockAlloc.h"
19 #include "Trace.h"
20
21 #ifdef HAVE_STDLIB_H
22 #include <stdlib.h>
23 #endif
24 #ifdef HAVE_STRING_H
25 #include <string.h>
26 #endif
27 #ifdef HAVE_UNISTD_H
28 #include <unistd.h>
29 #endif
30 #ifdef HAVE_SYS_TYPES_H
31 #include <sys/types.h>
32 #endif
33 #ifndef mingw32_HOST_OS
34 # ifdef HAVE_SYS_MMAN_H
35 # include <sys/mman.h>
36 # endif
37 #endif
38 #ifdef HAVE_FCNTL_H
39 #include <fcntl.h>
40 #endif
41 #if HAVE_WINDOWS_H
42 #include <windows.h>
43 #endif
44 #if darwin_HOST_OS
45 #include <mach/vm_map.h>
46 #endif
47
48 #include <errno.h>
49
50 lnat mblocks_allocated = 0;
51
52 /* -----------------------------------------------------------------------------
53 The MBlock Map: provides our implementation of HEAP_ALLOCED()
54 -------------------------------------------------------------------------- */
55
56 #if SIZEOF_VOID_P == 4
57 StgWord8 mblock_map[MBLOCK_MAP_SIZE]; // initially all zeros
58 #elif SIZEOF_VOID_P == 8
59 static MBlockMap dummy_mblock_map;
60 MBlockMap *mblock_cache = &dummy_mblock_map;
61 int mblock_map_count = 0;
62 MBlockMap **mblock_maps = NULL;
63
64 static MBlockMap *
65 findMBlockMap(void *p)
66 {
67 int i;
68 StgWord32 hi = (StgWord32) (((StgWord)p) >> 32);
69 for( i = 0; i < mblock_map_count; i++ )
70 {
71 if(mblock_maps[i]->addrHigh32 == hi)
72 {
73 return mblock_maps[i];
74 }
75 }
76 return NULL;
77 }
78
79 StgBool
80 slowIsHeapAlloced(void *p)
81 {
82 MBlockMap *map = findMBlockMap(p);
83 if(map)
84 {
85 mblock_cache = map;
86 return map->mblocks[MBLOCK_MAP_ENTRY(p)];
87 }
88 else
89 return 0;
90 }
91 #endif
92
93 static void
94 markHeapAlloced(void *p)
95 {
96 #if SIZEOF_VOID_P == 4
97 mblock_map[MBLOCK_MAP_ENTRY(p)] = 1;
98 #elif SIZEOF_VOID_P == 8
99 MBlockMap *map = findMBlockMap(p);
100 if(map == NULL)
101 {
102 mblock_map_count++;
103 mblock_maps = realloc(mblock_maps,
104 sizeof(MBlockMap*) * mblock_map_count);
105 map = mblock_maps[mblock_map_count-1] = calloc(1,sizeof(MBlockMap));
106 map->addrHigh32 = (StgWord32) (((StgWord)p) >> 32);
107 }
108 map->mblocks[MBLOCK_MAP_ENTRY(p)] = 1;
109 mblock_cache = map;
110 #endif
111 }
112
113 /* -----------------------------------------------------------------------------
114 Allocate new mblock(s)
115 -------------------------------------------------------------------------- */
116
117 void *
118 getMBlock(void)
119 {
120 return getMBlocks(1);
121 }
122
123 /* -----------------------------------------------------------------------------
124 The mmap() method
125
126 On Unix-like systems, we use mmap() to allocate our memory. We
127 want memory in chunks of MBLOCK_SIZE, and aligned on an MBLOCK_SIZE
128 boundary. The mmap() interface doesn't give us this level of
129 control, so we have to use some heuristics.
130
131 In the general case, if we want a block of n megablocks, then we
132 allocate n+1 and trim off the slop from either side (using
133 munmap()) to get an aligned chunk of size n. However, the next
134 time we'll try to allocate directly after the previously allocated
135 chunk, on the grounds that this is aligned and likely to be free.
136 If it turns out that we were wrong, we have to munmap() and try
137 again using the general method.
138
139 Note on posix_memalign(): this interface is available on recent
140 systems and appears to provide exactly what we want. However, it
141 turns out not to be as good as our mmap() implementation, because
142 it wastes extra space (using double the address space, in a test on
143 x86_64/Linux). The problem seems to be that posix_memalign()
144 returns memory that can be free()'d, so the library must store
145 extra information along with the allocated block, thus messing up
146 the alignment. Hence, we don't use posix_memalign() for now.
147
148 -------------------------------------------------------------------------- */
149
150 #if !defined(mingw32_HOST_OS) && !defined(cygwin32_HOST_OS)
151
152 // A wrapper around mmap(), to abstract away from OS differences in
153 // the mmap() interface.
154
155 static void *
156 my_mmap (void *addr, lnat size)
157 {
158 void *ret;
159
160 #if defined(solaris2_HOST_OS) || defined(irix_HOST_OS)
161 {
162 int fd = open("/dev/zero",O_RDONLY);
163 ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
164 close(fd);
165 }
166 #elif hpux_HOST_OS
167 ret = mmap(addr, size, PROT_READ | PROT_WRITE,
168 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
169 #elif darwin_HOST_OS
170 // Without MAP_FIXED, Apple's mmap ignores addr.
171 // With MAP_FIXED, it overwrites already mapped regions, whic
172 // mmap(0, ... MAP_FIXED ...) is worst of all: It unmaps the program text
173 // and replaces it with zeroes, causing instant death.
174 // This behaviour seems to be conformant with IEEE Std 1003.1-2001.
175 // Let's just use the underlying Mach Microkernel calls directly,
176 // they're much nicer.
177
178 kern_return_t err;
179 ret = addr;
180 if(addr) // try to allocate at adress
181 err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, size, FALSE);
182 if(!addr || err) // try to allocate anywhere
183 err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, size, TRUE);
184
185 if(err) {
186 // don't know what the error codes mean exactly, assume it's
187 // not our problem though.
188 errorBelch("memory allocation failed (requested %lu bytes)", size);
189 stg_exit(EXIT_FAILURE);
190 } else {
191 vm_protect(mach_task_self(),ret,size,FALSE,VM_PROT_READ|VM_PROT_WRITE);
192 }
193 #else
194 ret = mmap(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC,
195 MAP_ANON | MAP_PRIVATE, -1, 0);
196 #endif
197
198 if (ret == (void *)-1) {
199 if (errno == ENOMEM ||
200 (errno == EINVAL && sizeof(void*)==4 && size >= 0xc0000000)) {
201 // If we request more than 3Gig, then we get EINVAL
202 // instead of ENOMEM (at least on Linux).
203 errorBelch("out of memory (requested %lu bytes)", size);
204 stg_exit(EXIT_FAILURE);
205 } else {
206 barf("getMBlock: mmap: %s", strerror(errno));
207 }
208 }
209
210 return ret;
211 }
212
213 // Implements the general case: allocate a chunk of memory of 'size'
214 // mblocks.
215
216 static void *
217 gen_map_mblocks (lnat size)
218 {
219 int slop;
220 void *ret;
221
222 // Try to map a larger block, and take the aligned portion from
223 // it (unmap the rest).
224 size += MBLOCK_SIZE;
225 ret = my_mmap(0, size);
226
227 // unmap the slop bits around the chunk we allocated
228 slop = (W_)ret & MBLOCK_MASK;
229
230 if (munmap(ret, MBLOCK_SIZE - slop) == -1) {
231 barf("gen_map_mblocks: munmap failed");
232 }
233 if (slop > 0 && munmap(ret+size-slop, slop) == -1) {
234 barf("gen_map_mblocks: munmap failed");
235 }
236
237 // ToDo: if we happened to get an aligned block, then don't
238 // unmap the excess, just use it. For this to work, you
239 // need to keep in mind the following:
240 // * Calling my_mmap() with an 'addr' arg pointing to
241 // already my_mmap()ed space is OK and won't fail.
242 // * If my_mmap() can't satisfy the request at the
243 // given 'next_request' address in getMBlocks(), that
244 // you unmap the extra mblock mmap()ed here (or simply
245 // satisfy yourself that the slop introduced isn't worth
246 // salvaging.)
247 //
248
249 // next time, try after the block we just got.
250 ret += MBLOCK_SIZE - slop;
251 return ret;
252 }
253
254
255 // The external interface: allocate 'n' mblocks, and return the
256 // address.
257
258 void *
259 getMBlocks(nat n)
260 {
261 static caddr_t next_request = (caddr_t)HEAP_BASE;
262 caddr_t ret;
263 lnat size = MBLOCK_SIZE * n;
264 nat i;
265
266 if (next_request == 0) {
267 // use gen_map_mblocks the first time.
268 ret = gen_map_mblocks(size);
269 } else {
270 ret = my_mmap(next_request, size);
271
272 if (((W_)ret & MBLOCK_MASK) != 0) {
273 // misaligned block!
274 #if 0 // defined(DEBUG)
275 errorBelch("warning: getMBlock: misaligned block %p returned when allocating %d megablock(s) at %p", ret, n, next_request);
276 #endif
277
278 // unmap this block...
279 if (munmap(ret, size) == -1) {
280 barf("getMBlock: munmap failed");
281 }
282 // and do it the hard way
283 ret = gen_map_mblocks(size);
284 }
285 }
286
287 // Next time, we'll try to allocate right after the block we just got.
288 // ToDo: check that we haven't already grabbed the memory at next_request
289 next_request = ret + size;
290
291 debugTrace(DEBUG_gc, "allocated %d megablock(s) at %p",n,ret);
292
293 // fill in the table
294 for (i = 0; i < n; i++) {
295 markHeapAlloced( ret + i * MBLOCK_SIZE );
296 }
297
298 mblocks_allocated += n;
299
300 return ret;
301 }
302
303 void
304 freeAllMBlocks(void)
305 {
306 /* XXX Do something here */
307 }
308
309 #else /* defined(mingw32_HOST_OS) || defined(cygwin32_HOST_OS) */
310
311 /*
312 On Win32 platforms we make use of the two-phased virtual memory API
313 to allocate mega blocks. We proceed as follows:
314
315 Reserve a large chunk of VM (256M at the time, or what the user asked
316 for via the -M option), but don't supply a base address that's aligned on
317 a MB boundary. Instead we round up to the nearest mblock from the chunk of
318 VM we're handed back from the OS (at the moment we just leave the 'slop' at
319 the beginning of the reserved chunk unused - ToDo: reuse it .)
320
321 Reserving memory doesn't allocate physical storage (not even in the
322 page file), this is done later on by committing pages (or mega-blocks in
323 our case).
324 */
325
326 static char* base_non_committed = (char*)0;
327 static char* end_non_committed = (char*)0;
328
329 static void *membase;
330
331 /* Default is to reserve 256M of VM to minimise the slop cost. */
332 #define SIZE_RESERVED_POOL ( 256 * 1024 * 1024 )
333
334 /* Number of bytes reserved */
335 static unsigned long size_reserved_pool = SIZE_RESERVED_POOL;
336
337 void *
338 getMBlocks(nat n)
339 {
340 static char* base_mblocks = (char*)0;
341 static char* next_request = (char*)0;
342 void* ret = (void*)0;
343 nat i;
344
345 lnat size = MBLOCK_SIZE * n;
346
347 if ( (base_non_committed == 0) || (next_request + size > end_non_committed) ) {
348 if (base_non_committed) {
349 /* Tacky, but if no user-provided -M option is in effect,
350 * set it to the default (==256M) in time for the heap overflow PSA.
351 */
352 if (RtsFlags.GcFlags.maxHeapSize == 0) {
353 RtsFlags.GcFlags.maxHeapSize = size_reserved_pool / BLOCK_SIZE;
354 }
355 heapOverflow();
356 }
357 if (RtsFlags.GcFlags.maxHeapSize != 0) {
358 size_reserved_pool = BLOCK_SIZE * RtsFlags.GcFlags.maxHeapSize;
359 if (size_reserved_pool < MBLOCK_SIZE) {
360 size_reserved_pool = 2*MBLOCK_SIZE;
361 }
362 }
363 base_non_committed = VirtualAlloc ( NULL
364 , size_reserved_pool
365 , MEM_RESERVE
366 , PAGE_READWRITE
367 );
368 membase = base_non_committed;
369 if ( base_non_committed == 0 ) {
370 errorBelch("getMBlocks: VirtualAlloc MEM_RESERVE %lu failed with: %ld\n", size_reserved_pool, GetLastError());
371 ret=(void*)-1;
372 } else {
373 end_non_committed = (char*)base_non_committed + (unsigned long)size_reserved_pool;
374 /* The returned pointer is not aligned on a mega-block boundary. Make it. */
375 base_mblocks = (char*)((unsigned long)base_non_committed & (unsigned long)~MBLOCK_MASK) + MBLOCK_SIZE;
376 # if 0
377 debugBelch("getMBlocks: Dropping %d bytes off of 256M chunk\n",
378 (unsigned)base_mblocks - (unsigned)base_non_committed);
379 # endif
380
381 if ( ((char*)base_mblocks + size) > end_non_committed ) {
382 debugBelch("getMBlocks: oops, committed too small a region to start with.");
383 ret=(void*)-1;
384 } else {
385 next_request = base_mblocks;
386 }
387 }
388 }
389 /* Commit the mega block(s) to phys mem */
390 if ( ret != (void*)-1 ) {
391 ret = VirtualAlloc(next_request, size, MEM_COMMIT, PAGE_READWRITE);
392 if (ret == NULL) {
393 debugBelch("getMBlocks: VirtualAlloc MEM_COMMIT %lu failed with: %ld\n", size, GetLastError());
394 ret=(void*)-1;
395 }
396 }
397
398 if (((W_)ret & MBLOCK_MASK) != 0) {
399 barf("getMBlocks: misaligned block returned");
400 }
401
402 if (ret == (void*)-1) {
403 barf("getMBlocks: unknown memory allocation failure on Win32.");
404 }
405
406 debugTrace(DEBUG_gc, "allocated %d megablock(s) at 0x%x",n,(nat)ret);
407 next_request = (char*)next_request + size;
408
409 mblocks_allocated += n;
410
411 // fill in the table
412 for (i = 0; i < n; i++) {
413 markHeapAlloced( ret + i * MBLOCK_SIZE );
414 }
415
416 return ret;
417 }
418
419 void
420 freeAllMBlocks(void)
421 {
422 BOOL rc;
423
424 rc = VirtualFree(membase, 0, MEM_RELEASE);
425
426 if (rc == FALSE) {
427 debugBelch("freeAllMBlocks: VirtualFree failed with: %ld\n", GetLastError());
428 }
429 }
430
431 /* Hand back the physical memory that is allocated to a mega-block.
432 ToDo: chain the released mega block onto some list so that
433 getMBlocks() can get at it.
434
435 Currently unused.
436 */
437 #if 0
438 void
439 freeMBlock(void* p, nat n)
440 {
441 BOOL rc;
442
443 rc = VirtualFree(p, n * MBLOCK_SIZE , MEM_DECOMMIT );
444
445 if (rc == FALSE) {
446 # ifdef DEBUG
447 debugBelch("freeMBlocks: VirtualFree failed with: %d\n", GetLastError());
448 # endif
449 }
450
451 }
452 #endif
453
454 #endif