Signals: Ensure libdw session is freed
[ghc.git] / rts / posix / OSMem.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The University of Glasgow 2006-2007
4 *
5 * OS-specific memory management
6 *
7 * ---------------------------------------------------------------------------*/
8
9 // This is non-posix compliant.
10 // #include "PosixSource.h"
11
12 #include "Rts.h"
13
14 #include "RtsUtils.h"
15 #include "sm/OSMem.h"
16 #include "sm/HeapAlloc.h"
17
18 #ifdef HAVE_UNISTD_H
19 #include <unistd.h>
20 #endif
21 #ifdef HAVE_SYS_TYPES_H
22 #include <sys/types.h>
23 #endif
24 #ifdef HAVE_SYS_MMAN_H
25 #include <sys/mman.h>
26 #endif
27 #ifdef HAVE_STRING_H
28 #include <string.h>
29 #endif
30 #ifdef HAVE_FCNTL_H
31 #include <fcntl.h>
32 #endif
33
34 #include <errno.h>
35
36 #if darwin_HOST_OS || ios_HOST_OS
37 #include <mach/mach.h>
38 #include <mach/vm_map.h>
39 #include <sys/sysctl.h>
40 #endif
41
42 static caddr_t next_request = 0;
43
44 void osMemInit(void)
45 {
46 next_request = (caddr_t)RtsFlags.GcFlags.heapBase;
47 }
48
49 /* -----------------------------------------------------------------------------
50 The mmap() method
51
52 On Unix-like systems, we use mmap() to allocate our memory. We
53 want memory in chunks of MBLOCK_SIZE, and aligned on an MBLOCK_SIZE
54 boundary. The mmap() interface doesn't give us this level of
55 control, so we have to use some heuristics.
56
57 In the general case, if we want a block of n megablocks, then we
58 allocate n+1 and trim off the slop from either side (using
59 munmap()) to get an aligned chunk of size n. However, the next
60 time we'll try to allocate directly after the previously allocated
61 chunk, on the grounds that this is aligned and likely to be free.
62 If it turns out that we were wrong, we have to munmap() and try
63 again using the general method.
64
65 Note on posix_memalign(): this interface is available on recent
66 systems and appears to provide exactly what we want. However, it
67 turns out not to be as good as our mmap() implementation, because
68 it wastes extra space (using double the address space, in a test on
69 x86_64/Linux). The problem seems to be that posix_memalign()
70 returns memory that can be free()'d, so the library must store
71 extra information along with the allocated block, thus messing up
72 the alignment. Hence, we don't use posix_memalign() for now.
73
74 -------------------------------------------------------------------------- */
75
76 /*
77 A wrapper around mmap(), to abstract away from OS differences in
78 the mmap() interface.
79
80 It supports the following operations:
81 - reserve: find a new chunk of available address space, and make it so
82 that we own it (no other library will get it), but don't actually
83 allocate memory for it
84 the addr is a hint for where to place the memory (and most
85 of the time the OS happily ignores!)
86 - commit: given a chunk of address space that we know we own, make sure
87 there is some memory backing it
88 the addr is not a hint, it must point into previously reserved
89 address space, or bad things happen
90 - reserve&commit: do both at the same time
91
92 The naming is chosen from the Win32 API (VirtualAlloc) which does the
93 same thing and has done so forever, while support for this in Unix systems
94 has only been added recently and is hidden in the posix portability mess.
95 It is confusing because to get the reserve behavior we need MAP_NORESERVE
96 (which tells the kernel not to allocate backing space), but heh...
97 */
98 enum
99 {
100 MEM_RESERVE = 1,
101 MEM_COMMIT = 2,
102 MEM_RESERVE_AND_COMMIT = MEM_RESERVE | MEM_COMMIT
103 };
104
105 static void *
106 my_mmap (void *addr, W_ size, int operation)
107 {
108 void *ret;
109
110 #if darwin_HOST_OS
111 // Without MAP_FIXED, Apple's mmap ignores addr.
112 // With MAP_FIXED, it overwrites already mapped regions, whic
113 // mmap(0, ... MAP_FIXED ...) is worst of all: It unmaps the program text
114 // and replaces it with zeroes, causing instant death.
115 // This behaviour seems to be conformant with IEEE Std 1003.1-2001.
116 // Let's just use the underlying Mach Microkernel calls directly,
117 // they're much nicer.
118
119 kern_return_t err = 0;
120 ret = addr;
121
122 if(operation & MEM_RESERVE)
123 {
124 if(addr) // try to allocate at address
125 err = vm_allocate(mach_task_self(),(vm_address_t*) &ret,
126 size, FALSE);
127 if(!addr || err) // try to allocate anywhere
128 err = vm_allocate(mach_task_self(),(vm_address_t*) &ret,
129 size, TRUE);
130 }
131
132 if(err) {
133 // don't know what the error codes mean exactly, assume it's
134 // not our problem though.
135 errorBelch("memory allocation failed (requested %" FMT_Word " bytes)",
136 size);
137 stg_exit(EXIT_FAILURE);
138 }
139
140 if(operation & MEM_COMMIT) {
141 vm_protect(mach_task_self(), (vm_address_t)ret, size, FALSE,
142 VM_PROT_READ|VM_PROT_WRITE);
143 }
144
145 #else
146
147 int prot, flags;
148 if (operation & MEM_COMMIT)
149 prot = PROT_READ | PROT_WRITE;
150 else
151 prot = PROT_NONE;
152 if (operation == MEM_RESERVE)
153 flags = MAP_NORESERVE;
154 else if (operation == MEM_COMMIT)
155 flags = MAP_FIXED;
156 else
157 flags = 0;
158
159 #if defined(irix_HOST_OS)
160 {
161 if (operation & MEM_RESERVE)
162 {
163 int fd = open("/dev/zero",O_RDONLY);
164 ret = mmap(addr, size, prot, flags | MAP_PRIVATE, fd, 0);
165 close(fd);
166 }
167 else
168 {
169 ret = mmap(addr, size, prot, flags | MAP_PRIVATE, -1, 0);
170 }
171 }
172 #elif hpux_HOST_OS
173 ret = mmap(addr, size, prot, flags | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
174 #elif linux_HOST_OS
175 ret = mmap(addr, size, prot, flags | MAP_ANON | MAP_PRIVATE, -1, 0);
176 if (ret == (void *)-1 && errno == EPERM) {
177 // Linux may return EPERM if it tried to give us
178 // a chunk of address space below mmap_min_addr,
179 // See Trac #7500.
180 if (addr != 0 && (operation & MEM_RESERVE)) {
181 // Try again with no hint address.
182 // It's not clear that this can ever actually help,
183 // but since our alternative is to abort, we may as well try.
184 ret = mmap(0, size, prot, flags | MAP_ANON | MAP_PRIVATE, -1, 0);
185 }
186 if (ret == (void *)-1 && errno == EPERM) {
187 // Linux is not willing to give us any mapping,
188 // so treat this as an out-of-memory condition
189 // (really out of virtual address space).
190 errno = ENOMEM;
191 }
192 }
193 #else
194 ret = mmap(addr, size, prot, flags | MAP_ANON | MAP_PRIVATE, -1, 0);
195 #endif
196 #endif
197
198 if (ret == (void *)-1) {
199 if (errno == ENOMEM ||
200 (errno == EINVAL && sizeof(void*)==4 && size >= 0xc0000000)) {
201 // If we request more than 3Gig, then we get EINVAL
202 // instead of ENOMEM (at least on Linux).
203 errorBelch("out of memory (requested %" FMT_Word " bytes)", size);
204 stg_exit(EXIT_FAILURE);
205 } else {
206 barf("getMBlock: mmap: %s", strerror(errno));
207 }
208 }
209
210 return ret;
211 }
212
213 // Implements the general case: allocate a chunk of memory of 'size'
214 // mblocks.
215
216 static void *
217 gen_map_mblocks (W_ size)
218 {
219 int slop;
220 StgWord8 *ret;
221
222 // Try to map a larger block, and take the aligned portion from
223 // it (unmap the rest).
224 size += MBLOCK_SIZE;
225 ret = my_mmap(0, size, MEM_RESERVE_AND_COMMIT);
226
227 // unmap the slop bits around the chunk we allocated
228 slop = (W_)ret & MBLOCK_MASK;
229
230 if (munmap((void*)ret, MBLOCK_SIZE - slop) == -1) {
231 barf("gen_map_mblocks: munmap failed");
232 }
233 if (slop > 0 && munmap((void*)(ret+size-slop), slop) == -1) {
234 barf("gen_map_mblocks: munmap failed");
235 }
236
237 // ToDo: if we happened to get an aligned block, then don't
238 // unmap the excess, just use it. For this to work, you
239 // need to keep in mind the following:
240 // * Calling my_mmap() with an 'addr' arg pointing to
241 // already my_mmap()ed space is OK and won't fail.
242 // * If my_mmap() can't satisfy the request at the
243 // given 'next_request' address in getMBlocks(), that
244 // you unmap the extra mblock mmap()ed here (or simply
245 // satisfy yourself that the slop introduced isn't worth
246 // salvaging.)
247 //
248
249 // next time, try after the block we just got.
250 ret += MBLOCK_SIZE - slop;
251 return ret;
252 }
253
254 void *
255 osGetMBlocks(nat n)
256 {
257 caddr_t ret;
258 W_ size = MBLOCK_SIZE * (W_)n;
259
260 if (next_request == 0) {
261 // use gen_map_mblocks the first time.
262 ret = gen_map_mblocks(size);
263 } else {
264 ret = my_mmap(next_request, size, MEM_RESERVE_AND_COMMIT);
265
266 if (((W_)ret & MBLOCK_MASK) != 0) {
267 // misaligned block!
268 #if 0 // defined(DEBUG)
269 errorBelch("warning: getMBlock: misaligned block %p returned "
270 "when allocating %d megablock(s) at %p",
271 ret, n, next_request);
272 #endif
273
274 // unmap this block...
275 if (munmap(ret, size) == -1) {
276 barf("getMBlock: munmap failed");
277 }
278 // and do it the hard way
279 ret = gen_map_mblocks(size);
280 }
281 }
282 // Next time, we'll try to allocate right after the block we just got.
283 // ToDo: check that we haven't already grabbed the memory at next_request
284 next_request = ret + size;
285
286 return ret;
287 }
288
289 void osFreeMBlocks(char *addr, nat n)
290 {
291 munmap(addr, n * MBLOCK_SIZE);
292 }
293
294 void osReleaseFreeMemory(void) {
295 /* Nothing to do on POSIX */
296 }
297
298 void osFreeAllMBlocks(void)
299 {
300 void *mblock;
301 void *state;
302
303 for (mblock = getFirstMBlock(&state);
304 mblock != NULL;
305 mblock = getNextMBlock(&state, mblock)) {
306 munmap(mblock, MBLOCK_SIZE);
307 }
308 }
309
310 W_ getPageSize (void)
311 {
312 static W_ pageSize = 0;
313 if (pageSize) {
314 return pageSize;
315 } else {
316 long ret;
317 ret = sysconf(_SC_PAGESIZE);
318 if (ret == -1) {
319 barf("getPageSize: cannot get page size");
320 }
321 pageSize = ret;
322 return ret;
323 }
324 }
325
326 /* Returns 0 if physical memory size cannot be identified */
327 StgWord64 getPhysicalMemorySize (void)
328 {
329 static StgWord64 physMemSize = 0;
330 if (!physMemSize) {
331 #if defined(darwin_HOST_OS) || defined(ios_HOST_OS)
332 /* So, darwin doesn't support _SC_PHYS_PAGES, but it does
333 support getting the raw memory size in bytes through
334 sysctlbyname(hw.memsize); */
335 size_t len = sizeof(physMemSize);
336 int ret = -1;
337
338 /* Note hw.memsize is in bytes, so no need to multiply by page size. */
339 ret = sysctlbyname("hw.memsize", &physMemSize, &len, NULL, 0);
340 if (ret == -1) {
341 physMemSize = 0;
342 return 0;
343 }
344 #else
345 /* We'll politely assume we have a system supporting _SC_PHYS_PAGES
346 * otherwise. */
347 W_ pageSize = getPageSize();
348 long ret = sysconf(_SC_PHYS_PAGES);
349 if (ret == -1) {
350 #if defined(DEBUG)
351 errorBelch("warning: getPhysicalMemorySize: cannot get "
352 "physical memory size");
353 #endif
354 return 0;
355 }
356 physMemSize = ret * pageSize;
357 #endif /* darwin_HOST_OS */
358 }
359 return physMemSize;
360 }
361
362 void setExecutable (void *p, W_ len, rtsBool exec)
363 {
364 StgWord pageSize = getPageSize();
365
366 /* malloced memory isn't executable by default on OpenBSD */
367 StgWord mask = ~(pageSize - 1);
368 StgWord startOfFirstPage = ((StgWord)p ) & mask;
369 StgWord startOfLastPage = ((StgWord)p + len - 1) & mask;
370 StgWord size = startOfLastPage - startOfFirstPage + pageSize;
371 if (mprotect((void*)startOfFirstPage, (size_t)size,
372 (exec ? PROT_EXEC : 0) | PROT_READ | PROT_WRITE) != 0) {
373 barf("setExecutable: failed to protect 0x%p\n", p);
374 }
375 }
376
377 #ifdef USE_LARGE_ADDRESS_SPACE
378
379 static void *
380 osTryReserveHeapMemory (W_ len, void *hint)
381 {
382 void *base, *top;
383 void *start, *end;
384
385 /* We try to allocate len + MBLOCK_SIZE,
386 because we need memory which is MBLOCK_SIZE aligned,
387 and then we discard what we don't need */
388
389 base = my_mmap(hint, len + MBLOCK_SIZE, MEM_RESERVE);
390 top = (void*)((W_)base + len + MBLOCK_SIZE);
391
392 if (((W_)base & MBLOCK_MASK) != 0) {
393 start = MBLOCK_ROUND_UP(base);
394 end = MBLOCK_ROUND_DOWN(top);
395 ASSERT(((W_)end - (W_)start) == len);
396
397 if (munmap(base, (W_)start-(W_)base) < 0) {
398 sysErrorBelch("unable to release slop before heap");
399 }
400 if (munmap(end, (W_)top-(W_)end) < 0) {
401 sysErrorBelch("unable to release slop after heap");
402 }
403 } else {
404 start = base;
405 }
406
407 return start;
408 }
409
410 void *osReserveHeapMemory(W_ len)
411 {
412 int attempt;
413 void *at;
414
415 /* We want to ensure the heap starts at least 8 GB inside the address space,
416 to make sure that any dynamically loaded code will be close enough to the
417 original code so that short relocations will work. This is in particular
418 important on Darwin/Mach-O, because object files not compiled as shared
419 libraries are position independent but cannot be loaded about 4GB.
420
421 We do so with a hint to the mmap, and we verify the OS satisfied our
422 hint. We loop a few times in case there is already something allocated
423 there, but we bail if we cannot allocate at all.
424 */
425
426 attempt = 0;
427 do {
428 void *hint = (void*)((W_)8 * (1 << 30) + attempt * BLOCK_SIZE);
429 at = osTryReserveHeapMemory(len, hint);
430 } while ((W_)at < ((W_)8 * (1 << 30)));
431
432 return at;
433 }
434
435 void osCommitMemory(void *at, W_ size)
436 {
437 my_mmap(at, size, MEM_COMMIT);
438 }
439
440 void osDecommitMemory(void *at, W_ size)
441 {
442 int r;
443
444 // First make the memory unaccessible (so that we get a segfault
445 // at the next attempt to touch it)
446 // We only do this in DEBUG because it forces the OS to remove
447 // all MMU entries for this page range, and there is no reason
448 // to do so unless there is memory pressure
449 #ifdef DEBUG
450 r = mprotect(at, size, PROT_NONE);
451 if(r < 0)
452 sysErrorBelch("unable to make released memory unaccessible");
453 #endif
454
455 #ifdef MADV_FREE
456 // Try MADV_FREE first, FreeBSD has both and MADV_DONTNEED
457 // just swaps memory out
458 r = madvise(at, size, MADV_FREE);
459 #else
460 r = madvise(at, size, MADV_DONTNEED);
461 #endif
462 if(r < 0)
463 sysErrorBelch("unable to decommit memory");
464 }
465
466 void osReleaseHeapMemory(void)
467 {
468 int r;
469
470 r = munmap((void*)mblock_address_space.begin,
471 mblock_address_space.end - mblock_address_space.begin);
472 if(r < 0)
473 sysErrorBelch("unable to release address space");
474 }
475
476 #endif