Add Read1/Read2 methods defined in terms of ReadPrec
[ghc.git] / rts / linker / M32Alloc.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2012
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #include "Rts.h"
10 #include "sm/OSMem.h"
11 #include "linker/M32Alloc.h"
12
13 #include <inttypes.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <stdio.h>
17
18 /*
19
20 Note [Compile Time Trickery]
21 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
22
23 This file implements two versions of each of the `m32_*` functions. At the top
24 of the file there is the real implementaion (compiled in when
25 `RTS_LINKER_USE_MMAP` is true) and a dummy implementation that exists only to
26 satisfy the compiler and which hould never be called. If any of these dummy
27 implementaions are called the program will abort.
28
29 The rationale for this is to allow the calling code to be written without using
30 the C pre-processor (CPP) `#if` hackery. The value of `RTS_LINKER_USE_MMAP` is
31 known at compile time, code like:
32
33 if (RTS_LINKER_USE_MMAP)
34 m32_allocator_init();
35
36 will be compiled to call to `m32_allocator_init` if `RTS_LINKER_USE_MMAP` is
37 true and will be optimised awat to nothing if `RTS_LINKER_USE_MMAP` is false.
38 However, regardless of the value of `RTS_LINKER_USE_MMAP` the compiler will
39 still check the call for syntax and correct function parameter types.
40
41 */
42
43 #if RTS_LINKER_USE_MMAP == 1
44
45 /*
46
47 Note [M32 Allocator]
48 ~~~~~~~~~~~~~~~~~~~~
49
50 A memory allocator that allocates only pages in the 32-bit range (lower 2GB).
51 This is useful on 64-bit platforms to ensure that addresses of allocated
52 objects can be referenced with a 32-bit relative offset.
53
54 Initially, the linker used `mmap` to allocate a page per object. Hence it
55 wasted a lot of space for small objects (see #9314). With this allocator, we
56 try to fill pages as much as we can for small objects.
57
58 How does it work?
59 -----------------
60
61 For small objects, a Word64 counter is added at the beginning of the page they
62 are stored in. It indicates the number of objects that are still alive in the
63 page. When the counter drops down to zero, the page is freed. The counter is
64 atomically decremented, hence the deallocation is thread-safe.
65
66 During the allocation phase, the allocator keeps track of some pages that are
67 not totally filled: the number of pages in the "filling" list is configurable
68 with M32_MAX_PAGES. Allocation consists in finding some place in one of these
69 pages or starting a new one, then increasing the page counter. If none of the
70 pages in the "filling" list has enough free space, the most filled one is
71 flushed (see below) and a new one is allocated.
72
73 The allocator holds a reference on pages in the "filling" list: the counter in
74 these pages is 1+n where n is the current number of objects allocated in the
75 page. Hence allocated objects can be freed while the allocator is using
76 (filling) the page. Flushing a page consists in decreasing its counter and
77 removing it from the "filling" list. By extension, flushing the allocator
78 consists in flushing all the pages in the "filling" list. Don't forget to
79 flush the allocator at the end of the allocation phase in order to avoid space
80 leaks!
81
82 Large objects are objects that are larger than a page (minus the bytes required
83 for the counter and the optional padding). These objects are allocated into
84 their own set of pages. We can differentiate large and small objects from
85 their address: large objects are aligned on page size while small objects never
86 are (because of the space reserved for the page's object counter).
87
88 For large objects, the remaining space at the end of the last page is left
89 unused by the allocator. It can be used with care as it will be freed with the
90 associated large object. GHC linker uses this feature/hack, hence changing the
91 implementation of the M32 allocator must be done with care (i.e. do not try to
92 improve the allocator to avoid wasting this space without modifying the linker
93 code accordingly).
94
95 Object allocation is *not* thread-safe (however it could be done easily with a
96 lock in the allocator structure). Object deallocation is thread-safe.
97
98 */
99
100 #define ROUND_UP(x,size) ((x + size - 1) & ~(size - 1))
101 #define ROUND_DOWN(x,size) (x & ~(size - 1))
102
103 /****************************************************************************
104 * M32 ALLOCATOR (see Note [M32 Allocator]
105 ***************************************************************************/
106
107 #define M32_MAX_PAGES 32
108 #define M32_REFCOUNT_BYTES 8
109
110
111 /**
112 * An allocated page being filled by the allocator
113 */
114 struct m32_alloc_t {
115 void * base_addr; // Page address
116 size_t current_size; // Number of bytes already reserved
117 };
118
119 /**
120 * Allocator
121 *
122 * Currently an allocator is just a set of pages being filled. The maximum
123 * number of pages can be configured with M32_MAX_PAGES.
124 */
125 typedef struct m32_allocator_t {
126 struct m32_alloc_t pages[M32_MAX_PAGES];
127 } m32_allocator;
128
129 // We use a global memory allocator
130 static struct m32_allocator_t alloc;
131
132 /**
133 * Wrapper for `unmap` that handles error cases.
134 * This is the real implementation. There is another dummy implementation below.
135 * See the note titled "Compile Time Trickery" at the top of this file.
136 */
137 static void
138 munmapForLinker (void * addr, size_t size)
139 {
140 int r = munmap(addr,size);
141 if (r == -1) {
142 // Should we abort here?
143 sysErrorBelch("munmap");
144 }
145 }
146
147 /**
148 * Initialize the allocator structure
149 * This is the real implementation. There is another dummy implementation below.
150 * See the note titled "Compile Time Trickery" at the top of this file.
151 */
152 void
153 m32_allocator_init(void)
154 {
155 memset(&alloc, 0, sizeof(struct m32_allocator_t));
156 // Preallocate the initial M32_MAX_PAGES to ensure that they don't
157 // fragment the memory.
158 size_t pgsz = getPageSize();
159 char* bigchunk = mmapForLinker(pgsz * M32_MAX_PAGES,MAP_ANONYMOUS,-1,0);
160 int i;
161 for (i=0; i<M32_MAX_PAGES; i++) {
162 alloc.pages[i].base_addr = bigchunk + i*pgsz;
163 *((uintptr_t*)alloc.pages[i].base_addr) = 1;
164 alloc.pages[i].current_size = M32_REFCOUNT_BYTES;
165 }
166 }
167
168 /**
169 * Atomically decrement the object counter on the given page and release the
170 * page if necessary. The given address must be the *base address* of the page.
171 *
172 * You shouldn't have to use this method. Use `m32_free` instead.
173 */
174 static void
175 m32_free_internal(void * addr) {
176 uintptr_t c = __sync_sub_and_fetch((uintptr_t*)addr, 1);
177 if (c == 0) {
178 munmapForLinker(addr, getPageSize());
179 }
180 }
181
182 /**
183 * Release the allocator's reference to pages on the "filling" list. This
184 * should be called when it is believed that no more allocations will be needed
185 * from the allocator to ensure that empty pages waiting to be filled aren't
186 * unnecessarily held.
187 *
188 * This is the real implementation. There is another dummy implementation below.
189 * See the note titled "Compile Time Trickery" at the top of this file.
190 */
191 void
192 m32_allocator_flush(void) {
193 int i;
194 for (i=0; i<M32_MAX_PAGES; i++) {
195 void * addr = __sync_fetch_and_and(&alloc.pages[i].base_addr, 0x0);
196 if (addr != 0) {
197 m32_free_internal(addr);
198 }
199 }
200 }
201
202 // Return true if the object has its own dedicated set of pages
203 #define m32_is_large_object(size,alignment) \
204 (size >= getPageSize() - ROUND_UP(M32_REFCOUNT_BYTES,alignment))
205
206 // Return true if the object has its own dedicated set of pages
207 #define m32_is_large_object_addr(addr) \
208 ((uintptr_t) addr % getPageSize() == 0)
209
210 /**
211 * Free the memory associated with an object.
212 *
213 * If the object is "small", the object counter of the page it is allocated in
214 * is decremented and the page is not freed until all of its objects are freed.
215 *
216 * This is the real implementation. There is another dummy implementation below.
217 * See the note titled "Compile Time Trickery" at the top of this file.
218 */
219 void
220 m32_free(void *addr, size_t size)
221 {
222 uintptr_t m = (uintptr_t) addr % getPageSize();
223
224 if (m == 0) {
225 // large object
226 munmapForLinker(addr,roundUpToPage(size));
227 }
228 else {
229 // small object
230 void * page_addr = (void*)((uintptr_t)addr - m);
231 m32_free_internal(page_addr);
232 }
233 }
234
235 /**
236 * Allocate `size` bytes of memory with the given alignment.
237 *
238 * This is the real implementation. There is another dummy implementation below.
239 * See the note titled "Compile Time Trickery" at the top of this file.
240 */
241 void *
242 m32_alloc(size_t size, size_t alignment)
243 {
244 size_t pgsz = getPageSize();
245
246 if (m32_is_large_object(size,alignment)) {
247 // large object
248 return mmapForLinker(size,MAP_ANONYMOUS,-1,0);
249 }
250
251 // small object
252 // Try to find a page that can contain it
253 int empty = -1;
254 int most_filled = -1;
255 int i;
256 for (i=0; i<M32_MAX_PAGES; i++) {
257 // empty page
258 if (alloc.pages[i].base_addr == 0) {
259 empty = empty == -1 ? i : empty;
260 continue;
261 }
262 // If the page is referenced only by the allocator, we can reuse it.
263 // If we don't then we'll be left with a bunch of pages that have a
264 // few bytes left to allocate and we don't get to use or free them
265 // until we use up all the "filling" pages. This will unnecessarily
266 // allocate new pages and fragment the address space.
267 if (*((uintptr_t*)(alloc.pages[i].base_addr)) == 1) {
268 alloc.pages[i].current_size = M32_REFCOUNT_BYTES;
269 }
270 // page can contain the buffer?
271 size_t alsize = ROUND_UP(alloc.pages[i].current_size, alignment);
272 if (size <= pgsz - alsize) {
273 void * addr = (char*)alloc.pages[i].base_addr + alsize;
274 alloc.pages[i].current_size = alsize + size;
275 // increment the counter atomically
276 __sync_fetch_and_add((uintptr_t*)alloc.pages[i].base_addr, 1);
277 return addr;
278 }
279 // most filled?
280 if (most_filled == -1
281 || alloc.pages[most_filled].current_size < alloc.pages[i].current_size)
282 {
283 most_filled = i;
284 }
285 }
286
287 // If we haven't found an empty page, flush the most filled one
288 if (empty == -1) {
289 m32_free_internal(alloc.pages[most_filled].base_addr);
290 alloc.pages[most_filled].base_addr = 0;
291 alloc.pages[most_filled].current_size = 0;
292 empty = most_filled;
293 }
294
295 // Allocate a new page
296 void * addr = mmapForLinker(pgsz,MAP_ANONYMOUS,-1,0);
297 if (addr == NULL) {
298 return NULL;
299 }
300 alloc.pages[empty].base_addr = addr;
301 // Add M32_REFCOUNT_BYTES bytes for the counter + padding
302 alloc.pages[empty].current_size =
303 size+ROUND_UP(M32_REFCOUNT_BYTES,alignment);
304 // Initialize the counter:
305 // 1 for the allocator + 1 for the returned allocated memory
306 *((uintptr_t*)addr) = 2;
307 return (char*)addr + ROUND_UP(M32_REFCOUNT_BYTES,alignment);
308 }
309
310 #elif RTS_LINKER_USE_MMAP == 0
311
312 // The following implementations of these functions should never be called. If
313 // they are, there is a bug at the call site.
314 // See the note titled "Compile Time Trickery" at the top of this file.
315
316 void
317 m32_allocator_init(void)
318 {
319 barf("%s: RTS_LINKER_USE_MMAP is %d", __func__, RTS_LINKER_USE_MMAP);
320 }
321
322 void
323 m32_allocator_flush(void)
324 {
325 barf("%s: RTS_LINKER_USE_MMAP is %d", __func__, RTS_LINKER_USE_MMAP);
326 }
327
328 void
329 m32_free(void *addr STG_UNUSED, size_t size STG_UNUSED)
330 {
331 barf("%s: RTS_LINKER_USE_MMAP is %d", __func__, RTS_LINKER_USE_MMAP);
332 }
333
334 void *
335 m32_alloc(size_t size STG_UNUSED, size_t alignment STG_UNUSED)
336 {
337 barf("%s: RTS_LINKER_USE_MMAP is %d", __func__, RTS_LINKER_USE_MMAP);
338 return NULL;
339 }
340
341 #else
342
343 #error RTS_LINKER_USE_MMAP should be either `0` or `1`.
344
345 #endif