1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 2000-2012
7 * ---------------------------------------------------------------------------*/
11 #include "linker/M32Alloc.h"
20 Note [Compile Time Trickery]
21 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
23 This file implements two versions of each of the `m32_*` functions. At the top
24 of the file there is the real implementaion (compiled in when
25 `RTS_LINKER_USE_MMAP` is true) and a dummy implementation that exists only to
26 satisfy the compiler and which hould never be called. If any of these dummy
27 implementaions are called the program will abort.
29 The rationale for this is to allow the calling code to be written without using
30 the C pre-processor (CPP) `#if` hackery. The value of `RTS_LINKER_USE_MMAP` is
31 known at compile time, code like:
33 if (RTS_LINKER_USE_MMAP)
36 will be compiled to call to `m32_allocator_init` if `RTS_LINKER_USE_MMAP` is
37 true and will be optimised awat to nothing if `RTS_LINKER_USE_MMAP` is false.
38 However, regardless of the value of `RTS_LINKER_USE_MMAP` the compiler will
39 still check the call for syntax and correct function parameter types.
43 #if RTS_LINKER_USE_MMAP == 1
50 A memory allocator that allocates only pages in the 32-bit range (lower 2GB).
51 This is useful on 64-bit platforms to ensure that addresses of allocated
52 objects can be referenced with a 32-bit relative offset.
54 Initially, the linker used `mmap` to allocate a page per object. Hence it
55 wasted a lot of space for small objects (see #9314). With this allocator, we
56 try to fill pages as much as we can for small objects.
61 For small objects, a Word64 counter is added at the beginning of the page they
62 are stored in. It indicates the number of objects that are still alive in the
63 page. When the counter drops down to zero, the page is freed. The counter is
64 atomically decremented, hence the deallocation is thread-safe.
66 During the allocation phase, the allocator keeps track of some pages that are
67 not totally filled: the number of pages in the "filling" list is configurable
68 with M32_MAX_PAGES. Allocation consists in finding some place in one of these
69 pages or starting a new one, then increasing the page counter. If none of the
70 pages in the "filling" list has enough free space, the most filled one is
71 flushed (see below) and a new one is allocated.
73 The allocator holds a reference on pages in the "filling" list: the counter in
74 these pages is 1+n where n is the current number of objects allocated in the
75 page. Hence allocated objects can be freed while the allocator is using
76 (filling) the page. Flushing a page consists in decreasing its counter and
77 removing it from the "filling" list. By extension, flushing the allocator
78 consists in flushing all the pages in the "filling" list. Don't forget to
79 flush the allocator at the end of the allocation phase in order to avoid space
82 Large objects are objects that are larger than a page (minus the bytes required
83 for the counter and the optional padding). These objects are allocated into
84 their own set of pages. We can differentiate large and small objects from
85 their address: large objects are aligned on page size while small objects never
86 are (because of the space reserved for the page's object counter).
88 For large objects, the remaining space at the end of the last page is left
89 unused by the allocator. It can be used with care as it will be freed with the
90 associated large object. GHC linker uses this feature/hack, hence changing the
91 implementation of the M32 allocator must be done with care (i.e. do not try to
92 improve the allocator to avoid wasting this space without modifying the linker
95 Object allocation is *not* thread-safe (however it could be done easily with a
96 lock in the allocator structure). Object deallocation is thread-safe.
100 #define ROUND_UP(x,size) ((x + size - 1) & ~(size - 1))
101 #define ROUND_DOWN(x,size) (x & ~(size - 1))
103 /****************************************************************************
104 * M32 ALLOCATOR (see Note [M32 Allocator]
105 ***************************************************************************/
107 #define M32_MAX_PAGES 32
108 #define M32_REFCOUNT_BYTES 8
112 * An allocated page being filled by the allocator
115 void * base_addr
; // Page address
116 size_t current_size
; // Number of bytes already reserved
122 * Currently an allocator is just a set of pages being filled. The maximum
123 * number of pages can be configured with M32_MAX_PAGES.
125 typedef struct m32_allocator_t
{
126 struct m32_alloc_t pages
[M32_MAX_PAGES
];
129 // We use a global memory allocator
130 static struct m32_allocator_t alloc
;
133 * Wrapper for `unmap` that handles error cases.
134 * This is the real implementation. There is another dummy implementation below.
135 * See the note titled "Compile Time Trickery" at the top of this file.
138 munmapForLinker (void * addr
, size_t size
)
140 int r
= munmap(addr
,size
);
142 // Should we abort here?
143 sysErrorBelch("munmap");
148 * Initialize the allocator structure
149 * This is the real implementation. There is another dummy implementation below.
150 * See the note titled "Compile Time Trickery" at the top of this file.
153 m32_allocator_init(void)
155 memset(&alloc
, 0, sizeof(struct m32_allocator_t
));
156 // Preallocate the initial M32_MAX_PAGES to ensure that they don't
157 // fragment the memory.
158 size_t pgsz
= getPageSize();
159 char* bigchunk
= mmapForLinker(pgsz
* M32_MAX_PAGES
,MAP_ANONYMOUS
,-1,0);
161 for (i
=0; i
<M32_MAX_PAGES
; i
++) {
162 alloc
.pages
[i
].base_addr
= bigchunk
+ i
*pgsz
;
163 *((uintptr_t*)alloc
.pages
[i
].base_addr
) = 1;
164 alloc
.pages
[i
].current_size
= M32_REFCOUNT_BYTES
;
169 * Atomically decrement the object counter on the given page and release the
170 * page if necessary. The given address must be the *base address* of the page.
172 * You shouldn't have to use this method. Use `m32_free` instead.
175 m32_free_internal(void * addr
) {
176 uintptr_t c
= __sync_sub_and_fetch((uintptr_t*)addr
, 1);
178 munmapForLinker(addr
, getPageSize());
183 * Release the allocator's reference to pages on the "filling" list. This
184 * should be called when it is believed that no more allocations will be needed
185 * from the allocator to ensure that empty pages waiting to be filled aren't
186 * unnecessarily held.
188 * This is the real implementation. There is another dummy implementation below.
189 * See the note titled "Compile Time Trickery" at the top of this file.
192 m32_allocator_flush(void) {
194 for (i
=0; i
<M32_MAX_PAGES
; i
++) {
195 void * addr
= __sync_fetch_and_and(&alloc
.pages
[i
].base_addr
, 0x0);
197 m32_free_internal(addr
);
202 // Return true if the object has its own dedicated set of pages
203 #define m32_is_large_object(size,alignment) \
204 (size >= getPageSize() - ROUND_UP(M32_REFCOUNT_BYTES,alignment))
206 // Return true if the object has its own dedicated set of pages
207 #define m32_is_large_object_addr(addr) \
208 ((uintptr_t) addr % getPageSize() == 0)
211 * Free the memory associated with an object.
213 * If the object is "small", the object counter of the page it is allocated in
214 * is decremented and the page is not freed until all of its objects are freed.
216 * This is the real implementation. There is another dummy implementation below.
217 * See the note titled "Compile Time Trickery" at the top of this file.
220 m32_free(void *addr
, size_t size
)
222 uintptr_t m
= (uintptr_t) addr
% getPageSize();
226 munmapForLinker(addr
,roundUpToPage(size
));
230 void * page_addr
= (void*)((uintptr_t)addr
- m
);
231 m32_free_internal(page_addr
);
236 * Allocate `size` bytes of memory with the given alignment.
238 * This is the real implementation. There is another dummy implementation below.
239 * See the note titled "Compile Time Trickery" at the top of this file.
242 m32_alloc(size_t size
, size_t alignment
)
244 size_t pgsz
= getPageSize();
246 if (m32_is_large_object(size
,alignment
)) {
248 return mmapForLinker(size
,MAP_ANONYMOUS
,-1,0);
252 // Try to find a page that can contain it
254 int most_filled
= -1;
256 for (i
=0; i
<M32_MAX_PAGES
; i
++) {
258 if (alloc
.pages
[i
].base_addr
== 0) {
259 empty
= empty
== -1 ? i
: empty
;
262 // If the page is referenced only by the allocator, we can reuse it.
263 // If we don't then we'll be left with a bunch of pages that have a
264 // few bytes left to allocate and we don't get to use or free them
265 // until we use up all the "filling" pages. This will unnecessarily
266 // allocate new pages and fragment the address space.
267 if (*((uintptr_t*)(alloc
.pages
[i
].base_addr
)) == 1) {
268 alloc
.pages
[i
].current_size
= M32_REFCOUNT_BYTES
;
270 // page can contain the buffer?
271 size_t alsize
= ROUND_UP(alloc
.pages
[i
].current_size
, alignment
);
272 if (size
<= pgsz
- alsize
) {
273 void * addr
= (char*)alloc
.pages
[i
].base_addr
+ alsize
;
274 alloc
.pages
[i
].current_size
= alsize
+ size
;
275 // increment the counter atomically
276 __sync_fetch_and_add((uintptr_t*)alloc
.pages
[i
].base_addr
, 1);
280 if (most_filled
== -1
281 || alloc
.pages
[most_filled
].current_size
< alloc
.pages
[i
].current_size
)
287 // If we haven't found an empty page, flush the most filled one
289 m32_free_internal(alloc
.pages
[most_filled
].base_addr
);
290 alloc
.pages
[most_filled
].base_addr
= 0;
291 alloc
.pages
[most_filled
].current_size
= 0;
295 // Allocate a new page
296 void * addr
= mmapForLinker(pgsz
,MAP_ANONYMOUS
,-1,0);
300 alloc
.pages
[empty
].base_addr
= addr
;
301 // Add M32_REFCOUNT_BYTES bytes for the counter + padding
302 alloc
.pages
[empty
].current_size
=
303 size
+ROUND_UP(M32_REFCOUNT_BYTES
,alignment
);
304 // Initialize the counter:
305 // 1 for the allocator + 1 for the returned allocated memory
306 *((uintptr_t*)addr
) = 2;
307 return (char*)addr
+ ROUND_UP(M32_REFCOUNT_BYTES
,alignment
);
310 #elif RTS_LINKER_USE_MMAP == 0
312 // The following implementations of these functions should never be called. If
313 // they are, there is a bug at the call site.
314 // See the note titled "Compile Time Trickery" at the top of this file.
317 m32_allocator_init(void)
319 barf("%s: RTS_LINKER_USE_MMAP is %d", __func__
, RTS_LINKER_USE_MMAP
);
323 m32_allocator_flush(void)
325 barf("%s: RTS_LINKER_USE_MMAP is %d", __func__
, RTS_LINKER_USE_MMAP
);
329 m32_free(void *addr STG_UNUSED
, size_t size STG_UNUSED
)
331 barf("%s: RTS_LINKER_USE_MMAP is %d", __func__
, RTS_LINKER_USE_MMAP
);
335 m32_alloc(size_t size STG_UNUSED
, size_t alignment STG_UNUSED
)
337 barf("%s: RTS_LINKER_USE_MMAP is %d", __func__
, RTS_LINKER_USE_MMAP
);
343 #error RTS_LINKER_USE_MMAP should be either `0` or `1`.