Document MIN_PAYLOAD_SIZE and mark-compact GC mark bits
[ghc.git] / rts / LinkerInternals.h
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #pragma once
10
11 #include "Rts.h"
12 #include "Hash.h"
13
14 #if RTS_LINKER_USE_MMAP
15 #include <sys/mman.h>
16 #endif
17
18 #include "BeginPrivate.h"
19
20 typedef void SymbolAddr;
21 typedef char SymbolName;
22
23 /* Hold extended information about a symbol in case we need to resolve it at a
24 late stage. */
25 typedef struct _Symbol
26 {
27 SymbolName *name;
28 SymbolAddr *addr;
29 } Symbol_t;
30
31 /* Indication of section kinds for loaded objects. Needed by
32 the GC for deciding whether or not a pointer on the stack
33 is a code pointer.
34 See Note [BFD import library].
35 */
36 typedef
37 enum { /* Section is code or readonly. e.g. .text or .r(o)data. */
38 SECTIONKIND_CODE_OR_RODATA,
39 /* Section contains read/write data. e.g. .data. */
40 SECTIONKIND_RWDATA,
41 /* Static initializer section. e.g. .ctors. */
42 SECTIONKIND_INIT_ARRAY,
43 /* Static finalizer section. e.g. .dtors. */
44 SECTIONKIND_FINIT_ARRAY,
45 /* We don't know what the section is and don't care. */
46 SECTIONKIND_OTHER,
47 /* Section contains debug information. e.g. .debug$. */
48 SECTIONKIND_DEBUG,
49 /* Section belongs to an import section group. e.g. .idata$. */
50 SECTIONKIND_IMPORT,
51 /* Section defines an import library entry, e.g. idata$7. */
52 SECTIONKIND_IMPORT_LIBRARY,
53 SECTIONKIND_NOINFOAVAIL
54 }
55 SectionKind;
56
57 typedef
58 enum { SECTION_NOMEM,
59 SECTION_M32,
60 SECTION_MMAP,
61 SECTION_MALLOC
62 }
63 SectionAlloc;
64
65 /* Indicates a desired memory protection for pages within a segment. Defined as
66 * enum since it's more explicit and look nicer in a debugger.
67 *
68 * Can be used directly as a substitution for a combination of PROT_X flags on
69 * POSIX systems.
70 */
71 typedef enum {
72 #if RTS_LINKER_USE_MMAP
73 SEGMENT_PROT_RO = PROT_READ,
74 SEGMENT_PROT_RX = PROT_READ | PROT_EXEC,
75 SEGMENT_PROT_RWO = PROT_READ | PROT_WRITE,
76 SEGMENT_PROT_RWX = PROT_READ | PROT_WRITE | PROT_EXEC
77 #else
78 SEGMENT_PROT_RO,
79 SEGMENT_PROT_RX,
80 SEGMENT_PROT_RWO,
81 SEGMENT_PROT_RWX
82 #endif
83 } SegmentProt;
84
85 /*
86 * Note [No typedefs for customizable types]
87 * Some pointer-to-struct types are defined opaquely
88 * first, and customized later to architecture/ABI-specific
89 * instantiations. Having the usual
90 * typedef struct _Foo {...} Foo;
91 * wrappers is hard to get right with older versions of GCC,
92 * so just have a
93 * struct Foo {...};
94 * and always refer to it with the 'struct' qualifier.
95 */
96
97 typedef
98 struct _Section {
99 void* start; /* actual start of section in memory */
100 StgWord size; /* actual size of section in memory */
101 SectionKind kind;
102 SectionAlloc alloc;
103
104 /*
105 * The following fields are relevant for SECTION_MMAP sections only
106 */
107 StgWord mapped_offset; /* offset from the image of mapped_start */
108 void* mapped_start; /* start of mmap() block */
109 StgWord mapped_size; /* size of mmap() block */
110
111 /* A customizable type to augment the Section type.
112 * See Note [No typedefs for customizable types]
113 */
114 struct SectionFormatInfo* info;
115 }
116 Section;
117
118 typedef
119 struct _ProddableBlock {
120 void* start;
121 int size;
122 struct _ProddableBlock* next;
123 }
124 ProddableBlock;
125
126 typedef struct _Segment {
127 void *start; /* page aligned start address of a segment */
128 size_t size; /* page rounded size of a segment */
129 SegmentProt prot; /* mem protection to set after all symbols were
130 * resolved */
131
132 int *sections_idx; /* an array of section indexes assigned to this segment */
133 int n_sections;
134 } Segment;
135
136 /*
137 * We must keep track of the StablePtrs that are created for foreign
138 * exports by constructor functions when the module is loaded, so that
139 * we can free them again when the module is unloaded. If we don't do
140 * this, then the StablePtr will keep the module alive indefinitely.
141 */
142 typedef struct ForeignExportStablePtr_ {
143 StgStablePtr stable_ptr;
144 struct ForeignExportStablePtr_ *next;
145 } ForeignExportStablePtr;
146
147 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
148 #define NEED_SYMBOL_EXTRAS 1
149 #endif
150
151 /* Jump Islands are sniplets of machine code required for relative
152 * address relocations on the PowerPC, x86_64 and ARM.
153 */
154 typedef struct {
155 #if defined(powerpc_HOST_ARCH)
156 struct {
157 short lis_r12, hi_addr;
158 short ori_r12_r12, lo_addr;
159 long mtctr_r12;
160 long bctr;
161 } jumpIsland;
162 #elif defined(x86_64_HOST_ARCH)
163 uint64_t addr;
164 uint8_t jumpIsland[6];
165 #elif defined(arm_HOST_ARCH)
166 uint8_t jumpIsland[16];
167 #endif
168 } SymbolExtra;
169
170
171 /* Top-level structure for an object module. One of these is allocated
172 * for each object file in use.
173 */
174 typedef struct _ObjectCode {
175 OStatus status;
176 pathchar *fileName;
177 int fileSize; /* also mapped image size when using mmap() */
178 char* formatName; /* eg "ELF32", "DLL", "COFF", etc. */
179
180 /* If this object is a member of an archive, archiveMemberName is
181 * like "libarchive.a(object.o)". Otherwise it's NULL.
182 */
183 char* archiveMemberName;
184
185 /* An array containing ptrs to all the symbol names copied from
186 this object into the global symbol hash table. This is so that
187 we know which parts of the latter mapping to nuke when this
188 object is removed from the system. */
189 Symbol_t *symbols;
190 int n_symbols;
191
192 /* ptr to mem containing the object file image */
193 char* image;
194
195 /* A customizable type, that formats can use to augment ObjectCode
196 * See Note [No typedefs for customizable types]
197 */
198 struct ObjectCodeFormatInfo* info;
199
200 /* non-zero if the object file was mmap'd, otherwise malloc'd */
201 int imageMapped;
202
203 /* flag used when deciding whether to unload an object file */
204 int referenced;
205
206 /* record by how much image has been deliberately misaligned
207 after allocation, so that we can use realloc */
208 int misalignment;
209
210 /* The section-kind entries for this object module. An array. */
211 int n_sections;
212 Section* sections;
213
214 int n_segments;
215 Segment *segments;
216
217 /* Allow a chain of these things */
218 struct _ObjectCode * next;
219
220 /* SANITY CHECK ONLY: a list of the only memory regions which may
221 safely be prodded during relocation. Any attempt to prod
222 outside one of these is an error in the linker. */
223 ProddableBlock* proddables;
224
225 #if defined(ia64_HOST_ARCH)
226 /* Procedure Linkage Table for this object */
227 void *plt;
228 unsigned int pltIndex;
229 #endif
230
231 #if defined(NEED_SYMBOL_EXTRAS)
232 SymbolExtra *symbol_extras;
233 unsigned long first_symbol_extra;
234 unsigned long n_symbol_extras;
235 #endif
236 /* Additional memory that is preallocated and contiguous with image
237 which can be used used to relocate bss sections. */
238 char* bssBegin;
239 char* bssEnd;
240
241 ForeignExportStablePtr *stable_ptrs;
242
243 /* Holds the list of symbols in the .o file which
244 require extra information.*/
245 HashTable *extraInfos;
246
247 } ObjectCode;
248
249 #define OC_INFORMATIVE_FILENAME(OC) \
250 ( (OC)->archiveMemberName ? \
251 (OC)->archiveMemberName : \
252 (OC)->fileName \
253 )
254
255 extern ObjectCode *objects;
256 extern ObjectCode *unloaded_objects;
257
258 #if defined(THREADED_RTS)
259 extern Mutex linker_mutex;
260 extern Mutex linker_unloaded_mutex;
261 #endif
262
263 /* Type of the initializer */
264 typedef void (*init_t) (int argc, char **argv, char **env);
265
266 /* SymbolInfo tracks a symbol's address, the object code from which
267 it originated, and whether or not it's weak.
268
269 RtsSymbolInfo is used to track the state of the symbols currently
270 loaded or to be loaded by the Linker.
271
272 Where the information in the `ObjectCode` is used to track the
273 original status of the symbol inside the `ObjectCode`.
274
275 A weak symbol that has been used will still be marked as weak
276 in the `ObjectCode` but in the `RtsSymbolInfo` it won't be.
277 */
278 typedef struct _RtsSymbolInfo {
279 SymbolAddr* value;
280 ObjectCode *owner;
281 HsBool weak;
282 } RtsSymbolInfo;
283
284 void exitLinker( void );
285
286 void freeObjectCode (ObjectCode *oc);
287 SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo);
288
289 void *mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset);
290
291 void addProddableBlock ( ObjectCode* oc, void* start, int size );
292 void checkProddableBlock (ObjectCode *oc, void *addr, size_t size );
293 void freeProddableBlocks (ObjectCode *oc);
294
295 void addSection (Section *s, SectionKind kind, SectionAlloc alloc,
296 void* start, StgWord size, StgWord mapped_offset,
297 void* mapped_start, StgWord mapped_size);
298
299 HsBool ghciLookupSymbolInfo(HashTable *table,
300 const SymbolName* key, RtsSymbolInfo **result);
301
302 int ghciInsertSymbolTable(
303 pathchar* obj_name,
304 HashTable *table,
305 const SymbolName* key,
306 SymbolAddr* data,
307 HsBool weak,
308 ObjectCode *owner);
309
310 /* lock-free version of lookupSymbol */
311 SymbolAddr* lookupSymbol_ (SymbolName* lbl);
312
313 extern /*Str*/HashTable *symhash;
314
315 pathchar*
316 resolveSymbolAddr (pathchar* buffer, int size,
317 SymbolAddr* symbol, uintptr_t* top);
318
319 /*************************************************
320 * Various bits of configuration
321 *************************************************/
322
323 /* PowerPC and ARM have relative branch instructions with only 24 bit
324 * displacements and therefore need jump islands contiguous with each object
325 * code module.
326 */
327 #if defined(powerpc_HOST_ARCH)
328 #define SHORT_REL_BRANCH 1
329 #endif
330 #if defined(arm_HOST_ARCH)
331 #define SHORT_REL_BRANCH 1
332 #endif
333
334 #if (RTS_LINKER_USE_MMAP && defined(SHORT_REL_BRANCH) && defined(linux_HOST_OS))
335 #define USE_CONTIGUOUS_MMAP 1
336 #else
337 #define USE_CONTIGUOUS_MMAP 0
338 #endif
339
340 HsInt isAlreadyLoaded( pathchar *path );
341 HsInt loadOc( ObjectCode* oc );
342 ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
343 bool mapped, char *archiveMemberName,
344 int misalignment
345 );
346
347 void initSegment(Segment *s, void *start, size_t size, SegmentProt prot, int n_sections);
348 void freeSegments(ObjectCode *oc);
349
350 /* MAP_ANONYMOUS is MAP_ANON on some systems,
351 e.g. OS X (before Sierra), OpenBSD etc */
352 #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
353 #define MAP_ANONYMOUS MAP_ANON
354 #endif
355
356 /* Which object file format are we targetting? */
357 #if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) \
358 || defined(linux_android_HOST_OS) \
359 || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) \
360 || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) \
361 || defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
362 # define OBJFORMAT_ELF
363 # include "linker/ElfTypes.h"
364 #elif defined(mingw32_HOST_OS)
365 # define OBJFORMAT_PEi386
366 # include "linker/PEi386Types.h"
367 #elif defined(darwin_HOST_OS) || defined(ios_HOST_OS)
368 # define OBJFORMAT_MACHO
369 # include "linker/MachOTypes.h"
370 #else
371 #error "Unknown OBJECT_FORMAT for HOST_OS"
372 #endif
373
374 /* In order to simplify control flow a bit, some references to mmap-related
375 definitions are blocked off by a C-level if statement rather than a CPP-level
376 #if statement. Since those are dead branches when !RTS_LINKER_USE_MMAP, we
377 just stub out the relevant symbols here
378 */
379 #if !RTS_LINKER_USE_MMAP
380 #define munmap(x,y) /* nothing */
381 #define MAP_ANONYMOUS 0
382 #endif
383
384 #include "EndPrivate.h"