a83c13086455e0b683f8d0745217835cff35d725
[ghc.git] / rts / Linker.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2000-2004
4 *
5 * RTS Object Linker
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #if 0
10 #include "PosixSource.h"
11 #endif
12
13 /* Linux needs _GNU_SOURCE to get RTLD_DEFAULT from <dlfcn.h> and
14 MREMAP_MAYMOVE from <sys/mman.h>.
15 */
16 #if defined(__linux__) || defined(__GLIBC__)
17 #define _GNU_SOURCE 1
18 #endif
19
20 #include "Rts.h"
21 #include "HsFFI.h"
22
23 #include "sm/Storage.h"
24 #include "Stats.h"
25 #include "Hash.h"
26 #include "LinkerInternals.h"
27 #include "RtsUtils.h"
28 #include "Trace.h"
29 #include "StgPrimFloat.h" // for __int_encodeFloat etc.
30 #include "Stable.h"
31
32 #if !defined(mingw32_HOST_OS)
33 #include "posix/Signals.h"
34 #endif
35
36 // get protos for is*()
37 #include <ctype.h>
38
39 #ifdef HAVE_SYS_TYPES_H
40 #include <sys/types.h>
41 #endif
42
43 #include <inttypes.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <stdio.h>
47 #include <assert.h>
48
49 #ifdef HAVE_SYS_STAT_H
50 #include <sys/stat.h>
51 #endif
52
53 #if defined(HAVE_DLFCN_H)
54 #include <dlfcn.h>
55 #endif
56
57 #if defined(cygwin32_HOST_OS)
58 #ifdef HAVE_DIRENT_H
59 #include <dirent.h>
60 #endif
61
62 #ifdef HAVE_SYS_TIME_H
63 #include <sys/time.h>
64 #endif
65 #include <regex.h>
66 #include <sys/fcntl.h>
67 #include <sys/termios.h>
68 #include <sys/utime.h>
69 #include <sys/utsname.h>
70 #include <sys/wait.h>
71 #endif
72
73 #if !defined(powerpc_HOST_ARCH) && \
74 ( defined(linux_HOST_OS ) || defined(freebsd_HOST_OS) || \
75 defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS ) || \
76 defined(openbsd_HOST_OS ) || defined(darwin_HOST_OS ) || \
77 defined(kfreebsdgnu_HOST_OS) )
78 /* Don't use mmap on powerpc_HOST_ARCH as mmap doesn't support
79 * reallocating but we need to allocate jump islands just after each
80 * object images. Otherwise relative branches to jump islands can fail
81 * due to 24-bits displacement overflow.
82 */
83 #define USE_MMAP
84 #include <fcntl.h>
85 #include <sys/mman.h>
86
87 #ifdef HAVE_UNISTD_H
88 #include <unistd.h>
89 #endif
90
91 #endif
92
93 #if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS)
94 # define OBJFORMAT_ELF
95 # include <regex.h> // regex is already used by dlopen() so this is OK
96 // to use here without requiring an additional lib
97 #elif defined(cygwin32_HOST_OS) || defined (mingw32_HOST_OS)
98 # define OBJFORMAT_PEi386
99 # include <windows.h>
100 # include <math.h>
101 #elif defined(darwin_HOST_OS)
102 # define OBJFORMAT_MACHO
103 # include <regex.h>
104 # include <mach/machine.h>
105 # include <mach-o/fat.h>
106 # include <mach-o/loader.h>
107 # include <mach-o/nlist.h>
108 # include <mach-o/reloc.h>
109 #if !defined(HAVE_DLFCN_H)
110 # include <mach-o/dyld.h>
111 #endif
112 #if defined(powerpc_HOST_ARCH)
113 # include <mach-o/ppc/reloc.h>
114 #endif
115 #if defined(x86_64_HOST_ARCH)
116 # include <mach-o/x86_64/reloc.h>
117 #endif
118 #endif
119
120 #if defined(x86_64_HOST_ARCH) && defined(darwin_HOST_OS)
121 #define ALWAYS_PIC
122 #endif
123
124 /* Hash table mapping symbol names to Symbol */
125 static /*Str*/HashTable *symhash;
126
127 /* Hash table mapping symbol names to StgStablePtr */
128 static /*Str*/HashTable *stablehash;
129
130 /* List of currently loaded objects */
131 ObjectCode *objects = NULL; /* initially empty */
132
133 static HsInt loadOc( ObjectCode* oc );
134 static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
135 char *archiveMemberName
136 #ifndef USE_MMAP
137 #ifdef darwin_HOST_OS
138 , int misalignment
139 #endif
140 #endif
141 );
142
143 // Use wchar_t for pathnames on Windows (#5697)
144 #if defined(mingw32_HOST_OS)
145 #define pathcmp wcscmp
146 #define pathlen wcslen
147 #define pathopen _wfopen
148 #define pathstat _wstat
149 #define struct_stat struct _stat
150 #define open wopen
151 #define WSTR(s) L##s
152 #define PATH_FMT "S"
153 #else
154 #define pathcmp strcmp
155 #define pathlen strlen
156 #define pathopen fopen
157 #define pathstat stat
158 #define struct_stat struct stat
159 #define WSTR(s) s
160 #define PATH_FMT "s"
161 #endif
162
163 static pathchar* pathdup(pathchar *path)
164 {
165 pathchar *ret;
166 #if defined(mingw32_HOST_OS)
167 ret = wcsdup(path);
168 #else
169 /* sigh, strdup() isn't a POSIX function, so do it the long way */
170 ret = stgMallocBytes( strlen(path)+1, "loadObj" );
171 strcpy(ret, path);
172 #endif
173 return ret;
174 }
175
176
177 #if defined(OBJFORMAT_ELF)
178 static int ocVerifyImage_ELF ( ObjectCode* oc );
179 static int ocGetNames_ELF ( ObjectCode* oc );
180 static int ocResolve_ELF ( ObjectCode* oc );
181 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH)
182 static int ocAllocateSymbolExtras_ELF ( ObjectCode* oc );
183 #endif
184 #elif defined(OBJFORMAT_PEi386)
185 static int ocVerifyImage_PEi386 ( ObjectCode* oc );
186 static int ocGetNames_PEi386 ( ObjectCode* oc );
187 static int ocResolve_PEi386 ( ObjectCode* oc );
188 static void *lookupSymbolInDLLs ( unsigned char *lbl );
189 static void zapTrailingAtSign ( unsigned char *sym );
190 #elif defined(OBJFORMAT_MACHO)
191 static int ocVerifyImage_MachO ( ObjectCode* oc );
192 static int ocGetNames_MachO ( ObjectCode* oc );
193 static int ocResolve_MachO ( ObjectCode* oc );
194
195 #ifndef USE_MMAP
196 static int machoGetMisalignment( FILE * );
197 #endif
198 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
199 static int ocAllocateSymbolExtras_MachO ( ObjectCode* oc );
200 #endif
201 #ifdef powerpc_HOST_ARCH
202 static void machoInitSymbolsWithoutUnderscore( void );
203 #endif
204 #endif
205
206 /* on x86_64 we have a problem with relocating symbol references in
207 * code that was compiled without -fPIC. By default, the small memory
208 * model is used, which assumes that symbol references can fit in a
209 * 32-bit slot. The system dynamic linker makes this work for
210 * references to shared libraries by either (a) allocating a jump
211 * table slot for code references, or (b) moving the symbol at load
212 * time (and copying its contents, if necessary) for data references.
213 *
214 * We unfortunately can't tell whether symbol references are to code
215 * or data. So for now we assume they are code (the vast majority
216 * are), and allocate jump-table slots. Unfortunately this will
217 * SILENTLY generate crashing code for data references. This hack is
218 * enabled by X86_64_ELF_NONPIC_HACK.
219 *
220 * One workaround is to use shared Haskell libraries. This is
221 * coming. Another workaround is to keep the static libraries but
222 * compile them with -fPIC, because that will generate PIC references
223 * to data which can be relocated. The PIC code is still too green to
224 * do this systematically, though.
225 *
226 * See bug #781
227 * See thread http://www.haskell.org/pipermail/cvs-ghc/2007-September/038458.html
228 *
229 * Naming Scheme for Symbol Macros
230 *
231 * SymI_*: symbol is internal to the RTS. It resides in an object
232 * file/library that is statically.
233 * SymE_*: symbol is external to the RTS library. It might be linked
234 * dynamically.
235 *
236 * Sym*_HasProto : the symbol prototype is imported in an include file
237 * or defined explicitly
238 * Sym*_NeedsProto: the symbol is undefined and we add a dummy
239 * default proto extern void sym(void);
240 */
241 #define X86_64_ELF_NONPIC_HACK 1
242
243 /* Link objects into the lower 2Gb on x86_64. GHC assumes the
244 * small memory model on this architecture (see gcc docs,
245 * -mcmodel=small).
246 *
247 * MAP_32BIT not available on OpenBSD/amd64
248 */
249 #if defined(x86_64_HOST_ARCH) && defined(MAP_32BIT)
250 #define TRY_MAP_32BIT MAP_32BIT
251 #else
252 #define TRY_MAP_32BIT 0
253 #endif
254
255 /*
256 * Due to the small memory model (see above), on x86_64 we have to map
257 * all our non-PIC object files into the low 2Gb of the address space
258 * (why 2Gb and not 4Gb? Because all addresses must be reachable
259 * using a 32-bit signed PC-relative offset). On Linux we can do this
260 * using the MAP_32BIT flag to mmap(), however on other OSs
261 * (e.g. *BSD, see #2063, and also on Linux inside Xen, see #2512), we
262 * can't do this. So on these systems, we have to pick a base address
263 * in the low 2Gb of the address space and try to allocate memory from
264 * there.
265 *
266 * We pick a default address based on the OS, but also make this
267 * configurable via an RTS flag (+RTS -xm)
268 */
269 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
270
271 #if defined(MAP_32BIT)
272 // Try to use MAP_32BIT
273 #define MMAP_32BIT_BASE_DEFAULT 0
274 #else
275 // A guess: 1Gb.
276 #define MMAP_32BIT_BASE_DEFAULT 0x40000000
277 #endif
278
279 static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
280 #endif
281
282 /* MAP_ANONYMOUS is MAP_ANON on some systems, e.g. OpenBSD */
283 #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
284 #define MAP_ANONYMOUS MAP_ANON
285 #endif
286
287 /* -----------------------------------------------------------------------------
288 * Built-in symbols from the RTS
289 */
290
291 typedef struct _RtsSymbolVal {
292 char *lbl;
293 void *addr;
294 } RtsSymbolVal;
295
296 #define Maybe_Stable_Names SymI_HasProto(stg_mkWeakzh) \
297 SymI_HasProto(stg_mkWeakNoFinalizzerzh) \
298 SymI_HasProto(stg_mkWeakForeignEnvzh) \
299 SymI_HasProto(stg_makeStableNamezh) \
300 SymI_HasProto(stg_finalizzeWeakzh)
301
302 #if !defined (mingw32_HOST_OS)
303 #define RTS_POSIX_ONLY_SYMBOLS \
304 SymI_HasProto(__hscore_get_saved_termios) \
305 SymI_HasProto(__hscore_set_saved_termios) \
306 SymI_HasProto(shutdownHaskellAndSignal) \
307 SymI_HasProto(lockFile) \
308 SymI_HasProto(unlockFile) \
309 SymI_HasProto(signal_handlers) \
310 SymI_HasProto(stg_sig_install) \
311 SymI_HasProto(rtsTimerSignal) \
312 SymI_HasProto(atexit) \
313 SymI_NeedsProto(nocldstop)
314 #endif
315
316 #if defined (cygwin32_HOST_OS)
317 #define RTS_MINGW_ONLY_SYMBOLS /**/
318 /* Don't have the ability to read import libs / archives, so
319 * we have to stupidly list a lot of what libcygwin.a
320 * exports; sigh.
321 */
322 #define RTS_CYGWIN_ONLY_SYMBOLS \
323 SymI_HasProto(regfree) \
324 SymI_HasProto(regexec) \
325 SymI_HasProto(regerror) \
326 SymI_HasProto(regcomp) \
327 SymI_HasProto(__errno) \
328 SymI_HasProto(access) \
329 SymI_HasProto(chmod) \
330 SymI_HasProto(chdir) \
331 SymI_HasProto(close) \
332 SymI_HasProto(creat) \
333 SymI_HasProto(dup) \
334 SymI_HasProto(dup2) \
335 SymI_HasProto(fstat) \
336 SymI_HasProto(fcntl) \
337 SymI_HasProto(getcwd) \
338 SymI_HasProto(getenv) \
339 SymI_HasProto(lseek) \
340 SymI_HasProto(open) \
341 SymI_HasProto(fpathconf) \
342 SymI_HasProto(pathconf) \
343 SymI_HasProto(stat) \
344 SymI_HasProto(pow) \
345 SymI_HasProto(tanh) \
346 SymI_HasProto(cosh) \
347 SymI_HasProto(sinh) \
348 SymI_HasProto(atan) \
349 SymI_HasProto(acos) \
350 SymI_HasProto(asin) \
351 SymI_HasProto(tan) \
352 SymI_HasProto(cos) \
353 SymI_HasProto(sin) \
354 SymI_HasProto(exp) \
355 SymI_HasProto(log) \
356 SymI_HasProto(sqrt) \
357 SymI_HasProto(localtime_r) \
358 SymI_HasProto(gmtime_r) \
359 SymI_HasProto(mktime) \
360 SymI_NeedsProto(_imp___tzname) \
361 SymI_HasProto(gettimeofday) \
362 SymI_HasProto(timezone) \
363 SymI_HasProto(tcgetattr) \
364 SymI_HasProto(tcsetattr) \
365 SymI_HasProto(memcpy) \
366 SymI_HasProto(memmove) \
367 SymI_HasProto(realloc) \
368 SymI_HasProto(malloc) \
369 SymI_HasProto(free) \
370 SymI_HasProto(fork) \
371 SymI_HasProto(lstat) \
372 SymI_HasProto(isatty) \
373 SymI_HasProto(mkdir) \
374 SymI_HasProto(opendir) \
375 SymI_HasProto(readdir) \
376 SymI_HasProto(rewinddir) \
377 SymI_HasProto(closedir) \
378 SymI_HasProto(link) \
379 SymI_HasProto(mkfifo) \
380 SymI_HasProto(pipe) \
381 SymI_HasProto(read) \
382 SymI_HasProto(rename) \
383 SymI_HasProto(rmdir) \
384 SymI_HasProto(select) \
385 SymI_HasProto(system) \
386 SymI_HasProto(write) \
387 SymI_HasProto(strcmp) \
388 SymI_HasProto(strcpy) \
389 SymI_HasProto(strncpy) \
390 SymI_HasProto(strerror) \
391 SymI_HasProto(sigaddset) \
392 SymI_HasProto(sigemptyset) \
393 SymI_HasProto(sigprocmask) \
394 SymI_HasProto(umask) \
395 SymI_HasProto(uname) \
396 SymI_HasProto(unlink) \
397 SymI_HasProto(utime) \
398 SymI_HasProto(waitpid)
399
400 #elif defined(mingw32_HOST_OS)
401 #define RTS_POSIX_ONLY_SYMBOLS /**/
402 #define RTS_CYGWIN_ONLY_SYMBOLS /**/
403
404 #if HAVE_GETTIMEOFDAY
405 #define RTS_MINGW_GETTIMEOFDAY_SYM SymI_NeedsProto(gettimeofday)
406 #else
407 #define RTS_MINGW_GETTIMEOFDAY_SYM /**/
408 #endif
409
410 #if HAVE___MINGW_VFPRINTF
411 #define RTS___MINGW_VFPRINTF_SYM SymI_HasProto(__mingw_vfprintf)
412 #else
413 #define RTS___MINGW_VFPRINTF_SYM /**/
414 #endif
415
416 #if defined(i386_HOST_ARCH)
417 #define RTS_MINGW32_ONLY(X) X
418 #else
419 #define RTS_MINGW32_ONLY(X) /**/
420 #endif
421
422 /* These are statically linked from the mingw libraries into the ghc
423 executable, so we have to employ this hack. */
424 #define RTS_MINGW_ONLY_SYMBOLS \
425 SymI_HasProto(stg_asyncReadzh) \
426 SymI_HasProto(stg_asyncWritezh) \
427 SymI_HasProto(stg_asyncDoProczh) \
428 SymI_HasProto(getWin32ProgArgv) \
429 SymI_HasProto(setWin32ProgArgv) \
430 SymI_HasProto(memset) \
431 SymI_HasProto(inet_ntoa) \
432 SymI_HasProto(inet_addr) \
433 SymI_HasProto(htonl) \
434 SymI_HasProto(recvfrom) \
435 SymI_HasProto(listen) \
436 SymI_HasProto(bind) \
437 SymI_HasProto(shutdown) \
438 SymI_HasProto(connect) \
439 SymI_HasProto(htons) \
440 SymI_HasProto(ntohs) \
441 SymI_HasProto(getservbyname) \
442 SymI_HasProto(getservbyport) \
443 SymI_HasProto(getprotobynumber) \
444 SymI_HasProto(getprotobyname) \
445 SymI_HasProto(gethostbyname) \
446 SymI_HasProto(gethostbyaddr) \
447 SymI_HasProto(gethostname) \
448 SymI_HasProto(strcpy) \
449 SymI_HasProto(strncpy) \
450 SymI_HasProto(abort) \
451 RTS_MINGW32_ONLY(SymI_NeedsProto(_alloca)) \
452 SymI_HasProto(isxdigit) \
453 SymI_HasProto(isupper) \
454 SymI_HasProto(ispunct) \
455 SymI_HasProto(islower) \
456 SymI_HasProto(isspace) \
457 SymI_HasProto(isprint) \
458 SymI_HasProto(isdigit) \
459 SymI_HasProto(iscntrl) \
460 SymI_HasProto(isalpha) \
461 SymI_HasProto(isalnum) \
462 SymI_HasProto(isascii) \
463 RTS___MINGW_VFPRINTF_SYM \
464 SymI_HasProto(strcmp) \
465 SymI_HasProto(memmove) \
466 SymI_HasProto(realloc) \
467 SymI_HasProto(malloc) \
468 SymI_HasProto(pow) \
469 SymI_HasProto(tanh) \
470 SymI_HasProto(cosh) \
471 SymI_HasProto(sinh) \
472 SymI_HasProto(atan) \
473 SymI_HasProto(acos) \
474 SymI_HasProto(asin) \
475 SymI_HasProto(tan) \
476 SymI_HasProto(cos) \
477 SymI_HasProto(sin) \
478 SymI_HasProto(exp) \
479 SymI_HasProto(log) \
480 SymI_HasProto(sqrt) \
481 SymI_HasProto(powf) \
482 SymI_HasProto(tanhf) \
483 SymI_HasProto(coshf) \
484 SymI_HasProto(sinhf) \
485 SymI_HasProto(atanf) \
486 SymI_HasProto(acosf) \
487 SymI_HasProto(asinf) \
488 SymI_HasProto(tanf) \
489 SymI_HasProto(cosf) \
490 SymI_HasProto(sinf) \
491 SymI_HasProto(expf) \
492 SymI_HasProto(logf) \
493 SymI_HasProto(sqrtf) \
494 SymI_HasProto(erf) \
495 SymI_HasProto(erfc) \
496 SymI_HasProto(erff) \
497 SymI_HasProto(erfcf) \
498 SymI_HasProto(memcpy) \
499 SymI_HasProto(rts_InstallConsoleEvent) \
500 SymI_HasProto(rts_ConsoleHandlerDone) \
501 SymI_NeedsProto(mktime) \
502 RTS_MINGW32_ONLY(SymI_NeedsProto(_imp___timezone)) \
503 RTS_MINGW32_ONLY(SymI_NeedsProto(_imp___tzname)) \
504 RTS_MINGW32_ONLY(SymI_NeedsProto(_imp__tzname)) \
505 RTS_MINGW32_ONLY(SymI_NeedsProto(_imp___iob)) \
506 RTS_MINGW32_ONLY(SymI_NeedsProto(_imp___osver)) \
507 SymI_NeedsProto(localtime) \
508 SymI_NeedsProto(gmtime) \
509 SymI_NeedsProto(opendir) \
510 SymI_NeedsProto(readdir) \
511 SymI_NeedsProto(rewinddir) \
512 RTS_MINGW32_ONLY(SymI_NeedsProto(_imp____mb_cur_max)) \
513 RTS_MINGW32_ONLY(SymI_NeedsProto(_imp___pctype)) \
514 RTS_MINGW32_ONLY(SymI_NeedsProto(__chkstk)) \
515 SymI_NeedsProto(__imp___iob_func) \
516 SymI_NeedsProto(___chkstk_ms) \
517 SymI_NeedsProto(__imp_localeconv) \
518 SymI_NeedsProto(__imp_islower) \
519 SymI_NeedsProto(__imp_isspace) \
520 SymI_NeedsProto(__imp_isxdigit) \
521 SymI_HasProto(close) \
522 SymI_HasProto(read) \
523 SymI_HasProto(dup) \
524 SymI_HasProto(dup2) \
525 SymI_HasProto(write) \
526 SymI_NeedsProto(getpid) \
527 SymI_HasProto(access) \
528 SymI_HasProto(chmod) \
529 SymI_HasProto(creat) \
530 SymI_HasProto(umask) \
531 SymI_HasProto(unlink) \
532 SymI_NeedsProto(__imp__errno) \
533 SymI_NeedsProto(ftruncate64) \
534 SymI_HasProto(setmode) \
535 SymI_NeedsProto(__imp__wstat64) \
536 SymI_NeedsProto(__imp__fstat64) \
537 SymI_NeedsProto(__imp__wsopen) \
538 SymI_HasProto(__imp__environ) \
539 SymI_NeedsProto(__imp_GetFileType) \
540 SymI_NeedsProto(__imp_GetLastError) \
541 SymI_NeedsProto(__imp_QueryPerformanceFrequency) \
542 SymI_NeedsProto(__imp_QueryPerformanceCounter) \
543 SymI_NeedsProto(__imp_GetTickCount) \
544 SymI_NeedsProto(__imp_WaitForSingleObject) \
545 SymI_NeedsProto(__imp_PeekConsoleInputA) \
546 SymI_NeedsProto(__imp_ReadConsoleInputA) \
547 SymI_NeedsProto(__imp_PeekNamedPipe) \
548 SymI_NeedsProto(__imp__isatty) \
549 SymI_NeedsProto(__imp_select) \
550 SymI_HasProto(isatty) \
551 SymI_NeedsProto(__imp__get_osfhandle) \
552 SymI_NeedsProto(__imp_GetConsoleMode) \
553 SymI_NeedsProto(__imp_SetConsoleMode) \
554 SymI_NeedsProto(__imp_FlushConsoleInputBuffer) \
555 RTS_MINGW_GETTIMEOFDAY_SYM \
556 SymI_NeedsProto(closedir)
557
558 #else
559 #define RTS_MINGW_ONLY_SYMBOLS /**/
560 #define RTS_CYGWIN_ONLY_SYMBOLS /**/
561 #endif
562
563
564 #if defined(darwin_HOST_OS) && HAVE_PRINTF_LDBLSTUB
565 #define RTS_DARWIN_ONLY_SYMBOLS \
566 SymI_NeedsProto(asprintf$LDBLStub) \
567 SymI_NeedsProto(err$LDBLStub) \
568 SymI_NeedsProto(errc$LDBLStub) \
569 SymI_NeedsProto(errx$LDBLStub) \
570 SymI_NeedsProto(fprintf$LDBLStub) \
571 SymI_NeedsProto(fscanf$LDBLStub) \
572 SymI_NeedsProto(fwprintf$LDBLStub) \
573 SymI_NeedsProto(fwscanf$LDBLStub) \
574 SymI_NeedsProto(printf$LDBLStub) \
575 SymI_NeedsProto(scanf$LDBLStub) \
576 SymI_NeedsProto(snprintf$LDBLStub) \
577 SymI_NeedsProto(sprintf$LDBLStub) \
578 SymI_NeedsProto(sscanf$LDBLStub) \
579 SymI_NeedsProto(strtold$LDBLStub) \
580 SymI_NeedsProto(swprintf$LDBLStub) \
581 SymI_NeedsProto(swscanf$LDBLStub) \
582 SymI_NeedsProto(syslog$LDBLStub) \
583 SymI_NeedsProto(vasprintf$LDBLStub) \
584 SymI_NeedsProto(verr$LDBLStub) \
585 SymI_NeedsProto(verrc$LDBLStub) \
586 SymI_NeedsProto(verrx$LDBLStub) \
587 SymI_NeedsProto(vfprintf$LDBLStub) \
588 SymI_NeedsProto(vfscanf$LDBLStub) \
589 SymI_NeedsProto(vfwprintf$LDBLStub) \
590 SymI_NeedsProto(vfwscanf$LDBLStub) \
591 SymI_NeedsProto(vprintf$LDBLStub) \
592 SymI_NeedsProto(vscanf$LDBLStub) \
593 SymI_NeedsProto(vsnprintf$LDBLStub) \
594 SymI_NeedsProto(vsprintf$LDBLStub) \
595 SymI_NeedsProto(vsscanf$LDBLStub) \
596 SymI_NeedsProto(vswprintf$LDBLStub) \
597 SymI_NeedsProto(vswscanf$LDBLStub) \
598 SymI_NeedsProto(vsyslog$LDBLStub) \
599 SymI_NeedsProto(vwarn$LDBLStub) \
600 SymI_NeedsProto(vwarnc$LDBLStub) \
601 SymI_NeedsProto(vwarnx$LDBLStub) \
602 SymI_NeedsProto(vwprintf$LDBLStub) \
603 SymI_NeedsProto(vwscanf$LDBLStub) \
604 SymI_NeedsProto(warn$LDBLStub) \
605 SymI_NeedsProto(warnc$LDBLStub) \
606 SymI_NeedsProto(warnx$LDBLStub) \
607 SymI_NeedsProto(wcstold$LDBLStub) \
608 SymI_NeedsProto(wprintf$LDBLStub) \
609 SymI_NeedsProto(wscanf$LDBLStub)
610 #else
611 #define RTS_DARWIN_ONLY_SYMBOLS
612 #endif
613
614 #ifndef SMP
615 # define MAIN_CAP_SYM SymI_HasProto(MainCapability)
616 #else
617 # define MAIN_CAP_SYM
618 #endif
619
620 #if !defined(mingw32_HOST_OS)
621 #define RTS_USER_SIGNALS_SYMBOLS \
622 SymI_HasProto(setIOManagerControlFd) \
623 SymI_HasProto(setIOManagerWakeupFd) \
624 SymI_HasProto(ioManagerWakeup) \
625 SymI_HasProto(blockUserSignals) \
626 SymI_HasProto(unblockUserSignals)
627 #else
628 #define RTS_USER_SIGNALS_SYMBOLS \
629 SymI_HasProto(ioManagerWakeup) \
630 SymI_HasProto(sendIOManagerEvent) \
631 SymI_HasProto(readIOManagerEvent) \
632 SymI_HasProto(getIOManagerEvent) \
633 SymI_HasProto(console_handler)
634 #endif
635
636 #define RTS_LIBFFI_SYMBOLS \
637 SymE_NeedsProto(ffi_prep_cif) \
638 SymE_NeedsProto(ffi_call) \
639 SymE_NeedsProto(ffi_type_void) \
640 SymE_NeedsProto(ffi_type_float) \
641 SymE_NeedsProto(ffi_type_double) \
642 SymE_NeedsProto(ffi_type_sint64) \
643 SymE_NeedsProto(ffi_type_uint64) \
644 SymE_NeedsProto(ffi_type_sint32) \
645 SymE_NeedsProto(ffi_type_uint32) \
646 SymE_NeedsProto(ffi_type_sint16) \
647 SymE_NeedsProto(ffi_type_uint16) \
648 SymE_NeedsProto(ffi_type_sint8) \
649 SymE_NeedsProto(ffi_type_uint8) \
650 SymE_NeedsProto(ffi_type_pointer)
651
652 #ifdef TABLES_NEXT_TO_CODE
653 #define RTS_RET_SYMBOLS /* nothing */
654 #else
655 #define RTS_RET_SYMBOLS \
656 SymI_HasProto(stg_enter_ret) \
657 SymI_HasProto(stg_gc_fun_ret) \
658 SymI_HasProto(stg_ap_v_ret) \
659 SymI_HasProto(stg_ap_f_ret) \
660 SymI_HasProto(stg_ap_d_ret) \
661 SymI_HasProto(stg_ap_l_ret) \
662 SymI_HasProto(stg_ap_n_ret) \
663 SymI_HasProto(stg_ap_p_ret) \
664 SymI_HasProto(stg_ap_pv_ret) \
665 SymI_HasProto(stg_ap_pp_ret) \
666 SymI_HasProto(stg_ap_ppv_ret) \
667 SymI_HasProto(stg_ap_ppp_ret) \
668 SymI_HasProto(stg_ap_pppv_ret) \
669 SymI_HasProto(stg_ap_pppp_ret) \
670 SymI_HasProto(stg_ap_ppppp_ret) \
671 SymI_HasProto(stg_ap_pppppp_ret)
672 #endif
673
674 /* Modules compiled with -ticky may mention ticky counters */
675 /* This list should marry up with the one in $(TOP)/includes/stg/Ticky.h */
676 #define RTS_TICKY_SYMBOLS \
677 SymI_NeedsProto(ticky_entry_ctrs) \
678 SymI_NeedsProto(top_ct) \
679 \
680 SymI_HasProto(ENT_VIA_NODE_ctr) \
681 SymI_HasProto(ENT_STATIC_THK_ctr) \
682 SymI_HasProto(ENT_DYN_THK_ctr) \
683 SymI_HasProto(ENT_STATIC_FUN_DIRECT_ctr) \
684 SymI_HasProto(ENT_DYN_FUN_DIRECT_ctr) \
685 SymI_HasProto(ENT_STATIC_CON_ctr) \
686 SymI_HasProto(ENT_DYN_CON_ctr) \
687 SymI_HasProto(ENT_STATIC_IND_ctr) \
688 SymI_HasProto(ENT_DYN_IND_ctr) \
689 SymI_HasProto(ENT_PERM_IND_ctr) \
690 SymI_HasProto(ENT_PAP_ctr) \
691 SymI_HasProto(ENT_AP_ctr) \
692 SymI_HasProto(ENT_AP_STACK_ctr) \
693 SymI_HasProto(ENT_BH_ctr) \
694 SymI_HasProto(UNKNOWN_CALL_ctr) \
695 SymI_HasProto(SLOW_CALL_v_ctr) \
696 SymI_HasProto(SLOW_CALL_f_ctr) \
697 SymI_HasProto(SLOW_CALL_d_ctr) \
698 SymI_HasProto(SLOW_CALL_l_ctr) \
699 SymI_HasProto(SLOW_CALL_n_ctr) \
700 SymI_HasProto(SLOW_CALL_p_ctr) \
701 SymI_HasProto(SLOW_CALL_pv_ctr) \
702 SymI_HasProto(SLOW_CALL_pp_ctr) \
703 SymI_HasProto(SLOW_CALL_ppv_ctr) \
704 SymI_HasProto(SLOW_CALL_ppp_ctr) \
705 SymI_HasProto(SLOW_CALL_pppv_ctr) \
706 SymI_HasProto(SLOW_CALL_pppp_ctr) \
707 SymI_HasProto(SLOW_CALL_ppppp_ctr) \
708 SymI_HasProto(SLOW_CALL_pppppp_ctr) \
709 SymI_HasProto(SLOW_CALL_OTHER_ctr) \
710 SymI_HasProto(ticky_slow_call_unevald) \
711 SymI_HasProto(SLOW_CALL_ctr) \
712 SymI_HasProto(MULTI_CHUNK_SLOW_CALL_ctr) \
713 SymI_HasProto(MULTI_CHUNK_SLOW_CALL_CHUNKS_ctr) \
714 SymI_HasProto(KNOWN_CALL_ctr) \
715 SymI_HasProto(KNOWN_CALL_TOO_FEW_ARGS_ctr) \
716 SymI_HasProto(KNOWN_CALL_EXTRA_ARGS_ctr) \
717 SymI_HasProto(SLOW_CALL_FUN_TOO_FEW_ctr) \
718 SymI_HasProto(SLOW_CALL_FUN_CORRECT_ctr) \
719 SymI_HasProto(SLOW_CALL_FUN_TOO_MANY_ctr) \
720 SymI_HasProto(SLOW_CALL_PAP_TOO_FEW_ctr) \
721 SymI_HasProto(SLOW_CALL_PAP_CORRECT_ctr) \
722 SymI_HasProto(SLOW_CALL_PAP_TOO_MANY_ctr) \
723 SymI_HasProto(SLOW_CALL_UNEVALD_ctr) \
724 SymI_HasProto(UPDF_OMITTED_ctr) \
725 SymI_HasProto(UPDF_PUSHED_ctr) \
726 SymI_HasProto(CATCHF_PUSHED_ctr) \
727 SymI_HasProto(UPDF_RCC_PUSHED_ctr) \
728 SymI_HasProto(UPDF_RCC_OMITTED_ctr) \
729 SymI_HasProto(UPD_SQUEEZED_ctr) \
730 SymI_HasProto(UPD_CON_IN_NEW_ctr) \
731 SymI_HasProto(UPD_CON_IN_PLACE_ctr) \
732 SymI_HasProto(UPD_PAP_IN_NEW_ctr) \
733 SymI_HasProto(UPD_PAP_IN_PLACE_ctr) \
734 SymI_HasProto(ALLOC_HEAP_ctr) \
735 SymI_HasProto(ALLOC_HEAP_tot) \
736 SymI_HasProto(ALLOC_FUN_ctr) \
737 SymI_HasProto(ALLOC_FUN_adm) \
738 SymI_HasProto(ALLOC_FUN_gds) \
739 SymI_HasProto(ALLOC_FUN_slp) \
740 SymI_HasProto(UPD_NEW_IND_ctr) \
741 SymI_HasProto(UPD_NEW_PERM_IND_ctr) \
742 SymI_HasProto(UPD_OLD_IND_ctr) \
743 SymI_HasProto(UPD_OLD_PERM_IND_ctr) \
744 SymI_HasProto(UPD_BH_UPDATABLE_ctr) \
745 SymI_HasProto(UPD_BH_SINGLE_ENTRY_ctr) \
746 SymI_HasProto(UPD_CAF_BH_UPDATABLE_ctr) \
747 SymI_HasProto(UPD_CAF_BH_SINGLE_ENTRY_ctr) \
748 SymI_HasProto(GC_SEL_ABANDONED_ctr) \
749 SymI_HasProto(GC_SEL_MINOR_ctr) \
750 SymI_HasProto(GC_SEL_MAJOR_ctr) \
751 SymI_HasProto(GC_FAILED_PROMOTION_ctr) \
752 SymI_HasProto(ALLOC_UP_THK_ctr) \
753 SymI_HasProto(ALLOC_SE_THK_ctr) \
754 SymI_HasProto(ALLOC_THK_adm) \
755 SymI_HasProto(ALLOC_THK_gds) \
756 SymI_HasProto(ALLOC_THK_slp) \
757 SymI_HasProto(ALLOC_CON_ctr) \
758 SymI_HasProto(ALLOC_CON_adm) \
759 SymI_HasProto(ALLOC_CON_gds) \
760 SymI_HasProto(ALLOC_CON_slp) \
761 SymI_HasProto(ALLOC_TUP_ctr) \
762 SymI_HasProto(ALLOC_TUP_adm) \
763 SymI_HasProto(ALLOC_TUP_gds) \
764 SymI_HasProto(ALLOC_TUP_slp) \
765 SymI_HasProto(ALLOC_BH_ctr) \
766 SymI_HasProto(ALLOC_BH_adm) \
767 SymI_HasProto(ALLOC_BH_gds) \
768 SymI_HasProto(ALLOC_BH_slp) \
769 SymI_HasProto(ALLOC_PRIM_ctr) \
770 SymI_HasProto(ALLOC_PRIM_adm) \
771 SymI_HasProto(ALLOC_PRIM_gds) \
772 SymI_HasProto(ALLOC_PRIM_slp) \
773 SymI_HasProto(ALLOC_PAP_ctr) \
774 SymI_HasProto(ALLOC_PAP_adm) \
775 SymI_HasProto(ALLOC_PAP_gds) \
776 SymI_HasProto(ALLOC_PAP_slp) \
777 SymI_HasProto(ALLOC_TSO_ctr) \
778 SymI_HasProto(ALLOC_TSO_adm) \
779 SymI_HasProto(ALLOC_TSO_gds) \
780 SymI_HasProto(ALLOC_TSO_slp) \
781 SymI_HasProto(RET_NEW_ctr) \
782 SymI_HasProto(RET_OLD_ctr) \
783 SymI_HasProto(RET_UNBOXED_TUP_ctr) \
784 SymI_HasProto(RET_SEMI_loads_avoided)
785
786
787 // On most platforms, the garbage collector rewrites references
788 // to small integer and char objects to a set of common, shared ones.
789 //
790 // We don't do this when compiling to Windows DLLs at the moment because
791 // it doesn't support cross package data references well.
792 //
793 #if defined(__PIC__) && defined(mingw32_HOST_OS) && defined(i386_HOST_ARCH)
794 #define RTS_INTCHAR_SYMBOLS
795 #else
796 #define RTS_INTCHAR_SYMBOLS \
797 SymI_HasProto(stg_CHARLIKE_closure) \
798 SymI_HasProto(stg_INTLIKE_closure)
799 #endif
800
801
802 #define RTS_SYMBOLS \
803 Maybe_Stable_Names \
804 RTS_TICKY_SYMBOLS \
805 SymI_HasProto(StgReturn) \
806 SymI_HasProto(stg_enter_info) \
807 SymI_HasProto(stg_gc_void_info) \
808 SymI_HasProto(__stg_gc_enter_1) \
809 SymI_HasProto(stg_gc_noregs) \
810 SymI_HasProto(stg_gc_unpt_r1_info) \
811 SymI_HasProto(stg_gc_unpt_r1) \
812 SymI_HasProto(stg_gc_unbx_r1_info) \
813 SymI_HasProto(stg_gc_unbx_r1) \
814 SymI_HasProto(stg_gc_f1_info) \
815 SymI_HasProto(stg_gc_f1) \
816 SymI_HasProto(stg_gc_d1_info) \
817 SymI_HasProto(stg_gc_d1) \
818 SymI_HasProto(stg_gc_l1_info) \
819 SymI_HasProto(stg_gc_l1) \
820 SymI_HasProto(__stg_gc_fun) \
821 SymI_HasProto(stg_gc_fun_info) \
822 SymI_HasProto(stg_gc_gen) \
823 SymI_HasProto(stg_gc_gen_info) \
824 SymI_HasProto(stg_gc_gen_hp) \
825 SymI_HasProto(stg_gc_ut) \
826 SymI_HasProto(stg_gen_yield) \
827 SymI_HasProto(stg_yield_noregs) \
828 SymI_HasProto(stg_yield_to_interpreter) \
829 SymI_HasProto(stg_gen_block) \
830 SymI_HasProto(stg_block_noregs) \
831 SymI_HasProto(stg_block_1) \
832 SymI_HasProto(stg_block_takemvar) \
833 SymI_HasProto(stg_block_putmvar) \
834 MAIN_CAP_SYM \
835 SymI_HasProto(MallocFailHook) \
836 SymI_HasProto(OnExitHook) \
837 SymI_HasProto(OutOfHeapHook) \
838 SymI_HasProto(StackOverflowHook) \
839 SymI_HasProto(addDLL) \
840 SymI_HasProto(__int_encodeDouble) \
841 SymI_HasProto(__word_encodeDouble) \
842 SymI_HasProto(__2Int_encodeDouble) \
843 SymI_HasProto(__int_encodeFloat) \
844 SymI_HasProto(__word_encodeFloat) \
845 SymI_HasProto(stg_atomicallyzh) \
846 SymI_HasProto(barf) \
847 SymI_HasProto(debugBelch) \
848 SymI_HasProto(errorBelch) \
849 SymI_HasProto(sysErrorBelch) \
850 SymI_HasProto(stg_getMaskingStatezh) \
851 SymI_HasProto(stg_maskAsyncExceptionszh) \
852 SymI_HasProto(stg_maskUninterruptiblezh) \
853 SymI_HasProto(stg_catchzh) \
854 SymI_HasProto(stg_catchRetryzh) \
855 SymI_HasProto(stg_catchSTMzh) \
856 SymI_HasProto(stg_checkzh) \
857 SymI_HasProto(closure_flags) \
858 SymI_HasProto(cmp_thread) \
859 SymI_HasProto(createAdjustor) \
860 SymI_HasProto(stg_decodeDoublezu2Intzh) \
861 SymI_HasProto(stg_decodeFloatzuIntzh) \
862 SymI_HasProto(defaultsHook) \
863 SymI_HasProto(stg_delayzh) \
864 SymI_HasProto(stg_deRefWeakzh) \
865 SymI_HasProto(stg_deRefStablePtrzh) \
866 SymI_HasProto(dirty_MUT_VAR) \
867 SymI_HasProto(stg_forkzh) \
868 SymI_HasProto(stg_forkOnzh) \
869 SymI_HasProto(forkProcess) \
870 SymI_HasProto(forkOS_createThread) \
871 SymI_HasProto(freeHaskellFunctionPtr) \
872 SymI_HasProto(getOrSetGHCConcSignalSignalHandlerStore) \
873 SymI_HasProto(getOrSetGHCConcWindowsPendingDelaysStore) \
874 SymI_HasProto(getOrSetGHCConcWindowsIOManagerThreadStore) \
875 SymI_HasProto(getOrSetGHCConcWindowsProddingStore) \
876 SymI_HasProto(getOrSetSystemEventThreadEventManagerStore) \
877 SymI_HasProto(getOrSetSystemEventThreadIOManagerThreadStore) \
878 SymI_HasProto(getGCStats) \
879 SymI_HasProto(genSymZh) \
880 SymI_HasProto(genericRaise) \
881 SymI_HasProto(getProgArgv) \
882 SymI_HasProto(getFullProgArgv) \
883 SymI_HasProto(getStablePtr) \
884 SymI_HasProto(hs_init) \
885 SymI_HasProto(hs_exit) \
886 SymI_HasProto(hs_set_argv) \
887 SymI_HasProto(hs_add_root) \
888 SymI_HasProto(hs_perform_gc) \
889 SymI_HasProto(hs_free_stable_ptr) \
890 SymI_HasProto(hs_free_fun_ptr) \
891 SymI_HasProto(hs_hpc_rootModule) \
892 SymI_HasProto(hs_hpc_module) \
893 SymI_HasProto(initLinker) \
894 SymI_HasProto(stg_unpackClosurezh) \
895 SymI_HasProto(stg_getApStackValzh) \
896 SymI_HasProto(stg_getSparkzh) \
897 SymI_HasProto(stg_numSparkszh) \
898 SymI_HasProto(stg_isCurrentThreadBoundzh) \
899 SymI_HasProto(stg_isEmptyMVarzh) \
900 SymI_HasProto(stg_killThreadzh) \
901 SymI_HasProto(loadArchive) \
902 SymI_HasProto(loadObj) \
903 SymI_HasProto(insertStableSymbol) \
904 SymI_HasProto(insertSymbol) \
905 SymI_HasProto(lookupSymbol) \
906 SymI_HasProto(stg_makeStablePtrzh) \
907 SymI_HasProto(stg_mkApUpd0zh) \
908 SymI_HasProto(stg_myThreadIdzh) \
909 SymI_HasProto(stg_labelThreadzh) \
910 SymI_HasProto(stg_newArrayzh) \
911 SymI_HasProto(stg_newArrayArrayzh) \
912 SymI_HasProto(stg_newBCOzh) \
913 SymI_HasProto(stg_newByteArrayzh) \
914 SymI_HasProto_redirect(newCAF, newDynCAF) \
915 SymI_HasProto(stg_newMVarzh) \
916 SymI_HasProto(stg_newMutVarzh) \
917 SymI_HasProto(stg_newTVarzh) \
918 SymI_HasProto(stg_noDuplicatezh) \
919 SymI_HasProto(stg_atomicModifyMutVarzh) \
920 SymI_HasProto(stg_casMutVarzh) \
921 SymI_HasProto(stg_newPinnedByteArrayzh) \
922 SymI_HasProto(stg_newAlignedPinnedByteArrayzh) \
923 SymI_HasProto(newSpark) \
924 SymI_HasProto(performGC) \
925 SymI_HasProto(performMajorGC) \
926 SymI_HasProto(prog_argc) \
927 SymI_HasProto(prog_argv) \
928 SymI_HasProto(stg_putMVarzh) \
929 SymI_HasProto(stg_raisezh) \
930 SymI_HasProto(stg_raiseIOzh) \
931 SymI_HasProto(stg_readTVarzh) \
932 SymI_HasProto(stg_readTVarIOzh) \
933 SymI_HasProto(resumeThread) \
934 SymI_HasProto(setNumCapabilities) \
935 SymI_HasProto(getNumberOfProcessors) \
936 SymI_HasProto(resolveObjs) \
937 SymI_HasProto(stg_retryzh) \
938 SymI_HasProto(rts_apply) \
939 SymI_HasProto(rts_checkSchedStatus) \
940 SymI_HasProto(rts_eval) \
941 SymI_HasProto(rts_evalIO) \
942 SymI_HasProto(rts_evalLazyIO) \
943 SymI_HasProto(rts_evalStableIO) \
944 SymI_HasProto(rts_eval_) \
945 SymI_HasProto(rts_getBool) \
946 SymI_HasProto(rts_getChar) \
947 SymI_HasProto(rts_getDouble) \
948 SymI_HasProto(rts_getFloat) \
949 SymI_HasProto(rts_getInt) \
950 SymI_HasProto(rts_getInt8) \
951 SymI_HasProto(rts_getInt16) \
952 SymI_HasProto(rts_getInt32) \
953 SymI_HasProto(rts_getInt64) \
954 SymI_HasProto(rts_getPtr) \
955 SymI_HasProto(rts_getFunPtr) \
956 SymI_HasProto(rts_getStablePtr) \
957 SymI_HasProto(rts_getThreadId) \
958 SymI_HasProto(rts_getWord) \
959 SymI_HasProto(rts_getWord8) \
960 SymI_HasProto(rts_getWord16) \
961 SymI_HasProto(rts_getWord32) \
962 SymI_HasProto(rts_getWord64) \
963 SymI_HasProto(rts_lock) \
964 SymI_HasProto(rts_mkBool) \
965 SymI_HasProto(rts_mkChar) \
966 SymI_HasProto(rts_mkDouble) \
967 SymI_HasProto(rts_mkFloat) \
968 SymI_HasProto(rts_mkInt) \
969 SymI_HasProto(rts_mkInt8) \
970 SymI_HasProto(rts_mkInt16) \
971 SymI_HasProto(rts_mkInt32) \
972 SymI_HasProto(rts_mkInt64) \
973 SymI_HasProto(rts_mkPtr) \
974 SymI_HasProto(rts_mkFunPtr) \
975 SymI_HasProto(rts_mkStablePtr) \
976 SymI_HasProto(rts_mkString) \
977 SymI_HasProto(rts_mkWord) \
978 SymI_HasProto(rts_mkWord8) \
979 SymI_HasProto(rts_mkWord16) \
980 SymI_HasProto(rts_mkWord32) \
981 SymI_HasProto(rts_mkWord64) \
982 SymI_HasProto(rts_unlock) \
983 SymI_HasProto(rts_unsafeGetMyCapability) \
984 SymI_HasProto(rtsSupportsBoundThreads) \
985 SymI_HasProto(rts_isProfiled) \
986 SymI_HasProto(setProgArgv) \
987 SymI_HasProto(startupHaskell) \
988 SymI_HasProto(shutdownHaskell) \
989 SymI_HasProto(shutdownHaskellAndExit) \
990 SymI_HasProto(stable_ptr_table) \
991 SymI_HasProto(stackOverflow) \
992 SymI_HasProto(stg_CAF_BLACKHOLE_info) \
993 SymI_HasProto(stg_BLACKHOLE_info) \
994 SymI_HasProto(__stg_EAGER_BLACKHOLE_info) \
995 SymI_HasProto(stg_BLOCKING_QUEUE_CLEAN_info) \
996 SymI_HasProto(stg_BLOCKING_QUEUE_DIRTY_info) \
997 SymI_HasProto(startTimer) \
998 SymI_HasProto(stg_MVAR_CLEAN_info) \
999 SymI_HasProto(stg_MVAR_DIRTY_info) \
1000 SymI_HasProto(stg_IND_STATIC_info) \
1001 SymI_HasProto(stg_ARR_WORDS_info) \
1002 SymI_HasProto(stg_MUT_ARR_PTRS_DIRTY_info) \
1003 SymI_HasProto(stg_MUT_ARR_PTRS_FROZEN_info) \
1004 SymI_HasProto(stg_MUT_ARR_PTRS_FROZEN0_info) \
1005 SymI_HasProto(stg_WEAK_info) \
1006 SymI_HasProto(stg_ap_v_info) \
1007 SymI_HasProto(stg_ap_f_info) \
1008 SymI_HasProto(stg_ap_d_info) \
1009 SymI_HasProto(stg_ap_l_info) \
1010 SymI_HasProto(stg_ap_n_info) \
1011 SymI_HasProto(stg_ap_p_info) \
1012 SymI_HasProto(stg_ap_pv_info) \
1013 SymI_HasProto(stg_ap_pp_info) \
1014 SymI_HasProto(stg_ap_ppv_info) \
1015 SymI_HasProto(stg_ap_ppp_info) \
1016 SymI_HasProto(stg_ap_pppv_info) \
1017 SymI_HasProto(stg_ap_pppp_info) \
1018 SymI_HasProto(stg_ap_ppppp_info) \
1019 SymI_HasProto(stg_ap_pppppp_info) \
1020 SymI_HasProto(stg_ap_0_fast) \
1021 SymI_HasProto(stg_ap_v_fast) \
1022 SymI_HasProto(stg_ap_f_fast) \
1023 SymI_HasProto(stg_ap_d_fast) \
1024 SymI_HasProto(stg_ap_l_fast) \
1025 SymI_HasProto(stg_ap_n_fast) \
1026 SymI_HasProto(stg_ap_p_fast) \
1027 SymI_HasProto(stg_ap_pv_fast) \
1028 SymI_HasProto(stg_ap_pp_fast) \
1029 SymI_HasProto(stg_ap_ppv_fast) \
1030 SymI_HasProto(stg_ap_ppp_fast) \
1031 SymI_HasProto(stg_ap_pppv_fast) \
1032 SymI_HasProto(stg_ap_pppp_fast) \
1033 SymI_HasProto(stg_ap_ppppp_fast) \
1034 SymI_HasProto(stg_ap_pppppp_fast) \
1035 SymI_HasProto(stg_ap_1_upd_info) \
1036 SymI_HasProto(stg_ap_2_upd_info) \
1037 SymI_HasProto(stg_ap_3_upd_info) \
1038 SymI_HasProto(stg_ap_4_upd_info) \
1039 SymI_HasProto(stg_ap_5_upd_info) \
1040 SymI_HasProto(stg_ap_6_upd_info) \
1041 SymI_HasProto(stg_ap_7_upd_info) \
1042 SymI_HasProto(stg_exit) \
1043 SymI_HasProto(stg_sel_0_upd_info) \
1044 SymI_HasProto(stg_sel_10_upd_info) \
1045 SymI_HasProto(stg_sel_11_upd_info) \
1046 SymI_HasProto(stg_sel_12_upd_info) \
1047 SymI_HasProto(stg_sel_13_upd_info) \
1048 SymI_HasProto(stg_sel_14_upd_info) \
1049 SymI_HasProto(stg_sel_15_upd_info) \
1050 SymI_HasProto(stg_sel_1_upd_info) \
1051 SymI_HasProto(stg_sel_2_upd_info) \
1052 SymI_HasProto(stg_sel_3_upd_info) \
1053 SymI_HasProto(stg_sel_4_upd_info) \
1054 SymI_HasProto(stg_sel_5_upd_info) \
1055 SymI_HasProto(stg_sel_6_upd_info) \
1056 SymI_HasProto(stg_sel_7_upd_info) \
1057 SymI_HasProto(stg_sel_8_upd_info) \
1058 SymI_HasProto(stg_sel_9_upd_info) \
1059 SymI_HasProto(stg_upd_frame_info) \
1060 SymI_HasProto(stg_bh_upd_frame_info) \
1061 SymI_HasProto(suspendThread) \
1062 SymI_HasProto(stg_takeMVarzh) \
1063 SymI_HasProto(stg_threadStatuszh) \
1064 SymI_HasProto(stg_tryPutMVarzh) \
1065 SymI_HasProto(stg_tryTakeMVarzh) \
1066 SymI_HasProto(stg_unmaskAsyncExceptionszh) \
1067 SymI_HasProto(unloadObj) \
1068 SymI_HasProto(stg_unsafeThawArrayzh) \
1069 SymI_HasProto(stg_waitReadzh) \
1070 SymI_HasProto(stg_waitWritezh) \
1071 SymI_HasProto(stg_writeTVarzh) \
1072 SymI_HasProto(stg_yieldzh) \
1073 SymI_NeedsProto(stg_interp_constr_entry) \
1074 SymI_HasProto(stg_arg_bitmaps) \
1075 SymI_HasProto(large_alloc_lim) \
1076 SymI_HasProto(g0) \
1077 SymI_HasProto(allocate) \
1078 SymI_HasProto(allocateExec) \
1079 SymI_HasProto(freeExec) \
1080 SymI_HasProto(getAllocations) \
1081 SymI_HasProto(revertCAFs) \
1082 SymI_HasProto(RtsFlags) \
1083 SymI_NeedsProto(rts_breakpoint_io_action) \
1084 SymI_NeedsProto(rts_stop_next_breakpoint) \
1085 SymI_NeedsProto(rts_stop_on_exception) \
1086 SymI_HasProto(stopTimer) \
1087 SymI_HasProto(n_capabilities) \
1088 SymI_HasProto(stg_traceCcszh) \
1089 SymI_HasProto(stg_traceEventzh) \
1090 RTS_USER_SIGNALS_SYMBOLS \
1091 RTS_INTCHAR_SYMBOLS
1092
1093
1094 // 64-bit support functions in libgcc.a
1095 #if defined(__GNUC__) && SIZEOF_VOID_P <= 4
1096 #define RTS_LIBGCC_SYMBOLS \
1097 SymI_NeedsProto(__divdi3) \
1098 SymI_NeedsProto(__udivdi3) \
1099 SymI_NeedsProto(__moddi3) \
1100 SymI_NeedsProto(__umoddi3) \
1101 SymI_NeedsProto(__muldi3) \
1102 SymI_NeedsProto(__ashldi3) \
1103 SymI_NeedsProto(__ashrdi3) \
1104 SymI_NeedsProto(__lshrdi3) \
1105 SymI_NeedsProto(__fixunsdfdi)
1106 #else
1107 #define RTS_LIBGCC_SYMBOLS
1108 #endif
1109
1110 #if defined(darwin_HOST_OS) && defined(powerpc_HOST_ARCH)
1111 // Symbols that don't have a leading underscore
1112 // on Mac OS X. They have to receive special treatment,
1113 // see machoInitSymbolsWithoutUnderscore()
1114 #define RTS_MACHO_NOUNDERLINE_SYMBOLS \
1115 SymI_NeedsProto(saveFP) \
1116 SymI_NeedsProto(restFP)
1117 #endif
1118
1119 /* entirely bogus claims about types of these symbols */
1120 #define SymI_NeedsProto(vvv) extern void vvv(void);
1121 #if defined(__PIC__) && defined(mingw32_HOST_OS) && defined(i386_HOST_ARCH)
1122 #define SymE_HasProto(vvv) SymE_HasProto(vvv);
1123 #define SymE_NeedsProto(vvv) extern void _imp__ ## vvv (void);
1124 #else
1125 #define SymE_NeedsProto(vvv) SymI_NeedsProto(vvv);
1126 #define SymE_HasProto(vvv) SymI_HasProto(vvv)
1127 #endif
1128 #define SymI_HasProto(vvv) /**/
1129 #define SymI_HasProto_redirect(vvv,xxx) /**/
1130 RTS_SYMBOLS
1131 RTS_RET_SYMBOLS
1132 RTS_POSIX_ONLY_SYMBOLS
1133 RTS_MINGW_ONLY_SYMBOLS
1134 RTS_CYGWIN_ONLY_SYMBOLS
1135 RTS_DARWIN_ONLY_SYMBOLS
1136 RTS_LIBGCC_SYMBOLS
1137 RTS_LIBFFI_SYMBOLS
1138 #undef SymI_NeedsProto
1139 #undef SymI_HasProto
1140 #undef SymI_HasProto_redirect
1141 #undef SymE_HasProto
1142 #undef SymE_NeedsProto
1143
1144 #ifdef LEADING_UNDERSCORE
1145 #define MAYBE_LEADING_UNDERSCORE_STR(s) ("_" s)
1146 #else
1147 #define MAYBE_LEADING_UNDERSCORE_STR(s) (s)
1148 #endif
1149
1150 #define SymI_HasProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
1151 (void*)(&(vvv)) },
1152 #define SymE_HasProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
1153 (void*)DLL_IMPORT_DATA_REF(vvv) },
1154
1155 #define SymI_NeedsProto(vvv) SymI_HasProto(vvv)
1156 #define SymE_NeedsProto(vvv) SymE_HasProto(vvv)
1157
1158 // SymI_HasProto_redirect allows us to redirect references to one symbol to
1159 // another symbol. See newCAF/newDynCAF for an example.
1160 #define SymI_HasProto_redirect(vvv,xxx) \
1161 { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
1162 (void*)(&(xxx)) },
1163
1164 static RtsSymbolVal rtsSyms[] = {
1165 RTS_SYMBOLS
1166 RTS_RET_SYMBOLS
1167 RTS_POSIX_ONLY_SYMBOLS
1168 RTS_MINGW_ONLY_SYMBOLS
1169 RTS_CYGWIN_ONLY_SYMBOLS
1170 RTS_DARWIN_ONLY_SYMBOLS
1171 RTS_LIBGCC_SYMBOLS
1172 RTS_LIBFFI_SYMBOLS
1173 #if defined(darwin_HOST_OS) && defined(i386_HOST_ARCH)
1174 // dyld stub code contains references to this,
1175 // but it should never be called because we treat
1176 // lazy pointers as nonlazy.
1177 { "dyld_stub_binding_helper", (void*)0xDEADBEEF },
1178 #endif
1179 { 0, 0 } /* sentinel */
1180 };
1181
1182
1183 /* -----------------------------------------------------------------------------
1184 * Insert symbols into hash tables, checking for duplicates.
1185 */
1186
1187 static void ghciInsertStrHashTable ( pathchar* obj_name,
1188 HashTable *table,
1189 char* key,
1190 void *data
1191 )
1192 {
1193 if (lookupHashTable(table, (StgWord)key) == NULL)
1194 {
1195 insertStrHashTable(table, (StgWord)key, data);
1196 return;
1197 }
1198 debugBelch(
1199 "\n\n"
1200 "GHCi runtime linker: fatal error: I found a duplicate definition for symbol\n"
1201 " %s\n"
1202 "whilst processing object file\n"
1203 " %" PATH_FMT "\n"
1204 "This could be caused by:\n"
1205 " * Loading two different object files which export the same symbol\n"
1206 " * Specifying the same object file twice on the GHCi command line\n"
1207 " * An incorrect `package.conf' entry, causing some object to be\n"
1208 " loaded twice.\n"
1209 "GHCi cannot safely continue in this situation. Exiting now. Sorry.\n"
1210 "\n",
1211 (char*)key,
1212 obj_name
1213 );
1214 stg_exit(1);
1215 }
1216 /* -----------------------------------------------------------------------------
1217 * initialize the object linker
1218 */
1219
1220
1221 static int linker_init_done = 0 ;
1222
1223 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
1224 static void *dl_prog_handle;
1225 static regex_t re_invalid;
1226 static regex_t re_realso;
1227 #ifdef THREADED_RTS
1228 static Mutex dl_mutex; // mutex to protect dlopen/dlerror critical section
1229 #endif
1230 #endif
1231
1232 void
1233 initLinker( void )
1234 {
1235 RtsSymbolVal *sym;
1236 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
1237 int compileResult;
1238 #endif
1239
1240 IF_DEBUG(linker, debugBelch("initLinker: start\n"));
1241
1242 /* Make initLinker idempotent, so we can call it
1243 before evey relevant operation; that means we
1244 don't need to initialise the linker separately */
1245 if (linker_init_done == 1) {
1246 IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
1247 return;
1248 } else {
1249 linker_init_done = 1;
1250 }
1251
1252 #if defined(THREADED_RTS) && (defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO))
1253 initMutex(&dl_mutex);
1254 #endif
1255 stablehash = allocStrHashTable();
1256 symhash = allocStrHashTable();
1257
1258 /* populate the symbol table with stuff from the RTS */
1259 for (sym = rtsSyms; sym->lbl != NULL; sym++) {
1260 ghciInsertStrHashTable(WSTR("(GHCi built-in symbols)"),
1261 symhash, sym->lbl, sym->addr);
1262 IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
1263 }
1264 # if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
1265 machoInitSymbolsWithoutUnderscore();
1266 # endif
1267
1268 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
1269 # if defined(RTLD_DEFAULT)
1270 dl_prog_handle = RTLD_DEFAULT;
1271 # else
1272 dl_prog_handle = dlopen(NULL, RTLD_LAZY);
1273 # endif /* RTLD_DEFAULT */
1274
1275 compileResult = regcomp(&re_invalid,
1276 "(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short)",
1277 REG_EXTENDED);
1278 if (compileResult != 0) {
1279 barf("Compiling re_invalid failed");
1280 }
1281 compileResult = regcomp(&re_realso,
1282 "(GROUP|INPUT) *\\( *([^ )]+)",
1283 REG_EXTENDED);
1284 if (compileResult != 0) {
1285 barf("Compiling re_realso failed");
1286 }
1287 # endif
1288
1289 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1290 if (RtsFlags.MiscFlags.linkerMemBase != 0) {
1291 // User-override for mmap_32bit_base
1292 mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
1293 }
1294 #endif
1295
1296 #if defined(mingw32_HOST_OS)
1297 /*
1298 * These two libraries cause problems when added to the static link,
1299 * but are necessary for resolving symbols in GHCi, hence we load
1300 * them manually here.
1301 */
1302 addDLL(WSTR("msvcrt"));
1303 addDLL(WSTR("kernel32"));
1304 #endif
1305
1306 IF_DEBUG(linker, debugBelch("initLinker: done\n"));
1307 return;
1308 }
1309
1310 void
1311 exitLinker( void ) {
1312 #if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
1313 if (linker_init_done == 1) {
1314 regfree(&re_invalid);
1315 regfree(&re_realso);
1316 #ifdef THREADED_RTS
1317 closeMutex(&dl_mutex);
1318 #endif
1319 }
1320 #endif
1321 }
1322
1323 /* -----------------------------------------------------------------------------
1324 * Loading DLL or .so dynamic libraries
1325 * -----------------------------------------------------------------------------
1326 *
1327 * Add a DLL from which symbols may be found. In the ELF case, just
1328 * do RTLD_GLOBAL-style add, so no further messing around needs to
1329 * happen in order that symbols in the loaded .so are findable --
1330 * lookupSymbol() will subsequently see them by dlsym on the program's
1331 * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
1332 *
1333 * In the PEi386 case, open the DLLs and put handles to them in a
1334 * linked list. When looking for a symbol, try all handles in the
1335 * list. This means that we need to load even DLLs that are guaranteed
1336 * to be in the ghc.exe image already, just so we can get a handle
1337 * to give to loadSymbol, so that we can find the symbols. For such
1338 * libraries, the LoadLibrary call should be a no-op except for returning
1339 * the handle.
1340 *
1341 */
1342
1343 #if defined(OBJFORMAT_PEi386)
1344 /* A record for storing handles into DLLs. */
1345
1346 typedef
1347 struct _OpenedDLL {
1348 pathchar* name;
1349 struct _OpenedDLL* next;
1350 HINSTANCE instance;
1351 }
1352 OpenedDLL;
1353
1354 /* A list thereof. */
1355 static OpenedDLL* opened_dlls = NULL;
1356 #endif
1357
1358 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
1359
1360 static const char *
1361 internal_dlopen(const char *dll_name)
1362 {
1363 void *hdl;
1364 const char *errmsg;
1365 char *errmsg_copy;
1366
1367 // omitted: RTLD_NOW
1368 // see http://www.haskell.org/pipermail/cvs-ghc/2007-September/038570.html
1369 IF_DEBUG(linker,
1370 debugBelch("internal_dlopen: dll_name = '%s'\n", dll_name));
1371
1372 //-------------- Begin critical section ------------------
1373 // This critical section is necessary because dlerror() is not
1374 // required to be reentrant (see POSIX -- IEEE Std 1003.1-2008)
1375 // Also, the error message returned must be copied to preserve it
1376 // (see POSIX also)
1377
1378 ACQUIRE_LOCK(&dl_mutex);
1379 hdl = dlopen(dll_name, RTLD_LAZY | RTLD_GLOBAL);
1380
1381 errmsg = NULL;
1382 if (hdl == NULL) {
1383 /* dlopen failed; return a ptr to the error msg. */
1384 errmsg = dlerror();
1385 if (errmsg == NULL) errmsg = "addDLL: unknown error";
1386 errmsg_copy = stgMallocBytes(strlen(errmsg)+1, "addDLL");
1387 strcpy(errmsg_copy, errmsg);
1388 errmsg = errmsg_copy;
1389 }
1390 RELEASE_LOCK(&dl_mutex);
1391 //--------------- End critical section -------------------
1392
1393 return errmsg;
1394 }
1395 # endif
1396
1397 const char *
1398 addDLL( pathchar *dll_name )
1399 {
1400 # if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
1401 /* ------------------- ELF DLL loader ------------------- */
1402
1403 #define NMATCH 5
1404 regmatch_t match[NMATCH];
1405 const char *errmsg;
1406 FILE* fp;
1407 size_t match_length;
1408 #define MAXLINE 1000
1409 char line[MAXLINE];
1410 int result;
1411
1412 initLinker();
1413
1414 IF_DEBUG(linker, debugBelch("addDLL: dll_name = '%s'\n", dll_name));
1415 errmsg = internal_dlopen(dll_name);
1416
1417 if (errmsg == NULL) {
1418 return NULL;
1419 }
1420
1421 // GHC Trac ticket #2615
1422 // On some systems (e.g., Gentoo Linux) dynamic files (e.g. libc.so)
1423 // contain linker scripts rather than ELF-format object code. This
1424 // code handles the situation by recognizing the real object code
1425 // file name given in the linker script.
1426 //
1427 // If an "invalid ELF header" error occurs, it is assumed that the
1428 // .so file contains a linker script instead of ELF object code.
1429 // In this case, the code looks for the GROUP ( ... ) linker
1430 // directive. If one is found, the first file name inside the
1431 // parentheses is treated as the name of a dynamic library and the
1432 // code attempts to dlopen that file. If this is also unsuccessful,
1433 // an error message is returned.
1434
1435 // see if the error message is due to an invalid ELF header
1436 IF_DEBUG(linker, debugBelch("errmsg = '%s'\n", errmsg));
1437 result = regexec(&re_invalid, errmsg, (size_t) NMATCH, match, 0);
1438 IF_DEBUG(linker, debugBelch("result = %i\n", result));
1439 if (result == 0) {
1440 // success -- try to read the named file as a linker script
1441 match_length = (size_t) stg_min((match[1].rm_eo - match[1].rm_so),
1442 MAXLINE-1);
1443 strncpy(line, (errmsg+(match[1].rm_so)),match_length);
1444 line[match_length] = '\0'; // make sure string is null-terminated
1445 IF_DEBUG(linker, debugBelch ("file name = '%s'\n", line));
1446 if ((fp = fopen(line, "r")) == NULL) {
1447 return errmsg; // return original error if open fails
1448 }
1449 // try to find a GROUP or INPUT ( ... ) command
1450 while (fgets(line, MAXLINE, fp) != NULL) {
1451 IF_DEBUG(linker, debugBelch("input line = %s", line));
1452 if (regexec(&re_realso, line, (size_t) NMATCH, match, 0) == 0) {
1453 // success -- try to dlopen the first named file
1454 IF_DEBUG(linker, debugBelch("match%s\n",""));
1455 line[match[2].rm_eo] = '\0';
1456 errmsg = internal_dlopen(line+match[2].rm_so);
1457 break;
1458 }
1459 // if control reaches here, no GROUP or INPUT ( ... ) directive
1460 // was found and the original error message is returned to the
1461 // caller
1462 }
1463 fclose(fp);
1464 }
1465 return errmsg;
1466
1467 # elif defined(OBJFORMAT_PEi386)
1468 /* ------------------- Win32 DLL loader ------------------- */
1469
1470 pathchar* buf;
1471 OpenedDLL* o_dll;
1472 HINSTANCE instance;
1473
1474 initLinker();
1475
1476 /* debugBelch("\naddDLL; dll_name = `%s'\n", dll_name); */
1477
1478 /* See if we've already got it, and ignore if so. */
1479 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
1480 if (0 == pathcmp(o_dll->name, dll_name))
1481 return NULL;
1482 }
1483
1484 /* The file name has no suffix (yet) so that we can try
1485 both foo.dll and foo.drv
1486
1487 The documentation for LoadLibrary says:
1488 If no file name extension is specified in the lpFileName
1489 parameter, the default library extension .dll is
1490 appended. However, the file name string can include a trailing
1491 point character (.) to indicate that the module name has no
1492 extension. */
1493
1494 buf = stgMallocBytes((pathlen(dll_name) + 10) * sizeof(wchar_t), "addDLL");
1495 swprintf(buf, L"%s.DLL", dll_name);
1496 instance = LoadLibraryW(buf);
1497 if (instance == NULL) {
1498 if (GetLastError() != ERROR_MOD_NOT_FOUND) goto error;
1499 // KAA: allow loading of drivers (like winspool.drv)
1500 swprintf(buf, L"%s.DRV", dll_name);
1501 instance = LoadLibraryW(buf);
1502 if (instance == NULL) {
1503 if (GetLastError() != ERROR_MOD_NOT_FOUND) goto error;
1504 // #1883: allow loading of unix-style libfoo.dll DLLs
1505 swprintf(buf, L"lib%s.DLL", dll_name);
1506 instance = LoadLibraryW(buf);
1507 if (instance == NULL) {
1508 goto error;
1509 }
1510 }
1511 }
1512 stgFree(buf);
1513
1514 /* Add this DLL to the list of DLLs in which to search for symbols. */
1515 o_dll = stgMallocBytes( sizeof(OpenedDLL), "addDLL" );
1516 o_dll->name = pathdup(dll_name);
1517 o_dll->instance = instance;
1518 o_dll->next = opened_dlls;
1519 opened_dlls = o_dll;
1520
1521 return NULL;
1522
1523 error:
1524 stgFree(buf);
1525 sysErrorBelch("%" PATH_FMT, dll_name);
1526
1527 /* LoadLibrary failed; return a ptr to the error msg. */
1528 return "addDLL: could not load DLL";
1529
1530 # else
1531 barf("addDLL: not implemented on this platform");
1532 # endif
1533 }
1534
1535 /* -----------------------------------------------------------------------------
1536 * insert a stable symbol in the hash table
1537 */
1538
1539 void
1540 insertStableSymbol(pathchar* obj_name, char* key, StgPtr p)
1541 {
1542 ghciInsertStrHashTable(obj_name, stablehash, key, getStablePtr(p));
1543 }
1544
1545
1546 /* -----------------------------------------------------------------------------
1547 * insert a symbol in the hash table
1548 */
1549 void
1550 insertSymbol(pathchar* obj_name, char* key, void* data)
1551 {
1552 ghciInsertStrHashTable(obj_name, symhash, key, data);
1553 }
1554
1555 /* -----------------------------------------------------------------------------
1556 * lookup a symbol in the hash table
1557 */
1558 void *
1559 lookupSymbol( char *lbl )
1560 {
1561 void *val;
1562 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s\n", lbl));
1563 initLinker() ;
1564 ASSERT(symhash != NULL);
1565 val = lookupStrHashTable(symhash, lbl);
1566
1567 if (val == NULL) {
1568 IF_DEBUG(linker, debugBelch("lookupSymbol: symbol not found\n"));
1569 # if defined(OBJFORMAT_ELF)
1570 return dlsym(dl_prog_handle, lbl);
1571 # elif defined(OBJFORMAT_MACHO)
1572 # if HAVE_DLFCN_H
1573 /* On OS X 10.3 and later, we use dlsym instead of the old legacy
1574 interface.
1575
1576 HACK: On OS X, all symbols are prefixed with an underscore.
1577 However, dlsym wants us to omit the leading underscore from the
1578 symbol name -- the dlsym routine puts it back on before searching
1579 for the symbol. For now, we simply strip it off here (and ONLY
1580 here).
1581 */
1582 IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n", lbl));
1583 ASSERT(lbl[0] == '_');
1584 return dlsym(dl_prog_handle, lbl + 1);
1585 # else
1586 if (NSIsSymbolNameDefined(lbl)) {
1587 NSSymbol symbol = NSLookupAndBindSymbol(lbl);
1588 return NSAddressOfSymbol(symbol);
1589 } else {
1590 return NULL;
1591 }
1592 # endif /* HAVE_DLFCN_H */
1593 # elif defined(OBJFORMAT_PEi386)
1594 void* sym;
1595
1596 sym = lookupSymbolInDLLs((unsigned char*)lbl);
1597 if (sym != NULL) { return sym; };
1598
1599 // Also try looking up the symbol without the @N suffix. Some
1600 // DLLs have the suffixes on their symbols, some don't.
1601 zapTrailingAtSign ( (unsigned char*)lbl );
1602 sym = lookupSymbolInDLLs((unsigned char*)lbl);
1603 if (sym != NULL) { return sym; };
1604 return NULL;
1605
1606 # else
1607 ASSERT(2+2 == 5);
1608 return NULL;
1609 # endif
1610 } else {
1611 IF_DEBUG(linker, debugBelch("lookupSymbol: value of %s is %p\n", lbl, val));
1612 return val;
1613 }
1614 }
1615
1616 /* -----------------------------------------------------------------------------
1617 * Debugging aid: look in GHCi's object symbol tables for symbols
1618 * within DELTA bytes of the specified address, and show their names.
1619 */
1620 #ifdef DEBUG
1621 void ghci_enquire ( char* addr );
1622
1623 void ghci_enquire ( char* addr )
1624 {
1625 int i;
1626 char* sym;
1627 char* a;
1628 const int DELTA = 64;
1629 ObjectCode* oc;
1630
1631 initLinker();
1632
1633 for (oc = objects; oc; oc = oc->next) {
1634 for (i = 0; i < oc->n_symbols; i++) {
1635 sym = oc->symbols[i];
1636 if (sym == NULL) continue;
1637 a = NULL;
1638 if (a == NULL) {
1639 a = lookupStrHashTable(symhash, sym);
1640 }
1641 if (a == NULL) {
1642 // debugBelch("ghci_enquire: can't find %s\n", sym);
1643 }
1644 else if (addr-DELTA <= a && a <= addr+DELTA) {
1645 debugBelch("%p + %3d == `%s'\n", addr, (int)(a - addr), sym);
1646 }
1647 }
1648 }
1649 }
1650 #endif
1651
1652 #ifdef USE_MMAP
1653 #define ROUND_UP(x,size) ((x + size - 1) & ~(size - 1))
1654
1655 static void *
1656 mmapForLinker (size_t bytes, nat flags, int fd)
1657 {
1658 void *map_addr = NULL;
1659 void *result;
1660 int pagesize, size;
1661 static nat fixed = 0;
1662
1663 IF_DEBUG(linker, debugBelch("mmapForLinker: start\n"));
1664 pagesize = getpagesize();
1665 size = ROUND_UP(bytes, pagesize);
1666
1667 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1668 mmap_again:
1669
1670 if (mmap_32bit_base != 0) {
1671 map_addr = mmap_32bit_base;
1672 }
1673 #endif
1674
1675 IF_DEBUG(linker, debugBelch("mmapForLinker: \tprotection %#0x\n", PROT_EXEC | PROT_READ | PROT_WRITE));
1676 IF_DEBUG(linker, debugBelch("mmapForLinker: \tflags %#0x\n", MAP_PRIVATE | TRY_MAP_32BIT | fixed | flags));
1677 result = mmap(map_addr, size, PROT_EXEC|PROT_READ|PROT_WRITE,
1678 MAP_PRIVATE|TRY_MAP_32BIT|fixed|flags, fd, 0);
1679
1680 if (result == MAP_FAILED) {
1681 sysErrorBelch("mmap %lu bytes at %p",(lnat)size,map_addr);
1682 errorBelch("Try specifying an address with +RTS -xm<addr> -RTS");
1683 stg_exit(EXIT_FAILURE);
1684 }
1685
1686 #if !defined(ALWAYS_PIC) && defined(x86_64_HOST_ARCH)
1687 if (mmap_32bit_base != 0) {
1688 if (result == map_addr) {
1689 mmap_32bit_base = (StgWord8*)map_addr + size;
1690 } else {
1691 if ((W_)result > 0x80000000) {
1692 // oops, we were given memory over 2Gb
1693 #if defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) || defined(dragonfly_HOST_OS)
1694 // Some platforms require MAP_FIXED. This is normally
1695 // a bad idea, because MAP_FIXED will overwrite
1696 // existing mappings.
1697 munmap(result,size);
1698 fixed = MAP_FIXED;
1699 goto mmap_again;
1700 #else
1701 barf("loadObj: failed to mmap() memory below 2Gb; asked for %lu bytes at %p. Try specifying an address with +RTS -xm<addr> -RTS", size, map_addr, result);
1702 #endif
1703 } else {
1704 // hmm, we were given memory somewhere else, but it's
1705 // still under 2Gb so we can use it. Next time, ask
1706 // for memory right after the place we just got some
1707 mmap_32bit_base = (StgWord8*)result + size;
1708 }
1709 }
1710 } else {
1711 if ((W_)result > 0x80000000) {
1712 // oops, we were given memory over 2Gb
1713 // ... try allocating memory somewhere else?;
1714 debugTrace(DEBUG_linker,"MAP_32BIT didn't work; gave us %lu bytes at 0x%p", bytes, result);
1715 munmap(result, size);
1716
1717 // Set a base address and try again... (guess: 1Gb)
1718 mmap_32bit_base = (void*)0x40000000;
1719 goto mmap_again;
1720 }
1721 }
1722 #endif
1723
1724 IF_DEBUG(linker, debugBelch("mmapForLinker: mapped %lu bytes starting at %p\n", (lnat)size, result));
1725 IF_DEBUG(linker, debugBelch("mmapForLinker: done\n"));
1726 return result;
1727 }
1728 #endif // USE_MMAP
1729
1730 static ObjectCode*
1731 mkOc( pathchar *path, char *image, int imageSize,
1732 char *archiveMemberName
1733 #ifndef USE_MMAP
1734 #ifdef darwin_HOST_OS
1735 , int misalignment
1736 #endif
1737 #endif
1738 ) {
1739 ObjectCode* oc;
1740
1741 IF_DEBUG(linker, debugBelch("mkOc: start\n"));
1742 oc = stgMallocBytes(sizeof(ObjectCode), "loadArchive(oc)");
1743
1744 # if defined(OBJFORMAT_ELF)
1745 oc->formatName = "ELF";
1746 # elif defined(OBJFORMAT_PEi386)
1747 oc->formatName = "PEi386";
1748 # elif defined(OBJFORMAT_MACHO)
1749 oc->formatName = "Mach-O";
1750 # else
1751 stgFree(oc);
1752 barf("loadObj: not implemented on this platform");
1753 # endif
1754
1755 oc->image = image;
1756 oc->fileName = pathdup(path);
1757
1758 if (archiveMemberName) {
1759 oc->archiveMemberName = stgMallocBytes( strlen(archiveMemberName)+1, "loadObj" );
1760 strcpy(oc->archiveMemberName, archiveMemberName);
1761 }
1762 else {
1763 oc->archiveMemberName = NULL;
1764 }
1765
1766 oc->fileSize = imageSize;
1767 oc->symbols = NULL;
1768 oc->sections = NULL;
1769 oc->proddables = NULL;
1770
1771 #ifndef USE_MMAP
1772 #ifdef darwin_HOST_OS
1773 oc->misalignment = misalignment;
1774 #endif
1775 #endif
1776
1777 /* chain it onto the list of objects */
1778 oc->next = objects;
1779 objects = oc;
1780
1781 IF_DEBUG(linker, debugBelch("mkOc: done\n"));
1782 return oc;
1783 }
1784
1785 HsInt
1786 loadArchive( pathchar *path )
1787 {
1788 ObjectCode* oc;
1789 char *image;
1790 int memberSize;
1791 FILE *f;
1792 int n;
1793 size_t thisFileNameSize;
1794 char *fileName;
1795 size_t fileNameSize;
1796 int isObject, isGnuIndex;
1797 char tmp[20];
1798 char *gnuFileIndex;
1799 int gnuFileIndexSize;
1800 #if defined(darwin_HOST_OS)
1801 int i;
1802 uint32_t nfat_arch, nfat_offset, cputype, cpusubtype;
1803 #if defined(i386_HOST_ARCH)
1804 const uint32_t mycputype = CPU_TYPE_X86;
1805 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_ALL;
1806 #elif defined(x86_64_HOST_ARCH)
1807 const uint32_t mycputype = CPU_TYPE_X86_64;
1808 const uint32_t mycpusubtype = CPU_SUBTYPE_X86_64_ALL;
1809 #elif defined(powerpc_HOST_ARCH)
1810 const uint32_t mycputype = CPU_TYPE_POWERPC;
1811 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1812 #elif defined(powerpc64_HOST_ARCH)
1813 const uint32_t mycputype = CPU_TYPE_POWERPC64;
1814 const uint32_t mycpusubtype = CPU_SUBTYPE_POWERPC_ALL;
1815 #else
1816 #error Unknown Darwin architecture
1817 #endif
1818 #if !defined(USE_MMAP)
1819 int misalignment;
1820 #endif
1821 #endif
1822
1823 IF_DEBUG(linker, debugBelch("loadArchive: start\n"));
1824 IF_DEBUG(linker, debugBelch("loadArchive: Loading archive `%" PATH_FMT" '\n", path));
1825
1826 gnuFileIndex = NULL;
1827 gnuFileIndexSize = 0;
1828
1829 fileNameSize = 32;
1830 fileName = stgMallocBytes(fileNameSize, "loadArchive(fileName)");
1831
1832 f = pathopen(path, WSTR("rb"));
1833 if (!f)
1834 barf("loadObj: can't read `%s'", path);
1835
1836 /* Check if this is an archive by looking for the magic "!<arch>\n"
1837 * string. Usually, if this fails, we barf and quit. On Darwin however,
1838 * we may have a fat archive, which contains archives for more than
1839 * one architecture. Fat archives start with the magic number 0xcafebabe,
1840 * always stored big endian. If we find a fat_header, we scan through
1841 * the fat_arch structs, searching through for one for our host
1842 * architecture. If a matching struct is found, we read the offset
1843 * of our archive data (nfat_offset) and seek forward nfat_offset bytes
1844 * from the start of the file.
1845 *
1846 * A subtlety is that all of the members of the fat_header and fat_arch
1847 * structs are stored big endian, so we need to call byte order
1848 * conversion functions.
1849 *
1850 * If we find the appropriate architecture in a fat archive, we gobble
1851 * its magic "!<arch>\n" string and continue processing just as if
1852 * we had a single architecture archive.
1853 */
1854
1855 n = fread ( tmp, 1, 8, f );
1856 if (n != 8)
1857 barf("loadArchive: Failed reading header from `%s'", path);
1858 if (strncmp(tmp, "!<arch>\n", 8) != 0) {
1859
1860 #if defined(darwin_HOST_OS)
1861 /* Not a standard archive, look for a fat archive magic number: */
1862 if (ntohl(*(uint32_t *)tmp) == FAT_MAGIC) {
1863 nfat_arch = ntohl(*(uint32_t *)(tmp + 4));
1864 IF_DEBUG(linker, debugBelch("loadArchive: found a fat archive containing %d architectures\n", nfat_arch));
1865 nfat_offset = 0;
1866
1867 for (i = 0; i < (int)nfat_arch; i++) {
1868 /* search for the right arch */
1869 n = fread( tmp, 1, 20, f );
1870 if (n != 8)
1871 barf("loadArchive: Failed reading arch from `%s'", path);
1872 cputype = ntohl(*(uint32_t *)tmp);
1873 cpusubtype = ntohl(*(uint32_t *)(tmp + 4));
1874
1875 if (cputype == mycputype && cpusubtype == mycpusubtype) {
1876 IF_DEBUG(linker, debugBelch("loadArchive: found my archive in a fat archive\n"));
1877 nfat_offset = ntohl(*(uint32_t *)(tmp + 8));
1878 break;
1879 }
1880 }
1881
1882 if (nfat_offset == 0) {
1883 barf ("loadArchive: searched %d architectures, but no host arch found", (int)nfat_arch);
1884 }
1885 else {
1886 n = fseek( f, nfat_offset, SEEK_SET );
1887 if (n != 0)
1888 barf("loadArchive: Failed to seek to arch in `%s'", path);
1889 n = fread ( tmp, 1, 8, f );
1890 if (n != 8)
1891 barf("loadArchive: Failed reading header from `%s'", path);
1892 if (strncmp(tmp, "!<arch>\n", 8) != 0) {
1893 barf("loadArchive: couldn't find archive in `%s' at offset %d", path, nfat_offset);
1894 }
1895 }
1896 }
1897 else {
1898 barf("loadArchive: Neither an archive, nor a fat archive: `%s'", path);
1899 }
1900
1901 #else
1902 barf("loadArchive: Not an archive: `%s'", path);
1903 #endif
1904 }
1905
1906 IF_DEBUG(linker, debugBelch("loadArchive: loading archive contents\n"));
1907
1908 while(1) {
1909 n = fread ( fileName, 1, 16, f );
1910 if (n != 16) {
1911 if (feof(f)) {
1912 IF_DEBUG(linker, debugBelch("loadArchive: EOF while reading from '%" PATH_FMT "'\n", path));
1913 break;
1914 }
1915 else {
1916 barf("loadArchive: Failed reading file name from `%s'", path);
1917 }
1918 }
1919
1920 #if defined(darwin_HOST_OS)
1921 if (strncmp(fileName, "!<arch>\n", 8) == 0) {
1922 IF_DEBUG(linker, debugBelch("loadArchive: found the start of another archive, breaking\n"));
1923 break;
1924 }
1925 #endif
1926
1927 n = fread ( tmp, 1, 12, f );
1928 if (n != 12)
1929 barf("loadArchive: Failed reading mod time from `%s'", path);
1930 n = fread ( tmp, 1, 6, f );
1931 if (n != 6)
1932 barf("loadArchive: Failed reading owner from `%s'", path);
1933 n = fread ( tmp, 1, 6, f );
1934 if (n != 6)
1935 barf("loadArchive: Failed reading group from `%s'", path);
1936 n = fread ( tmp, 1, 8, f );
1937 if (n != 8)
1938 barf("loadArchive: Failed reading mode from `%s'", path);
1939 n = fread ( tmp, 1, 10, f );
1940 if (n != 10)
1941 barf("loadArchive: Failed reading size from `%s'", path);
1942 tmp[10] = '\0';
1943 for (n = 0; isdigit(tmp[n]); n++);
1944 tmp[n] = '\0';
1945 memberSize = atoi(tmp);
1946
1947 IF_DEBUG(linker, debugBelch("loadArchive: size of this archive member is %d\n", memberSize));
1948 n = fread ( tmp, 1, 2, f );
1949 if (n != 2)
1950 barf("loadArchive: Failed reading magic from `%s'", path);
1951 if (strncmp(tmp, "\x60\x0A", 2) != 0)
1952 barf("loadArchive: Failed reading magic from `%s' at %ld. Got %c%c",
1953 path, ftell(f), tmp[0], tmp[1]);
1954
1955 isGnuIndex = 0;
1956 /* Check for BSD-variant large filenames */
1957 if (0 == strncmp(fileName, "#1/", 3)) {
1958 fileName[16] = '\0';
1959 if (isdigit(fileName[3])) {
1960 for (n = 4; isdigit(fileName[n]); n++);
1961 fileName[n] = '\0';
1962 thisFileNameSize = atoi(fileName + 3);
1963 memberSize -= thisFileNameSize;
1964 if (thisFileNameSize >= fileNameSize) {
1965 /* Double it to avoid potentially continually
1966 increasing it by 1 */
1967 fileNameSize = thisFileNameSize * 2;
1968 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
1969 }
1970 n = fread ( fileName, 1, thisFileNameSize, f );
1971 if (n != (int)thisFileNameSize) {
1972 barf("loadArchive: Failed reading filename from `%s'",
1973 path);
1974 }
1975 fileName[thisFileNameSize] = 0;
1976
1977 /* On OS X at least, thisFileNameSize is the size of the
1978 fileName field, not the length of the fileName
1979 itself. */
1980 thisFileNameSize = strlen(fileName);
1981 }
1982 else {
1983 barf("loadArchive: BSD-variant filename size not found while reading filename from `%s'", path);
1984 }
1985 }
1986 /* Check for GNU file index file */
1987 else if (0 == strncmp(fileName, "//", 2)) {
1988 fileName[0] = '\0';
1989 thisFileNameSize = 0;
1990 isGnuIndex = 1;
1991 }
1992 /* Check for a file in the GNU file index */
1993 else if (fileName[0] == '/') {
1994 if (isdigit(fileName[1])) {
1995 int i;
1996
1997 for (n = 2; isdigit(fileName[n]); n++);
1998 fileName[n] = '\0';
1999 n = atoi(fileName + 1);
2000
2001 if (gnuFileIndex == NULL) {
2002 barf("loadArchive: GNU-variant filename without an index while reading from `%s'", path);
2003 }
2004 if (n < 0 || n > gnuFileIndexSize) {
2005 barf("loadArchive: GNU-variant filename offset %d out of range [0..%d] while reading filename from `%s'", n, gnuFileIndexSize, path);
2006 }
2007 if (n != 0 && gnuFileIndex[n - 1] != '\n') {
2008 barf("loadArchive: GNU-variant filename offset %d invalid (range [0..%d]) while reading filename from `%s'", n, gnuFileIndexSize, path);
2009 }
2010 for (i = n; gnuFileIndex[i] != '/'; i++);
2011 thisFileNameSize = i - n;
2012 if (thisFileNameSize >= fileNameSize) {
2013 /* Double it to avoid potentially continually
2014 increasing it by 1 */
2015 fileNameSize = thisFileNameSize * 2;
2016 fileName = stgReallocBytes(fileName, fileNameSize, "loadArchive(fileName)");
2017 }
2018 memcpy(fileName, gnuFileIndex + n, thisFileNameSize);
2019 fileName[thisFileNameSize] = '\0';
2020 }
2021 else if (fileName[1] == ' ') {
2022 fileName[0] = '\0';
2023 thisFileNameSize = 0;
2024 }
2025 else {
2026 barf("loadArchive: GNU-variant filename offset not found while reading filename from `%s'", path);
2027 }
2028 }
2029 /* Finally, the case where the filename field actually contains
2030 the filename */
2031 else {
2032 /* GNU ar terminates filenames with a '/', this allowing
2033 spaces in filenames. So first look to see if there is a
2034 terminating '/'. */
2035 for (thisFileNameSize = 0;
2036 thisFileNameSize < 16;
2037 thisFileNameSize++) {
2038 if (fileName[thisFileNameSize] == '/') {
2039 fileName[thisFileNameSize] = '\0';
2040 break;
2041 }
2042 }
2043 /* If we didn't find a '/', then a space teminates the
2044 filename. Note that if we don't find one, then
2045 thisFileNameSize ends up as 16, and we already have the
2046 '\0' at the end. */
2047 if (thisFileNameSize == 16) {
2048 for (thisFileNameSize = 0;
2049 thisFileNameSize < 16;
2050 thisFileNameSize++) {
2051 if (fileName[thisFileNameSize] == ' ') {
2052 fileName[thisFileNameSize] = '\0';
2053 break;
2054 }
2055 }
2056 }
2057 }
2058
2059 IF_DEBUG(linker,
2060 debugBelch("loadArchive: Found member file `%s'\n", fileName));
2061
2062 isObject = thisFileNameSize >= 2
2063 && fileName[thisFileNameSize - 2] == '.'
2064 && fileName[thisFileNameSize - 1] == 'o';
2065
2066 IF_DEBUG(linker, debugBelch("loadArchive: \tthisFileNameSize = %d\n", (int)thisFileNameSize));
2067 IF_DEBUG(linker, debugBelch("loadArchive: \tisObject = %d\n", isObject));
2068
2069 if (isObject) {
2070 char *archiveMemberName;
2071
2072 IF_DEBUG(linker, debugBelch("loadArchive: Member is an object file...loading...\n"));
2073
2074 /* We can't mmap from the archive directly, as object
2075 files need to be 8-byte aligned but files in .ar
2076 archives are 2-byte aligned. When possible we use mmap
2077 to get some anonymous memory, as on 64-bit platforms if
2078 we use malloc then we can be given memory above 2^32.
2079 In the mmap case we're probably wasting lots of space;
2080 we could do better. */
2081 #if defined(USE_MMAP)
2082 image = mmapForLinker(memberSize, MAP_ANONYMOUS, -1);
2083 #elif defined(mingw32_HOST_OS)
2084 // TODO: We would like to use allocateExec here, but allocateExec
2085 // cannot currently allocate blocks large enough.
2086 image = VirtualAlloc(NULL, memberSize, MEM_RESERVE | MEM_COMMIT,
2087 PAGE_EXECUTE_READWRITE);
2088 #elif defined(darwin_HOST_OS)
2089 /* See loadObj() */
2090 misalignment = machoGetMisalignment(f);
2091 image = stgMallocBytes(memberSize + misalignment, "loadArchive(image)");
2092 image += misalignment;
2093 #else
2094 image = stgMallocBytes(memberSize, "loadArchive(image)");
2095 #endif
2096 n = fread ( image, 1, memberSize, f );
2097 if (n != memberSize) {
2098 barf("loadArchive: error whilst reading `%s'", path);
2099 }
2100
2101 archiveMemberName = stgMallocBytes(pathlen(path) + thisFileNameSize + 3,
2102 "loadArchive(file)");
2103 sprintf(archiveMemberName, "%" PATH_FMT "(%.*s)",
2104 path, (int)thisFileNameSize, fileName);
2105
2106 oc = mkOc(path, image, memberSize, archiveMemberName
2107 #ifndef USE_MMAP
2108 #ifdef darwin_HOST_OS
2109 , misalignment
2110 #endif
2111 #endif
2112 );
2113
2114 stgFree(archiveMemberName);
2115
2116 if (0 == loadOc(oc)) {
2117 stgFree(fileName);
2118 return 0;
2119 }
2120 }
2121 else if (isGnuIndex) {
2122 if (gnuFileIndex != NULL) {
2123 barf("loadArchive: GNU-variant index found, but already have an index, while reading filename from `%s'", path);
2124 }
2125 IF_DEBUG(linker, debugBelch("loadArchive: Found GNU-variant file index\n"));
2126 #ifdef USE_MMAP
2127 gnuFileIndex = mmapForLinker(memberSize + 1, MAP_ANONYMOUS, -1);
2128 #else
2129 gnuFileIndex = stgMallocBytes(memberSize + 1, "loadArchive(image)");
2130 #endif
2131 n = fread ( gnuFileIndex, 1, memberSize, f );
2132 if (n != memberSize) {
2133 barf("loadArchive: error whilst reading `%s'", path);
2134 }
2135 gnuFileIndex[memberSize] = '/';
2136 gnuFileIndexSize = memberSize;
2137 }
2138 else {
2139 IF_DEBUG(linker, debugBelch("loadArchive: '%s' does not appear to be an object file\n", fileName));
2140 n = fseek(f, memberSize, SEEK_CUR);
2141 if (n != 0)
2142 barf("loadArchive: error whilst seeking by %d in `%s'",
2143 memberSize, path);
2144 }
2145
2146 /* .ar files are 2-byte aligned */
2147 if (memberSize % 2) {
2148 IF_DEBUG(linker, debugBelch("loadArchive: trying to read one pad byte\n"));
2149 n = fread ( tmp, 1, 1, f );
2150 if (n != 1) {
2151 if (feof(f)) {
2152 IF_DEBUG(linker, debugBelch("loadArchive: found EOF while reading one pad byte\n"));
2153 break;
2154 }
2155 else {
2156 barf("loadArchive: Failed reading padding from `%s'", path);
2157 }
2158 }
2159 IF_DEBUG(linker, debugBelch("loadArchive: successfully read one pad byte\n"));
2160 }
2161 IF_DEBUG(linker, debugBelch("loadArchive: reached end of archive loading while loop\n"));
2162 }
2163
2164 fclose(f);
2165
2166 stgFree(fileName);
2167 if (gnuFileIndex != NULL) {
2168 #ifdef USE_MMAP
2169 munmap(gnuFileIndex, gnuFileIndexSize + 1);
2170 #else
2171 stgFree(gnuFileIndex);
2172 #endif
2173 }
2174
2175 IF_DEBUG(linker, debugBelch("loadArchive: done\n"));
2176 return 1;
2177 }
2178
2179 /* -----------------------------------------------------------------------------
2180 * Load an obj (populate the global symbol table, but don't resolve yet)
2181 *
2182 * Returns: 1 if ok, 0 on error.
2183 */
2184 HsInt
2185 loadObj( pathchar *path )
2186 {
2187 ObjectCode* oc;
2188 char *image;
2189 int fileSize;
2190 struct_stat st;
2191 int r;
2192 #ifdef USE_MMAP
2193 int fd;
2194 #else
2195 FILE *f;
2196 # if defined(darwin_HOST_OS)
2197 int misalignment;
2198 # endif
2199 #endif
2200 IF_DEBUG(linker, debugBelch("loadObj %" PATH_FMT "\n", path));
2201
2202 initLinker();
2203
2204 /* debugBelch("loadObj %s\n", path ); */
2205
2206 /* Check that we haven't already loaded this object.
2207 Ignore requests to load multiple times */
2208 {
2209 ObjectCode *o;
2210 int is_dup = 0;
2211 for (o = objects; o; o = o->next) {
2212 if (0 == pathcmp(o->fileName, path)) {
2213 is_dup = 1;
2214 break; /* don't need to search further */
2215 }
2216 }
2217 if (is_dup) {
2218 IF_DEBUG(linker, debugBelch(
2219 "GHCi runtime linker: warning: looks like you're trying to load the\n"
2220 "same object file twice:\n"
2221 " %" PATH_FMT "\n"
2222 "GHCi will ignore this, but be warned.\n"
2223 , path));
2224 return 1; /* success */
2225 }
2226 }
2227
2228 r = pathstat(path, &st);
2229 if (r == -1) {
2230 IF_DEBUG(linker, debugBelch("File doesn't exist\n"));
2231 return 0;
2232 }
2233
2234 fileSize = st.st_size;
2235
2236 #ifdef USE_MMAP
2237 /* On many architectures malloc'd memory isn't executable, so we need to use mmap. */
2238
2239 #if defined(openbsd_HOST_OS)
2240 fd = open(path, O_RDONLY, S_IRUSR);
2241 #else
2242 fd = open(path, O_RDONLY);
2243 #endif
2244 if (fd == -1)
2245 barf("loadObj: can't open `%s'", path);
2246
2247 image = mmapForLinker(fileSize, 0, fd);
2248
2249 close(fd);
2250
2251 #else /* !USE_MMAP */
2252 /* load the image into memory */
2253 f = pathopen(path, WSTR("rb"));
2254 if (!f)
2255 barf("loadObj: can't read `%" PATH_FMT "'", path);
2256
2257 # if defined(mingw32_HOST_OS)
2258 // TODO: We would like to use allocateExec here, but allocateExec
2259 // cannot currently allocate blocks large enough.
2260 {
2261 int offset;
2262 #if defined(x86_64_HOST_ARCH)
2263 /* We get back 8-byte aligned memory (is that guaranteed?), but
2264 the offsets to the sections within the file are all 4 mod 8
2265 (is that guaranteed?). We therefore need to offset the image
2266 by 4, so that all the pointers are 8-byte aligned, so that
2267 pointer tagging works. */
2268 offset = 4;
2269 #else
2270 offset = 0;
2271 #endif
2272 image = VirtualAlloc(NULL, fileSize + offset, MEM_RESERVE | MEM_COMMIT,
2273 PAGE_EXECUTE_READWRITE);
2274 image += offset;
2275 }
2276 # elif defined(darwin_HOST_OS)
2277 // In a Mach-O .o file, all sections can and will be misaligned
2278 // if the total size of the headers is not a multiple of the
2279 // desired alignment. This is fine for .o files that only serve
2280 // as input for the static linker, but it's not fine for us,
2281 // as SSE (used by gcc for floating point) and Altivec require
2282 // 16-byte alignment.
2283 // We calculate the correct alignment from the header before
2284 // reading the file, and then we misalign image on purpose so
2285 // that the actual sections end up aligned again.
2286 misalignment = machoGetMisalignment(f);
2287 image = stgMallocBytes(fileSize + misalignment, "loadObj(image)");
2288 image += misalignment;
2289 # else
2290 image = stgMallocBytes(fileSize, "loadObj(image)");
2291 # endif
2292
2293 {
2294 int n;
2295 n = fread ( image, 1, fileSize, f );
2296 if (n != fileSize)
2297 barf("loadObj: error whilst reading `%s'", path);
2298 }
2299 fclose(f);
2300 #endif /* USE_MMAP */
2301
2302 oc = mkOc(path, image, fileSize, NULL
2303 #ifndef USE_MMAP
2304 #ifdef darwin_HOST_OS
2305 , misalignment
2306 #endif
2307 #endif
2308 );
2309
2310 return loadOc(oc);
2311 }
2312
2313 static HsInt
2314 loadOc( ObjectCode* oc ) {
2315 int r;
2316
2317 IF_DEBUG(linker, debugBelch("loadOc: start\n"));
2318
2319 # if defined(OBJFORMAT_MACHO) && (defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH))
2320 r = ocAllocateSymbolExtras_MachO ( oc );
2321 if (!r) {
2322 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_MachO failed\n"));
2323 return r;
2324 }
2325 # elif defined(OBJFORMAT_ELF) && (defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH))
2326 r = ocAllocateSymbolExtras_ELF ( oc );
2327 if (!r) {
2328 IF_DEBUG(linker, debugBelch("loadOc: ocAllocateSymbolExtras_ELF failed\n"));
2329 return r;
2330 }
2331 #endif
2332
2333 /* verify the in-memory image */
2334 # if defined(OBJFORMAT_ELF)
2335 r = ocVerifyImage_ELF ( oc );
2336 # elif defined(OBJFORMAT_PEi386)
2337 r = ocVerifyImage_PEi386 ( oc );
2338 # elif defined(OBJFORMAT_MACHO)
2339 r = ocVerifyImage_MachO ( oc );
2340 # else
2341 barf("loadObj: no verify method");
2342 # endif
2343 if (!r) {
2344 IF_DEBUG(linker, debugBelch("loadOc: ocVerifyImage_* failed\n"));
2345 return r;
2346 }
2347
2348 /* build the symbol list for this image */
2349 # if defined(OBJFORMAT_ELF)
2350 r = ocGetNames_ELF ( oc );
2351 # elif defined(OBJFORMAT_PEi386)
2352 r = ocGetNames_PEi386 ( oc );
2353 # elif defined(OBJFORMAT_MACHO)
2354 r = ocGetNames_MachO ( oc );
2355 # else
2356 barf("loadObj: no getNames method");
2357 # endif
2358 if (!r) {
2359 IF_DEBUG(linker, debugBelch("loadOc: ocGetNames_* failed\n"));
2360 return r;
2361 }
2362
2363 /* loaded, but not resolved yet */
2364 oc->status = OBJECT_LOADED;
2365 IF_DEBUG(linker, debugBelch("loadOc: done.\n"));
2366
2367 return 1;
2368 }
2369
2370 /* -----------------------------------------------------------------------------
2371 * resolve all the currently unlinked objects in memory
2372 *
2373 * Returns: 1 if ok, 0 on error.
2374 */
2375 HsInt
2376 resolveObjs( void )
2377 {
2378 ObjectCode *oc;
2379 int r;
2380
2381 IF_DEBUG(linker, debugBelch("resolveObjs: start\n"));
2382 initLinker();
2383
2384 for (oc = objects; oc; oc = oc->next) {
2385 if (oc->status != OBJECT_RESOLVED) {
2386 # if defined(OBJFORMAT_ELF)
2387 r = ocResolve_ELF ( oc );
2388 # elif defined(OBJFORMAT_PEi386)
2389 r = ocResolve_PEi386 ( oc );
2390 # elif defined(OBJFORMAT_MACHO)
2391 r = ocResolve_MachO ( oc );
2392 # else
2393 barf("resolveObjs: not implemented on this platform");
2394 # endif
2395 if (!r) { return r; }
2396 oc->status = OBJECT_RESOLVED;
2397 }
2398 }
2399 IF_DEBUG(linker, debugBelch("resolveObjs: done\n"));
2400 return 1;
2401 }
2402
2403 /* -----------------------------------------------------------------------------
2404 * delete an object from the pool
2405 */
2406 HsInt
2407 unloadObj( pathchar *path )
2408 {
2409 ObjectCode *oc, *prev;
2410 HsBool unloadedAnyObj = HS_BOOL_FALSE;
2411
2412 ASSERT(symhash != NULL);
2413 ASSERT(objects != NULL);
2414
2415 initLinker();
2416
2417 prev = NULL;
2418 for (oc = objects; oc; prev = oc, oc = oc->next) {
2419 if (!pathcmp(oc->fileName,path)) {
2420
2421 /* Remove all the mappings for the symbols within this
2422 * object..
2423 */
2424 {
2425 int i;
2426 for (i = 0; i < oc->n_symbols; i++) {
2427 if (oc->symbols[i] != NULL) {
2428 removeStrHashTable(symhash, oc->symbols[i], NULL);
2429 }
2430 }
2431 }
2432
2433 if (prev == NULL) {
2434 objects = oc->next;
2435 } else {
2436 prev->next = oc->next;
2437 }
2438
2439 // We're going to leave this in place, in case there are
2440 // any pointers from the heap into it:
2441 // #ifdef mingw32_HOST_OS
2442 // If uncommenting, note that currently oc->image is
2443 // not the right address to free on Win64, as we added
2444 // 4 bytes of padding at the start
2445 // VirtualFree(oc->image);
2446 // #else
2447 // stgFree(oc->image);
2448 // #endif
2449 stgFree(oc->fileName);
2450 stgFree(oc->archiveMemberName);
2451 stgFree(oc->symbols);
2452 stgFree(oc->sections);
2453 stgFree(oc);
2454
2455 /* This could be a member of an archive so continue
2456 * unloading other members. */
2457 unloadedAnyObj = HS_BOOL_TRUE;
2458 }
2459 }
2460
2461 if (unloadedAnyObj) {
2462 return 1;
2463 }
2464 else {
2465 errorBelch("unloadObj: can't find `%" PATH_FMT "' to unload", path);
2466 return 0;
2467 }
2468 }
2469
2470 /* -----------------------------------------------------------------------------
2471 * Sanity checking. For each ObjectCode, maintain a list of address ranges
2472 * which may be prodded during relocation, and abort if we try and write
2473 * outside any of these.
2474 */
2475 static void
2476 addProddableBlock ( ObjectCode* oc, void* start, int size )
2477 {
2478 ProddableBlock* pb
2479 = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
2480
2481 IF_DEBUG(linker, debugBelch("addProddableBlock: %p %p %d\n", oc, start, size));
2482 ASSERT(size > 0);
2483 pb->start = start;
2484 pb->size = size;
2485 pb->next = oc->proddables;
2486 oc->proddables = pb;
2487 }
2488
2489 static void
2490 checkProddableBlock (ObjectCode *oc, void *addr )
2491 {
2492 ProddableBlock* pb;
2493
2494 for (pb = oc->proddables; pb != NULL; pb = pb->next) {
2495 char* s = (char*)(pb->start);
2496 char* e = s + pb->size - 1;
2497 char* a = (char*)addr;
2498 #if WORD_SIZE_IN_BITS == 32
2499 /* Assumes that the biggest fixup involves a 4-byte write */
2500 if (a >= s && (a+3) <= e) return;
2501 #elif WORD_SIZE_IN_BITS == 64
2502 /* Assumes that the biggest fixup involves a 4-byte write */
2503 if (a >= s && (a+7) <= e) return;
2504 #else
2505 #error
2506 #endif
2507 }
2508 barf("checkProddableBlock: invalid fixup in runtime linker: %p", addr);
2509 }
2510
2511 /* -----------------------------------------------------------------------------
2512 * Section management.
2513 */
2514 static void
2515 addSection ( ObjectCode* oc, SectionKind kind,
2516 void* start, void* end )
2517 {
2518 Section* s = stgMallocBytes(sizeof(Section), "addSection");
2519 s->start = start;
2520 s->end = end;
2521 s->kind = kind;
2522 s->next = oc->sections;
2523 oc->sections = s;
2524
2525 IF_DEBUG(linker, debugBelch("addSection: %p-%p (size %lld), kind %d\n",
2526 start, ((char*)end)-1, ((long long)(size_t)end) - ((long long)(size_t)start) + 1, kind ));
2527 }
2528
2529
2530 /* --------------------------------------------------------------------------
2531 * Symbol Extras.
2532 * This is about allocating a small chunk of memory for every symbol in the
2533 * object file. We make sure that the SymboLExtras are always "in range" of
2534 * limited-range PC-relative instructions on various platforms by allocating
2535 * them right next to the object code itself.
2536 */
2537
2538 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH)
2539 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2540
2541 /*
2542 ocAllocateSymbolExtras
2543
2544 Allocate additional space at the end of the object file image to make room
2545 for jump islands (powerpc, x86_64, arm) and GOT entries (x86_64).
2546
2547 PowerPC relative branch instructions have a 24 bit displacement field.
2548 As PPC code is always 4-byte-aligned, this yields a +-32MB range.
2549 If a particular imported symbol is outside this range, we have to redirect
2550 the jump to a short piece of new code that just loads the 32bit absolute
2551 address and jumps there.
2552 On x86_64, PC-relative jumps and PC-relative accesses to the GOT are limited
2553 to 32 bits (+-2GB).
2554
2555 This function just allocates space for one SymbolExtra for every
2556 undefined symbol in the object file. The code for the jump islands is
2557 filled in by makeSymbolExtra below.
2558 */
2559
2560 static int ocAllocateSymbolExtras( ObjectCode* oc, int count, int first )
2561 {
2562 #ifdef USE_MMAP
2563 int pagesize, n, m;
2564 #endif
2565 int aligned;
2566 #ifndef USE_MMAP
2567 int misalignment = 0;
2568 #ifdef darwin_HOST_OS
2569 misalignment = oc->misalignment;
2570 #endif
2571 #endif
2572
2573 if( count > 0 )
2574 {
2575 // round up to the nearest 4
2576 aligned = (oc->fileSize + 3) & ~3;
2577
2578 #ifdef USE_MMAP
2579 pagesize = getpagesize();
2580 n = ROUND_UP( oc->fileSize, pagesize );
2581 m = ROUND_UP( aligned + sizeof (SymbolExtra) * count, pagesize );
2582
2583 /* we try to use spare space at the end of the last page of the
2584 * image for the jump islands, but if there isn't enough space
2585 * then we have to map some (anonymously, remembering MAP_32BIT).
2586 */
2587 if( m > n ) // we need to allocate more pages
2588 {
2589 oc->symbol_extras = mmapForLinker(sizeof(SymbolExtra) * count,
2590 MAP_ANONYMOUS, -1);
2591 }
2592 else
2593 {
2594 oc->symbol_extras = (SymbolExtra *) (oc->image + aligned);
2595 }
2596 #else
2597 oc->image -= misalignment;
2598 oc->image = stgReallocBytes( oc->image,
2599 misalignment +
2600 aligned + sizeof (SymbolExtra) * count,
2601 "ocAllocateSymbolExtras" );
2602 oc->image += misalignment;
2603
2604 oc->symbol_extras = (SymbolExtra *) (oc->image + aligned);
2605 #endif /* USE_MMAP */
2606
2607 memset( oc->symbol_extras, 0, sizeof (SymbolExtra) * count );
2608 }
2609 else
2610 oc->symbol_extras = NULL;
2611
2612 oc->first_symbol_extra = first;
2613 oc->n_symbol_extras = count;
2614
2615 return 1;
2616 }
2617
2618 #endif
2619 #endif // defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH)
2620
2621 #if defined(arm_HOST_ARCH)
2622
2623 static void
2624 ocFlushInstructionCache( ObjectCode *oc )
2625 {
2626 // Object code
2627 __clear_cache(oc->image, oc->image + oc->fileSize);
2628 // Jump islands
2629 __clear_cache(oc->symbol_extras, &oc->symbol_extras[oc->n_symbol_extras]);
2630 }
2631
2632 #endif
2633
2634 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2635 #if !defined(x86_64_HOST_ARCH) || !defined(mingw32_HOST_OS)
2636
2637 static SymbolExtra* makeSymbolExtra( ObjectCode* oc,
2638 unsigned long symbolNumber,
2639 unsigned long target )
2640 {
2641 SymbolExtra *extra;
2642
2643 ASSERT( symbolNumber >= oc->first_symbol_extra
2644 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2645
2646 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2647
2648 #ifdef powerpc_HOST_ARCH
2649 // lis r12, hi16(target)
2650 extra->jumpIsland.lis_r12 = 0x3d80;
2651 extra->jumpIsland.hi_addr = target >> 16;
2652
2653 // ori r12, r12, lo16(target)
2654 extra->jumpIsland.ori_r12_r12 = 0x618c;
2655 extra->jumpIsland.lo_addr = target & 0xffff;
2656
2657 // mtctr r12
2658 extra->jumpIsland.mtctr_r12 = 0x7d8903a6;
2659
2660 // bctr
2661 extra->jumpIsland.bctr = 0x4e800420;
2662 #endif
2663 #ifdef x86_64_HOST_ARCH
2664 // jmp *-14(%rip)
2665 static uint8_t jmp[] = { 0xFF, 0x25, 0xF2, 0xFF, 0xFF, 0xFF };
2666 extra->addr = target;
2667 memcpy(extra->jumpIsland, jmp, 6);
2668 #endif
2669
2670 return extra;
2671 }
2672
2673 #endif
2674 #endif // defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
2675
2676 #ifdef arm_HOST_ARCH
2677 static SymbolExtra* makeArmSymbolExtra( ObjectCode* oc,
2678 unsigned long symbolNumber,
2679 unsigned long target,
2680 int fromThumb,
2681 int toThumb )
2682 {
2683 SymbolExtra *extra;
2684
2685 ASSERT( symbolNumber >= oc->first_symbol_extra
2686 && symbolNumber - oc->first_symbol_extra < oc->n_symbol_extras);
2687
2688 extra = &oc->symbol_extras[symbolNumber - oc->first_symbol_extra];
2689
2690 // Make sure instruction mode bit is set properly
2691 if (toThumb)
2692 target |= 1;
2693 else
2694 target &= ~1;
2695
2696 if (!fromThumb) {
2697 // In ARM encoding:
2698 // movw r12, #0
2699 // movt r12, #0
2700 // bx r12
2701 uint32_t code[] = { 0xe300c000, 0xe340c000, 0xe12fff1c };
2702
2703 // Patch lower half-word into movw
2704 code[0] |= ((target>>12) & 0xf) << 16;
2705 code[0] |= target & 0xfff;
2706 // Patch upper half-word into movt
2707 target >>= 16;
2708 code[1] |= ((target>>12) & 0xf) << 16;
2709 code[1] |= target & 0xfff;
2710
2711 memcpy(extra->jumpIsland, code, 12);
2712
2713 } else {
2714 // In Thumb encoding:
2715 // movw r12, #0
2716 // movt r12, #0
2717 // bx r12
2718 uint16_t code[] = { 0xf240, 0x0c00,
2719 0xf2c0, 0x0c00,
2720 0x4760 };
2721
2722 // Patch lower half-word into movw
2723 code[0] |= (target>>12) & 0xf;
2724 code[0] |= ((target>>11) & 0x1) << 10;
2725 code[1] |= ((target>>8) & 0x7) << 12;
2726 code[1] |= target & 0xff;
2727 // Patch upper half-word into movt
2728 target >>= 16;
2729 code[2] |= (target>>12) & 0xf;
2730 code[2] |= ((target>>11) & 0x1) << 10;
2731 code[3] |= ((target>>8) & 0x7) << 12;
2732 code[3] |= target & 0xff;
2733
2734 memcpy(extra->jumpIsland, code, 10);
2735 }
2736
2737 return extra;
2738 }
2739 #endif // arm_HOST_ARCH
2740
2741 /* --------------------------------------------------------------------------
2742 * PowerPC specifics (instruction cache flushing)
2743 * ------------------------------------------------------------------------*/
2744
2745 #ifdef powerpc_HOST_ARCH
2746 /*
2747 ocFlushInstructionCache
2748
2749 Flush the data & instruction caches.
2750 Because the PPC has split data/instruction caches, we have to
2751 do that whenever we modify code at runtime.
2752 */
2753
2754 static void
2755 ocFlushInstructionCacheFrom(void* begin, size_t length)
2756 {
2757 size_t n = (length + 3) / 4;
2758 unsigned long* p = begin;
2759
2760 while (n--)
2761 {
2762 __asm__ volatile ( "dcbf 0,%0\n\t"
2763 "sync\n\t"
2764 "icbi 0,%0"
2765 :
2766 : "r" (p)
2767 );
2768 p++;
2769 }
2770 __asm__ volatile ( "sync\n\t"
2771 "isync"
2772 );
2773 }
2774
2775 static void
2776 ocFlushInstructionCache( ObjectCode *oc )
2777 {
2778 /* The main object code */
2779 ocFlushInstructionCacheFrom(oc->image
2780 #ifdef darwin_HOST_OS
2781 + oc->misalignment
2782 #endif
2783 , oc->fileSize);
2784
2785 /* Jump Islands */
2786 ocFlushInstructionCacheFrom(oc->symbol_extras, sizeof(SymbolExtra) * oc->n_symbol_extras);
2787 }
2788 #endif /* powerpc_HOST_ARCH */
2789
2790
2791 /* --------------------------------------------------------------------------
2792 * PEi386 specifics (Win32 targets)
2793 * ------------------------------------------------------------------------*/
2794
2795 /* The information for this linker comes from
2796 Microsoft Portable Executable
2797 and Common Object File Format Specification
2798 revision 5.1 January 1998
2799 which SimonM says comes from the MS Developer Network CDs.
2800
2801 It can be found there (on older CDs), but can also be found
2802 online at:
2803
2804 http://www.microsoft.com/hwdev/hardware/PECOFF.asp
2805
2806 (this is Rev 6.0 from February 1999).
2807
2808 Things move, so if that fails, try searching for it via
2809
2810 http://www.google.com/search?q=PE+COFF+specification
2811
2812 The ultimate reference for the PE format is the Winnt.h
2813 header file that comes with the Platform SDKs; as always,
2814 implementations will drift wrt their documentation.
2815
2816 A good background article on the PE format is Matt Pietrek's
2817 March 1994 article in Microsoft System Journal (MSJ)
2818 (Vol.9, No. 3): "Peering Inside the PE: A Tour of the
2819 Win32 Portable Executable File Format." The info in there
2820 has recently been updated in a two part article in
2821 MSDN magazine, issues Feb and March 2002,
2822 "Inside Windows: An In-Depth Look into the Win32 Portable
2823 Executable File Format"
2824
2825 John Levine's book "Linkers and Loaders" contains useful
2826 info on PE too.
2827 */
2828
2829
2830 #if defined(OBJFORMAT_PEi386)
2831
2832
2833
2834 typedef unsigned char UChar;
2835 typedef unsigned short UInt16;
2836 typedef unsigned int UInt32;
2837 typedef int Int32;
2838 typedef unsigned long long int UInt64;
2839
2840
2841 typedef
2842 struct {
2843 UInt16 Machine;
2844 UInt16 NumberOfSections;
2845 UInt32 TimeDateStamp;
2846 UInt32 PointerToSymbolTable;
2847 UInt32 NumberOfSymbols;
2848 UInt16 SizeOfOptionalHeader;
2849 UInt16 Characteristics;
2850 }
2851 COFF_header;
2852
2853 #define sizeof_COFF_header 20
2854
2855
2856 typedef
2857 struct {
2858 UChar Name[8];
2859 UInt32 VirtualSize;
2860 UInt32 VirtualAddress;
2861 UInt32 SizeOfRawData;
2862 UInt32 PointerToRawData;
2863 UInt32 PointerToRelocations;
2864 UInt32 PointerToLinenumbers;
2865 UInt16 NumberOfRelocations;
2866 UInt16 NumberOfLineNumbers;
2867 UInt32 Characteristics;
2868 }
2869 COFF_section;
2870
2871 #define sizeof_COFF_section 40
2872
2873
2874 typedef
2875 struct {
2876 UChar Name[8];
2877 UInt32 Value;
2878 UInt16 SectionNumber;
2879 UInt16 Type;
2880 UChar StorageClass;
2881 UChar NumberOfAuxSymbols;
2882 }
2883 COFF_symbol;
2884
2885 #define sizeof_COFF_symbol 18
2886
2887
2888 typedef
2889 struct {
2890 UInt32 VirtualAddress;
2891 UInt32 SymbolTableIndex;
2892 UInt16 Type;
2893 }
2894 COFF_reloc;
2895
2896 #define sizeof_COFF_reloc 10
2897
2898
2899 /* From PE spec doc, section 3.3.2 */
2900 /* Note use of MYIMAGE_* since IMAGE_* are already defined in
2901 windows.h -- for the same purpose, but I want to know what I'm
2902 getting, here. */
2903 #define MYIMAGE_FILE_RELOCS_STRIPPED 0x0001
2904 #define MYIMAGE_FILE_EXECUTABLE_IMAGE 0x0002
2905 #define MYIMAGE_FILE_DLL 0x2000
2906 #define MYIMAGE_FILE_SYSTEM 0x1000
2907 #define MYIMAGE_FILE_BYTES_REVERSED_HI 0x8000
2908 #define MYIMAGE_FILE_BYTES_REVERSED_LO 0x0080
2909 #define MYIMAGE_FILE_32BIT_MACHINE 0x0100
2910
2911 /* From PE spec doc, section 5.4.2 and 5.4.4 */
2912 #define MYIMAGE_SYM_CLASS_EXTERNAL 2
2913 #define MYIMAGE_SYM_CLASS_STATIC 3
2914 #define MYIMAGE_SYM_UNDEFINED 0
2915
2916 /* From PE spec doc, section 4.1 */
2917 #define MYIMAGE_SCN_CNT_CODE 0x00000020
2918 #define MYIMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040
2919 #define MYIMAGE_SCN_LNK_NRELOC_OVFL 0x01000000
2920
2921 /* From PE spec doc, section 5.2.1 */
2922 #define MYIMAGE_REL_I386_DIR32 0x0006
2923 #define MYIMAGE_REL_I386_REL32 0x0014
2924
2925
2926 /* We use myindex to calculate array addresses, rather than
2927 simply doing the normal subscript thing. That's because
2928 some of the above structs have sizes which are not
2929 a whole number of words. GCC rounds their sizes up to a
2930 whole number of words, which means that the address calcs
2931 arising from using normal C indexing or pointer arithmetic
2932 are just plain wrong. Sigh.
2933 */
2934 static UChar *
2935 myindex ( int scale, void* base, int index )
2936 {
2937 return
2938 ((UChar*)base) + scale * index;
2939 }
2940
2941
2942 static void
2943 printName ( UChar* name, UChar* strtab )
2944 {
2945 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
2946 UInt32 strtab_offset = * (UInt32*)(name+4);
2947 debugBelch("%s", strtab + strtab_offset );
2948 } else {
2949 int i;
2950 for (i = 0; i < 8; i++) {
2951 if (name[i] == 0) break;
2952 debugBelch("%c", name[i] );
2953 }
2954 }
2955 }
2956
2957
2958 static void
2959 copyName ( UChar* name, UChar* strtab, UChar* dst, int dstSize )
2960 {
2961 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
2962 UInt32 strtab_offset = * (UInt32*)(name+4);
2963 strncpy ( (char*)dst, (char*)strtab+strtab_offset, dstSize );
2964 dst[dstSize-1] = 0;
2965 } else {
2966 int i = 0;
2967 while (1) {
2968 if (i >= 8) break;
2969 if (name[i] == 0) break;
2970 dst[i] = name[i];
2971 i++;
2972 }
2973 dst[i] = 0;
2974 }
2975 }
2976
2977
2978 static UChar *
2979 cstring_from_COFF_symbol_name ( UChar* name, UChar* strtab )
2980 {
2981 UChar* newstr;
2982 /* If the string is longer than 8 bytes, look in the
2983 string table for it -- this will be correctly zero terminated.
2984 */
2985 if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
2986 UInt32 strtab_offset = * (UInt32*)(name+4);
2987 return ((UChar*)strtab) + strtab_offset;
2988 }
2989 /* Otherwise, if shorter than 8 bytes, return the original,
2990 which by defn is correctly terminated.
2991 */
2992 if (name[7]==0) return name;
2993 /* The annoying case: 8 bytes. Copy into a temporary
2994 (XXX which is never freed ...)
2995 */
2996 newstr = stgMallocBytes(9, "cstring_from_COFF_symbol_name");
2997 ASSERT(newstr);
2998 strncpy((char*)newstr,(char*)name,8);
2999 newstr[8] = 0;
3000 return newstr;
3001 }
3002
3003 /* Getting the name of a section is mildly tricky, so we make a
3004 function for it. Sadly, in one case we have to copy the string
3005 (when it is exactly 8 bytes long there's no trailing '\0'), so for
3006 consistency we *always* copy the string; the caller must free it
3007 */
3008 static char *
3009 cstring_from_section_name (UChar* name, UChar* strtab)
3010 {
3011 char *newstr;
3012
3013 if (name[0]=='/') {
3014 int strtab_offset = strtol((char*)name+1,NULL,10);
3015 int len = strlen(((char*)strtab) + strtab_offset);
3016
3017 newstr = stgMallocBytes(len+1, "cstring_from_section_symbol_name");
3018 strcpy((char*)newstr, (char*)((UChar*)strtab) + strtab_offset);
3019 return newstr;
3020 }
3021 else
3022 {
3023 newstr = stgMallocBytes(9, "cstring_from_section_symbol_name");
3024 ASSERT(newstr);
3025 strncpy((char*)newstr,(char*)name,8);
3026 newstr[8] = 0;
3027 return newstr;
3028 }
3029 }
3030
3031 /* Just compares the short names (first 8 chars) */
3032 static COFF_section *
3033 findPEi386SectionCalled ( ObjectCode* oc, UChar* name )
3034 {
3035 int i;
3036 COFF_header* hdr
3037 = (COFF_header*)(oc->image);
3038 COFF_section* sectab
3039 = (COFF_section*) (
3040 ((UChar*)(oc->image))
3041 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3042 );
3043 for (i = 0; i < hdr->NumberOfSections; i++) {
3044 UChar* n1;
3045 UChar* n2;
3046 COFF_section* section_i
3047 = (COFF_section*)
3048 myindex ( sizeof_COFF_section, sectab, i );
3049 n1 = (UChar*) &(section_i->Name);
3050 n2 = name;
3051 if (n1[0]==n2[0] && n1[1]==n2[1] && n1[2]==n2[2] &&
3052 n1[3]==n2[3] && n1[4]==n2[4] && n1[5]==n2[5] &&
3053 n1[6]==n2[6] && n1[7]==n2[7])
3054 return section_i;
3055 }
3056
3057 return NULL;
3058 }
3059
3060 static void
3061 zapTrailingAtSign ( UChar* sym )
3062 {
3063 # define my_isdigit(c) ((c) >= '0' && (c) <= '9')
3064 int i, j;
3065 if (sym[0] == 0) return;
3066 i = 0;
3067 while (sym[i] != 0) i++;
3068 i--;
3069 j = i;
3070 while (j > 0 && my_isdigit(sym[j])) j--;
3071 if (j > 0 && sym[j] == '@' && j != i) sym[j] = 0;
3072 # undef my_isdigit
3073 }
3074
3075 static void *
3076 lookupSymbolInDLLs ( UChar *lbl )
3077 {
3078 OpenedDLL* o_dll;
3079 void *sym;
3080
3081 for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
3082 /* debugBelch("look in %s for %s\n", o_dll->name, lbl); */
3083
3084 if (lbl[0] == '_') {
3085 /* HACK: if the name has an initial underscore, try stripping
3086 it off & look that up first. I've yet to verify whether there's
3087 a Rule that governs whether an initial '_' *should always* be
3088 stripped off when mapping from import lib name to the DLL name.
3089 */
3090 sym = GetProcAddress(o_dll->instance, (char*)(lbl+1));
3091 if (sym != NULL) {
3092 /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/
3093 return sym;
3094 }
3095 }
3096 sym = GetProcAddress(o_dll->instance, (char*)lbl);
3097 if (sym != NULL) {
3098 /*debugBelch("found %s in %s\n", lbl,o_dll->name);*/
3099 return sym;
3100 }
3101 }
3102 return NULL;
3103 }
3104
3105
3106 static int
3107 ocVerifyImage_PEi386 ( ObjectCode* oc )
3108 {
3109 int i;
3110 UInt32 j, noRelocs;
3111 COFF_header* hdr;
3112 COFF_section* sectab;
3113 COFF_symbol* symtab;
3114 UChar* strtab;
3115 /* debugBelch("\nLOADING %s\n", oc->fileName); */
3116 hdr = (COFF_header*)(oc->image);
3117 sectab = (COFF_section*) (
3118 ((UChar*)(oc->image))
3119 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3120 );
3121 symtab = (COFF_symbol*) (
3122 ((UChar*)(oc->image))
3123 + hdr->PointerToSymbolTable
3124 );
3125 strtab = ((UChar*)symtab)
3126 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3127
3128 #if defined(i386_HOST_ARCH)
3129 if (hdr->Machine != 0x14c) {
3130 errorBelch("%" PATH_FMT ": Not x86 PEi386", oc->fileName);
3131 return 0;
3132 }
3133 #elif defined(x86_64_HOST_ARCH)
3134 if (hdr->Machine != 0x8664) {
3135 errorBelch("%" PATH_FMT ": Not x86_64 PEi386", oc->fileName);
3136 return 0;
3137 }
3138 #else
3139 errorBelch("PEi386 not supported on this arch");
3140 #endif
3141
3142 if (hdr->SizeOfOptionalHeader != 0) {
3143 errorBelch("%" PATH_FMT ": PEi386 with nonempty optional header", oc->fileName);
3144 return 0;
3145 }
3146 if ( /* (hdr->Characteristics & MYIMAGE_FILE_RELOCS_STRIPPED) || */
3147 (hdr->Characteristics & MYIMAGE_FILE_EXECUTABLE_IMAGE) ||
3148 (hdr->Characteristics & MYIMAGE_FILE_DLL) ||
3149 (hdr->Characteristics & MYIMAGE_FILE_SYSTEM) ) {
3150 errorBelch("%" PATH_FMT ": Not a PEi386 object file", oc->fileName);
3151 return 0;
3152 }
3153 if ( (hdr->Characteristics & MYIMAGE_FILE_BYTES_REVERSED_HI)
3154 /* || !(hdr->Characteristics & MYIMAGE_FILE_32BIT_MACHINE) */ ) {
3155 errorBelch("%" PATH_FMT ": Invalid PEi386 word size or endiannness: %d",
3156 oc->fileName,
3157 (int)(hdr->Characteristics));
3158 return 0;
3159 }
3160 /* If the string table size is way crazy, this might indicate that
3161 there are more than 64k relocations, despite claims to the
3162 contrary. Hence this test. */
3163 /* debugBelch("strtab size %d\n", * (UInt32*)strtab); */
3164 #if 0
3165 if ( (*(UInt32*)strtab) > 600000 ) {
3166 /* Note that 600k has no special significance other than being
3167 big enough to handle the almost-2MB-sized lumps that
3168 constitute HSwin32*.o. */
3169 debugBelch("PEi386 object has suspiciously large string table; > 64k relocs?");
3170 return 0;
3171 }
3172 #endif
3173
3174 /* No further verification after this point; only debug printing. */
3175 i = 0;
3176 IF_DEBUG(linker, i=1);
3177 if (i == 0) return 1;
3178
3179 debugBelch( "sectab offset = %" FMT_Int "\n", ((UChar*)sectab) - ((UChar*)hdr) );
3180 debugBelch( "symtab offset = %" FMT_Int "\n", ((UChar*)symtab) - ((UChar*)hdr) );
3181 debugBelch( "strtab offset = %" FMT_Int "\n", ((UChar*)strtab) - ((UChar*)hdr) );
3182
3183 debugBelch("\n" );
3184 debugBelch( "Machine: 0x%x\n", (UInt32)(hdr->Machine) );
3185 debugBelch( "# sections: %d\n", (UInt32)(hdr->NumberOfSections) );
3186 debugBelch( "time/date: 0x%x\n", (UInt32)(hdr->TimeDateStamp) );
3187 debugBelch( "symtab offset: %d\n", (UInt32)(hdr->PointerToSymbolTable) );
3188 debugBelch( "# symbols: %d\n", (UInt32)(hdr->NumberOfSymbols) );
3189 debugBelch( "sz of opt hdr: %d\n", (UInt32)(hdr->SizeOfOptionalHeader) );
3190 debugBelch( "characteristics: 0x%x\n", (UInt32)(hdr->Characteristics) );
3191
3192 /* Print the section table. */
3193 debugBelch("\n" );
3194 for (i = 0; i < hdr->NumberOfSections; i++) {
3195 COFF_reloc* reltab;
3196 COFF_section* sectab_i
3197 = (COFF_section*)
3198 myindex ( sizeof_COFF_section, sectab, i );
3199 debugBelch(
3200 "\n"
3201 "section %d\n"
3202 " name `",
3203 i
3204 );
3205 printName ( sectab_i->Name, strtab );
3206 debugBelch(
3207 "'\n"
3208 " vsize %d\n"
3209 " vaddr %d\n"
3210 " data sz %d\n"
3211 " data off %d\n"
3212 " num rel %d\n"
3213 " off rel %d\n"
3214 " ptr raw 0x%x\n",
3215 sectab_i->VirtualSize,
3216 sectab_i->VirtualAddress,
3217 sectab_i->SizeOfRawData,
3218 sectab_i->PointerToRawData,
3219 sectab_i->NumberOfRelocations,
3220 sectab_i->PointerToRelocations,
3221 sectab_i->PointerToRawData
3222 );
3223 reltab = (COFF_reloc*) (
3224 ((UChar*)(oc->image)) + sectab_i->PointerToRelocations
3225 );
3226
3227 if ( sectab_i->Characteristics & MYIMAGE_SCN_LNK_NRELOC_OVFL ) {
3228 /* If the relocation field (a short) has overflowed, the
3229 * real count can be found in the first reloc entry.
3230 *
3231 * See Section 4.1 (last para) of the PE spec (rev6.0).
3232 */
3233 COFF_reloc* rel = (COFF_reloc*)
3234 myindex ( sizeof_COFF_reloc, reltab, 0 );
3235 noRelocs = rel->VirtualAddress;
3236 j = 1;
3237 } else {
3238 noRelocs = sectab_i->NumberOfRelocations;
3239 j = 0;
3240 }
3241
3242 for (; j < noRelocs; j++) {
3243 COFF_symbol* sym;
3244 COFF_reloc* rel = (COFF_reloc*)
3245 myindex ( sizeof_COFF_reloc, reltab, j );
3246 debugBelch(
3247 " type 0x%-4x vaddr 0x%-8x name `",
3248 (UInt32)rel->Type,
3249 rel->VirtualAddress );
3250 sym = (COFF_symbol*)
3251 myindex ( sizeof_COFF_symbol, symtab, rel->SymbolTableIndex );
3252 /* Hmm..mysterious looking offset - what's it for? SOF */
3253 printName ( sym->Name, strtab -10 );
3254 debugBelch("'\n" );
3255 }
3256
3257 debugBelch("\n" );
3258 }
3259 debugBelch("\n" );
3260 debugBelch("string table has size 0x%x\n", * (UInt32*)strtab );
3261 debugBelch("---START of string table---\n");
3262 for (i = 4; i < *(Int32*)strtab; i++) {
3263 if (strtab[i] == 0)
3264 debugBelch("\n"); else
3265 debugBelch("%c", strtab[i] );
3266 }
3267 debugBelch("--- END of string table---\n");
3268
3269 debugBelch("\n" );
3270 i = 0;
3271 while (1) {
3272 COFF_symbol* symtab_i;
3273 if (i >= (Int32)(hdr->NumberOfSymbols)) break;
3274 symtab_i = (COFF_symbol*)
3275 myindex ( sizeof_COFF_symbol, symtab, i );
3276 debugBelch(
3277 "symbol %d\n"
3278 " name `",
3279 i
3280 );
3281 printName ( symtab_i->Name, strtab );
3282 debugBelch(
3283 "'\n"
3284 " value 0x%x\n"
3285 " 1+sec# %d\n"
3286 " type 0x%x\n"
3287 " sclass 0x%x\n"
3288 " nAux %d\n",
3289 symtab_i->Value,
3290 (Int32)(symtab_i->SectionNumber),
3291 (UInt32)symtab_i->Type,
3292 (UInt32)symtab_i->StorageClass,
3293 (UInt32)symtab_i->NumberOfAuxSymbols
3294 );
3295 i += symtab_i->NumberOfAuxSymbols;
3296 i++;
3297 }
3298
3299 debugBelch("\n" );
3300 return 1;
3301 }
3302
3303
3304 static int
3305 ocGetNames_PEi386 ( ObjectCode* oc )
3306 {
3307 COFF_header* hdr;
3308 COFF_section* sectab;
3309 COFF_symbol* symtab;
3310 UChar* strtab;
3311
3312 UChar* sname;
3313 void* addr;
3314 int i;
3315
3316 hdr = (COFF_header*)(oc->image);
3317 sectab = (COFF_section*) (
3318 ((UChar*)(oc->image))
3319 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3320 );
3321 symtab = (COFF_symbol*) (
3322 ((UChar*)(oc->image))
3323 + hdr->PointerToSymbolTable
3324 );
3325 strtab = ((UChar*)(oc->image))
3326 + hdr->PointerToSymbolTable
3327 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3328
3329 /* Allocate space for any (local, anonymous) .bss sections. */
3330
3331 for (i = 0; i < hdr->NumberOfSections; i++) {
3332 UInt32 bss_sz;
3333 UChar* zspace;
3334 COFF_section* sectab_i
3335 = (COFF_section*)
3336 myindex ( sizeof_COFF_section, sectab, i );
3337
3338 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3339
3340 if (0 != strcmp(secname, ".bss")) {
3341 stgFree(secname);
3342 continue;
3343 }
3344
3345 stgFree(secname);
3346
3347 /* sof 10/05: the PE spec text isn't too clear regarding what
3348 * the SizeOfRawData field is supposed to hold for object
3349 * file sections containing just uninitialized data -- for executables,
3350 * it is supposed to be zero; unclear what it's supposed to be
3351 * for object files. However, VirtualSize is guaranteed to be
3352 * zero for object files, which definitely suggests that SizeOfRawData
3353 * will be non-zero (where else would the size of this .bss section be
3354 * stored?) Looking at the COFF_section info for incoming object files,
3355 * this certainly appears to be the case.
3356 *
3357 * => I suspect we've been incorrectly handling .bss sections in (relocatable)
3358 * object files up until now. This turned out to bite us with ghc-6.4.1's use
3359 * of gcc-3.4.x, which has started to emit initially-zeroed-out local 'static'
3360 * variable decls into to the .bss section. (The specific function in Q which
3361 * triggered this is libraries/base/cbits/dirUtils.c:__hscore_getFolderPath())
3362 */
3363 if (sectab_i->VirtualSize == 0 && sectab_i->SizeOfRawData == 0) continue;
3364 /* This is a non-empty .bss section. Allocate zeroed space for
3365 it, and set its PointerToRawData field such that oc->image +
3366 PointerToRawData == addr_of_zeroed_space. */
3367 bss_sz = sectab_i->VirtualSize;
3368 if ( bss_sz < sectab_i->SizeOfRawData) { bss_sz = sectab_i->SizeOfRawData; }
3369 zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
3370 sectab_i->PointerToRawData = ((UChar*)zspace) - ((UChar*)(oc->image));
3371 addProddableBlock(oc, zspace, bss_sz);
3372 /* debugBelch("BSS anon section at 0x%x\n", zspace); */
3373 }
3374
3375 /* Copy section information into the ObjectCode. */
3376
3377 for (i = 0; i < hdr->NumberOfSections; i++) {
3378 UChar* start;
3379 UChar* end;
3380 UInt32 sz;
3381
3382 SectionKind kind
3383 = SECTIONKIND_OTHER;
3384 COFF_section* sectab_i
3385 = (COFF_section*)
3386 myindex ( sizeof_COFF_section, sectab, i );
3387
3388 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3389
3390 IF_DEBUG(linker, debugBelch("section name = %s\n", secname ));
3391
3392 # if 0
3393 /* I'm sure this is the Right Way to do it. However, the
3394 alternative of testing the sectab_i->Name field seems to
3395 work ok with Cygwin.
3396 */
3397 if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_CODE ||
3398 sectab_i->Characteristics & MYIMAGE_SCN_CNT_INITIALIZED_DATA)
3399 kind = SECTIONKIND_CODE_OR_RODATA;
3400 # endif
3401
3402 if (0==strcmp(".text",(char*)secname) ||
3403 0==strcmp(".rdata",(char*)secname)||
3404 0==strcmp(".rodata",(char*)secname))
3405 kind = SECTIONKIND_CODE_OR_RODATA;
3406 if (0==strcmp(".data",(char*)secname) ||
3407 0==strcmp(".bss",(char*)secname))
3408 kind = SECTIONKIND_RWDATA;
3409
3410 ASSERT(sectab_i->SizeOfRawData == 0 || sectab_i->VirtualSize == 0);
3411 sz = sectab_i->SizeOfRawData;
3412 if (sz < sectab_i->VirtualSize) sz = sectab_i->VirtualSize;
3413
3414 start = ((UChar*)(oc->image)) + sectab_i->PointerToRawData;
3415 end = start + sz - 1;
3416
3417 if (kind == SECTIONKIND_OTHER
3418 /* Ignore sections called which contain stabs debugging
3419 information. */
3420 && 0 != strcmp(".stab", (char*)secname)
3421 && 0 != strcmp(".stabstr", (char*)secname)
3422 /* Ignore sections called which contain exception information. */
3423 && 0 != strcmp(".pdata", (char*)secname)
3424 && 0 != strcmp(".xdata", (char*)secname)
3425 /* ignore constructor section for now */
3426 && 0 != strcmp(".ctors", (char*)secname)
3427 /* ignore section generated from .ident */
3428 && 0!= strncmp(".debug", (char*)secname, 6)
3429 /* ignore unknown section that appeared in gcc 3.4.5(?) */
3430 && 0!= strcmp(".reloc", (char*)secname)
3431 && 0 != strcmp(".rdata$zzz", (char*)secname)
3432 ) {
3433 errorBelch("Unknown PEi386 section name `%s' (while processing: %" PATH_FMT")", secname, oc->fileName);
3434 stgFree(secname);
3435 return 0;
3436 }
3437
3438 if (kind != SECTIONKIND_OTHER && end >= start) {
3439 addSection(oc, kind, start, end);
3440 addProddableBlock(oc, start, end - start + 1);
3441 }
3442
3443 stgFree(secname);
3444 }
3445
3446 /* Copy exported symbols into the ObjectCode. */
3447
3448 oc->n_symbols = hdr->NumberOfSymbols;
3449 oc->symbols = stgMallocBytes(oc->n_symbols * sizeof(char*),
3450 "ocGetNames_PEi386(oc->symbols)");
3451 /* Call me paranoid; I don't care. */
3452 for (i = 0; i < oc->n_symbols; i++)
3453 oc->symbols[i] = NULL;
3454
3455 i = 0;
3456 while (1) {
3457 COFF_symbol* symtab_i;
3458 if (i >= (Int32)(hdr->NumberOfSymbols)) break;
3459 symtab_i = (COFF_symbol*)
3460 myindex ( sizeof_COFF_symbol, symtab, i );
3461
3462 addr = NULL;
3463
3464 if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_EXTERNAL
3465 && symtab_i->SectionNumber != MYIMAGE_SYM_UNDEFINED) {
3466 /* This symbol is global and defined, viz, exported */
3467 /* for MYIMAGE_SYMCLASS_EXTERNAL
3468 && !MYIMAGE_SYM_UNDEFINED,
3469 the address of the symbol is:
3470 address of relevant section + offset in section
3471 */
3472 COFF_section* sectabent
3473 = (COFF_section*) myindex ( sizeof_COFF_section,
3474 sectab,
3475 symtab_i->SectionNumber-1 );
3476 addr = ((UChar*)(oc->image))
3477 + (sectabent->PointerToRawData
3478 + symtab_i->Value);
3479 }
3480 else
3481 if (symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
3482 && symtab_i->Value > 0) {
3483 /* This symbol isn't in any section at all, ie, global bss.
3484 Allocate zeroed space for it. */
3485 addr = stgCallocBytes(1, symtab_i->Value,
3486 "ocGetNames_PEi386(non-anonymous bss)");
3487 addSection(oc, SECTIONKIND_RWDATA, addr,
3488 ((UChar*)addr) + symtab_i->Value - 1);
3489 addProddableBlock(oc, addr, symtab_i->Value);
3490 /* debugBelch("BSS section at 0x%x\n", addr); */
3491 }
3492
3493 if (addr != NULL ) {
3494 sname = cstring_from_COFF_symbol_name ( symtab_i->Name, strtab );
3495 /* debugBelch("addSymbol %p `%s \n", addr,sname); */
3496 IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr,sname);)
3497 ASSERT(i >= 0 && i < oc->n_symbols);
3498 /* cstring_from_COFF_symbol_name always succeeds. */
3499 oc->symbols[i] = (char*)sname;
3500 ghciInsertStrHashTable(oc->fileName, symhash, (char*)sname, addr);
3501 } else {
3502 # if 0
3503 debugBelch(
3504 "IGNORING symbol %d\n"
3505 " name `",
3506 i
3507 );
3508 printName ( symtab_i->Name, strtab );
3509 debugBelch(
3510 "'\n"
3511 " value 0x%x\n"
3512 " 1+sec# %d\n"
3513 " type 0x%x\n"
3514 " sclass 0x%x\n"
3515 " nAux %d\n",
3516 symtab_i->Value,
3517 (Int32)(symtab_i->SectionNumber),
3518 (UInt32)symtab_i->Type,
3519 (UInt32)symtab_i->StorageClass,
3520 (UInt32)symtab_i->NumberOfAuxSymbols
3521 );
3522 # endif
3523 }
3524
3525 i += symtab_i->NumberOfAuxSymbols;
3526 i++;
3527 }
3528
3529 return 1;
3530 }
3531
3532
3533 static int
3534 ocResolve_PEi386 ( ObjectCode* oc )
3535 {
3536 COFF_header* hdr;
3537 COFF_section* sectab;
3538 COFF_symbol* symtab;
3539 UChar* strtab;
3540
3541 UInt32 A;
3542 size_t S;
3543 void * pP;
3544
3545 int i;
3546 UInt32 j, noRelocs;
3547
3548 /* ToDo: should be variable-sized? But is at least safe in the
3549 sense of buffer-overrun-proof. */
3550 UChar symbol[1000];
3551 /* debugBelch("resolving for %s\n", oc->fileName); */
3552
3553 hdr = (COFF_header*)(oc->image);
3554 sectab = (COFF_section*) (
3555 ((UChar*)(oc->image))
3556 + sizeof_COFF_header + hdr->SizeOfOptionalHeader
3557 );
3558 symtab = (COFF_symbol*) (
3559 ((UChar*)(oc->image))
3560 + hdr->PointerToSymbolTable
3561 );
3562 strtab = ((UChar*)(oc->image))
3563 + hdr->PointerToSymbolTable
3564 + hdr->NumberOfSymbols * sizeof_COFF_symbol;
3565
3566 for (i = 0; i < hdr->NumberOfSections; i++) {
3567 COFF_section* sectab_i
3568 = (COFF_section*)
3569 myindex ( sizeof_COFF_section, sectab, i );
3570 COFF_reloc* reltab
3571 = (COFF_reloc*) (
3572 ((UChar*)(oc->image)) + sectab_i->PointerToRelocations
3573 );
3574
3575 char *secname = cstring_from_section_name(sectab_i->Name, strtab);
3576
3577 /* Ignore sections called which contain stabs debugging
3578 information. */
3579 if (0 == strcmp(".stab", (char*)secname)
3580 || 0 == strcmp(".stabstr", (char*)secname)
3581 || 0 == strcmp(".pdata", (char*)secname)
3582 || 0 == strcmp(".xdata", (char*)secname)
3583 || 0 == strcmp(".ctors", (char*)secname)
3584 || 0 == strncmp(".debug", (char*)secname, 6)
3585 || 0 == strcmp(".rdata$zzz", (char*)secname)) {
3586 stgFree(secname);
3587 continue;
3588 }
3589
3590 stgFree(secname);
3591
3592 if ( sectab_i->Characteristics & MYIMAGE_SCN_LNK_NRELOC_OVFL ) {
3593 /* If the relocation field (a short) has overflowed, the
3594 * real count can be found in the first reloc entry.
3595 *
3596 * See Section 4.1 (last para) of the PE spec (rev6.0).
3597 *
3598 * Nov2003 update: the GNU linker still doesn't correctly
3599 * handle the generation of relocatable object files with
3600 * overflown relocations. Hence the output to warn of potential
3601 * troubles.
3602 */
3603 COFF_reloc* rel = (COFF_reloc*)
3604 myindex ( sizeof_COFF_reloc, reltab, 0 );
3605 noRelocs = rel->VirtualAddress;
3606
3607 /* 10/05: we now assume (and check for) a GNU ld that is capable
3608 * of handling object files with (>2^16) of relocs.
3609 */
3610 #if 0
3611 debugBelch("WARNING: Overflown relocation field (# relocs found: %u)\n",
3612 noRelocs);
3613 #endif
3614 j = 1;
3615 } else {
3616 noRelocs = sectab_i->NumberOfRelocations;
3617 j = 0;
3618 }
3619
3620 for (; j < noRelocs; j++) {
3621 COFF_symbol* sym;
3622 COFF_reloc* reltab_j
3623 = (COFF_reloc*)
3624 myindex ( sizeof_COFF_reloc, reltab, j );
3625
3626 /* the location to patch */
3627 pP = (
3628 ((UChar*)(oc->image))
3629 + (sectab_i->PointerToRawData
3630 + reltab_j->VirtualAddress
3631 - sectab_i->VirtualAddress )
3632 );
3633 /* the existing contents of pP */
3634 A = *(UInt32*)pP;
3635 /* the symbol to connect to */
3636 sym = (COFF_symbol*)
3637 myindex ( sizeof_COFF_symbol,
3638 symtab, reltab_j->SymbolTableIndex );
3639 IF_DEBUG(linker,
3640 debugBelch(
3641 "reloc sec %2d num %3d: type 0x%-4x "
3642 "vaddr 0x%-8x name `",
3643 i, j,
3644 (UInt32)reltab_j->Type,
3645 reltab_j->VirtualAddress );
3646 printName ( sym->Name, strtab );
3647 debugBelch("'\n" ));
3648
3649 if (sym->StorageClass == MYIMAGE_SYM_CLASS_STATIC) {
3650 COFF_section* section_sym
3651 = findPEi386SectionCalled ( oc, sym->Name );
3652 if (!section_sym) {
3653 errorBelch("%" PATH_FMT ": can't find section `%s'", oc->fileName, sym->Name);
3654 return 0;
3655 }
3656 S = ((size_t)(oc->image))
3657 + ((size_t)(section_sym->PointerToRawData))
3658 + ((size_t)(sym->Value));
3659 } else {
3660 copyName ( sym->Name, strtab, symbol, 1000-1 );
3661 S = (size_t) lookupSymbol( (char*)symbol );
3662 if ((void*)S != NULL) goto foundit;
3663 errorBelch("%" PATH_FMT ": unknown symbol `%s'", oc->fileName, symbol);
3664 return 0;
3665 foundit:;
3666 }
3667 checkProddableBlock(oc, pP);
3668 switch (reltab_j->Type) {
3669 #if defined(i386_HOST_ARCH)
3670 case MYIMAGE_REL_I386_DIR32:
3671 *(UInt32 *)pP = ((UInt32)S) + A;
3672 break;
3673 case MYIMAGE_REL_I386_REL32:
3674 /* Tricky. We have to insert a displacement at
3675 pP which, when added to the PC for the _next_
3676 insn, gives the address of the target (S).
3677 Problem is to know the address of the next insn
3678 when we only know pP. We assume that this
3679 literal field is always the last in the insn,
3680 so that the address of the next insn is pP+4
3681 -- hence the constant 4.
3682 Also I don't know if A should be added, but so
3683 far it has always been zero.
3684
3685 SOF 05/2005: 'A' (old contents of *pP) have been observed
3686 to contain values other than zero (the 'wx' object file
3687 that came with wxhaskell-0.9.4; dunno how it was compiled..).
3688 So, add displacement to old value instead of asserting
3689 A to be zero. Fixes wxhaskell-related crashes, and no other
3690 ill effects have been observed.
3691
3692 Update: the reason why we're seeing these more elaborate
3693 relocations is due to a switch in how the NCG compiles SRTs
3694 and offsets to them from info tables. SRTs live in .(ro)data,
3695 while info tables live in .text, causing GAS to emit REL32/DISP32
3696 relocations with non-zero values. Adding the displacement is
3697 the right thing to do.
3698 */
3699 *(UInt32 *)pP = ((UInt32)S) + A - ((UInt32)(size_t)pP) - 4;
3700 break;
3701 #elif defined(x86_64_HOST_ARCH)
3702 case 2: /* R_X86_64_32 */
3703 case 17: /* R_X86_64_32S */
3704 *(UInt32 *)pP = ((UInt32)S) + A;
3705 break;
3706 case 4: /* R_X86_64_PC32 */
3707 *(UInt32 *)pP = ((UInt32)S) + A - ((UInt32)(size_t)pP) - 4;
3708 break;
3709 case 1: /* R_X86_64_64 */
3710 {
3711 UInt64 A;
3712 A = *(UInt64*)pP;
3713 *(UInt64 *)pP = ((UInt64)S) + ((UInt64)A);
3714 break;
3715 }
3716 #endif
3717 default:
3718 debugBelch("%" PATH_FMT ": unhandled PEi386 relocation type %d",
3719 oc->fileName, reltab_j->Type);
3720 return 0;
3721 }
3722
3723 }
3724 }
3725
3726 IF_DEBUG(linker, debugBelch("completed %" PATH_FMT, oc->fileName));
3727 return 1;
3728 }
3729
3730 #endif /* defined(OBJFORMAT_PEi386) */
3731
3732
3733 /* --------------------------------------------------------------------------
3734 * ELF specifics
3735 * ------------------------------------------------------------------------*/
3736
3737 #if defined(OBJFORMAT_ELF)
3738
3739 #define FALSE 0
3740 #define TRUE 1
3741
3742 #if defined(sparc_HOST_ARCH)
3743 # define ELF_TARGET_SPARC /* Used inside <elf.h> */
3744 #elif defined(i386_HOST_ARCH)
3745 # define ELF_TARGET_386 /* Used inside <elf.h> */
3746 #elif defined(x86_64_HOST_ARCH)
3747 # define ELF_TARGET_X64_64
3748 # define ELF_64BIT
3749 #endif
3750
3751 #if !defined(openbsd_HOST_OS)
3752 # include <elf.h>
3753 #else
3754 /* openbsd elf has things in different places, with diff names */
3755 # include <elf_abi.h>
3756 # include <machine/reloc.h>
3757 # define R_386_32 RELOC_32
3758 # define R_386_PC32 RELOC_PC32
3759 #endif
3760
3761 /* If elf.h doesn't define it */
3762 # ifndef R_X86_64_PC64
3763 # define R_X86_64_PC64 24
3764 # endif
3765
3766 /*
3767 * Workaround for libc implementations (e.g. eglibc) with incomplete
3768 * relocation lists
3769 */
3770 #ifndef R_ARM_THM_CALL
3771 # define R_ARM_THM_CALL 10
3772 #endif
3773 #ifndef R_ARM_CALL
3774 # define R_ARM_CALL 28
3775 #endif
3776 #ifndef R_ARM_JUMP24
3777 # define R_ARM_JUMP24 29
3778 #endif
3779 #ifndef R_ARM_THM_JUMP24
3780 # define R_ARM_THM_JUMP24 30
3781 #endif
3782 #ifndef R_ARM_TARGET1
3783 # define R_ARM_TARGET1 38
3784 #endif
3785 #ifndef R_ARM_MOVW_ABS_NC
3786 # define R_ARM_MOVW_ABS_NC 43
3787 #endif
3788 #ifndef R_ARM_MOVT_ABS
3789 # define R_ARM_MOVT_ABS 44
3790 #endif
3791 #ifndef R_ARM_THM_MOVW_ABS_NC
3792 # define R_ARM_THM_MOVW_ABS_NC 47
3793 #endif
3794 #ifndef R_ARM_THM_MOVT_ABS
3795 # define R_ARM_THM_MOVT_ABS 48
3796 #endif
3797 #ifndef R_ARM_THM_JUMP11
3798 # define R_ARM_THM_JUMP11 102
3799 #endif
3800 #ifndef R_ARM_THM_JUMP8
3801 # define R_ARM_THM_JUMP8 103
3802 #endif
3803
3804 /*
3805 * Define a set of types which can be used for both ELF32 and ELF64
3806 */
3807
3808 #ifdef ELF_64BIT
3809 #define ELFCLASS ELFCLASS64
3810 #define Elf_Addr Elf64_Addr
3811 #define Elf_Word Elf64_Word
3812 #define Elf_Sword Elf64_Sword
3813 #define Elf_Ehdr Elf64_Ehdr
3814 #define Elf_Phdr Elf64_Phdr
3815 #define Elf_Shdr Elf64_Shdr
3816 #define Elf_Sym Elf64_Sym
3817 #define Elf_Rel Elf64_Rel
3818 #define Elf_Rela Elf64_Rela
3819 #ifndef ELF_ST_TYPE
3820 #define ELF_ST_TYPE ELF64_ST_TYPE
3821 #endif
3822 #ifndef ELF_ST_BIND
3823 #define ELF_ST_BIND ELF64_ST_BIND
3824 #endif
3825 #ifndef ELF_R_TYPE
3826 #define ELF_R_TYPE ELF64_R_TYPE
3827 #endif
3828 #ifndef ELF_R_SYM
3829 #define ELF_R_SYM ELF64_R_SYM
3830 #endif
3831 #else
3832 #define ELFCLASS ELFCLASS32
3833 #define Elf_Addr Elf32_Addr
3834 #define Elf_Word Elf32_Word
3835 #define Elf_Sword Elf32_Sword
3836 #define Elf_Ehdr Elf32_Ehdr
3837 #define Elf_Phdr Elf32_Phdr
3838 #define Elf_Shdr Elf32_Shdr
3839 #define Elf_Sym Elf32_Sym
3840 #define Elf_Rel Elf32_Rel
3841 #define Elf_Rela Elf32_Rela
3842 #ifndef ELF_ST_TYPE
3843 #define ELF_ST_TYPE ELF32_ST_TYPE
3844 #endif
3845 #ifndef ELF_ST_BIND
3846 #define ELF_ST_BIND ELF32_ST_BIND
3847 #endif
3848 #ifndef ELF_R_TYPE
3849 #define ELF_R_TYPE ELF32_R_TYPE
3850 #endif
3851 #ifndef ELF_R_SYM
3852 #define ELF_R_SYM ELF32_R_SYM
3853 #endif
3854 #endif
3855
3856
3857 /*
3858 * Functions to allocate entries in dynamic sections. Currently we simply
3859 * preallocate a large number, and we don't check if a entry for the given
3860 * target already exists (a linear search is too slow). Ideally these
3861 * entries would be associated with symbols.
3862 */
3863
3864 /* These sizes sufficient to load HSbase + HShaskell98 + a few modules */
3865 #define GOT_SIZE 0x20000
3866 #define FUNCTION_TABLE_SIZE 0x10000
3867 #define PLT_SIZE 0x08000
3868
3869 #ifdef ELF_NEED_GOT
3870 static Elf_Addr got[GOT_SIZE];
3871 static unsigned int gotIndex;
3872 static Elf_Addr gp_val = (Elf_Addr)got;
3873
3874 static Elf_Addr
3875 allocateGOTEntry(Elf_Addr target)
3876 {
3877 Elf_Addr *entry;
3878
3879 if (gotIndex >= GOT_SIZE)
3880 barf("Global offset table overflow");
3881
3882 entry = &got[gotIndex++];
3883 *entry = target;
3884 return (Elf_Addr)entry;
3885 }
3886 #endif
3887
3888 #ifdef ELF_FUNCTION_DESC
3889 typedef struct {
3890 Elf_Addr ip;
3891 Elf_Addr gp;
3892 } FunctionDesc;
3893
3894 static FunctionDesc functionTable[FUNCTION_TABLE_SIZE];
3895 static unsigned int functionTableIndex;
3896
3897 static Elf_Addr
3898 allocateFunctionDesc(Elf_Addr target)
3899 {
3900 FunctionDesc *entry;
3901
3902 if (functionTableIndex >= FUNCTION_TABLE_SIZE)
3903 barf("Function table overflow");
3904
3905 entry = &functionTable[functionTableIndex++];
3906 entry->ip = target;
3907 entry->gp = (Elf_Addr)gp_val;
3908 return (Elf_Addr)entry;
3909 }
3910
3911 static Elf_Addr
3912 copyFunctionDesc(Elf_Addr target)
3913 {
3914 FunctionDesc *olddesc = (FunctionDesc *)target;
3915 FunctionDesc *newdesc;
3916
3917 newdesc = (FunctionDesc *)allocateFunctionDesc(olddesc->ip);
3918 newdesc->gp = olddesc->gp;
3919 return (Elf_Addr)newdesc;
3920 }
3921 #endif
3922
3923 #ifdef ELF_NEED_PLT
3924
3925 typedef struct {
3926 unsigned char code[sizeof(plt_code)];
3927 } PLTEntry;
3928
3929 static Elf_Addr
3930 allocatePLTEntry(Elf_Addr target, ObjectCode *oc)
3931 {
3932 PLTEntry *plt = (PLTEntry *)oc->plt;
3933 PLTEntry *entry;
3934
3935 if (oc->pltIndex >= PLT_SIZE)
3936 barf("Procedure table overflow");
3937
3938 entry = &plt[oc->pltIndex++];
3939 memcpy(entry->code, plt_code, sizeof(entry->code));
3940 PLT_RELOC(entry->code, target);
3941 return (Elf_Addr)entry;
3942 }
3943
3944 static unsigned int
3945 PLTSize(void)
3946 {
3947 return (PLT_SIZE * sizeof(PLTEntry));
3948 }
3949 #endif
3950
3951
3952 /*
3953 * Generic ELF functions
3954 */
3955
3956 static int
3957 ocVerifyImage_ELF ( ObjectCode* oc )
3958 {
3959 Elf_Shdr* shdr;
3960 Elf_Sym* stab;
3961 int i, j, nent, nstrtab, nsymtabs;
3962 char* sh_strtab;
3963
3964 char* ehdrC = (char*)(oc->image);
3965 Elf_Ehdr* ehdr = (Elf_Ehdr*)ehdrC;
3966
3967 if (ehdr->e_ident[EI_MAG0] != ELFMAG0 ||
3968 ehdr->e_ident[EI_MAG1] != ELFMAG1 ||
3969 ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
3970 ehdr->e_ident[EI_MAG3] != ELFMAG3) {
3971 errorBelch("%s: not an ELF object", oc->fileName);
3972 return 0;
3973 }
3974
3975 if (ehdr->e_ident[EI_CLASS] != ELFCLASS) {
3976 errorBelch("%s: unsupported ELF format", oc->fileName);
3977 return 0;
3978 }
3979
3980 if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) {
3981 IF_DEBUG(linker,debugBelch( "Is little-endian\n" ));
3982 } else
3983 if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) {
3984 IF_DEBUG(linker,debugBelch( "Is big-endian\n" ));
3985 } else {
3986 errorBelch("%s: unknown endiannness", oc->fileName);
3987 return 0;
3988 }
3989
3990 if (ehdr->e_type != ET_REL) {
3991 errorBelch("%s: not a relocatable object (.o) file", oc->fileName);
3992 return 0;
3993 }
3994 IF_DEBUG(linker, debugBelch( "Is a relocatable object (.o) file\n" ));
3995
3996 IF_DEBUG(linker,debugBelch( "Architecture is " ));
3997 switch (ehdr->e_machine) {
3998 #ifdef EM_ARM
3999 case EM_ARM: IF_DEBUG(linker,debugBelch( "arm" )); break;
4000 #endif
4001 case EM_386: IF_DEBUG(linker,debugBelch( "x86" )); break;
4002 #ifdef EM_SPARC32PLUS
4003 case EM_SPARC32PLUS:
4004 #endif
4005 case EM_SPARC: IF_DEBUG(linker,debugBelch( "sparc" )); break;
4006 #ifdef EM_IA_64
4007 case EM_IA_64: IF_DEBUG(linker,debugBelch( "ia64" )); break;
4008 #endif
4009 case EM_PPC: IF_DEBUG(linker,debugBelch( "powerpc32" )); break;
4010 #ifdef EM_X86_64
4011 case EM_X86_64: IF_DEBUG(linker,debugBelch( "x86_64" )); break;
4012 #elif defined(EM_AMD64)
4013 case EM_AMD64: IF_DEBUG(linker,debugBelch( "amd64" )); break;
4014 #endif
4015 default: IF_DEBUG(linker,debugBelch( "unknown" ));
4016 errorBelch("%s: unknown architecture (e_machine == %d)"
4017 , oc->fileName, ehdr->e_machine);
4018 return 0;
4019 }
4020
4021 IF_DEBUG(linker,debugBelch(