move the "meat" into the wiki, this file just contains pointers now
[ghc.git] / includes / TailCalls.h
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-1999
4 *
5 * Stuff for implementing proper tail jumps.
6 *
7 * ---------------------------------------------------------------------------*/
8
9 #ifndef TAILCALLS_H
10 #define TAILCALLS_H
11
12 /* -----------------------------------------------------------------------------
13 Unmangled tail-jumping: use the mini interpretter.
14 -------------------------------------------------------------------------- */
15
16 #ifdef USE_MINIINTERPRETER
17
18 #define JMP_(cont) return((StgFunPtr)(cont))
19 #define FB_
20 #define FE_
21
22 #else
23
24 extern void __DISCARD__(void);
25
26 /* -----------------------------------------------------------------------------
27 Tail calling on x86
28 -------------------------------------------------------------------------- */
29
30 #if i386_HOST_ARCH
31
32 /* Note about discard: possibly there to fool GCC into clearing up
33 before we do the jump eg. if there are some arguments left on the C
34 stack that GCC hasn't popped yet. Also possibly to fool any
35 optimisations (a function call often acts as a barrier). Not sure
36 if any of this is necessary now -- SDM
37
38 Comment to above note: I don't think the __DISCARD__() in JMP_ is
39 necessary. Arguments should be popped from the C stack immediately
40 after returning from a function, as long as we pass -fno-defer-pop
41 to gcc. Moreover, a goto to a first-class label acts as a barrier
42 for optimisations in the same way a function call does.
43 -= chak
44 */
45
46 /* The goto here seems to cause gcc -O2 to delete all the code after
47 it - including the FE_ marker and the epilogue code - exactly what
48 we want! -- SDM
49 */
50
51 #define JMP_(cont) \
52 { \
53 void *__target; \
54 __DISCARD__(); \
55 __target = (void *)(cont); \
56 goto *__target; \
57 }
58
59 #endif /* i386_HOST_ARCH */
60
61 /* -----------------------------------------------------------------------------
62 Tail calling on x86_64
63 -------------------------------------------------------------------------- */
64
65 #if x86_64_HOST_ARCH
66
67 /*
68 NOTE about __DISCARD__():
69
70 On x86_64 this is necessary to work around bugs in the register
71 variable support in gcc. Without the __DISCARD__() call, gcc will
72 silently throw away assignements to global register variables that
73 happen before the jump.
74
75 Here's the example:
76
77 extern void g(void);
78 static void f(void) {
79 R1 = g;
80 __DISCARD__()
81 goto *R1;
82 }
83
84 without the dummy function call, gcc throws away the assignment to R1
85 (gcc 3.4.3) gcc bug #20359.
86 */
87
88 #define JMP_(cont) \
89 { \
90 __DISCARD__(); \
91 goto *(void *)(cont); \
92 }
93
94 #endif /* x86_64_HOST_ARCH */
95
96 /* -----------------------------------------------------------------------------
97 Tail calling on Sparc
98 -------------------------------------------------------------------------- */
99
100 #ifdef sparc_HOST_ARCH
101
102 #define JMP_(cont) ((F_) (cont))()
103 /* Oh so happily, the above turns into a "call" instruction,
104 which, on a SPARC, is nothing but a "jmpl" with the
105 return address in %o7 [which we don't care about].
106 */
107
108 /* Don't need these for sparc mangling */
109 #define FB_
110 #define FE_
111
112 #endif /* sparc_HOST_ARCH */
113
114 /* -----------------------------------------------------------------------------
115 Tail calling on Alpha
116 -------------------------------------------------------------------------- */
117
118 #ifdef alpha_HOST_ARCH
119
120 #if IN_STG_CODE
121 register void *_procedure __asm__("$27");
122 #endif
123
124 #define JMP_(cont) \
125 do { _procedure = (void *)(cont); \
126 __DISCARD__(); \
127 goto *_procedure; \
128 } while(0)
129
130 /* Don't need these for alpha mangling */
131 #define FB_
132 #define FE_
133
134 #endif /* alpha_HOST_ARCH */
135
136 /* -----------------------------------------------------------------------------
137 Tail calling on HP
138
139 Description of HP's weird procedure linkage, many thanks to Andy Bennet
140 <andy_bennett@hp.com>:
141
142 I've been digging a little further into the problem of how HP-UX does
143 dynamic procedure calls. My solution in the last e-mail inserting an extra
144 'if' statement into the JMP_ I think is probably the best general solution I
145 can come up with. There are still a few problems with it however: It wont
146 work, if JMP_ ever has to call anything in a shared library, if this is
147 likely to be required it'll need something more elaborate. It also wont work
148 with PA-RISC 2.0 wide mode (64-bit) which uses a different format PLT.
149
150 I had some feedback from someone in HP's compiler lab and the problem
151 relates to the linker on HP-UX, not gcc as I first suspected. The reason the
152 'hsc' executable works is most likely due to a change in 'ld's behaviour for
153 performance reasons between your revision and mine.
154
155 The major issue relating to this is shared libraries and how they are
156 implented under HP-UX. The whole point of the Procedure Label Table (PLT) is
157 to allow a function pointer to hold the address of the function and a
158 pointer to the library's global data lookup table (DLT) used by position
159 independent code (PIC). This makes the PLT absolutely essential for shared
160 library calls. HP has two linker introduced assembly functions for dealing
161 with dynamic calls, $$dyncall and $$dyncall_external. The former does a
162 check to see if the address is a PLT pointer and dereferences if necessary
163 or just calls the address otherwise; the latter skips the check and just
164 does the indirect jump no matter what.
165
166 Since $$dyncall_external runs faster due to its not having the test, the
167 linker nowadays prefers to generate calls to that, rather than $$dyncall. It
168 makes this decision based on the presence of any shared library. If it even
169 smells an sl's existence at link time, it rigs the runtime system to
170 generate PLT references for everything on the assumption that the result
171 will be slightly more efficient. This is what is crashing GHC since the
172 calls it is generating have no understanding of the procedure label proper.
173 The only way to get real addresses is to link everything archive, including
174 system libraries, at which point it assumes you probably are going to be
175 using calls similar to GHC's (its rigged for HP's +ESfic compiler option)
176 but uses $$dyncall if necessary to cope, just in case you aren't.
177
178 -------------------------------------------------------------------------- */
179
180 #ifdef hppa1_1_hp_hpux_TARGET
181
182 #define JMP_(cont) \
183 do { void *_procedure = (void *)(cont); \
184 if (((int) _procedure) & 2) \
185 _procedure = (void *)(*((int *) (_procedure - 2))); \
186 goto *_procedure; \
187 } while(0)
188
189 #endif /* hppa1_1_hp_hpux_TARGET */
190
191 /* -----------------------------------------------------------------------------
192 Tail calling on PowerPC
193 -------------------------------------------------------------------------- */
194
195 #ifdef powerpc_HOST_ARCH
196
197 #define JMP_(cont) \
198 { \
199 void *target; \
200 target = (void *)(cont); \
201 __DISCARD__(); \
202 goto *target; \
203 }
204
205 /*
206 The __DISCARD__ is there because Apple's April 2002 Beta of GCC 3.1
207 sometimes generates incorrect code otherwise.
208 It tends to "forget" to update global register variables in the presence
209 of decrement/increment operators:
210 JMP_(*(--Sp)) is wrongly compiled as JMP_(Sp[-1]).
211 Calling __DISCARD__ in between works around this problem.
212 */
213
214 /*
215 I would _love_ to use the following instead,
216 but some versions of Apple's GCC fail to generate code for it
217 if it is called for a casted data pointer - which is exactly what
218 we are going to do...
219
220 #define JMP_(cont) ((F_) (cont))()
221 */
222
223 #endif /* powerpc_HOST_ARCH */
224
225 #ifdef powerpc64_HOST_ARCH
226 #define JMP_(cont) ((F_) (cont))()
227 #endif
228
229 /* -----------------------------------------------------------------------------
230 Tail calling on IA64
231 -------------------------------------------------------------------------- */
232
233 #ifdef ia64_HOST_ARCH
234
235 /* The compiler can more intelligently decide how to do this. We therefore
236 * implement it as a call and optimise to a jump at mangle time. */
237 #define JMP_(cont) ((F_) (cont))(); __asm__ volatile ("--- TAILCALL ---");
238
239 /* Don't emit calls to __DISCARD__ as this causes hassles */
240 #define __DISCARD__()
241
242 #endif
243
244 /* -----------------------------------------------------------------------------
245 Tail calling on MIPS
246 -------------------------------------------------------------------------- */
247
248 #ifdef mips_HOST_ARCH
249
250 #if IN_STG_CODE
251 register void *_procedure __asm__("$25");
252 #endif
253
254 #define JMP_(cont) \
255 { \
256 _procedure = (void *)(cont); \
257 __DISCARD__(); \
258 goto *_procedure; \
259 }
260
261 /* Don't need these for MIPS mangling */
262 #define FB_
263 #define FE_
264
265 #endif /* mips_HOST_ARCH */
266
267 /* -----------------------------------------------------------------------------
268 FUNBEGIN and FUNEND.
269
270 These are markers indicating the start and end of Real Code in a
271 function. All instructions between the actual start and end of the
272 function and these markers is shredded by the mangler.
273 -------------------------------------------------------------------------- */
274
275 /* The following __DISCARD__() has become necessary with gcc 2.96 on x86.
276 * It prevents gcc from moving stack manipulation code from the function
277 * body (aka the Real Code) into the function prologue, ie, from moving it
278 * over the --- BEGIN --- marker. It should be noted that (like some
279 * other black magic in GHC's code), there is no essential reason why gcc
280 * could not move some stack manipulation code across the __DISCARD__() -
281 * it just doesn't choose to do it at the moment.
282 * -= chak
283 */
284
285 #ifndef FB_
286 #define FB_ __asm__ volatile ("--- BEGIN ---"); __DISCARD__ ();
287 #endif
288
289 #ifndef FE_
290 #define FE_ __asm__ volatile ("--- END ---");
291 #endif
292
293 #endif /* !USE_MINIINTERPRETER */
294
295 #endif /* TAILCALLS_H */