Pass 512-bit-wide vectors in registers.
[ghc.git] / includes / stg / Regs.h
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2012
4 *
5 * Registers in the STG machine.
6 *
7 * Do not #include this file directly: #include "Rts.h" instead.
8 *
9 * To understand the structure of the RTS headers, see the wiki:
10 * http://hackage.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #ifndef REGS_H
15 #define REGS_H
16
17 /*
18 * The STG machine has a collection of "registers", each one of which
19 * may or may not correspond to an actual machine register when
20 * running code.
21 *
22 * The register set is backed by a table in memory (struct
23 * StgRegTable). If a particular STG register is not mapped to a
24 * machine register, then the appropriate slot in this table is used
25 * instead.
26 *
27 * This table is itself pointed to by another register, BaseReg. If
28 * BaseReg is not in a machine register, then the register table is
29 * used from an absolute location (MainCapability).
30 *
31 */
32
33 typedef struct {
34 StgWord stgEagerBlackholeInfo;
35 StgFunPtr stgGCEnter1;
36 StgFunPtr stgGCFun;
37 } StgFunTable;
38
39 /*
40 * Vanilla registers are given this union type, which is purely so
41 * that we can cast the vanilla reg to a variety of types with the
42 * minimum of syntax. eg. R1.w instead of (StgWord)R1.
43 */
44 typedef union {
45 StgWord w;
46 StgAddr a;
47 StgChar c;
48 StgFloat f;
49 StgInt i;
50 StgPtr p;
51 } StgUnion;
52
53 /*
54 * This is the table that holds shadow-locations for all the STG
55 * registers. The shadow locations are used when:
56 *
57 * 1) the particular register isn't mapped to a real machine
58 * register, probably because there's a shortage of real registers.
59 * 2) caller-saves registers are saved across a CCall
60 */
61 typedef struct {
62 StgUnion rR1;
63 StgUnion rR2;
64 StgUnion rR3;
65 StgUnion rR4;
66 StgUnion rR5;
67 StgUnion rR6;
68 StgUnion rR7;
69 StgUnion rR8;
70 StgUnion rR9; /* used occasionally by heap/stack checks */
71 StgUnion rR10; /* used occasionally by heap/stack checks */
72 StgFloat rF1;
73 StgFloat rF2;
74 StgFloat rF3;
75 StgFloat rF4;
76 StgFloat rF5;
77 StgFloat rF6;
78 StgDouble rD1;
79 StgDouble rD2;
80 StgDouble rD3;
81 StgDouble rD4;
82 StgDouble rD5;
83 StgDouble rD6;
84 StgWord128 rXMM1;
85 StgWord128 rXMM2;
86 StgWord128 rXMM3;
87 StgWord128 rXMM4;
88 StgWord128 rXMM5;
89 StgWord128 rXMM6;
90 StgWord256 rYMM1;
91 StgWord256 rYMM2;
92 StgWord256 rYMM3;
93 StgWord256 rYMM4;
94 StgWord256 rYMM5;
95 StgWord256 rYMM6;
96 StgWord512 rZMM1;
97 StgWord512 rZMM2;
98 StgWord512 rZMM3;
99 StgWord512 rZMM4;
100 StgWord512 rZMM5;
101 StgWord512 rZMM6;
102 StgWord64 rL1;
103 StgPtr rSp;
104 StgPtr rSpLim;
105 StgPtr rHp;
106 StgPtr rHpLim;
107 struct CostCentreStack_ * rCCCS; /* current cost-centre-stack */
108 struct StgTSO_ * rCurrentTSO;
109 struct nursery_ * rNursery;
110 struct bdescr_ * rCurrentNursery; /* Hp/HpLim point into this block */
111 struct bdescr_ * rCurrentAlloc; /* for allocation using allocate() */
112 StgWord rHpAlloc; /* number of *bytes* being allocated in heap */
113 StgWord rRet; /* holds the return code of the thread */
114 } StgRegTable;
115
116 #if IN_STG_CODE
117
118 /*
119 * Registers Hp and HpLim are global across the entire system, and are
120 * copied into the RegTable or registers before executing a thread.
121 *
122 * Registers Sp and SpLim are saved in the TSO for the thread, but are
123 * copied into the RegTable or registers before executing a thread.
124 *
125 * All other registers are "general purpose", and are used for passing
126 * arguments to functions, and returning values. The code generator
127 * knows how many of these are in real registers, and avoids
128 * generating code that uses non-real registers. General purpose
129 * registers are never saved when returning to the scheduler, instead
130 * we save whatever is live at the time on the stack, and restore it
131 * later. This should reduce the context switch time, amongst other
132 * things.
133 *
134 * For argument passing, the stack will be used in preference to
135 * pseudo-registers if the architecture has too few general purpose
136 * registers.
137 *
138 * Some special RTS functions like newArray and the Integer primitives
139 * expect their arguments to be in registers R1-Rn, so we use these
140 * (pseudo-)registers in those cases.
141 */
142
143 /* -----------------------------------------------------------------------------
144 * Emit the GCC-specific register declarations for each machine
145 * register being used. If any STG register isn't mapped to a machine
146 * register, then map it to an offset from BaseReg.
147 *
148 * First, the general purpose registers. The idea is, if a particular
149 * general-purpose STG register can't be mapped to a real machine
150 * register, it won't be used at all. Instead, we'll use the stack.
151 */
152
153 /* define NO_REGS to omit register declarations - used in RTS C code
154 * that needs all the STG definitions but not the global register
155 * settings.
156 */
157 #define GLOBAL_REG_DECL(type,name,reg) register type name REG(reg);
158
159 #if defined(REG_R1) && !defined(NO_GLOBAL_REG_DECLS)
160 GLOBAL_REG_DECL(StgUnion,R1,REG_R1)
161 #else
162 # define R1 (BaseReg->rR1)
163 #endif
164
165 #if defined(REG_R2) && !defined(NO_GLOBAL_REG_DECLS)
166 GLOBAL_REG_DECL(StgUnion,R2,REG_R2)
167 #else
168 # define R2 (BaseReg->rR2)
169 #endif
170
171 #if defined(REG_R3) && !defined(NO_GLOBAL_REG_DECLS)
172 GLOBAL_REG_DECL(StgUnion,R3,REG_R3)
173 #else
174 # define R3 (BaseReg->rR3)
175 #endif
176
177 #if defined(REG_R4) && !defined(NO_GLOBAL_REG_DECLS)
178 GLOBAL_REG_DECL(StgUnion,R4,REG_R4)
179 #else
180 # define R4 (BaseReg->rR4)
181 #endif
182
183 #if defined(REG_R5) && !defined(NO_GLOBAL_REG_DECLS)
184 GLOBAL_REG_DECL(StgUnion,R5,REG_R5)
185 #else
186 # define R5 (BaseReg->rR5)
187 #endif
188
189 #if defined(REG_R6) && !defined(NO_GLOBAL_REG_DECLS)
190 GLOBAL_REG_DECL(StgUnion,R6,REG_R6)
191 #else
192 # define R6 (BaseReg->rR6)
193 #endif
194
195 #if defined(REG_R7) && !defined(NO_GLOBAL_REG_DECLS)
196 GLOBAL_REG_DECL(StgUnion,R7,REG_R7)
197 #else
198 # define R7 (BaseReg->rR7)
199 #endif
200
201 #if defined(REG_R8) && !defined(NO_GLOBAL_REG_DECLS)
202 GLOBAL_REG_DECL(StgUnion,R8,REG_R8)
203 #else
204 # define R8 (BaseReg->rR8)
205 #endif
206
207 #if defined(REG_R9) && !defined(NO_GLOBAL_REG_DECLS)
208 GLOBAL_REG_DECL(StgUnion,R9,REG_R9)
209 #else
210 # define R9 (BaseReg->rR9)
211 #endif
212
213 #if defined(REG_R10) && !defined(NO_GLOBAL_REG_DECLS)
214 GLOBAL_REG_DECL(StgUnion,R10,REG_R10)
215 #else
216 # define R10 (BaseReg->rR10)
217 #endif
218
219 #if defined(REG_F1) && !defined(NO_GLOBAL_REG_DECLS)
220 GLOBAL_REG_DECL(StgFloat,F1,REG_F1)
221 #else
222 #define F1 (BaseReg->rF1)
223 #endif
224
225 #if defined(REG_F2) && !defined(NO_GLOBAL_REG_DECLS)
226 GLOBAL_REG_DECL(StgFloat,F2,REG_F2)
227 #else
228 #define F2 (BaseReg->rF2)
229 #endif
230
231 #if defined(REG_F3) && !defined(NO_GLOBAL_REG_DECLS)
232 GLOBAL_REG_DECL(StgFloat,F3,REG_F3)
233 #else
234 #define F3 (BaseReg->rF3)
235 #endif
236
237 #if defined(REG_F4) && !defined(NO_GLOBAL_REG_DECLS)
238 GLOBAL_REG_DECL(StgFloat,F4,REG_F4)
239 #else
240 #define F4 (BaseReg->rF4)
241 #endif
242
243 #if defined(REG_F5) && !defined(NO_GLOBAL_REG_DECLS)
244 GLOBAL_REG_DECL(StgFloat,F5,REG_F5)
245 #else
246 #define F5 (BaseReg->rF5)
247 #endif
248
249 #if defined(REG_F6) && !defined(NO_GLOBAL_REG_DECLS)
250 GLOBAL_REG_DECL(StgFloat,F6,REG_F6)
251 #else
252 #define F6 (BaseReg->rF6)
253 #endif
254
255 #if defined(REG_D1) && !defined(NO_GLOBAL_REG_DECLS)
256 GLOBAL_REG_DECL(StgDouble,D1,REG_D1)
257 #else
258 #define D1 (BaseReg->rD1)
259 #endif
260
261 #if defined(REG_D2) && !defined(NO_GLOBAL_REG_DECLS)
262 GLOBAL_REG_DECL(StgDouble,D2,REG_D2)
263 #else
264 #define D2 (BaseReg->rD2)
265 #endif
266
267 #if defined(REG_D3) && !defined(NO_GLOBAL_REG_DECLS)
268 GLOBAL_REG_DECL(StgDouble,D3,REG_D3)
269 #else
270 #define D3 (BaseReg->rD3)
271 #endif
272
273 #if defined(REG_D4) && !defined(NO_GLOBAL_REG_DECLS)
274 GLOBAL_REG_DECL(StgDouble,D4,REG_D4)
275 #else
276 #define D4 (BaseReg->rD4)
277 #endif
278
279 #if defined(REG_D5) && !defined(NO_GLOBAL_REG_DECLS)
280 GLOBAL_REG_DECL(StgDouble,D5,REG_D5)
281 #else
282 #define D5 (BaseReg->rD5)
283 #endif
284
285 #if defined(REG_D6) && !defined(NO_GLOBAL_REG_DECLS)
286 GLOBAL_REG_DECL(StgDouble,D6,REG_D6)
287 #else
288 #define D6 (BaseReg->rD6)
289 #endif
290
291 #if defined(REG_XMM1) && !defined(NO_GLOBAL_REG_DECLS)
292 GLOBAL_REG_DECL(StgWord128,XMM1,REG_XMM1)
293 #else
294 #define XMM1 (BaseReg->rXMM1)
295 #endif
296
297 #if defined(REG_XMM2) && !defined(NO_GLOBAL_REG_DECLS)
298 GLOBAL_REG_DECL(StgWord128,XMM2,REG_XMM2)
299 #else
300 #define XMM2 (BaseReg->rXMM2)
301 #endif
302
303 #if defined(REG_XMM3) && !defined(NO_GLOBAL_REG_DECLS)
304 GLOBAL_REG_DECL(StgWord128,XMM3,REG_XMM3)
305 #else
306 #define XMM3 (BaseReg->rXMM3)
307 #endif
308
309 #if defined(REG_XMM4) && !defined(NO_GLOBAL_REG_DECLS)
310 GLOBAL_REG_DECL(StgWord128,XMM4,REG_XMM4)
311 #else
312 #define XMM4 (BaseReg->rXMM4)
313 #endif
314
315 #if defined(REG_XMM5) && !defined(NO_GLOBAL_REG_DECLS)
316 GLOBAL_REG_DECL(StgWord128,XMM5,REG_XMM5)
317 #else
318 #define XMM5 (BaseReg->rXMM5)
319 #endif
320
321 #if defined(REG_XMM6) && !defined(NO_GLOBAL_REG_DECLS)
322 GLOBAL_REG_DECL(StgWord128,XMM6,REG_XMM6)
323 #else
324 #define XMM6 (BaseReg->rXMM6)
325 #endif
326
327 #if defined(REG_YMM1) && !defined(NO_GLOBAL_REG_DECLS)
328 GLOBAL_REG_DECL(StgWord256,YMM1,REG_YMM1)
329 #else
330 #define YMM1 (BaseReg->rYMM1)
331 #endif
332
333 #if defined(REG_YMM2) && !defined(NO_GLOBAL_REG_DECLS)
334 GLOBAL_REG_DECL(StgWord256,YMM2,REG_YMM2)
335 #else
336 #define YMM2 (BaseReg->rYMM2)
337 #endif
338
339 #if defined(REG_YMM3) && !defined(NO_GLOBAL_REG_DECLS)
340 GLOBAL_REG_DECL(StgWord256,YMM3,REG_YMM3)
341 #else
342 #define YMM3 (BaseReg->rYMM3)
343 #endif
344
345 #if defined(REG_YMM4) && !defined(NO_GLOBAL_REG_DECLS)
346 GLOBAL_REG_DECL(StgWord256,YMM4,REG_YMM4)
347 #else
348 #define YMM4 (BaseReg->rYMM4)
349 #endif
350
351 #if defined(REG_YMM5) && !defined(NO_GLOBAL_REG_DECLS)
352 GLOBAL_REG_DECL(StgWord256,YMM5,REG_YMM5)
353 #else
354 #define YMM5 (BaseReg->rYMM5)
355 #endif
356
357 #if defined(REG_YMM6) && !defined(NO_GLOBAL_REG_DECLS)
358 GLOBAL_REG_DECL(StgWord256,YMM6,REG_YMM6)
359 #else
360 #define YMM6 (BaseReg->rYMM6)
361 #endif
362
363 #if defined(REG_ZMM1) && !defined(NO_GLOBAL_REG_DECLS)
364 GLOBAL_REG_DECL(StgWord512,ZMM1,REG_ZMM1)
365 #else
366 #define ZMM1 (BaseReg->rZMM1)
367 #endif
368
369 #if defined(REG_ZMM2) && !defined(NO_GLOBAL_REG_DECLS)
370 GLOBAL_REG_DECL(StgWord512,ZMM2,REG_ZMM2)
371 #else
372 #define ZMM2 (BaseReg->rZMM2)
373 #endif
374
375 #if defined(REG_ZMM3) && !defined(NO_GLOBAL_REG_DECLS)
376 GLOBAL_REG_DECL(StgWord512,ZMM3,REG_ZMM3)
377 #else
378 #define ZMM3 (BaseReg->rZMM3)
379 #endif
380
381 #if defined(REG_ZMM4) && !defined(NO_GLOBAL_REG_DECLS)
382 GLOBAL_REG_DECL(StgWord512,ZMM4,REG_ZMM4)
383 #else
384 #define ZMM4 (BaseReg->rZMM4)
385 #endif
386
387 #if defined(REG_ZMM5) && !defined(NO_GLOBAL_REG_DECLS)
388 GLOBAL_REG_DECL(StgWord512,ZMM5,REG_ZMM5)
389 #else
390 #define ZMM5 (BaseReg->rZMM5)
391 #endif
392
393 #if defined(REG_ZMM6) && !defined(NO_GLOBAL_REG_DECLS)
394 GLOBAL_REG_DECL(StgWord512,ZMM6,REG_ZMM6)
395 #else
396 #define ZMM6 (BaseReg->rZMM6)
397 #endif
398
399 #if defined(REG_L1) && !defined(NO_GLOBAL_REG_DECLS)
400 GLOBAL_REG_DECL(StgWord64,L1,REG_L1)
401 #else
402 #define L1 (BaseReg->rL1)
403 #endif
404
405 /*
406 * If BaseReg isn't mapped to a machine register, just use the global
407 * address of the current register table (CurrentRegTable in
408 * concurrent Haskell, MainRegTable otherwise).
409 */
410
411 /* A capability is a combination of a FunTable and a RegTable. In STG
412 * code, BaseReg normally points to the RegTable portion of this
413 * structure, so that we can index both forwards and backwards to take
414 * advantage of shorter instruction forms on some archs (eg. x86).
415 * This is a cut-down version of the Capability structure; the full
416 * version is defined in Capability.h.
417 */
418 struct PartCapability_ {
419 StgFunTable f;
420 StgRegTable r;
421 };
422
423 /* No such thing as a MainCapability under THREADED_RTS - each thread must have
424 * its own Capability.
425 */
426 #if IN_STG_CODE && !(defined(THREADED_RTS) && !defined(NOSMP))
427 extern W_ MainCapability[];
428 #endif
429
430 /*
431 * Assigning to BaseReg (the ASSIGN_BaseReg macro): this happens on
432 * return from a "safe" foreign call, when the thread might be running
433 * on a new Capability. Obviously if BaseReg is not a register, then
434 * we are restricted to a single Capability (this invariant is enforced
435 * in Capability.c:initCapabilities), and assigning to BaseReg can be omitted.
436 */
437
438 #if defined(REG_Base) && !defined(NO_GLOBAL_REG_DECLS)
439 GLOBAL_REG_DECL(StgRegTable *,BaseReg,REG_Base)
440 #define ASSIGN_BaseReg(e) (BaseReg = (e))
441 #else
442 #if defined(THREADED_RTS) && !defined(NOSMP)
443 #error BaseReg must be in a register for THREADED_RTS
444 #endif
445 #define BaseReg (&((struct PartCapability_ *)MainCapability)->r)
446 #define ASSIGN_BaseReg(e) (e)
447 #endif
448
449 #if defined(REG_Sp) && !defined(NO_GLOBAL_REG_DECLS)
450 GLOBAL_REG_DECL(P_,Sp,REG_Sp)
451 #else
452 #define Sp (BaseReg->rSp)
453 #endif
454
455 #if defined(REG_SpLim) && !defined(NO_GLOBAL_REG_DECLS)
456 GLOBAL_REG_DECL(P_,SpLim,REG_SpLim)
457 #else
458 #define SpLim (BaseReg->rSpLim)
459 #endif
460
461 #if defined(REG_Hp) && !defined(NO_GLOBAL_REG_DECLS)
462 GLOBAL_REG_DECL(P_,Hp,REG_Hp)
463 #else
464 #define Hp (BaseReg->rHp)
465 #endif
466
467 #if defined(REG_HpLim) && !defined(NO_GLOBAL_REG_DECLS)
468 #error HpLim cannot be in a register
469 #else
470 #define HpLim (BaseReg->rHpLim)
471 #endif
472
473 #if defined(REG_CCCS) && !defined(NO_GLOBAL_REG_DECLS)
474 GLOBAL_REG_DECL(struct CostCentreStack_ *,CCCS,REG_CCCS)
475 #else
476 #define CCCS (BaseReg->rCCCS)
477 #endif
478
479 #if defined(REG_CurrentTSO) && !defined(NO_GLOBAL_REG_DECLS)
480 GLOBAL_REG_DECL(struct _StgTSO *,CurrentTSO,REG_CurrentTSO)
481 #else
482 #define CurrentTSO (BaseReg->rCurrentTSO)
483 #endif
484
485 #if defined(REG_CurrentNursery) && !defined(NO_GLOBAL_REG_DECLS)
486 GLOBAL_REG_DECL(bdescr *,CurrentNursery,REG_CurrentNursery)
487 #else
488 #define CurrentNursery (BaseReg->rCurrentNursery)
489 #endif
490
491 #if defined(REG_HpAlloc) && !defined(NO_GLOBAL_REG_DECLS)
492 GLOBAL_REG_DECL(bdescr *,HpAlloc,REG_HpAlloc)
493 #else
494 #define HpAlloc (BaseReg->rHpAlloc)
495 #endif
496
497 /* -----------------------------------------------------------------------------
498 Get absolute function pointers from the register table, to save
499 code space. On x86,
500
501 jmp *-12(%ebx)
502
503 is shorter than
504
505 jmp absolute_address
506
507 as long as the offset is within the range of a signed byte
508 (-128..+127). So we pick some common absolute_addresses and put
509 them in the register table. As a bonus, linking time should also
510 be reduced.
511
512 Other possible candidates in order of importance:
513
514 stg_upd_frame_info
515 stg_CAF_BLACKHOLE_info
516 stg_IND_STATIC_info
517
518 anything else probably isn't worth the effort.
519
520 -------------------------------------------------------------------------- */
521
522
523 #define FunReg ((StgFunTable *)((void *)BaseReg - STG_FIELD_OFFSET(struct PartCapability_, r)))
524
525 #define stg_EAGER_BLACKHOLE_info (FunReg->stgEagerBlackholeInfo)
526 #define stg_gc_enter_1 (FunReg->stgGCEnter1)
527 #define stg_gc_fun (FunReg->stgGCFun)
528
529 #endif /* IN_STG_CODE */
530
531 #endif /* REGS_H */