Update Hadrian
[ghc.git] / includes / Cmm.h
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The University of Glasgow 2004-2013
4 *
5 * This file is included at the top of all .cmm source files (and
6 * *only* .cmm files). It defines a collection of useful macros for
7 * making .cmm code a bit less error-prone to write, and a bit easier
8 * on the eye for the reader.
9 *
10 * For the syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
11 *
12 * Accessing fields of structures defined in the RTS header files is
13 * done via automatically-generated macros in DerivedConstants.h. For
14 * example, where previously we used
15 *
16 * CurrentTSO->what_next = x
17 *
18 * in C-- we now use
19 *
20 * StgTSO_what_next(CurrentTSO) = x
21 *
22 * where the StgTSO_what_next() macro is automatically generated by
23 * mkDerivedConstants.c. If you need to access a field that doesn't
24 * already have a macro, edit that file (it's pretty self-explanatory).
25 *
26 * -------------------------------------------------------------------------- */
27
28 #pragma once
29
30 /*
31 * In files that are included into both C and C-- (and perhaps
32 * Haskell) sources, we sometimes need to conditionally compile bits
33 * depending on the language. CMINUSMINUS==1 in .cmm sources:
34 */
35 #define CMINUSMINUS 1
36
37 #include "ghcconfig.h"
38
39 /* -----------------------------------------------------------------------------
40 Types
41
42 The following synonyms for C-- types are declared here:
43
44 I8, I16, I32, I64 MachRep-style names for convenience
45
46 W_ is shorthand for the word type (== StgWord)
47 F_ shorthand for float (F_ == StgFloat == C's float)
48 D_ shorthand for double (D_ == StgDouble == C's double)
49
50 CInt has the same size as an int in C on this platform
51 CLong has the same size as a long in C on this platform
52 CBool has the same size as a bool in C on this platform
53
54 --------------------------------------------------------------------------- */
55
56 #define I8 bits8
57 #define I16 bits16
58 #define I32 bits32
59 #define I64 bits64
60 #define P_ gcptr
61
62 #if SIZEOF_VOID_P == 4
63 #define W_ bits32
64 /* Maybe it's better to include MachDeps.h */
65 #define TAG_BITS 2
66 #elif SIZEOF_VOID_P == 8
67 #define W_ bits64
68 /* Maybe it's better to include MachDeps.h */
69 #define TAG_BITS 3
70 #else
71 #error Unknown word size
72 #endif
73
74 /*
75 * The RTS must sometimes UNTAG a pointer before dereferencing it.
76 * See the wiki page Commentary/Rts/HaskellExecution/PointerTagging
77 */
78 #define TAG_MASK ((1 << TAG_BITS) - 1)
79 #define UNTAG(p) (p & ~TAG_MASK)
80 #define GETTAG(p) (p & TAG_MASK)
81
82 #if SIZEOF_INT == 4
83 #define CInt bits32
84 #elif SIZEOF_INT == 8
85 #define CInt bits64
86 #else
87 #error Unknown int size
88 #endif
89
90 #if SIZEOF_LONG == 4
91 #define CLong bits32
92 #elif SIZEOF_LONG == 8
93 #define CLong bits64
94 #else
95 #error Unknown long size
96 #endif
97
98 #define CBool bits8
99
100 #define F_ float32
101 #define D_ float64
102 #define L_ bits64
103 #define V16_ bits128
104 #define V32_ bits256
105 #define V64_ bits512
106
107 #define SIZEOF_StgDouble 8
108 #define SIZEOF_StgWord64 8
109
110 /* -----------------------------------------------------------------------------
111 Misc useful stuff
112 -------------------------------------------------------------------------- */
113
114 #define ccall foreign "C"
115
116 #define NULL (0::W_)
117
118 #define STRING(name,str) \
119 section "rodata" { \
120 name : bits8[] str; \
121 } \
122
123 #if defined(TABLES_NEXT_TO_CODE)
124 #define RET_LBL(f) f##_info
125 #else
126 #define RET_LBL(f) f##_ret
127 #endif
128
129 #if defined(TABLES_NEXT_TO_CODE)
130 #define ENTRY_LBL(f) f##_info
131 #else
132 #define ENTRY_LBL(f) f##_entry
133 #endif
134
135 /* -----------------------------------------------------------------------------
136 Byte/word macros
137
138 Everything in C-- is in byte offsets (well, most things). We use
139 some macros to allow us to express offsets in words and to try to
140 avoid byte/word confusion.
141 -------------------------------------------------------------------------- */
142
143 #define SIZEOF_W SIZEOF_VOID_P
144 #define W_MASK (SIZEOF_W-1)
145
146 #if SIZEOF_W == 4
147 #define W_SHIFT 2
148 #elif SIZEOF_W == 8
149 #define W_SHIFT 3
150 #endif
151
152 /* Converting quantities of words to bytes */
153 #define WDS(n) ((n)*SIZEOF_W)
154
155 /*
156 * Converting quantities of bytes to words
157 * NB. these work on *unsigned* values only
158 */
159 #define BYTES_TO_WDS(n) ((n) / SIZEOF_W)
160 #define ROUNDUP_BYTES_TO_WDS(n) (((n) + SIZEOF_W - 1) / SIZEOF_W)
161
162 /* TO_W_(n) converts n to W_ type from a smaller type */
163 #if SIZEOF_W == 4
164 #define TO_W_(x) %sx32(x)
165 #define HALF_W_(x) %lobits16(x)
166 #elif SIZEOF_W == 8
167 #define TO_W_(x) %sx64(x)
168 #define HALF_W_(x) %lobits32(x)
169 #endif
170
171 #if SIZEOF_INT == 4 && SIZEOF_W == 8
172 #define W_TO_INT(x) %lobits32(x)
173 #elif SIZEOF_INT == SIZEOF_W
174 #define W_TO_INT(x) (x)
175 #endif
176
177 #if SIZEOF_LONG == 4 && SIZEOF_W == 8
178 #define W_TO_LONG(x) %lobits32(x)
179 #elif SIZEOF_LONG == SIZEOF_W
180 #define W_TO_LONG(x) (x)
181 #endif
182
183 /* -----------------------------------------------------------------------------
184 Atomic memory operations.
185 -------------------------------------------------------------------------- */
186
187 #if SIZEOF_W == 4
188 #define cmpxchgW cmpxchg32
189 #elif SIZEOF_W == 8
190 #define cmpxchgW cmpxchg64
191 #endif
192
193 /* -----------------------------------------------------------------------------
194 Heap/stack access, and adjusting the heap/stack pointers.
195 -------------------------------------------------------------------------- */
196
197 #define Sp(n) W_[Sp + WDS(n)]
198 #define Hp(n) W_[Hp + WDS(n)]
199
200 #define Sp_adj(n) Sp = Sp + WDS(n) /* pronounced "spadge" */
201 #define Hp_adj(n) Hp = Hp + WDS(n)
202
203 /* -----------------------------------------------------------------------------
204 Assertions and Debuggery
205 -------------------------------------------------------------------------- */
206
207 #if defined(DEBUG)
208 #define ASSERT(predicate) \
209 if (predicate) { \
210 /*null*/; \
211 } else { \
212 foreign "C" _assertFail(NULL, __LINE__) never returns; \
213 }
214 #else
215 #define ASSERT(p) /* nothing */
216 #endif
217
218 #if defined(DEBUG)
219 #define DEBUG_ONLY(s) s
220 #else
221 #define DEBUG_ONLY(s) /* nothing */
222 #endif
223
224 /*
225 * The IF_DEBUG macro is useful for debug messages that depend on one
226 * of the RTS debug options. For example:
227 *
228 * IF_DEBUG(RtsFlags_DebugFlags_apply,
229 * foreign "C" fprintf(stderr, stg_ap_0_ret_str));
230 *
231 * Note the syntax is slightly different to the C version of this macro.
232 */
233 #if defined(DEBUG)
234 #define IF_DEBUG(c,s) if (RtsFlags_DebugFlags_##c(RtsFlags) != 0::CBool) { s; }
235 #else
236 #define IF_DEBUG(c,s) /* nothing */
237 #endif
238
239 /* -----------------------------------------------------------------------------
240 Entering
241
242 It isn't safe to "enter" every closure. Functions in particular
243 have no entry code as such; their entry point contains the code to
244 apply the function.
245
246 ToDo: range should end in N_CLOSURE_TYPES-1, not N_CLOSURE_TYPES,
247 but switch doesn't allow us to use exprs there yet.
248
249 If R1 points to a tagged object it points either to
250 * A constructor.
251 * A function with arity <= TAG_MASK.
252 In both cases the right thing to do is to return.
253 Note: it is rather lucky that we can use the tag bits to do this
254 for both objects. Maybe it points to a brittle design?
255
256 Indirections can contain tagged pointers, so their tag is checked.
257 -------------------------------------------------------------------------- */
258
259 #if defined(PROFILING)
260
261 // When profiling, we cannot shortcut ENTER() by checking the tag,
262 // because LDV profiling relies on entering closures to mark them as
263 // "used".
264
265 #define LOAD_INFO(ret,x) \
266 info = %INFO_PTR(UNTAG(x));
267
268 #define UNTAG_IF_PROF(x) UNTAG(x)
269
270 #else
271
272 #define LOAD_INFO(ret,x) \
273 if (GETTAG(x) != 0) { \
274 ret(x); \
275 } \
276 info = %INFO_PTR(x);
277
278 #define UNTAG_IF_PROF(x) (x) /* already untagged */
279
280 #endif
281
282 // We need two versions of ENTER():
283 // - ENTER(x) takes the closure as an argument and uses return(),
284 // for use in civilized code where the stack is handled by GHC
285 //
286 // - ENTER_NOSTACK() where the closure is in R1, and returns are
287 // explicit jumps, for use when we are doing the stack management
288 // ourselves.
289
290 #if defined(PROFILING)
291 // See Note [Evaluating functions with profiling] in rts/Apply.cmm
292 #define ENTER(x) jump stg_ap_0_fast(x);
293 #else
294 #define ENTER(x) ENTER_(return,x)
295 #endif
296
297 #define ENTER_R1() ENTER_(RET_R1,R1)
298
299 #define RET_R1(x) jump %ENTRY_CODE(Sp(0)) [R1]
300
301 #define ENTER_(ret,x) \
302 again: \
303 W_ info; \
304 LOAD_INFO(ret,x) \
305 switch [INVALID_OBJECT .. N_CLOSURE_TYPES] \
306 (TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { \
307 case \
308 IND, \
309 IND_STATIC: \
310 { \
311 x = StgInd_indirectee(x); \
312 goto again; \
313 } \
314 case \
315 FUN, \
316 FUN_1_0, \
317 FUN_0_1, \
318 FUN_2_0, \
319 FUN_1_1, \
320 FUN_0_2, \
321 FUN_STATIC, \
322 BCO, \
323 PAP: \
324 { \
325 ret(x); \
326 } \
327 default: \
328 { \
329 x = UNTAG_IF_PROF(x); \
330 jump %ENTRY_CODE(info) (x); \
331 } \
332 }
333
334 // The FUN cases almost never happen: a pointer to a non-static FUN
335 // should always be tagged. This unfortunately isn't true for the
336 // interpreter right now, which leaves untagged FUNs on the stack.
337
338 /* -----------------------------------------------------------------------------
339 Constants.
340 -------------------------------------------------------------------------- */
341
342 #include "rts/Constants.h"
343 #include "DerivedConstants.h"
344 #include "rts/storage/ClosureTypes.h"
345 #include "rts/storage/FunTypes.h"
346 #include "rts/OSThreads.h"
347
348 /*
349 * Need MachRegs, because some of the RTS code is conditionally
350 * compiled based on REG_R1, REG_R2, etc.
351 */
352 #include "stg/RtsMachRegs.h"
353
354 #include "rts/prof/LDV.h"
355
356 #undef BLOCK_SIZE
357 #undef MBLOCK_SIZE
358 #include "rts/storage/Block.h" /* For Bdescr() */
359
360
361 #define MyCapability() (BaseReg - OFFSET_Capability_r)
362
363 /* -------------------------------------------------------------------------
364 Info tables
365 ------------------------------------------------------------------------- */
366
367 #if defined(PROFILING)
368 #define PROF_HDR_FIELDS(w_,hdr1,hdr2) \
369 w_ hdr1, \
370 w_ hdr2,
371 #else
372 #define PROF_HDR_FIELDS(w_,hdr1,hdr2) /* nothing */
373 #endif
374
375 /* -------------------------------------------------------------------------
376 Allocation and garbage collection
377 ------------------------------------------------------------------------- */
378
379 /*
380 * ALLOC_PRIM is for allocating memory on the heap for a primitive
381 * object. It is used all over PrimOps.cmm.
382 *
383 * We make the simplifying assumption that the "admin" part of a
384 * primitive closure is just the header when calculating sizes for
385 * ticky-ticky. It's not clear whether eg. the size field of an array
386 * should be counted as "admin", or the various fields of a BCO.
387 */
388 #define ALLOC_PRIM(bytes) \
389 HP_CHK_GEN_TICKY(bytes); \
390 TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \
391 CCCS_ALLOC(bytes);
392
393 #define HEAP_CHECK(bytes,failure) \
394 TICK_BUMP(HEAP_CHK_ctr); \
395 Hp = Hp + (bytes); \
396 if (Hp > HpLim) { HpAlloc = (bytes); failure; } \
397 TICK_ALLOC_HEAP_NOCTR(bytes);
398
399 #define ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,failure) \
400 HEAP_CHECK(bytes,failure) \
401 TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \
402 CCCS_ALLOC(bytes);
403
404 #define ALLOC_PRIM_(bytes,fun) \
405 ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM(fun));
406
407 #define ALLOC_PRIM_P(bytes,fun,arg) \
408 ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_P(fun,arg));
409
410 #define ALLOC_PRIM_N(bytes,fun,arg) \
411 ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_N(fun,arg));
412
413 /* CCS_ALLOC wants the size in words, because ccs->mem_alloc is in words */
414 #define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), CCCS)
415
416 #define HP_CHK_GEN_TICKY(bytes) \
417 HP_CHK_GEN(bytes); \
418 TICK_ALLOC_HEAP_NOCTR(bytes);
419
420 #define HP_CHK_P(bytes, fun, arg) \
421 HEAP_CHECK(bytes, GC_PRIM_P(fun,arg))
422
423 // TODO I'm not seeing where ALLOC_P_TICKY is used; can it be removed?
424 // -NSF March 2013
425 #define ALLOC_P_TICKY(bytes, fun, arg) \
426 HP_CHK_P(bytes); \
427 TICK_ALLOC_HEAP_NOCTR(bytes);
428
429 #define CHECK_GC() \
430 (bdescr_link(CurrentNursery) == NULL || \
431 generation_n_new_large_words(W_[g0]) >= TO_W_(CLong[large_alloc_lim]))
432
433 // allocate() allocates from the nursery, so we check to see
434 // whether the nursery is nearly empty in any function that uses
435 // allocate() - this includes many of the primops.
436 //
437 // HACK alert: the __L__ stuff is here to coax the common-block
438 // eliminator into commoning up the call stg_gc_noregs() with the same
439 // code that gets generated by a STK_CHK_GEN() in the same proc. We
440 // also need an if (0) { goto __L__; } so that the __L__ label isn't
441 // optimised away by the control-flow optimiser prior to common-block
442 // elimination (it will be optimised away later).
443 //
444 // This saves some code in gmp-wrappers.cmm where we have lots of
445 // MAYBE_GC() in the same proc as STK_CHK_GEN().
446 //
447 #define MAYBE_GC(retry) \
448 if (CHECK_GC()) { \
449 HpAlloc = 0; \
450 goto __L__; \
451 __L__: \
452 call stg_gc_noregs(); \
453 goto retry; \
454 } \
455 if (0) { goto __L__; }
456
457 #define GC_PRIM(fun) \
458 jump stg_gc_prim(fun);
459
460 // Version of GC_PRIM for use in low-level Cmm. We can call
461 // stg_gc_prim, because it takes one argument and therefore has a
462 // platform-independent calling convention (Note [Syntax of .cmm
463 // files] in CmmParse.y).
464 #define GC_PRIM_LL(fun) \
465 R1 = fun; \
466 jump stg_gc_prim [R1];
467
468 // We pass the fun as the second argument, because the arg is
469 // usually already in the first argument position (R1), so this
470 // avoids moving it to a different register / stack slot.
471 #define GC_PRIM_N(fun,arg) \
472 jump stg_gc_prim_n(arg,fun);
473
474 #define GC_PRIM_P(fun,arg) \
475 jump stg_gc_prim_p(arg,fun);
476
477 #define GC_PRIM_P_LL(fun,arg) \
478 R1 = arg; \
479 R2 = fun; \
480 jump stg_gc_prim_p_ll [R1,R2];
481
482 #define GC_PRIM_PP(fun,arg1,arg2) \
483 jump stg_gc_prim_pp(arg1,arg2,fun);
484
485 #define MAYBE_GC_(fun) \
486 if (CHECK_GC()) { \
487 HpAlloc = 0; \
488 GC_PRIM(fun) \
489 }
490
491 #define MAYBE_GC_N(fun,arg) \
492 if (CHECK_GC()) { \
493 HpAlloc = 0; \
494 GC_PRIM_N(fun,arg) \
495 }
496
497 #define MAYBE_GC_P(fun,arg) \
498 if (CHECK_GC()) { \
499 HpAlloc = 0; \
500 GC_PRIM_P(fun,arg) \
501 }
502
503 #define MAYBE_GC_PP(fun,arg1,arg2) \
504 if (CHECK_GC()) { \
505 HpAlloc = 0; \
506 GC_PRIM_PP(fun,arg1,arg2) \
507 }
508
509 #define STK_CHK_LL(n, fun) \
510 TICK_BUMP(STK_CHK_ctr); \
511 if (Sp - (n) < SpLim) { \
512 GC_PRIM_LL(fun) \
513 }
514
515 #define STK_CHK_P_LL(n, fun, arg) \
516 TICK_BUMP(STK_CHK_ctr); \
517 if (Sp - (n) < SpLim) { \
518 GC_PRIM_P_LL(fun,arg) \
519 }
520
521 #define STK_CHK_PP(n, fun, arg1, arg2) \
522 TICK_BUMP(STK_CHK_ctr); \
523 if (Sp - (n) < SpLim) { \
524 GC_PRIM_PP(fun,arg1,arg2) \
525 }
526
527 #define STK_CHK_ENTER(n, closure) \
528 TICK_BUMP(STK_CHK_ctr); \
529 if (Sp - (n) < SpLim) { \
530 jump __stg_gc_enter_1(closure); \
531 }
532
533 // A funky heap check used by AutoApply.cmm
534
535 #define HP_CHK_NP_ASSIGN_SP0(size,f) \
536 HEAP_CHECK(size, Sp(0) = f; jump __stg_gc_enter_1 [R1];)
537
538 /* -----------------------------------------------------------------------------
539 Closure headers
540 -------------------------------------------------------------------------- */
541
542 /*
543 * This is really ugly, since we don't do the rest of StgHeader this
544 * way. The problem is that values from DerivedConstants.h cannot be
545 * dependent on the way (SMP, PROF etc.). For SIZEOF_StgHeader we get
546 * the value from GHC, but it seems like too much trouble to do that
547 * for StgThunkHeader.
548 */
549 #define SIZEOF_StgThunkHeader SIZEOF_StgHeader+SIZEOF_StgSMPThunkHeader
550
551 #define StgThunk_payload(__ptr__,__ix__) \
552 W_[__ptr__+SIZEOF_StgThunkHeader+ WDS(__ix__)]
553
554 /* -----------------------------------------------------------------------------
555 Closures
556 -------------------------------------------------------------------------- */
557
558 /* The offset of the payload of an array */
559 #define BYTE_ARR_CTS(arr) ((arr) + SIZEOF_StgArrBytes)
560
561 /* The number of words allocated in an array payload */
562 #define BYTE_ARR_WDS(arr) ROUNDUP_BYTES_TO_WDS(StgArrBytes_bytes(arr))
563
564 /* Getting/setting the info pointer of a closure */
565 #define SET_INFO(p,info) StgHeader_info(p) = info
566 #define GET_INFO(p) StgHeader_info(p)
567
568 /* Determine the size of an ordinary closure from its info table */
569 #define sizeW_fromITBL(itbl) \
570 SIZEOF_StgHeader + WDS(%INFO_PTRS(itbl)) + WDS(%INFO_NPTRS(itbl))
571
572 /* NB. duplicated from InfoTables.h! */
573 #define BITMAP_SIZE(bitmap) ((bitmap) & BITMAP_SIZE_MASK)
574 #define BITMAP_BITS(bitmap) ((bitmap) >> BITMAP_BITS_SHIFT)
575
576 /* Debugging macros */
577 #define LOOKS_LIKE_INFO_PTR(p) \
578 ((p) != NULL && \
579 LOOKS_LIKE_INFO_PTR_NOT_NULL(p))
580
581 #define LOOKS_LIKE_INFO_PTR_NOT_NULL(p) \
582 ( (TO_W_(%INFO_TYPE(%STD_INFO(p))) != INVALID_OBJECT) && \
583 (TO_W_(%INFO_TYPE(%STD_INFO(p))) < N_CLOSURE_TYPES))
584
585 #define LOOKS_LIKE_CLOSURE_PTR(p) (LOOKS_LIKE_INFO_PTR(GET_INFO(UNTAG(p))))
586
587 /*
588 * The layout of the StgFunInfoExtra part of an info table changes
589 * depending on TABLES_NEXT_TO_CODE. So we define field access
590 * macros which use the appropriate version here:
591 */
592 #if defined(TABLES_NEXT_TO_CODE)
593 /*
594 * when TABLES_NEXT_TO_CODE, slow_apply is stored as an offset
595 * instead of the normal pointer.
596 */
597
598 #define StgFunInfoExtra_slow_apply(fun_info) \
599 (TO_W_(StgFunInfoExtraRev_slow_apply_offset(fun_info)) \
600 + (fun_info) + SIZEOF_StgFunInfoExtraRev + SIZEOF_StgInfoTable)
601
602 #define StgFunInfoExtra_fun_type(i) StgFunInfoExtraRev_fun_type(i)
603 #define StgFunInfoExtra_arity(i) StgFunInfoExtraRev_arity(i)
604 #define StgFunInfoExtra_bitmap(i) StgFunInfoExtraRev_bitmap(i)
605 #else
606 #define StgFunInfoExtra_slow_apply(i) StgFunInfoExtraFwd_slow_apply(i)
607 #define StgFunInfoExtra_fun_type(i) StgFunInfoExtraFwd_fun_type(i)
608 #define StgFunInfoExtra_arity(i) StgFunInfoExtraFwd_arity(i)
609 #define StgFunInfoExtra_bitmap(i) StgFunInfoExtraFwd_bitmap(i)
610 #endif
611
612 #define mutArrCardMask ((1 << MUT_ARR_PTRS_CARD_BITS) - 1)
613 #define mutArrPtrCardDown(i) ((i) >> MUT_ARR_PTRS_CARD_BITS)
614 #define mutArrPtrCardUp(i) (((i) + mutArrCardMask) >> MUT_ARR_PTRS_CARD_BITS)
615 #define mutArrPtrsCardWords(n) ROUNDUP_BYTES_TO_WDS(mutArrPtrCardUp(n))
616
617 #if defined(PROFILING) || (!defined(THREADED_RTS) && defined(DEBUG))
618 #define OVERWRITING_CLOSURE(c) foreign "C" overwritingClosure(c "ptr")
619 #define OVERWRITING_CLOSURE_OFS(c,n) \
620 foreign "C" overwritingClosureOfs(c "ptr", n)
621 #else
622 #define OVERWRITING_CLOSURE(c) /* nothing */
623 #define OVERWRITING_CLOSURE_OFS(c,n) /* nothing */
624 #endif
625
626 #if defined(THREADED_RTS)
627 #define prim_write_barrier prim %write_barrier()
628 #else
629 #define prim_write_barrier /* nothing */
630 #endif
631
632 /* -----------------------------------------------------------------------------
633 Ticky macros
634 -------------------------------------------------------------------------- */
635
636 #if defined(TICKY_TICKY)
637 #define TICK_BUMP_BY(ctr,n) CLong[ctr] = CLong[ctr] + n
638 #else
639 #define TICK_BUMP_BY(ctr,n) /* nothing */
640 #endif
641
642 #define TICK_BUMP(ctr) TICK_BUMP_BY(ctr,1)
643
644 #define TICK_ENT_DYN_IND() TICK_BUMP(ENT_DYN_IND_ctr)
645 #define TICK_ENT_DYN_THK() TICK_BUMP(ENT_DYN_THK_ctr)
646 #define TICK_ENT_VIA_NODE() TICK_BUMP(ENT_VIA_NODE_ctr)
647 #define TICK_ENT_STATIC_IND() TICK_BUMP(ENT_STATIC_IND_ctr)
648 #define TICK_ENT_PERM_IND() TICK_BUMP(ENT_PERM_IND_ctr)
649 #define TICK_ENT_PAP() TICK_BUMP(ENT_PAP_ctr)
650 #define TICK_ENT_AP() TICK_BUMP(ENT_AP_ctr)
651 #define TICK_ENT_AP_STACK() TICK_BUMP(ENT_AP_STACK_ctr)
652 #define TICK_ENT_BH() TICK_BUMP(ENT_BH_ctr)
653 #define TICK_ENT_LNE() TICK_BUMP(ENT_LNE_ctr)
654 #define TICK_UNKNOWN_CALL() TICK_BUMP(UNKNOWN_CALL_ctr)
655 #define TICK_UPDF_PUSHED() TICK_BUMP(UPDF_PUSHED_ctr)
656 #define TICK_CATCHF_PUSHED() TICK_BUMP(CATCHF_PUSHED_ctr)
657 #define TICK_UPDF_OMITTED() TICK_BUMP(UPDF_OMITTED_ctr)
658 #define TICK_UPD_NEW_IND() TICK_BUMP(UPD_NEW_IND_ctr)
659 #define TICK_UPD_NEW_PERM_IND() TICK_BUMP(UPD_NEW_PERM_IND_ctr)
660 #define TICK_UPD_OLD_IND() TICK_BUMP(UPD_OLD_IND_ctr)
661 #define TICK_UPD_OLD_PERM_IND() TICK_BUMP(UPD_OLD_PERM_IND_ctr)
662
663 #define TICK_SLOW_CALL_FUN_TOO_FEW() TICK_BUMP(SLOW_CALL_FUN_TOO_FEW_ctr)
664 #define TICK_SLOW_CALL_FUN_CORRECT() TICK_BUMP(SLOW_CALL_FUN_CORRECT_ctr)
665 #define TICK_SLOW_CALL_FUN_TOO_MANY() TICK_BUMP(SLOW_CALL_FUN_TOO_MANY_ctr)
666 #define TICK_SLOW_CALL_PAP_TOO_FEW() TICK_BUMP(SLOW_CALL_PAP_TOO_FEW_ctr)
667 #define TICK_SLOW_CALL_PAP_CORRECT() TICK_BUMP(SLOW_CALL_PAP_CORRECT_ctr)
668 #define TICK_SLOW_CALL_PAP_TOO_MANY() TICK_BUMP(SLOW_CALL_PAP_TOO_MANY_ctr)
669
670 #define TICK_SLOW_CALL_fast_v16() TICK_BUMP(SLOW_CALL_fast_v16_ctr)
671 #define TICK_SLOW_CALL_fast_v() TICK_BUMP(SLOW_CALL_fast_v_ctr)
672 #define TICK_SLOW_CALL_fast_p() TICK_BUMP(SLOW_CALL_fast_p_ctr)
673 #define TICK_SLOW_CALL_fast_pv() TICK_BUMP(SLOW_CALL_fast_pv_ctr)
674 #define TICK_SLOW_CALL_fast_pp() TICK_BUMP(SLOW_CALL_fast_pp_ctr)
675 #define TICK_SLOW_CALL_fast_ppv() TICK_BUMP(SLOW_CALL_fast_ppv_ctr)
676 #define TICK_SLOW_CALL_fast_ppp() TICK_BUMP(SLOW_CALL_fast_ppp_ctr)
677 #define TICK_SLOW_CALL_fast_pppv() TICK_BUMP(SLOW_CALL_fast_pppv_ctr)
678 #define TICK_SLOW_CALL_fast_pppp() TICK_BUMP(SLOW_CALL_fast_pppp_ctr)
679 #define TICK_SLOW_CALL_fast_ppppp() TICK_BUMP(SLOW_CALL_fast_ppppp_ctr)
680 #define TICK_SLOW_CALL_fast_pppppp() TICK_BUMP(SLOW_CALL_fast_pppppp_ctr)
681 #define TICK_VERY_SLOW_CALL() TICK_BUMP(VERY_SLOW_CALL_ctr)
682
683 /* NOTE: TICK_HISTO_BY and TICK_HISTO
684 currently have no effect.
685 The old code for it didn't typecheck and I
686 just commented it out to get ticky to work.
687 - krc 1/2007 */
688
689 #define TICK_HISTO_BY(histo,n,i) /* nothing */
690
691 #define TICK_HISTO(histo,n) TICK_HISTO_BY(histo,n,1)
692
693 /* An unboxed tuple with n components. */
694 #define TICK_RET_UNBOXED_TUP(n) \
695 TICK_BUMP(RET_UNBOXED_TUP_ctr++); \
696 TICK_HISTO(RET_UNBOXED_TUP,n)
697
698 /*
699 * A slow call with n arguments. In the unevald case, this call has
700 * already been counted once, so don't count it again.
701 */
702 #define TICK_SLOW_CALL(n) \
703 TICK_BUMP(SLOW_CALL_ctr); \
704 TICK_HISTO(SLOW_CALL,n)
705
706 /*
707 * This slow call was found to be to an unevaluated function; undo the
708 * ticks we did in TICK_SLOW_CALL.
709 */
710 #define TICK_SLOW_CALL_UNEVALD(n) \
711 TICK_BUMP(SLOW_CALL_UNEVALD_ctr); \
712 TICK_BUMP_BY(SLOW_CALL_ctr,-1); \
713 TICK_HISTO_BY(SLOW_CALL,n,-1);
714
715 /* Updating a closure with a new CON */
716 #define TICK_UPD_CON_IN_NEW(n) \
717 TICK_BUMP(UPD_CON_IN_NEW_ctr); \
718 TICK_HISTO(UPD_CON_IN_NEW,n)
719
720 #define TICK_ALLOC_HEAP_NOCTR(bytes) \
721 TICK_BUMP(ALLOC_RTS_ctr); \
722 TICK_BUMP_BY(ALLOC_RTS_tot,bytes)
723
724 /* -----------------------------------------------------------------------------
725 Saving and restoring STG registers
726
727 STG registers must be saved around a C call, just in case the STG
728 register is mapped to a caller-saves machine register. Normally we
729 don't need to worry about this the code generator has already
730 loaded any live STG registers into variables for us, but in
731 hand-written low-level Cmm code where we don't know which registers
732 are live, we might have to save them all.
733 -------------------------------------------------------------------------- */
734
735 #define SAVE_STGREGS \
736 W_ r1, r2, r3, r4, r5, r6, r7, r8; \
737 F_ f1, f2, f3, f4, f5, f6; \
738 D_ d1, d2, d3, d4, d5, d6; \
739 L_ l1; \
740 \
741 r1 = R1; \
742 r2 = R2; \
743 r3 = R3; \
744 r4 = R4; \
745 r5 = R5; \
746 r6 = R6; \
747 r7 = R7; \
748 r8 = R8; \
749 \
750 f1 = F1; \
751 f2 = F2; \
752 f3 = F3; \
753 f4 = F4; \
754 f5 = F5; \
755 f6 = F6; \
756 \
757 d1 = D1; \
758 d2 = D2; \
759 d3 = D3; \
760 d4 = D4; \
761 d5 = D5; \
762 d6 = D6; \
763 \
764 l1 = L1;
765
766
767 #define RESTORE_STGREGS \
768 R1 = r1; \
769 R2 = r2; \
770 R3 = r3; \
771 R4 = r4; \
772 R5 = r5; \
773 R6 = r6; \
774 R7 = r7; \
775 R8 = r8; \
776 \
777 F1 = f1; \
778 F2 = f2; \
779 F3 = f3; \
780 F4 = f4; \
781 F5 = f5; \
782 F6 = f6; \
783 \
784 D1 = d1; \
785 D2 = d2; \
786 D3 = d3; \
787 D4 = d4; \
788 D5 = d5; \
789 D6 = d6; \
790 \
791 L1 = l1;
792
793 /* -----------------------------------------------------------------------------
794 Misc junk
795 -------------------------------------------------------------------------- */
796
797 #define NO_TREC stg_NO_TREC_closure
798 #define END_TSO_QUEUE stg_END_TSO_QUEUE_closure
799 #define STM_AWOKEN stg_STM_AWOKEN_closure
800 #define END_INVARIANT_CHECK_QUEUE stg_END_INVARIANT_CHECK_QUEUE_closure
801
802 #define recordMutableCap(p, gen) \
803 W_ __bd; \
804 W_ mut_list; \
805 mut_list = Capability_mut_lists(MyCapability()) + WDS(gen); \
806 __bd = W_[mut_list]; \
807 if (bdescr_free(__bd) >= bdescr_start(__bd) + BLOCK_SIZE) { \
808 W_ __new_bd; \
809 ("ptr" __new_bd) = foreign "C" allocBlock_lock(); \
810 bdescr_link(__new_bd) = __bd; \
811 __bd = __new_bd; \
812 W_[mut_list] = __bd; \
813 } \
814 W_ free; \
815 free = bdescr_free(__bd); \
816 W_[free] = p; \
817 bdescr_free(__bd) = free + WDS(1);
818
819 #define recordMutable(p) \
820 P_ __p; \
821 W_ __bd; \
822 W_ __gen; \
823 __p = p; \
824 __bd = Bdescr(__p); \
825 __gen = TO_W_(bdescr_gen_no(__bd)); \
826 if (__gen > 0) { recordMutableCap(__p, __gen); }
827
828 /* -----------------------------------------------------------------------------
829 Arrays
830 -------------------------------------------------------------------------- */
831
832 /* Complete function body for the clone family of (mutable) array ops.
833 Defined as a macro to avoid function call overhead or code
834 duplication. */
835 #define cloneArray(info, src, offset, n) \
836 W_ words, size; \
837 gcptr dst, dst_p, src_p; \
838 \
839 again: MAYBE_GC(again); \
840 \
841 size = n + mutArrPtrsCardWords(n); \
842 words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; \
843 ("ptr" dst) = ccall allocate(MyCapability() "ptr", words); \
844 TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0); \
845 \
846 SET_HDR(dst, info, CCCS); \
847 StgMutArrPtrs_ptrs(dst) = n; \
848 StgMutArrPtrs_size(dst) = size; \
849 \
850 dst_p = dst + SIZEOF_StgMutArrPtrs; \
851 src_p = src + SIZEOF_StgMutArrPtrs + WDS(offset); \
852 prim %memcpy(dst_p, src_p, n * SIZEOF_W, SIZEOF_W); \
853 \
854 return (dst);
855
856 #define copyArray(src, src_off, dst, dst_off, n) \
857 W_ dst_elems_p, dst_p, src_p, dst_cards_p, bytes; \
858 \
859 if ((n) != 0) { \
860 SET_HDR(dst, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); \
861 \
862 dst_elems_p = (dst) + SIZEOF_StgMutArrPtrs; \
863 dst_p = dst_elems_p + WDS(dst_off); \
864 src_p = (src) + SIZEOF_StgMutArrPtrs + WDS(src_off); \
865 bytes = WDS(n); \
866 \
867 prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \
868 \
869 dst_cards_p = dst_elems_p + WDS(StgMutArrPtrs_ptrs(dst)); \
870 setCards(dst_cards_p, dst_off, n); \
871 } \
872 \
873 return ();
874
875 #define copyMutableArray(src, src_off, dst, dst_off, n) \
876 W_ dst_elems_p, dst_p, src_p, dst_cards_p, bytes; \
877 \
878 if ((n) != 0) { \
879 SET_HDR(dst, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); \
880 \
881 dst_elems_p = (dst) + SIZEOF_StgMutArrPtrs; \
882 dst_p = dst_elems_p + WDS(dst_off); \
883 src_p = (src) + SIZEOF_StgMutArrPtrs + WDS(src_off); \
884 bytes = WDS(n); \
885 \
886 if ((src) == (dst)) { \
887 prim %memmove(dst_p, src_p, bytes, SIZEOF_W); \
888 } else { \
889 prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \
890 } \
891 \
892 dst_cards_p = dst_elems_p + WDS(StgMutArrPtrs_ptrs(dst)); \
893 setCards(dst_cards_p, dst_off, n); \
894 } \
895 \
896 return ();
897
898 /*
899 * Set the cards in the cards table pointed to by dst_cards_p for an
900 * update to n elements, starting at element dst_off.
901 */
902 #define setCards(dst_cards_p, dst_off, n) \
903 W_ __start_card, __end_card, __cards; \
904 __start_card = mutArrPtrCardDown(dst_off); \
905 __end_card = mutArrPtrCardDown((dst_off) + (n) - 1); \
906 __cards = __end_card - __start_card + 1; \
907 prim %memset((dst_cards_p) + __start_card, 1, __cards, 1);
908
909 /* Complete function body for the clone family of small (mutable)
910 array ops. Defined as a macro to avoid function call overhead or
911 code duplication. */
912 #define cloneSmallArray(info, src, offset, n) \
913 W_ words, size; \
914 gcptr dst, dst_p, src_p; \
915 \
916 again: MAYBE_GC(again); \
917 \
918 words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n; \
919 ("ptr" dst) = ccall allocate(MyCapability() "ptr", words); \
920 TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0); \
921 \
922 SET_HDR(dst, info, CCCS); \
923 StgSmallMutArrPtrs_ptrs(dst) = n; \
924 \
925 dst_p = dst + SIZEOF_StgSmallMutArrPtrs; \
926 src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(offset); \
927 prim %memcpy(dst_p, src_p, n * SIZEOF_W, SIZEOF_W); \
928 \
929 return (dst);