Spelling fixes in comments [ci skip]
[ghc.git] / includes / Cmm.h
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The University of Glasgow 2004-2013
4 *
5 * This file is included at the top of all .cmm source files (and
6 * *only* .cmm files). It defines a collection of useful macros for
7 * making .cmm code a bit less error-prone to write, and a bit easier
8 * on the eye for the reader.
9 *
10 * For the syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
11 *
12 * Accessing fields of structures defined in the RTS header files is
13 * done via automatically-generated macros in DerivedConstants.h. For
14 * example, where previously we used
15 *
16 * CurrentTSO->what_next = x
17 *
18 * in C-- we now use
19 *
20 * StgTSO_what_next(CurrentTSO) = x
21 *
22 * where the StgTSO_what_next() macro is automatically generated by
23 * mkDerivedConstants.c. If you need to access a field that doesn't
24 * already have a macro, edit that file (it's pretty self-explanatory).
25 *
26 * -------------------------------------------------------------------------- */
27
28 #ifndef CMM_H
29 #define CMM_H
30
31 /*
32 * In files that are included into both C and C-- (and perhaps
33 * Haskell) sources, we sometimes need to conditionally compile bits
34 * depending on the language. CMINUSMINUS==1 in .cmm sources:
35 */
36 #define CMINUSMINUS 1
37
38 #include "ghcconfig.h"
39
40 /* -----------------------------------------------------------------------------
41 Types
42
43 The following synonyms for C-- types are declared here:
44
45 I8, I16, I32, I64 MachRep-style names for convenience
46
47 W_ is shorthand for the word type (== StgWord)
48 F_ shorthand for float (F_ == StgFloat == C's float)
49 D_ shorthand for double (D_ == StgDouble == C's double)
50
51 CInt has the same size as an int in C on this platform
52 CLong has the same size as a long in C on this platform
53 CBool has the same size as a bool in C on this platform
54
55 --------------------------------------------------------------------------- */
56
57 #define I8 bits8
58 #define I16 bits16
59 #define I32 bits32
60 #define I64 bits64
61 #define P_ gcptr
62
63 #if SIZEOF_VOID_P == 4
64 #define W_ bits32
65 /* Maybe it's better to include MachDeps.h */
66 #define TAG_BITS 2
67 #elif SIZEOF_VOID_P == 8
68 #define W_ bits64
69 /* Maybe it's better to include MachDeps.h */
70 #define TAG_BITS 3
71 #else
72 #error Unknown word size
73 #endif
74
75 /*
76 * The RTS must sometimes UNTAG a pointer before dereferencing it.
77 * See the wiki page Commentary/Rts/HaskellExecution/PointerTagging
78 */
79 #define TAG_MASK ((1 << TAG_BITS) - 1)
80 #define UNTAG(p) (p & ~TAG_MASK)
81 #define GETTAG(p) (p & TAG_MASK)
82
83 #if SIZEOF_INT == 4
84 #define CInt bits32
85 #elif SIZEOF_INT == 8
86 #define CInt bits64
87 #else
88 #error Unknown int size
89 #endif
90
91 #if SIZEOF_LONG == 4
92 #define CLong bits32
93 #elif SIZEOF_LONG == 8
94 #define CLong bits64
95 #else
96 #error Unknown long size
97 #endif
98
99 #define CBool bits8
100
101 #define F_ float32
102 #define D_ float64
103 #define L_ bits64
104 #define V16_ bits128
105 #define V32_ bits256
106 #define V64_ bits512
107
108 #define SIZEOF_StgDouble 8
109 #define SIZEOF_StgWord64 8
110
111 /* -----------------------------------------------------------------------------
112 Misc useful stuff
113 -------------------------------------------------------------------------- */
114
115 #define ccall foreign "C"
116
117 #define NULL (0::W_)
118
119 #define STRING(name,str) \
120 section "rodata" { \
121 name : bits8[] str; \
122 } \
123
124 #ifdef TABLES_NEXT_TO_CODE
125 #define RET_LBL(f) f##_info
126 #else
127 #define RET_LBL(f) f##_ret
128 #endif
129
130 #ifdef TABLES_NEXT_TO_CODE
131 #define ENTRY_LBL(f) f##_info
132 #else
133 #define ENTRY_LBL(f) f##_entry
134 #endif
135
136 /* -----------------------------------------------------------------------------
137 Byte/word macros
138
139 Everything in C-- is in byte offsets (well, most things). We use
140 some macros to allow us to express offsets in words and to try to
141 avoid byte/word confusion.
142 -------------------------------------------------------------------------- */
143
144 #define SIZEOF_W SIZEOF_VOID_P
145 #define W_MASK (SIZEOF_W-1)
146
147 #if SIZEOF_W == 4
148 #define W_SHIFT 2
149 #elif SIZEOF_W == 8
150 #define W_SHIFT 3
151 #endif
152
153 /* Converting quantities of words to bytes */
154 #define WDS(n) ((n)*SIZEOF_W)
155
156 /*
157 * Converting quantities of bytes to words
158 * NB. these work on *unsigned* values only
159 */
160 #define BYTES_TO_WDS(n) ((n) / SIZEOF_W)
161 #define ROUNDUP_BYTES_TO_WDS(n) (((n) + SIZEOF_W - 1) / SIZEOF_W)
162
163 /* TO_W_(n) converts n to W_ type from a smaller type */
164 #if SIZEOF_W == 4
165 #define TO_W_(x) %sx32(x)
166 #define HALF_W_(x) %lobits16(x)
167 #elif SIZEOF_W == 8
168 #define TO_W_(x) %sx64(x)
169 #define HALF_W_(x) %lobits32(x)
170 #endif
171
172 #if SIZEOF_INT == 4 && SIZEOF_W == 8
173 #define W_TO_INT(x) %lobits32(x)
174 #elif SIZEOF_INT == SIZEOF_W
175 #define W_TO_INT(x) (x)
176 #endif
177
178 #if SIZEOF_LONG == 4 && SIZEOF_W == 8
179 #define W_TO_LONG(x) %lobits32(x)
180 #elif SIZEOF_LONG == SIZEOF_W
181 #define W_TO_LONG(x) (x)
182 #endif
183
184 /* -----------------------------------------------------------------------------
185 Atomic memory operations.
186 -------------------------------------------------------------------------- */
187
188 #if SIZEOF_W == 4
189 #define cmpxchgW cmpxchg32
190 #elif SIZEOF_W == 8
191 #define cmpxchgW cmpxchg64
192 #endif
193
194 /* -----------------------------------------------------------------------------
195 Heap/stack access, and adjusting the heap/stack pointers.
196 -------------------------------------------------------------------------- */
197
198 #define Sp(n) W_[Sp + WDS(n)]
199 #define Hp(n) W_[Hp + WDS(n)]
200
201 #define Sp_adj(n) Sp = Sp + WDS(n) /* pronounced "spadge" */
202 #define Hp_adj(n) Hp = Hp + WDS(n)
203
204 /* -----------------------------------------------------------------------------
205 Assertions and Debuggery
206 -------------------------------------------------------------------------- */
207
208 #ifdef DEBUG
209 #define ASSERT(predicate) \
210 if (predicate) { \
211 /*null*/; \
212 } else { \
213 foreign "C" _assertFail(NULL, __LINE__) never returns; \
214 }
215 #else
216 #define ASSERT(p) /* nothing */
217 #endif
218
219 #ifdef DEBUG
220 #define DEBUG_ONLY(s) s
221 #else
222 #define DEBUG_ONLY(s) /* nothing */
223 #endif
224
225 /*
226 * The IF_DEBUG macro is useful for debug messages that depend on one
227 * of the RTS debug options. For example:
228 *
229 * IF_DEBUG(RtsFlags_DebugFlags_apply,
230 * foreign "C" fprintf(stderr, stg_ap_0_ret_str));
231 *
232 * Note the syntax is slightly different to the C version of this macro.
233 */
234 #ifdef DEBUG
235 #define IF_DEBUG(c,s) if (RtsFlags_DebugFlags_##c(RtsFlags) != 0::CBool) { s; }
236 #else
237 #define IF_DEBUG(c,s) /* nothing */
238 #endif
239
240 /* -----------------------------------------------------------------------------
241 Entering
242
243 It isn't safe to "enter" every closure. Functions in particular
244 have no entry code as such; their entry point contains the code to
245 apply the function.
246
247 ToDo: range should end in N_CLOSURE_TYPES-1, not N_CLOSURE_TYPES,
248 but switch doesn't allow us to use exprs there yet.
249
250 If R1 points to a tagged object it points either to
251 * A constructor.
252 * A function with arity <= TAG_MASK.
253 In both cases the right thing to do is to return.
254 Note: it is rather lucky that we can use the tag bits to do this
255 for both objects. Maybe it points to a brittle design?
256
257 Indirections can contain tagged pointers, so their tag is checked.
258 -------------------------------------------------------------------------- */
259
260 #ifdef PROFILING
261
262 // When profiling, we cannot shortcut ENTER() by checking the tag,
263 // because LDV profiling relies on entering closures to mark them as
264 // "used".
265
266 #define LOAD_INFO(ret,x) \
267 info = %INFO_PTR(UNTAG(x));
268
269 #define UNTAG_IF_PROF(x) UNTAG(x)
270
271 #else
272
273 #define LOAD_INFO(ret,x) \
274 if (GETTAG(x) != 0) { \
275 ret(x); \
276 } \
277 info = %INFO_PTR(x);
278
279 #define UNTAG_IF_PROF(x) (x) /* already untagged */
280
281 #endif
282
283 // We need two versions of ENTER():
284 // - ENTER(x) takes the closure as an argument and uses return(),
285 // for use in civilized code where the stack is handled by GHC
286 //
287 // - ENTER_NOSTACK() where the closure is in R1, and returns are
288 // explicit jumps, for use when we are doing the stack management
289 // ourselves.
290
291 #if defined(PROFILING)
292 // See Note [Evaluating functions with profiling] in rts/Apply.cmm
293 #define ENTER(x) jump stg_ap_0_fast(x);
294 #else
295 #define ENTER(x) ENTER_(return,x)
296 #endif
297
298 #define ENTER_R1() ENTER_(RET_R1,R1)
299
300 #define RET_R1(x) jump %ENTRY_CODE(Sp(0)) [R1]
301
302 #define ENTER_(ret,x) \
303 again: \
304 W_ info; \
305 LOAD_INFO(ret,x) \
306 switch [INVALID_OBJECT .. N_CLOSURE_TYPES] \
307 (TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { \
308 case \
309 IND, \
310 IND_STATIC: \
311 { \
312 x = StgInd_indirectee(x); \
313 goto again; \
314 } \
315 case \
316 FUN, \
317 FUN_1_0, \
318 FUN_0_1, \
319 FUN_2_0, \
320 FUN_1_1, \
321 FUN_0_2, \
322 FUN_STATIC, \
323 BCO, \
324 PAP: \
325 { \
326 ret(x); \
327 } \
328 default: \
329 { \
330 x = UNTAG_IF_PROF(x); \
331 jump %ENTRY_CODE(info) (x); \
332 } \
333 }
334
335 // The FUN cases almost never happen: a pointer to a non-static FUN
336 // should always be tagged. This unfortunately isn't true for the
337 // interpreter right now, which leaves untagged FUNs on the stack.
338
339 /* -----------------------------------------------------------------------------
340 Constants.
341 -------------------------------------------------------------------------- */
342
343 #include "rts/Constants.h"
344 #include "DerivedConstants.h"
345 #include "rts/storage/ClosureTypes.h"
346 #include "rts/storage/FunTypes.h"
347 #include "rts/OSThreads.h"
348
349 /*
350 * Need MachRegs, because some of the RTS code is conditionally
351 * compiled based on REG_R1, REG_R2, etc.
352 */
353 #include "stg/RtsMachRegs.h"
354
355 #include "rts/prof/LDV.h"
356
357 #undef BLOCK_SIZE
358 #undef MBLOCK_SIZE
359 #include "rts/storage/Block.h" /* For Bdescr() */
360
361
362 #define MyCapability() (BaseReg - OFFSET_Capability_r)
363
364 /* -------------------------------------------------------------------------
365 Info tables
366 ------------------------------------------------------------------------- */
367
368 #if defined(PROFILING)
369 #define PROF_HDR_FIELDS(w_,hdr1,hdr2) \
370 w_ hdr1, \
371 w_ hdr2,
372 #else
373 #define PROF_HDR_FIELDS(w_,hdr1,hdr2) /* nothing */
374 #endif
375
376 /* -------------------------------------------------------------------------
377 Allocation and garbage collection
378 ------------------------------------------------------------------------- */
379
380 /*
381 * ALLOC_PRIM is for allocating memory on the heap for a primitive
382 * object. It is used all over PrimOps.cmm.
383 *
384 * We make the simplifying assumption that the "admin" part of a
385 * primitive closure is just the header when calculating sizes for
386 * ticky-ticky. It's not clear whether eg. the size field of an array
387 * should be counted as "admin", or the various fields of a BCO.
388 */
389 #define ALLOC_PRIM(bytes) \
390 HP_CHK_GEN_TICKY(bytes); \
391 TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \
392 CCCS_ALLOC(bytes);
393
394 #define HEAP_CHECK(bytes,failure) \
395 TICK_BUMP(HEAP_CHK_ctr); \
396 Hp = Hp + (bytes); \
397 if (Hp > HpLim) { HpAlloc = (bytes); failure; } \
398 TICK_ALLOC_HEAP_NOCTR(bytes);
399
400 #define ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,failure) \
401 HEAP_CHECK(bytes,failure) \
402 TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \
403 CCCS_ALLOC(bytes);
404
405 #define ALLOC_PRIM_(bytes,fun) \
406 ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM(fun));
407
408 #define ALLOC_PRIM_P(bytes,fun,arg) \
409 ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_P(fun,arg));
410
411 #define ALLOC_PRIM_N(bytes,fun,arg) \
412 ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_N(fun,arg));
413
414 /* CCS_ALLOC wants the size in words, because ccs->mem_alloc is in words */
415 #define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), CCCS)
416
417 #define HP_CHK_GEN_TICKY(bytes) \
418 HP_CHK_GEN(bytes); \
419 TICK_ALLOC_HEAP_NOCTR(bytes);
420
421 #define HP_CHK_P(bytes, fun, arg) \
422 HEAP_CHECK(bytes, GC_PRIM_P(fun,arg))
423
424 // TODO I'm not seeing where ALLOC_P_TICKY is used; can it be removed?
425 // -NSF March 2013
426 #define ALLOC_P_TICKY(bytes, fun, arg) \
427 HP_CHK_P(bytes); \
428 TICK_ALLOC_HEAP_NOCTR(bytes);
429
430 #define CHECK_GC() \
431 (bdescr_link(CurrentNursery) == NULL || \
432 generation_n_new_large_words(W_[g0]) >= TO_W_(CLong[large_alloc_lim]))
433
434 // allocate() allocates from the nursery, so we check to see
435 // whether the nursery is nearly empty in any function that uses
436 // allocate() - this includes many of the primops.
437 //
438 // HACK alert: the __L__ stuff is here to coax the common-block
439 // eliminator into commoning up the call stg_gc_noregs() with the same
440 // code that gets generated by a STK_CHK_GEN() in the same proc. We
441 // also need an if (0) { goto __L__; } so that the __L__ label isn't
442 // optimised away by the control-flow optimiser prior to common-block
443 // elimination (it will be optimised away later).
444 //
445 // This saves some code in gmp-wrappers.cmm where we have lots of
446 // MAYBE_GC() in the same proc as STK_CHK_GEN().
447 //
448 #define MAYBE_GC(retry) \
449 if (CHECK_GC()) { \
450 HpAlloc = 0; \
451 goto __L__; \
452 __L__: \
453 call stg_gc_noregs(); \
454 goto retry; \
455 } \
456 if (0) { goto __L__; }
457
458 #define GC_PRIM(fun) \
459 jump stg_gc_prim(fun);
460
461 // Version of GC_PRIM for use in low-level Cmm. We can call
462 // stg_gc_prim, because it takes one argument and therefore has a
463 // platform-independent calling convention (Note [Syntax of .cmm
464 // files] in CmmParse.y).
465 #define GC_PRIM_LL(fun) \
466 R1 = fun; \
467 jump stg_gc_prim [R1];
468
469 // We pass the fun as the second argument, because the arg is
470 // usually already in the first argument position (R1), so this
471 // avoids moving it to a different register / stack slot.
472 #define GC_PRIM_N(fun,arg) \
473 jump stg_gc_prim_n(arg,fun);
474
475 #define GC_PRIM_P(fun,arg) \
476 jump stg_gc_prim_p(arg,fun);
477
478 #define GC_PRIM_P_LL(fun,arg) \
479 R1 = arg; \
480 R2 = fun; \
481 jump stg_gc_prim_p_ll [R1,R2];
482
483 #define GC_PRIM_PP(fun,arg1,arg2) \
484 jump stg_gc_prim_pp(arg1,arg2,fun);
485
486 #define MAYBE_GC_(fun) \
487 if (CHECK_GC()) { \
488 HpAlloc = 0; \
489 GC_PRIM(fun) \
490 }
491
492 #define MAYBE_GC_N(fun,arg) \
493 if (CHECK_GC()) { \
494 HpAlloc = 0; \
495 GC_PRIM_N(fun,arg) \
496 }
497
498 #define MAYBE_GC_P(fun,arg) \
499 if (CHECK_GC()) { \
500 HpAlloc = 0; \
501 GC_PRIM_P(fun,arg) \
502 }
503
504 #define MAYBE_GC_PP(fun,arg1,arg2) \
505 if (CHECK_GC()) { \
506 HpAlloc = 0; \
507 GC_PRIM_PP(fun,arg1,arg2) \
508 }
509
510 #define STK_CHK_LL(n, fun) \
511 TICK_BUMP(STK_CHK_ctr); \
512 if (Sp - (n) < SpLim) { \
513 GC_PRIM_LL(fun) \
514 }
515
516 #define STK_CHK_P_LL(n, fun, arg) \
517 TICK_BUMP(STK_CHK_ctr); \
518 if (Sp - (n) < SpLim) { \
519 GC_PRIM_P_LL(fun,arg) \
520 }
521
522 #define STK_CHK_PP(n, fun, arg1, arg2) \
523 TICK_BUMP(STK_CHK_ctr); \
524 if (Sp - (n) < SpLim) { \
525 GC_PRIM_PP(fun,arg1,arg2) \
526 }
527
528 #define STK_CHK_ENTER(n, closure) \
529 TICK_BUMP(STK_CHK_ctr); \
530 if (Sp - (n) < SpLim) { \
531 jump __stg_gc_enter_1(closure); \
532 }
533
534 // A funky heap check used by AutoApply.cmm
535
536 #define HP_CHK_NP_ASSIGN_SP0(size,f) \
537 HEAP_CHECK(size, Sp(0) = f; jump __stg_gc_enter_1 [R1];)
538
539 /* -----------------------------------------------------------------------------
540 Closure headers
541 -------------------------------------------------------------------------- */
542
543 /*
544 * This is really ugly, since we don't do the rest of StgHeader this
545 * way. The problem is that values from DerivedConstants.h cannot be
546 * dependent on the way (SMP, PROF etc.). For SIZEOF_StgHeader we get
547 * the value from GHC, but it seems like too much trouble to do that
548 * for StgThunkHeader.
549 */
550 #define SIZEOF_StgThunkHeader SIZEOF_StgHeader+SIZEOF_StgSMPThunkHeader
551
552 #define StgThunk_payload(__ptr__,__ix__) \
553 W_[__ptr__+SIZEOF_StgThunkHeader+ WDS(__ix__)]
554
555 /* -----------------------------------------------------------------------------
556 Closures
557 -------------------------------------------------------------------------- */
558
559 /* The offset of the payload of an array */
560 #define BYTE_ARR_CTS(arr) ((arr) + SIZEOF_StgArrBytes)
561
562 /* The number of words allocated in an array payload */
563 #define BYTE_ARR_WDS(arr) ROUNDUP_BYTES_TO_WDS(StgArrBytes_bytes(arr))
564
565 /* Getting/setting the info pointer of a closure */
566 #define SET_INFO(p,info) StgHeader_info(p) = info
567 #define GET_INFO(p) StgHeader_info(p)
568
569 /* Determine the size of an ordinary closure from its info table */
570 #define sizeW_fromITBL(itbl) \
571 SIZEOF_StgHeader + WDS(%INFO_PTRS(itbl)) + WDS(%INFO_NPTRS(itbl))
572
573 /* NB. duplicated from InfoTables.h! */
574 #define BITMAP_SIZE(bitmap) ((bitmap) & BITMAP_SIZE_MASK)
575 #define BITMAP_BITS(bitmap) ((bitmap) >> BITMAP_BITS_SHIFT)
576
577 /* Debugging macros */
578 #define LOOKS_LIKE_INFO_PTR(p) \
579 ((p) != NULL && \
580 LOOKS_LIKE_INFO_PTR_NOT_NULL(p))
581
582 #define LOOKS_LIKE_INFO_PTR_NOT_NULL(p) \
583 ( (TO_W_(%INFO_TYPE(%STD_INFO(p))) != INVALID_OBJECT) && \
584 (TO_W_(%INFO_TYPE(%STD_INFO(p))) < N_CLOSURE_TYPES))
585
586 #define LOOKS_LIKE_CLOSURE_PTR(p) (LOOKS_LIKE_INFO_PTR(GET_INFO(UNTAG(p))))
587
588 /*
589 * The layout of the StgFunInfoExtra part of an info table changes
590 * depending on TABLES_NEXT_TO_CODE. So we define field access
591 * macros which use the appropriate version here:
592 */
593 #ifdef TABLES_NEXT_TO_CODE
594 /*
595 * when TABLES_NEXT_TO_CODE, slow_apply is stored as an offset
596 * instead of the normal pointer.
597 */
598
599 #define StgFunInfoExtra_slow_apply(fun_info) \
600 (TO_W_(StgFunInfoExtraRev_slow_apply_offset(fun_info)) \
601 + (fun_info) + SIZEOF_StgFunInfoExtraRev + SIZEOF_StgInfoTable)
602
603 #define StgFunInfoExtra_fun_type(i) StgFunInfoExtraRev_fun_type(i)
604 #define StgFunInfoExtra_arity(i) StgFunInfoExtraRev_arity(i)
605 #define StgFunInfoExtra_bitmap(i) StgFunInfoExtraRev_bitmap(i)
606 #else
607 #define StgFunInfoExtra_slow_apply(i) StgFunInfoExtraFwd_slow_apply(i)
608 #define StgFunInfoExtra_fun_type(i) StgFunInfoExtraFwd_fun_type(i)
609 #define StgFunInfoExtra_arity(i) StgFunInfoExtraFwd_arity(i)
610 #define StgFunInfoExtra_bitmap(i) StgFunInfoExtraFwd_bitmap(i)
611 #endif
612
613 #define mutArrCardMask ((1 << MUT_ARR_PTRS_CARD_BITS) - 1)
614 #define mutArrPtrCardDown(i) ((i) >> MUT_ARR_PTRS_CARD_BITS)
615 #define mutArrPtrCardUp(i) (((i) + mutArrCardMask) >> MUT_ARR_PTRS_CARD_BITS)
616 #define mutArrPtrsCardWords(n) ROUNDUP_BYTES_TO_WDS(mutArrPtrCardUp(n))
617
618 #if defined(PROFILING) || (!defined(THREADED_RTS) && defined(DEBUG))
619 #define OVERWRITING_CLOSURE(c) foreign "C" overwritingClosure(c "ptr")
620 #define OVERWRITING_CLOSURE_OFS(c,n) \
621 foreign "C" overwritingClosureOfs(c "ptr", n)
622 #else
623 #define OVERWRITING_CLOSURE(c) /* nothing */
624 #define OVERWRITING_CLOSURE_OFS(c,n) /* nothing */
625 #endif
626
627 #ifdef THREADED_RTS
628 #define prim_write_barrier prim %write_barrier()
629 #else
630 #define prim_write_barrier /* nothing */
631 #endif
632
633 /* -----------------------------------------------------------------------------
634 Ticky macros
635 -------------------------------------------------------------------------- */
636
637 #ifdef TICKY_TICKY
638 #define TICK_BUMP_BY(ctr,n) CLong[ctr] = CLong[ctr] + n
639 #else
640 #define TICK_BUMP_BY(ctr,n) /* nothing */
641 #endif
642
643 #define TICK_BUMP(ctr) TICK_BUMP_BY(ctr,1)
644
645 #define TICK_ENT_DYN_IND() TICK_BUMP(ENT_DYN_IND_ctr)
646 #define TICK_ENT_DYN_THK() TICK_BUMP(ENT_DYN_THK_ctr)
647 #define TICK_ENT_VIA_NODE() TICK_BUMP(ENT_VIA_NODE_ctr)
648 #define TICK_ENT_STATIC_IND() TICK_BUMP(ENT_STATIC_IND_ctr)
649 #define TICK_ENT_PERM_IND() TICK_BUMP(ENT_PERM_IND_ctr)
650 #define TICK_ENT_PAP() TICK_BUMP(ENT_PAP_ctr)
651 #define TICK_ENT_AP() TICK_BUMP(ENT_AP_ctr)
652 #define TICK_ENT_AP_STACK() TICK_BUMP(ENT_AP_STACK_ctr)
653 #define TICK_ENT_BH() TICK_BUMP(ENT_BH_ctr)
654 #define TICK_ENT_LNE() TICK_BUMP(ENT_LNE_ctr)
655 #define TICK_UNKNOWN_CALL() TICK_BUMP(UNKNOWN_CALL_ctr)
656 #define TICK_UPDF_PUSHED() TICK_BUMP(UPDF_PUSHED_ctr)
657 #define TICK_CATCHF_PUSHED() TICK_BUMP(CATCHF_PUSHED_ctr)
658 #define TICK_UPDF_OMITTED() TICK_BUMP(UPDF_OMITTED_ctr)
659 #define TICK_UPD_NEW_IND() TICK_BUMP(UPD_NEW_IND_ctr)
660 #define TICK_UPD_NEW_PERM_IND() TICK_BUMP(UPD_NEW_PERM_IND_ctr)
661 #define TICK_UPD_OLD_IND() TICK_BUMP(UPD_OLD_IND_ctr)
662 #define TICK_UPD_OLD_PERM_IND() TICK_BUMP(UPD_OLD_PERM_IND_ctr)
663
664 #define TICK_SLOW_CALL_FUN_TOO_FEW() TICK_BUMP(SLOW_CALL_FUN_TOO_FEW_ctr)
665 #define TICK_SLOW_CALL_FUN_CORRECT() TICK_BUMP(SLOW_CALL_FUN_CORRECT_ctr)
666 #define TICK_SLOW_CALL_FUN_TOO_MANY() TICK_BUMP(SLOW_CALL_FUN_TOO_MANY_ctr)
667 #define TICK_SLOW_CALL_PAP_TOO_FEW() TICK_BUMP(SLOW_CALL_PAP_TOO_FEW_ctr)
668 #define TICK_SLOW_CALL_PAP_CORRECT() TICK_BUMP(SLOW_CALL_PAP_CORRECT_ctr)
669 #define TICK_SLOW_CALL_PAP_TOO_MANY() TICK_BUMP(SLOW_CALL_PAP_TOO_MANY_ctr)
670
671 #define TICK_SLOW_CALL_fast_v16() TICK_BUMP(SLOW_CALL_fast_v16_ctr)
672 #define TICK_SLOW_CALL_fast_v() TICK_BUMP(SLOW_CALL_fast_v_ctr)
673 #define TICK_SLOW_CALL_fast_p() TICK_BUMP(SLOW_CALL_fast_p_ctr)
674 #define TICK_SLOW_CALL_fast_pv() TICK_BUMP(SLOW_CALL_fast_pv_ctr)
675 #define TICK_SLOW_CALL_fast_pp() TICK_BUMP(SLOW_CALL_fast_pp_ctr)
676 #define TICK_SLOW_CALL_fast_ppv() TICK_BUMP(SLOW_CALL_fast_ppv_ctr)
677 #define TICK_SLOW_CALL_fast_ppp() TICK_BUMP(SLOW_CALL_fast_ppp_ctr)
678 #define TICK_SLOW_CALL_fast_pppv() TICK_BUMP(SLOW_CALL_fast_pppv_ctr)
679 #define TICK_SLOW_CALL_fast_pppp() TICK_BUMP(SLOW_CALL_fast_pppp_ctr)
680 #define TICK_SLOW_CALL_fast_ppppp() TICK_BUMP(SLOW_CALL_fast_ppppp_ctr)
681 #define TICK_SLOW_CALL_fast_pppppp() TICK_BUMP(SLOW_CALL_fast_pppppp_ctr)
682 #define TICK_VERY_SLOW_CALL() TICK_BUMP(VERY_SLOW_CALL_ctr)
683
684 /* NOTE: TICK_HISTO_BY and TICK_HISTO
685 currently have no effect.
686 The old code for it didn't typecheck and I
687 just commented it out to get ticky to work.
688 - krc 1/2007 */
689
690 #define TICK_HISTO_BY(histo,n,i) /* nothing */
691
692 #define TICK_HISTO(histo,n) TICK_HISTO_BY(histo,n,1)
693
694 /* An unboxed tuple with n components. */
695 #define TICK_RET_UNBOXED_TUP(n) \
696 TICK_BUMP(RET_UNBOXED_TUP_ctr++); \
697 TICK_HISTO(RET_UNBOXED_TUP,n)
698
699 /*
700 * A slow call with n arguments. In the unevald case, this call has
701 * already been counted once, so don't count it again.
702 */
703 #define TICK_SLOW_CALL(n) \
704 TICK_BUMP(SLOW_CALL_ctr); \
705 TICK_HISTO(SLOW_CALL,n)
706
707 /*
708 * This slow call was found to be to an unevaluated function; undo the
709 * ticks we did in TICK_SLOW_CALL.
710 */
711 #define TICK_SLOW_CALL_UNEVALD(n) \
712 TICK_BUMP(SLOW_CALL_UNEVALD_ctr); \
713 TICK_BUMP_BY(SLOW_CALL_ctr,-1); \
714 TICK_HISTO_BY(SLOW_CALL,n,-1);
715
716 /* Updating a closure with a new CON */
717 #define TICK_UPD_CON_IN_NEW(n) \
718 TICK_BUMP(UPD_CON_IN_NEW_ctr); \
719 TICK_HISTO(UPD_CON_IN_NEW,n)
720
721 #define TICK_ALLOC_HEAP_NOCTR(bytes) \
722 TICK_BUMP(ALLOC_RTS_ctr); \
723 TICK_BUMP_BY(ALLOC_RTS_tot,bytes)
724
725 /* -----------------------------------------------------------------------------
726 Saving and restoring STG registers
727
728 STG registers must be saved around a C call, just in case the STG
729 register is mapped to a caller-saves machine register. Normally we
730 don't need to worry about this the code generator has already
731 loaded any live STG registers into variables for us, but in
732 hand-written low-level Cmm code where we don't know which registers
733 are live, we might have to save them all.
734 -------------------------------------------------------------------------- */
735
736 #define SAVE_STGREGS \
737 W_ r1, r2, r3, r4, r5, r6, r7, r8; \
738 F_ f1, f2, f3, f4, f5, f6; \
739 D_ d1, d2, d3, d4, d5, d6; \
740 L_ l1; \
741 \
742 r1 = R1; \
743 r2 = R2; \
744 r3 = R3; \
745 r4 = R4; \
746 r5 = R5; \
747 r6 = R6; \
748 r7 = R7; \
749 r8 = R8; \
750 \
751 f1 = F1; \
752 f2 = F2; \
753 f3 = F3; \
754 f4 = F4; \
755 f5 = F5; \
756 f6 = F6; \
757 \
758 d1 = D1; \
759 d2 = D2; \
760 d3 = D3; \
761 d4 = D4; \
762 d5 = D5; \
763 d6 = D6; \
764 \
765 l1 = L1;
766
767
768 #define RESTORE_STGREGS \
769 R1 = r1; \
770 R2 = r2; \
771 R3 = r3; \
772 R4 = r4; \
773 R5 = r5; \
774 R6 = r6; \
775 R7 = r7; \
776 R8 = r8; \
777 \
778 F1 = f1; \
779 F2 = f2; \
780 F3 = f3; \
781 F4 = f4; \
782 F5 = f5; \
783 F6 = f6; \
784 \
785 D1 = d1; \
786 D2 = d2; \
787 D3 = d3; \
788 D4 = d4; \
789 D5 = d5; \
790 D6 = d6; \
791 \
792 L1 = l1;
793
794 /* -----------------------------------------------------------------------------
795 Misc junk
796 -------------------------------------------------------------------------- */
797
798 #define NO_TREC stg_NO_TREC_closure
799 #define END_TSO_QUEUE stg_END_TSO_QUEUE_closure
800 #define STM_AWOKEN stg_STM_AWOKEN_closure
801 #define END_INVARIANT_CHECK_QUEUE stg_END_INVARIANT_CHECK_QUEUE_closure
802
803 #define recordMutableCap(p, gen) \
804 W_ __bd; \
805 W_ mut_list; \
806 mut_list = Capability_mut_lists(MyCapability()) + WDS(gen); \
807 __bd = W_[mut_list]; \
808 if (bdescr_free(__bd) >= bdescr_start(__bd) + BLOCK_SIZE) { \
809 W_ __new_bd; \
810 ("ptr" __new_bd) = foreign "C" allocBlock_lock(); \
811 bdescr_link(__new_bd) = __bd; \
812 __bd = __new_bd; \
813 W_[mut_list] = __bd; \
814 } \
815 W_ free; \
816 free = bdescr_free(__bd); \
817 W_[free] = p; \
818 bdescr_free(__bd) = free + WDS(1);
819
820 #define recordMutable(p) \
821 P_ __p; \
822 W_ __bd; \
823 W_ __gen; \
824 __p = p; \
825 __bd = Bdescr(__p); \
826 __gen = TO_W_(bdescr_gen_no(__bd)); \
827 if (__gen > 0) { recordMutableCap(__p, __gen); }
828
829 /* -----------------------------------------------------------------------------
830 Arrays
831 -------------------------------------------------------------------------- */
832
833 /* Complete function body for the clone family of (mutable) array ops.
834 Defined as a macro to avoid function call overhead or code
835 duplication. */
836 #define cloneArray(info, src, offset, n) \
837 W_ words, size; \
838 gcptr dst, dst_p, src_p; \
839 \
840 again: MAYBE_GC(again); \
841 \
842 size = n + mutArrPtrsCardWords(n); \
843 words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; \
844 ("ptr" dst) = ccall allocate(MyCapability() "ptr", words); \
845 TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0); \
846 \
847 SET_HDR(dst, info, CCCS); \
848 StgMutArrPtrs_ptrs(dst) = n; \
849 StgMutArrPtrs_size(dst) = size; \
850 \
851 dst_p = dst + SIZEOF_StgMutArrPtrs; \
852 src_p = src + SIZEOF_StgMutArrPtrs + WDS(offset); \
853 while: \
854 if (n != 0) { \
855 n = n - 1; \
856 W_[dst_p] = W_[src_p]; \
857 dst_p = dst_p + WDS(1); \
858 src_p = src_p + WDS(1); \
859 goto while; \
860 } \
861 \
862 return (dst);
863
864 #define copyArray(src, src_off, dst, dst_off, n) \
865 W_ dst_elems_p, dst_p, src_p, dst_cards_p, bytes; \
866 \
867 if ((n) != 0) { \
868 SET_HDR(dst, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); \
869 \
870 dst_elems_p = (dst) + SIZEOF_StgMutArrPtrs; \
871 dst_p = dst_elems_p + WDS(dst_off); \
872 src_p = (src) + SIZEOF_StgMutArrPtrs + WDS(src_off); \
873 bytes = WDS(n); \
874 \
875 prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \
876 \
877 dst_cards_p = dst_elems_p + WDS(StgMutArrPtrs_ptrs(dst)); \
878 setCards(dst_cards_p, dst_off, n); \
879 } \
880 \
881 return ();
882
883 #define copyMutableArray(src, src_off, dst, dst_off, n) \
884 W_ dst_elems_p, dst_p, src_p, dst_cards_p, bytes; \
885 \
886 if ((n) != 0) { \
887 SET_HDR(dst, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); \
888 \
889 dst_elems_p = (dst) + SIZEOF_StgMutArrPtrs; \
890 dst_p = dst_elems_p + WDS(dst_off); \
891 src_p = (src) + SIZEOF_StgMutArrPtrs + WDS(src_off); \
892 bytes = WDS(n); \
893 \
894 if ((src) == (dst)) { \
895 prim %memmove(dst_p, src_p, bytes, SIZEOF_W); \
896 } else { \
897 prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \
898 } \
899 \
900 dst_cards_p = dst_elems_p + WDS(StgMutArrPtrs_ptrs(dst)); \
901 setCards(dst_cards_p, dst_off, n); \
902 } \
903 \
904 return ();
905
906 /*
907 * Set the cards in the cards table pointed to by dst_cards_p for an
908 * update to n elements, starting at element dst_off.
909 */
910 #define setCards(dst_cards_p, dst_off, n) \
911 W_ __start_card, __end_card, __cards; \
912 __start_card = mutArrPtrCardDown(dst_off); \
913 __end_card = mutArrPtrCardDown((dst_off) + (n) - 1); \
914 __cards = __end_card - __start_card + 1; \
915 prim %memset((dst_cards_p) + __start_card, 1, __cards, 1);
916
917 /* Complete function body for the clone family of small (mutable)
918 array ops. Defined as a macro to avoid function call overhead or
919 code duplication. */
920 #define cloneSmallArray(info, src, offset, n) \
921 W_ words, size; \
922 gcptr dst, dst_p, src_p; \
923 \
924 again: MAYBE_GC(again); \
925 \
926 words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n; \
927 ("ptr" dst) = ccall allocate(MyCapability() "ptr", words); \
928 TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0); \
929 \
930 SET_HDR(dst, info, CCCS); \
931 StgSmallMutArrPtrs_ptrs(dst) = n; \
932 \
933 dst_p = dst + SIZEOF_StgSmallMutArrPtrs; \
934 src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(offset); \
935 while: \
936 if (n != 0) { \
937 n = n - 1; \
938 W_[dst_p] = W_[src_p]; \
939 dst_p = dst_p + WDS(1); \
940 src_p = src_p + WDS(1); \
941 goto while; \
942 } \
943 \
944 return (dst);
945
946 #endif /* CMM_H */