ci: push perf test metrics even when the testsuite doesn't pass
[ghc.git] / includes / Cmm.h
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The University of Glasgow 2004-2013
4 *
5 * This file is included at the top of all .cmm source files (and
6 * *only* .cmm files). It defines a collection of useful macros for
7 * making .cmm code a bit less error-prone to write, and a bit easier
8 * on the eye for the reader.
9 *
10 * For the syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
11 *
12 * Accessing fields of structures defined in the RTS header files is
13 * done via automatically-generated macros in DerivedConstants.h. For
14 * example, where previously we used
15 *
16 * CurrentTSO->what_next = x
17 *
18 * in C-- we now use
19 *
20 * StgTSO_what_next(CurrentTSO) = x
21 *
22 * where the StgTSO_what_next() macro is automatically generated by
23 * mkDerivedConstants.c. If you need to access a field that doesn't
24 * already have a macro, edit that file (it's pretty self-explanatory).
25 *
26 * -------------------------------------------------------------------------- */
27
28 #pragma once
29
30 /*
31 * In files that are included into both C and C-- (and perhaps
32 * Haskell) sources, we sometimes need to conditionally compile bits
33 * depending on the language. CMINUSMINUS==1 in .cmm sources:
34 */
35 #define CMINUSMINUS 1
36
37 #include "ghcconfig.h"
38
39 /* -----------------------------------------------------------------------------
40 Types
41
42 The following synonyms for C-- types are declared here:
43
44 I8, I16, I32, I64 MachRep-style names for convenience
45
46 W_ is shorthand for the word type (== StgWord)
47 F_ shorthand for float (F_ == StgFloat == C's float)
48 D_ shorthand for double (D_ == StgDouble == C's double)
49
50 CInt has the same size as an int in C on this platform
51 CLong has the same size as a long in C on this platform
52 CBool has the same size as a bool in C on this platform
53
54 --------------------------------------------------------------------------- */
55
56 #define I8 bits8
57 #define I16 bits16
58 #define I32 bits32
59 #define I64 bits64
60 #define P_ gcptr
61
62 #if SIZEOF_VOID_P == 4
63 #define W_ bits32
64 /* Maybe it's better to include MachDeps.h */
65 #define TAG_BITS 2
66 #elif SIZEOF_VOID_P == 8
67 #define W_ bits64
68 /* Maybe it's better to include MachDeps.h */
69 #define TAG_BITS 3
70 #else
71 #error Unknown word size
72 #endif
73
74 /*
75 * The RTS must sometimes UNTAG a pointer before dereferencing it.
76 * See the wiki page commentary/rts/haskell-execution/pointer-tagging
77 */
78 #define TAG_MASK ((1 << TAG_BITS) - 1)
79 #define UNTAG(p) (p & ~TAG_MASK)
80 #define GETTAG(p) (p & TAG_MASK)
81
82 #if SIZEOF_INT == 4
83 #define CInt bits32
84 #elif SIZEOF_INT == 8
85 #define CInt bits64
86 #else
87 #error Unknown int size
88 #endif
89
90 #if SIZEOF_LONG == 4
91 #define CLong bits32
92 #elif SIZEOF_LONG == 8
93 #define CLong bits64
94 #else
95 #error Unknown long size
96 #endif
97
98 #define CBool bits8
99
100 #define F_ float32
101 #define D_ float64
102 #define L_ bits64
103 #define V16_ bits128
104 #define V32_ bits256
105 #define V64_ bits512
106
107 #define SIZEOF_StgDouble 8
108 #define SIZEOF_StgWord64 8
109
110 /* -----------------------------------------------------------------------------
111 Misc useful stuff
112 -------------------------------------------------------------------------- */
113
114 #define ccall foreign "C"
115
116 #define NULL (0::W_)
117
118 #define STRING(name,str) \
119 section "rodata" { \
120 name : bits8[] str; \
121 } \
122
123 #if defined(TABLES_NEXT_TO_CODE)
124 #define RET_LBL(f) f##_info
125 #else
126 #define RET_LBL(f) f##_ret
127 #endif
128
129 #if defined(TABLES_NEXT_TO_CODE)
130 #define ENTRY_LBL(f) f##_info
131 #else
132 #define ENTRY_LBL(f) f##_entry
133 #endif
134
135 /* -----------------------------------------------------------------------------
136 Byte/word macros
137
138 Everything in C-- is in byte offsets (well, most things). We use
139 some macros to allow us to express offsets in words and to try to
140 avoid byte/word confusion.
141 -------------------------------------------------------------------------- */
142
143 #define SIZEOF_W SIZEOF_VOID_P
144 #define W_MASK (SIZEOF_W-1)
145
146 #if SIZEOF_W == 4
147 #define W_SHIFT 2
148 #elif SIZEOF_W == 8
149 #define W_SHIFT 3
150 #endif
151
152 /* Converting quantities of words to bytes */
153 #define WDS(n) ((n)*SIZEOF_W)
154
155 /*
156 * Converting quantities of bytes to words
157 * NB. these work on *unsigned* values only
158 */
159 #define BYTES_TO_WDS(n) ((n) / SIZEOF_W)
160 #define ROUNDUP_BYTES_TO_WDS(n) (((n) + SIZEOF_W - 1) / SIZEOF_W)
161
162 /*
163 * TO_W_(n) and TO_ZXW_(n) convert n to W_ type from a smaller type,
164 * with and without sign extension respectively
165 */
166 #if SIZEOF_W == 4
167 #define TO_I64(x) %sx64(x)
168 #define TO_W_(x) %sx32(x)
169 #define TO_ZXW_(x) %zx32(x)
170 #define HALF_W_(x) %lobits16(x)
171 #elif SIZEOF_W == 8
172 #define TO_I64(x) (x)
173 #define TO_W_(x) %sx64(x)
174 #define TO_ZXW_(x) %zx64(x)
175 #define HALF_W_(x) %lobits32(x)
176 #endif
177
178 #if SIZEOF_INT == 4 && SIZEOF_W == 8
179 #define W_TO_INT(x) %lobits32(x)
180 #elif SIZEOF_INT == SIZEOF_W
181 #define W_TO_INT(x) (x)
182 #endif
183
184 #if SIZEOF_LONG == 4 && SIZEOF_W == 8
185 #define W_TO_LONG(x) %lobits32(x)
186 #elif SIZEOF_LONG == SIZEOF_W
187 #define W_TO_LONG(x) (x)
188 #endif
189
190 /* -----------------------------------------------------------------------------
191 Atomic memory operations.
192 -------------------------------------------------------------------------- */
193
194 #if SIZEOF_W == 4
195 #define cmpxchgW cmpxchg32
196 #elif SIZEOF_W == 8
197 #define cmpxchgW cmpxchg64
198 #endif
199
200 /* -----------------------------------------------------------------------------
201 Heap/stack access, and adjusting the heap/stack pointers.
202 -------------------------------------------------------------------------- */
203
204 #define Sp(n) W_[Sp + WDS(n)]
205 #define Hp(n) W_[Hp + WDS(n)]
206
207 #define Sp_adj(n) Sp = Sp + WDS(n) /* pronounced "spadge" */
208 #define Hp_adj(n) Hp = Hp + WDS(n)
209
210 /* -----------------------------------------------------------------------------
211 Assertions and Debuggery
212 -------------------------------------------------------------------------- */
213
214 #if defined(DEBUG)
215 #define ASSERT(predicate) \
216 if (predicate) { \
217 /*null*/; \
218 } else { \
219 foreign "C" _assertFail(__FILE__, __LINE__) never returns; \
220 }
221 #else
222 #define ASSERT(p) /* nothing */
223 #endif
224
225 #if defined(DEBUG)
226 #define DEBUG_ONLY(s) s
227 #else
228 #define DEBUG_ONLY(s) /* nothing */
229 #endif
230
231 /*
232 * The IF_DEBUG macro is useful for debug messages that depend on one
233 * of the RTS debug options. For example:
234 *
235 * IF_DEBUG(RtsFlags_DebugFlags_apply,
236 * foreign "C" fprintf(stderr, stg_ap_0_ret_str));
237 *
238 * Note the syntax is slightly different to the C version of this macro.
239 */
240 #if defined(DEBUG)
241 #define IF_DEBUG(c,s) if (RtsFlags_DebugFlags_##c(RtsFlags) != 0::CBool) { s; }
242 #else
243 #define IF_DEBUG(c,s) /* nothing */
244 #endif
245
246 /* -----------------------------------------------------------------------------
247 Entering
248
249 It isn't safe to "enter" every closure. Functions in particular
250 have no entry code as such; their entry point contains the code to
251 apply the function.
252
253 ToDo: range should end in N_CLOSURE_TYPES-1, not N_CLOSURE_TYPES,
254 but switch doesn't allow us to use exprs there yet.
255
256 If R1 points to a tagged object it points either to
257 * A constructor.
258 * A function with arity <= TAG_MASK.
259 In both cases the right thing to do is to return.
260 Note: it is rather lucky that we can use the tag bits to do this
261 for both objects. Maybe it points to a brittle design?
262
263 Indirections can contain tagged pointers, so their tag is checked.
264 -------------------------------------------------------------------------- */
265
266 #if defined(PROFILING)
267
268 // When profiling, we cannot shortcut ENTER() by checking the tag,
269 // because LDV profiling relies on entering closures to mark them as
270 // "used".
271
272 #define LOAD_INFO(ret,x) \
273 info = %INFO_PTR(UNTAG(x));
274
275 #define UNTAG_IF_PROF(x) UNTAG(x)
276
277 #else
278
279 #define LOAD_INFO(ret,x) \
280 if (GETTAG(x) != 0) { \
281 ret(x); \
282 } \
283 info = %INFO_PTR(x);
284
285 #define UNTAG_IF_PROF(x) (x) /* already untagged */
286
287 #endif
288
289 // We need two versions of ENTER():
290 // - ENTER(x) takes the closure as an argument and uses return(),
291 // for use in civilized code where the stack is handled by GHC
292 //
293 // - ENTER_NOSTACK() where the closure is in R1, and returns are
294 // explicit jumps, for use when we are doing the stack management
295 // ourselves.
296
297 #if defined(PROFILING)
298 // See Note [Evaluating functions with profiling] in rts/Apply.cmm
299 #define ENTER(x) jump stg_ap_0_fast(x);
300 #else
301 #define ENTER(x) ENTER_(return,x)
302 #endif
303
304 #define ENTER_R1() ENTER_(RET_R1,R1)
305
306 #define RET_R1(x) jump %ENTRY_CODE(Sp(0)) [R1]
307
308 #define ENTER_(ret,x) \
309 again: \
310 W_ info; \
311 LOAD_INFO(ret,x) \
312 /* See Note [Heap memory barriers] in SMP.h */ \
313 prim_read_barrier; \
314 switch [INVALID_OBJECT .. N_CLOSURE_TYPES] \
315 (TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { \
316 case \
317 IND, \
318 IND_STATIC: \
319 { \
320 x = StgInd_indirectee(x); \
321 goto again; \
322 } \
323 case \
324 FUN, \
325 FUN_1_0, \
326 FUN_0_1, \
327 FUN_2_0, \
328 FUN_1_1, \
329 FUN_0_2, \
330 FUN_STATIC, \
331 BCO, \
332 PAP: \
333 { \
334 ret(x); \
335 } \
336 default: \
337 { \
338 x = UNTAG_IF_PROF(x); \
339 jump %ENTRY_CODE(info) (x); \
340 } \
341 }
342
343 // The FUN cases almost never happen: a pointer to a non-static FUN
344 // should always be tagged. This unfortunately isn't true for the
345 // interpreter right now, which leaves untagged FUNs on the stack.
346
347 /* -----------------------------------------------------------------------------
348 Constants.
349 -------------------------------------------------------------------------- */
350
351 #include "rts/Constants.h"
352 #include "DerivedConstants.h"
353 #include "rts/storage/ClosureTypes.h"
354 #include "rts/storage/FunTypes.h"
355 #include "rts/OSThreads.h"
356
357 /*
358 * Need MachRegs, because some of the RTS code is conditionally
359 * compiled based on REG_R1, REG_R2, etc.
360 */
361 #include "stg/MachRegsForHost.h"
362
363 #include "rts/prof/LDV.h"
364
365 #undef BLOCK_SIZE
366 #undef MBLOCK_SIZE
367 #include "rts/storage/Block.h" /* For Bdescr() */
368
369
370 #define MyCapability() (BaseReg - OFFSET_Capability_r)
371
372 /* -------------------------------------------------------------------------
373 Info tables
374 ------------------------------------------------------------------------- */
375
376 #if defined(PROFILING)
377 #define PROF_HDR_FIELDS(w_,hdr1,hdr2) \
378 w_ hdr1, \
379 w_ hdr2,
380 #else
381 #define PROF_HDR_FIELDS(w_,hdr1,hdr2) /* nothing */
382 #endif
383
384 /* -------------------------------------------------------------------------
385 Allocation and garbage collection
386 ------------------------------------------------------------------------- */
387
388 /*
389 * ALLOC_PRIM is for allocating memory on the heap for a primitive
390 * object. It is used all over PrimOps.cmm.
391 *
392 * We make the simplifying assumption that the "admin" part of a
393 * primitive closure is just the header when calculating sizes for
394 * ticky-ticky. It's not clear whether eg. the size field of an array
395 * should be counted as "admin", or the various fields of a BCO.
396 */
397 #define ALLOC_PRIM(bytes) \
398 HP_CHK_GEN_TICKY(bytes); \
399 TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \
400 CCCS_ALLOC(bytes);
401
402 #define HEAP_CHECK(bytes,failure) \
403 TICK_BUMP(HEAP_CHK_ctr); \
404 Hp = Hp + (bytes); \
405 if (Hp > HpLim) { HpAlloc = (bytes); failure; } \
406 TICK_ALLOC_HEAP_NOCTR(bytes);
407
408 #define ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,failure) \
409 HEAP_CHECK(bytes,failure) \
410 TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \
411 CCCS_ALLOC(bytes);
412
413 #define ALLOC_PRIM_(bytes,fun) \
414 ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM(fun));
415
416 #define ALLOC_PRIM_P(bytes,fun,arg) \
417 ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_P(fun,arg));
418
419 #define ALLOC_PRIM_N(bytes,fun,arg) \
420 ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_N(fun,arg));
421
422 /* CCS_ALLOC wants the size in words, because ccs->mem_alloc is in words */
423 #define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), CCCS)
424
425 #define HP_CHK_GEN_TICKY(bytes) \
426 HP_CHK_GEN(bytes); \
427 TICK_ALLOC_HEAP_NOCTR(bytes);
428
429 #define HP_CHK_P(bytes, fun, arg) \
430 HEAP_CHECK(bytes, GC_PRIM_P(fun,arg))
431
432 // TODO I'm not seeing where ALLOC_P_TICKY is used; can it be removed?
433 // -NSF March 2013
434 #define ALLOC_P_TICKY(bytes, fun, arg) \
435 HP_CHK_P(bytes); \
436 TICK_ALLOC_HEAP_NOCTR(bytes);
437
438 #define CHECK_GC() \
439 (bdescr_link(CurrentNursery) == NULL || \
440 generation_n_new_large_words(W_[g0]) >= TO_W_(CLong[large_alloc_lim]))
441
442 // allocate() allocates from the nursery, so we check to see
443 // whether the nursery is nearly empty in any function that uses
444 // allocate() - this includes many of the primops.
445 //
446 // HACK alert: the __L__ stuff is here to coax the common-block
447 // eliminator into commoning up the call stg_gc_noregs() with the same
448 // code that gets generated by a STK_CHK_GEN() in the same proc. We
449 // also need an if (0) { goto __L__; } so that the __L__ label isn't
450 // optimised away by the control-flow optimiser prior to common-block
451 // elimination (it will be optimised away later).
452 //
453 // This saves some code in gmp-wrappers.cmm where we have lots of
454 // MAYBE_GC() in the same proc as STK_CHK_GEN().
455 //
456 #define MAYBE_GC(retry) \
457 if (CHECK_GC()) { \
458 HpAlloc = 0; \
459 goto __L__; \
460 __L__: \
461 call stg_gc_noregs(); \
462 goto retry; \
463 } \
464 if (0) { goto __L__; }
465
466 #define GC_PRIM(fun) \
467 jump stg_gc_prim(fun);
468
469 // Version of GC_PRIM for use in low-level Cmm. We can call
470 // stg_gc_prim, because it takes one argument and therefore has a
471 // platform-independent calling convention (Note [Syntax of .cmm
472 // files] in CmmParse.y).
473 #define GC_PRIM_LL(fun) \
474 R1 = fun; \
475 jump stg_gc_prim [R1];
476
477 // We pass the fun as the second argument, because the arg is
478 // usually already in the first argument position (R1), so this
479 // avoids moving it to a different register / stack slot.
480 #define GC_PRIM_N(fun,arg) \
481 jump stg_gc_prim_n(arg,fun);
482
483 #define GC_PRIM_P(fun,arg) \
484 jump stg_gc_prim_p(arg,fun);
485
486 #define GC_PRIM_P_LL(fun,arg) \
487 R1 = arg; \
488 R2 = fun; \
489 jump stg_gc_prim_p_ll [R1,R2];
490
491 #define GC_PRIM_PP(fun,arg1,arg2) \
492 jump stg_gc_prim_pp(arg1,arg2,fun);
493
494 #define MAYBE_GC_(fun) \
495 if (CHECK_GC()) { \
496 HpAlloc = 0; \
497 GC_PRIM(fun) \
498 }
499
500 #define MAYBE_GC_N(fun,arg) \
501 if (CHECK_GC()) { \
502 HpAlloc = 0; \
503 GC_PRIM_N(fun,arg) \
504 }
505
506 #define MAYBE_GC_P(fun,arg) \
507 if (CHECK_GC()) { \
508 HpAlloc = 0; \
509 GC_PRIM_P(fun,arg) \
510 }
511
512 #define MAYBE_GC_PP(fun,arg1,arg2) \
513 if (CHECK_GC()) { \
514 HpAlloc = 0; \
515 GC_PRIM_PP(fun,arg1,arg2) \
516 }
517
518 #define STK_CHK_LL(n, fun) \
519 TICK_BUMP(STK_CHK_ctr); \
520 if (Sp - (n) < SpLim) { \
521 GC_PRIM_LL(fun) \
522 }
523
524 #define STK_CHK_P_LL(n, fun, arg) \
525 TICK_BUMP(STK_CHK_ctr); \
526 if (Sp - (n) < SpLim) { \
527 GC_PRIM_P_LL(fun,arg) \
528 }
529
530 #define STK_CHK_PP(n, fun, arg1, arg2) \
531 TICK_BUMP(STK_CHK_ctr); \
532 if (Sp - (n) < SpLim) { \
533 GC_PRIM_PP(fun,arg1,arg2) \
534 }
535
536 #define STK_CHK_ENTER(n, closure) \
537 TICK_BUMP(STK_CHK_ctr); \
538 if (Sp - (n) < SpLim) { \
539 jump __stg_gc_enter_1(closure); \
540 }
541
542 // A funky heap check used by AutoApply.cmm
543
544 #define HP_CHK_NP_ASSIGN_SP0(size,f) \
545 HEAP_CHECK(size, Sp(0) = f; jump __stg_gc_enter_1 [R1];)
546
547 /* -----------------------------------------------------------------------------
548 Closure headers
549 -------------------------------------------------------------------------- */
550
551 /*
552 * This is really ugly, since we don't do the rest of StgHeader this
553 * way. The problem is that values from DerivedConstants.h cannot be
554 * dependent on the way (SMP, PROF etc.). For SIZEOF_StgHeader we get
555 * the value from GHC, but it seems like too much trouble to do that
556 * for StgThunkHeader.
557 */
558 #define SIZEOF_StgThunkHeader SIZEOF_StgHeader+SIZEOF_StgSMPThunkHeader
559
560 #define StgThunk_payload(__ptr__,__ix__) \
561 W_[__ptr__+SIZEOF_StgThunkHeader+ WDS(__ix__)]
562
563 /* -----------------------------------------------------------------------------
564 Closures
565 -------------------------------------------------------------------------- */
566
567 /* The offset of the payload of an array */
568 #define BYTE_ARR_CTS(arr) ((arr) + SIZEOF_StgArrBytes)
569
570 /* The number of words allocated in an array payload */
571 #define BYTE_ARR_WDS(arr) ROUNDUP_BYTES_TO_WDS(StgArrBytes_bytes(arr))
572
573 /* Getting/setting the info pointer of a closure */
574 #define SET_INFO(p,info) StgHeader_info(p) = info
575 #define GET_INFO(p) StgHeader_info(p)
576
577 /* Determine the size of an ordinary closure from its info table */
578 #define sizeW_fromITBL(itbl) \
579 SIZEOF_StgHeader + WDS(%INFO_PTRS(itbl)) + WDS(%INFO_NPTRS(itbl))
580
581 /* NB. duplicated from InfoTables.h! */
582 #define BITMAP_SIZE(bitmap) ((bitmap) & BITMAP_SIZE_MASK)
583 #define BITMAP_BITS(bitmap) ((bitmap) >> BITMAP_BITS_SHIFT)
584
585 /* Debugging macros */
586 #define LOOKS_LIKE_INFO_PTR(p) \
587 ((p) != NULL && \
588 LOOKS_LIKE_INFO_PTR_NOT_NULL(p))
589
590 #define LOOKS_LIKE_INFO_PTR_NOT_NULL(p) \
591 ( (TO_W_(%INFO_TYPE(%STD_INFO(p))) != INVALID_OBJECT) && \
592 (TO_W_(%INFO_TYPE(%STD_INFO(p))) < N_CLOSURE_TYPES))
593
594 #define LOOKS_LIKE_CLOSURE_PTR(p) (LOOKS_LIKE_INFO_PTR(GET_INFO(UNTAG(p))))
595
596 /*
597 * The layout of the StgFunInfoExtra part of an info table changes
598 * depending on TABLES_NEXT_TO_CODE. So we define field access
599 * macros which use the appropriate version here:
600 */
601 #if defined(TABLES_NEXT_TO_CODE)
602 /*
603 * when TABLES_NEXT_TO_CODE, slow_apply is stored as an offset
604 * instead of the normal pointer.
605 */
606
607 #define StgFunInfoExtra_slow_apply(fun_info) \
608 (TO_W_(StgFunInfoExtraRev_slow_apply_offset(fun_info)) \
609 + (fun_info) + SIZEOF_StgFunInfoExtraRev + SIZEOF_StgInfoTable)
610
611 #define StgFunInfoExtra_fun_type(i) StgFunInfoExtraRev_fun_type(i)
612 #define StgFunInfoExtra_arity(i) StgFunInfoExtraRev_arity(i)
613 #define StgFunInfoExtra_bitmap(i) StgFunInfoExtraRev_bitmap(i)
614 #else
615 #define StgFunInfoExtra_slow_apply(i) StgFunInfoExtraFwd_slow_apply(i)
616 #define StgFunInfoExtra_fun_type(i) StgFunInfoExtraFwd_fun_type(i)
617 #define StgFunInfoExtra_arity(i) StgFunInfoExtraFwd_arity(i)
618 #define StgFunInfoExtra_bitmap(i) StgFunInfoExtraFwd_bitmap(i)
619 #endif
620
621 #define mutArrCardMask ((1 << MUT_ARR_PTRS_CARD_BITS) - 1)
622 #define mutArrPtrCardDown(i) ((i) >> MUT_ARR_PTRS_CARD_BITS)
623 #define mutArrPtrCardUp(i) (((i) + mutArrCardMask) >> MUT_ARR_PTRS_CARD_BITS)
624 #define mutArrPtrsCardWords(n) ROUNDUP_BYTES_TO_WDS(mutArrPtrCardUp(n))
625
626 #if defined(PROFILING) || (!defined(THREADED_RTS) && defined(DEBUG))
627 #define OVERWRITING_CLOSURE_SIZE(c, size) foreign "C" overwritingClosureSize(c "ptr", size)
628 #define OVERWRITING_CLOSURE(c) foreign "C" overwritingClosure(c "ptr")
629 #define OVERWRITING_CLOSURE_OFS(c,n) foreign "C" overwritingClosureOfs(c "ptr", n)
630 #else
631 #define OVERWRITING_CLOSURE_SIZE(c, size) /* nothing */
632 #define OVERWRITING_CLOSURE(c) /* nothing */
633 #define OVERWRITING_CLOSURE_OFS(c,n) /* nothing */
634 #endif
635
636 // Memory barriers.
637 // For discussion of how these are used to fence heap object
638 // accesses see Note [Heap memory barriers] in SMP.h.
639 #if defined(THREADED_RTS)
640 #define prim_read_barrier prim %read_barrier()
641 #else
642 #define prim_read_barrier /* nothing */
643 #endif
644 #if defined(THREADED_RTS)
645 #define prim_write_barrier prim %write_barrier()
646 #else
647 #define prim_write_barrier /* nothing */
648 #endif
649
650 /* -----------------------------------------------------------------------------
651 Ticky macros
652 -------------------------------------------------------------------------- */
653
654 #if defined(TICKY_TICKY)
655 #define TICK_BUMP_BY(ctr,n) CLong[ctr] = CLong[ctr] + n
656 #else
657 #define TICK_BUMP_BY(ctr,n) /* nothing */
658 #endif
659
660 #define TICK_BUMP(ctr) TICK_BUMP_BY(ctr,1)
661
662 #define TICK_ENT_DYN_IND() TICK_BUMP(ENT_DYN_IND_ctr)
663 #define TICK_ENT_DYN_THK() TICK_BUMP(ENT_DYN_THK_ctr)
664 #define TICK_ENT_VIA_NODE() TICK_BUMP(ENT_VIA_NODE_ctr)
665 #define TICK_ENT_STATIC_IND() TICK_BUMP(ENT_STATIC_IND_ctr)
666 #define TICK_ENT_PERM_IND() TICK_BUMP(ENT_PERM_IND_ctr)
667 #define TICK_ENT_PAP() TICK_BUMP(ENT_PAP_ctr)
668 #define TICK_ENT_AP() TICK_BUMP(ENT_AP_ctr)
669 #define TICK_ENT_AP_STACK() TICK_BUMP(ENT_AP_STACK_ctr)
670 #define TICK_ENT_BH() TICK_BUMP(ENT_BH_ctr)
671 #define TICK_ENT_LNE() TICK_BUMP(ENT_LNE_ctr)
672 #define TICK_UNKNOWN_CALL() TICK_BUMP(UNKNOWN_CALL_ctr)
673 #define TICK_UPDF_PUSHED() TICK_BUMP(UPDF_PUSHED_ctr)
674 #define TICK_CATCHF_PUSHED() TICK_BUMP(CATCHF_PUSHED_ctr)
675 #define TICK_UPDF_OMITTED() TICK_BUMP(UPDF_OMITTED_ctr)
676 #define TICK_UPD_NEW_IND() TICK_BUMP(UPD_NEW_IND_ctr)
677 #define TICK_UPD_NEW_PERM_IND() TICK_BUMP(UPD_NEW_PERM_IND_ctr)
678 #define TICK_UPD_OLD_IND() TICK_BUMP(UPD_OLD_IND_ctr)
679 #define TICK_UPD_OLD_PERM_IND() TICK_BUMP(UPD_OLD_PERM_IND_ctr)
680
681 #define TICK_SLOW_CALL_FUN_TOO_FEW() TICK_BUMP(SLOW_CALL_FUN_TOO_FEW_ctr)
682 #define TICK_SLOW_CALL_FUN_CORRECT() TICK_BUMP(SLOW_CALL_FUN_CORRECT_ctr)
683 #define TICK_SLOW_CALL_FUN_TOO_MANY() TICK_BUMP(SLOW_CALL_FUN_TOO_MANY_ctr)
684 #define TICK_SLOW_CALL_PAP_TOO_FEW() TICK_BUMP(SLOW_CALL_PAP_TOO_FEW_ctr)
685 #define TICK_SLOW_CALL_PAP_CORRECT() TICK_BUMP(SLOW_CALL_PAP_CORRECT_ctr)
686 #define TICK_SLOW_CALL_PAP_TOO_MANY() TICK_BUMP(SLOW_CALL_PAP_TOO_MANY_ctr)
687
688 #define TICK_SLOW_CALL_fast_v16() TICK_BUMP(SLOW_CALL_fast_v16_ctr)
689 #define TICK_SLOW_CALL_fast_v() TICK_BUMP(SLOW_CALL_fast_v_ctr)
690 #define TICK_SLOW_CALL_fast_p() TICK_BUMP(SLOW_CALL_fast_p_ctr)
691 #define TICK_SLOW_CALL_fast_pv() TICK_BUMP(SLOW_CALL_fast_pv_ctr)
692 #define TICK_SLOW_CALL_fast_pp() TICK_BUMP(SLOW_CALL_fast_pp_ctr)
693 #define TICK_SLOW_CALL_fast_ppv() TICK_BUMP(SLOW_CALL_fast_ppv_ctr)
694 #define TICK_SLOW_CALL_fast_ppp() TICK_BUMP(SLOW_CALL_fast_ppp_ctr)
695 #define TICK_SLOW_CALL_fast_pppv() TICK_BUMP(SLOW_CALL_fast_pppv_ctr)
696 #define TICK_SLOW_CALL_fast_pppp() TICK_BUMP(SLOW_CALL_fast_pppp_ctr)
697 #define TICK_SLOW_CALL_fast_ppppp() TICK_BUMP(SLOW_CALL_fast_ppppp_ctr)
698 #define TICK_SLOW_CALL_fast_pppppp() TICK_BUMP(SLOW_CALL_fast_pppppp_ctr)
699 #define TICK_VERY_SLOW_CALL() TICK_BUMP(VERY_SLOW_CALL_ctr)
700
701 /* NOTE: TICK_HISTO_BY and TICK_HISTO
702 currently have no effect.
703 The old code for it didn't typecheck and I
704 just commented it out to get ticky to work.
705 - krc 1/2007 */
706
707 #define TICK_HISTO_BY(histo,n,i) /* nothing */
708
709 #define TICK_HISTO(histo,n) TICK_HISTO_BY(histo,n,1)
710
711 /* An unboxed tuple with n components. */
712 #define TICK_RET_UNBOXED_TUP(n) \
713 TICK_BUMP(RET_UNBOXED_TUP_ctr++); \
714 TICK_HISTO(RET_UNBOXED_TUP,n)
715
716 /*
717 * A slow call with n arguments. In the unevald case, this call has
718 * already been counted once, so don't count it again.
719 */
720 #define TICK_SLOW_CALL(n) \
721 TICK_BUMP(SLOW_CALL_ctr); \
722 TICK_HISTO(SLOW_CALL,n)
723
724 /*
725 * This slow call was found to be to an unevaluated function; undo the
726 * ticks we did in TICK_SLOW_CALL.
727 */
728 #define TICK_SLOW_CALL_UNEVALD(n) \
729 TICK_BUMP(SLOW_CALL_UNEVALD_ctr); \
730 TICK_BUMP_BY(SLOW_CALL_ctr,-1); \
731 TICK_HISTO_BY(SLOW_CALL,n,-1);
732
733 /* Updating a closure with a new CON */
734 #define TICK_UPD_CON_IN_NEW(n) \
735 TICK_BUMP(UPD_CON_IN_NEW_ctr); \
736 TICK_HISTO(UPD_CON_IN_NEW,n)
737
738 #define TICK_ALLOC_HEAP_NOCTR(bytes) \
739 TICK_BUMP(ALLOC_RTS_ctr); \
740 TICK_BUMP_BY(ALLOC_RTS_tot,bytes)
741
742 /* -----------------------------------------------------------------------------
743 Saving and restoring STG registers
744
745 STG registers must be saved around a C call, just in case the STG
746 register is mapped to a caller-saves machine register. Normally we
747 don't need to worry about this the code generator has already
748 loaded any live STG registers into variables for us, but in
749 hand-written low-level Cmm code where we don't know which registers
750 are live, we might have to save them all.
751 -------------------------------------------------------------------------- */
752
753 #define SAVE_STGREGS \
754 W_ r1, r2, r3, r4, r5, r6, r7, r8; \
755 F_ f1, f2, f3, f4, f5, f6; \
756 D_ d1, d2, d3, d4, d5, d6; \
757 L_ l1; \
758 \
759 r1 = R1; \
760 r2 = R2; \
761 r3 = R3; \
762 r4 = R4; \
763 r5 = R5; \
764 r6 = R6; \
765 r7 = R7; \
766 r8 = R8; \
767 \
768 f1 = F1; \
769 f2 = F2; \
770 f3 = F3; \
771 f4 = F4; \
772 f5 = F5; \
773 f6 = F6; \
774 \
775 d1 = D1; \
776 d2 = D2; \
777 d3 = D3; \
778 d4 = D4; \
779 d5 = D5; \
780 d6 = D6; \
781 \
782 l1 = L1;
783
784
785 #define RESTORE_STGREGS \
786 R1 = r1; \
787 R2 = r2; \
788 R3 = r3; \
789 R4 = r4; \
790 R5 = r5; \
791 R6 = r6; \
792 R7 = r7; \
793 R8 = r8; \
794 \
795 F1 = f1; \
796 F2 = f2; \
797 F3 = f3; \
798 F4 = f4; \
799 F5 = f5; \
800 F6 = f6; \
801 \
802 D1 = d1; \
803 D2 = d2; \
804 D3 = d3; \
805 D4 = d4; \
806 D5 = d5; \
807 D6 = d6; \
808 \
809 L1 = l1;
810
811 /* -----------------------------------------------------------------------------
812 Misc junk
813 -------------------------------------------------------------------------- */
814
815 #define NO_TREC stg_NO_TREC_closure
816 #define END_TSO_QUEUE stg_END_TSO_QUEUE_closure
817 #define STM_AWOKEN stg_STM_AWOKEN_closure
818
819 #define recordMutableCap(p, gen) \
820 W_ __bd; \
821 W_ mut_list; \
822 mut_list = Capability_mut_lists(MyCapability()) + WDS(gen); \
823 __bd = W_[mut_list]; \
824 if (bdescr_free(__bd) >= bdescr_start(__bd) + BLOCK_SIZE) { \
825 W_ __new_bd; \
826 ("ptr" __new_bd) = foreign "C" allocBlock_lock(); \
827 bdescr_link(__new_bd) = __bd; \
828 __bd = __new_bd; \
829 W_[mut_list] = __bd; \
830 } \
831 W_ free; \
832 free = bdescr_free(__bd); \
833 W_[free] = p; \
834 bdescr_free(__bd) = free + WDS(1);
835
836 #define recordMutable(p) \
837 P_ __p; \
838 W_ __bd; \
839 W_ __gen; \
840 __p = p; \
841 __bd = Bdescr(__p); \
842 __gen = TO_W_(bdescr_gen_no(__bd)); \
843 if (__gen > 0) { recordMutableCap(__p, __gen); }
844
845 /* -----------------------------------------------------------------------------
846 Update remembered set write barrier
847 -------------------------------------------------------------------------- */
848
849 /* -----------------------------------------------------------------------------
850 Arrays
851 -------------------------------------------------------------------------- */
852
853 /* Complete function body for the clone family of (mutable) array ops.
854 Defined as a macro to avoid function call overhead or code
855 duplication. */
856 #define cloneArray(info, src, offset, n) \
857 W_ words, size; \
858 gcptr dst, dst_p, src_p; \
859 \
860 again: MAYBE_GC(again); \
861 \
862 size = n + mutArrPtrsCardWords(n); \
863 words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; \
864 ("ptr" dst) = ccall allocate(MyCapability() "ptr", words); \
865 TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0); \
866 \
867 SET_HDR(dst, info, CCCS); \
868 StgMutArrPtrs_ptrs(dst) = n; \
869 StgMutArrPtrs_size(dst) = size; \
870 \
871 dst_p = dst + SIZEOF_StgMutArrPtrs; \
872 src_p = src + SIZEOF_StgMutArrPtrs + WDS(offset); \
873 prim %memcpy(dst_p, src_p, n * SIZEOF_W, SIZEOF_W); \
874 \
875 return (dst);
876
877 #define copyArray(src, src_off, dst, dst_off, n) \
878 W_ dst_elems_p, dst_p, src_p, dst_cards_p, bytes; \
879 \
880 if ((n) != 0) { \
881 SET_HDR(dst, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); \
882 \
883 dst_elems_p = (dst) + SIZEOF_StgMutArrPtrs; \
884 dst_p = dst_elems_p + WDS(dst_off); \
885 src_p = (src) + SIZEOF_StgMutArrPtrs + WDS(src_off); \
886 bytes = WDS(n); \
887 \
888 prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \
889 \
890 dst_cards_p = dst_elems_p + WDS(StgMutArrPtrs_ptrs(dst)); \
891 setCards(dst_cards_p, dst_off, n); \
892 } \
893 \
894 return ();
895
896 #define copyMutableArray(src, src_off, dst, dst_off, n) \
897 W_ dst_elems_p, dst_p, src_p, dst_cards_p, bytes; \
898 \
899 if ((n) != 0) { \
900 SET_HDR(dst, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); \
901 \
902 dst_elems_p = (dst) + SIZEOF_StgMutArrPtrs; \
903 dst_p = dst_elems_p + WDS(dst_off); \
904 src_p = (src) + SIZEOF_StgMutArrPtrs + WDS(src_off); \
905 bytes = WDS(n); \
906 \
907 if ((src) == (dst)) { \
908 prim %memmove(dst_p, src_p, bytes, SIZEOF_W); \
909 } else { \
910 prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \
911 } \
912 \
913 dst_cards_p = dst_elems_p + WDS(StgMutArrPtrs_ptrs(dst)); \
914 setCards(dst_cards_p, dst_off, n); \
915 } \
916 \
917 return ();
918
919 /*
920 * Set the cards in the cards table pointed to by dst_cards_p for an
921 * update to n elements, starting at element dst_off.
922 */
923 #define setCards(dst_cards_p, dst_off, n) \
924 W_ __start_card, __end_card, __cards; \
925 __start_card = mutArrPtrCardDown(dst_off); \
926 __end_card = mutArrPtrCardDown((dst_off) + (n) - 1); \
927 __cards = __end_card - __start_card + 1; \
928 prim %memset((dst_cards_p) + __start_card, 1, __cards, 1);
929
930 /* Complete function body for the clone family of small (mutable)
931 array ops. Defined as a macro to avoid function call overhead or
932 code duplication. */
933 #define cloneSmallArray(info, src, offset, n) \
934 W_ words, size; \
935 gcptr dst, dst_p, src_p; \
936 \
937 again: MAYBE_GC(again); \
938 \
939 words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n; \
940 ("ptr" dst) = ccall allocate(MyCapability() "ptr", words); \
941 TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0); \
942 \
943 SET_HDR(dst, info, CCCS); \
944 StgSmallMutArrPtrs_ptrs(dst) = n; \
945 \
946 dst_p = dst + SIZEOF_StgSmallMutArrPtrs; \
947 src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(offset); \
948 prim %memcpy(dst_p, src_p, n * SIZEOF_W, SIZEOF_W); \
949 \
950 return (dst);
951
952
953 //
954 // Nonmoving write barrier helpers
955 //
956 // See Note [Update remembered set] in NonMovingMark.c.
957
958 #if defined(THREADED_RTS)
959 #define IF_NONMOVING_WRITE_BARRIER_ENABLED \
960 if (W_[nonmoving_write_barrier_enabled] != 0) (likely: False)
961 #else
962 // A similar measure is also taken in rts/NonMoving.h, but that isn't visible from C--
963 #define IF_NONMOVING_WRITE_BARRIER_ENABLED \
964 if (0)
965 #define nonmoving_write_barrier_enabled 0
966 #endif
967
968 // A useful helper for pushing a pointer to the update remembered set.
969 #define updateRemembSetPushPtr(p) \
970 IF_NONMOVING_WRITE_BARRIER_ENABLED { \
971 ccall updateRemembSetPushClosure_(BaseReg "ptr", p "ptr"); \
972 }