Improve accuracy of get/setAllocationCounter
[ghc.git] / includes / Cmm.h
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The University of Glasgow 2004-2013
4 *
5 * This file is included at the top of all .cmm source files (and
6 * *only* .cmm files). It defines a collection of useful macros for
7 * making .cmm code a bit less error-prone to write, and a bit easier
8 * on the eye for the reader.
9 *
10 * For the syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
11 *
12 * Accessing fields of structures defined in the RTS header files is
13 * done via automatically-generated macros in DerivedConstants.h. For
14 * example, where previously we used
15 *
16 * CurrentTSO->what_next = x
17 *
18 * in C-- we now use
19 *
20 * StgTSO_what_next(CurrentTSO) = x
21 *
22 * where the StgTSO_what_next() macro is automatically generated by
23 * mkDerivedConstants.c. If you need to access a field that doesn't
24 * already have a macro, edit that file (it's pretty self-explanatory).
25 *
26 * -------------------------------------------------------------------------- */
27
28 #pragma once
29
30 /*
31 * In files that are included into both C and C-- (and perhaps
32 * Haskell) sources, we sometimes need to conditionally compile bits
33 * depending on the language. CMINUSMINUS==1 in .cmm sources:
34 */
35 #define CMINUSMINUS 1
36
37 #include "ghcconfig.h"
38
39 /* -----------------------------------------------------------------------------
40 Types
41
42 The following synonyms for C-- types are declared here:
43
44 I8, I16, I32, I64 MachRep-style names for convenience
45
46 W_ is shorthand for the word type (== StgWord)
47 F_ shorthand for float (F_ == StgFloat == C's float)
48 D_ shorthand for double (D_ == StgDouble == C's double)
49
50 CInt has the same size as an int in C on this platform
51 CLong has the same size as a long in C on this platform
52 CBool has the same size as a bool in C on this platform
53
54 --------------------------------------------------------------------------- */
55
56 #define I8 bits8
57 #define I16 bits16
58 #define I32 bits32
59 #define I64 bits64
60 #define P_ gcptr
61
62 #if SIZEOF_VOID_P == 4
63 #define W_ bits32
64 /* Maybe it's better to include MachDeps.h */
65 #define TAG_BITS 2
66 #elif SIZEOF_VOID_P == 8
67 #define W_ bits64
68 /* Maybe it's better to include MachDeps.h */
69 #define TAG_BITS 3
70 #else
71 #error Unknown word size
72 #endif
73
74 /*
75 * The RTS must sometimes UNTAG a pointer before dereferencing it.
76 * See the wiki page Commentary/Rts/HaskellExecution/PointerTagging
77 */
78 #define TAG_MASK ((1 << TAG_BITS) - 1)
79 #define UNTAG(p) (p & ~TAG_MASK)
80 #define GETTAG(p) (p & TAG_MASK)
81
82 #if SIZEOF_INT == 4
83 #define CInt bits32
84 #elif SIZEOF_INT == 8
85 #define CInt bits64
86 #else
87 #error Unknown int size
88 #endif
89
90 #if SIZEOF_LONG == 4
91 #define CLong bits32
92 #elif SIZEOF_LONG == 8
93 #define CLong bits64
94 #else
95 #error Unknown long size
96 #endif
97
98 #define CBool bits8
99
100 #define F_ float32
101 #define D_ float64
102 #define L_ bits64
103 #define V16_ bits128
104 #define V32_ bits256
105 #define V64_ bits512
106
107 #define SIZEOF_StgDouble 8
108 #define SIZEOF_StgWord64 8
109
110 /* -----------------------------------------------------------------------------
111 Misc useful stuff
112 -------------------------------------------------------------------------- */
113
114 #define ccall foreign "C"
115
116 #define NULL (0::W_)
117
118 #define STRING(name,str) \
119 section "rodata" { \
120 name : bits8[] str; \
121 } \
122
123 #if defined(TABLES_NEXT_TO_CODE)
124 #define RET_LBL(f) f##_info
125 #else
126 #define RET_LBL(f) f##_ret
127 #endif
128
129 #if defined(TABLES_NEXT_TO_CODE)
130 #define ENTRY_LBL(f) f##_info
131 #else
132 #define ENTRY_LBL(f) f##_entry
133 #endif
134
135 /* -----------------------------------------------------------------------------
136 Byte/word macros
137
138 Everything in C-- is in byte offsets (well, most things). We use
139 some macros to allow us to express offsets in words and to try to
140 avoid byte/word confusion.
141 -------------------------------------------------------------------------- */
142
143 #define SIZEOF_W SIZEOF_VOID_P
144 #define W_MASK (SIZEOF_W-1)
145
146 #if SIZEOF_W == 4
147 #define W_SHIFT 2
148 #elif SIZEOF_W == 8
149 #define W_SHIFT 3
150 #endif
151
152 /* Converting quantities of words to bytes */
153 #define WDS(n) ((n)*SIZEOF_W)
154
155 /*
156 * Converting quantities of bytes to words
157 * NB. these work on *unsigned* values only
158 */
159 #define BYTES_TO_WDS(n) ((n) / SIZEOF_W)
160 #define ROUNDUP_BYTES_TO_WDS(n) (((n) + SIZEOF_W - 1) / SIZEOF_W)
161
162 /* TO_W_(n) converts n to W_ type from a smaller type */
163 #if SIZEOF_W == 4
164 #define TO_I64(x) %sx64(x)
165 #define TO_W_(x) %sx32(x)
166 #define HALF_W_(x) %lobits16(x)
167 #elif SIZEOF_W == 8
168 #define TO_I64(x) (x)
169 #define TO_W_(x) %sx64(x)
170 #define HALF_W_(x) %lobits32(x)
171 #endif
172
173 #if SIZEOF_INT == 4 && SIZEOF_W == 8
174 #define W_TO_INT(x) %lobits32(x)
175 #elif SIZEOF_INT == SIZEOF_W
176 #define W_TO_INT(x) (x)
177 #endif
178
179 #if SIZEOF_LONG == 4 && SIZEOF_W == 8
180 #define W_TO_LONG(x) %lobits32(x)
181 #elif SIZEOF_LONG == SIZEOF_W
182 #define W_TO_LONG(x) (x)
183 #endif
184
185 /* -----------------------------------------------------------------------------
186 Atomic memory operations.
187 -------------------------------------------------------------------------- */
188
189 #if SIZEOF_W == 4
190 #define cmpxchgW cmpxchg32
191 #elif SIZEOF_W == 8
192 #define cmpxchgW cmpxchg64
193 #endif
194
195 /* -----------------------------------------------------------------------------
196 Heap/stack access, and adjusting the heap/stack pointers.
197 -------------------------------------------------------------------------- */
198
199 #define Sp(n) W_[Sp + WDS(n)]
200 #define Hp(n) W_[Hp + WDS(n)]
201
202 #define Sp_adj(n) Sp = Sp + WDS(n) /* pronounced "spadge" */
203 #define Hp_adj(n) Hp = Hp + WDS(n)
204
205 /* -----------------------------------------------------------------------------
206 Assertions and Debuggery
207 -------------------------------------------------------------------------- */
208
209 #if defined(DEBUG)
210 #define ASSERT(predicate) \
211 if (predicate) { \
212 /*null*/; \
213 } else { \
214 foreign "C" _assertFail(NULL, __LINE__) never returns; \
215 }
216 #else
217 #define ASSERT(p) /* nothing */
218 #endif
219
220 #if defined(DEBUG)
221 #define DEBUG_ONLY(s) s
222 #else
223 #define DEBUG_ONLY(s) /* nothing */
224 #endif
225
226 /*
227 * The IF_DEBUG macro is useful for debug messages that depend on one
228 * of the RTS debug options. For example:
229 *
230 * IF_DEBUG(RtsFlags_DebugFlags_apply,
231 * foreign "C" fprintf(stderr, stg_ap_0_ret_str));
232 *
233 * Note the syntax is slightly different to the C version of this macro.
234 */
235 #if defined(DEBUG)
236 #define IF_DEBUG(c,s) if (RtsFlags_DebugFlags_##c(RtsFlags) != 0::CBool) { s; }
237 #else
238 #define IF_DEBUG(c,s) /* nothing */
239 #endif
240
241 /* -----------------------------------------------------------------------------
242 Entering
243
244 It isn't safe to "enter" every closure. Functions in particular
245 have no entry code as such; their entry point contains the code to
246 apply the function.
247
248 ToDo: range should end in N_CLOSURE_TYPES-1, not N_CLOSURE_TYPES,
249 but switch doesn't allow us to use exprs there yet.
250
251 If R1 points to a tagged object it points either to
252 * A constructor.
253 * A function with arity <= TAG_MASK.
254 In both cases the right thing to do is to return.
255 Note: it is rather lucky that we can use the tag bits to do this
256 for both objects. Maybe it points to a brittle design?
257
258 Indirections can contain tagged pointers, so their tag is checked.
259 -------------------------------------------------------------------------- */
260
261 #if defined(PROFILING)
262
263 // When profiling, we cannot shortcut ENTER() by checking the tag,
264 // because LDV profiling relies on entering closures to mark them as
265 // "used".
266
267 #define LOAD_INFO(ret,x) \
268 info = %INFO_PTR(UNTAG(x));
269
270 #define UNTAG_IF_PROF(x) UNTAG(x)
271
272 #else
273
274 #define LOAD_INFO(ret,x) \
275 if (GETTAG(x) != 0) { \
276 ret(x); \
277 } \
278 info = %INFO_PTR(x);
279
280 #define UNTAG_IF_PROF(x) (x) /* already untagged */
281
282 #endif
283
284 // We need two versions of ENTER():
285 // - ENTER(x) takes the closure as an argument and uses return(),
286 // for use in civilized code where the stack is handled by GHC
287 //
288 // - ENTER_NOSTACK() where the closure is in R1, and returns are
289 // explicit jumps, for use when we are doing the stack management
290 // ourselves.
291
292 #if defined(PROFILING)
293 // See Note [Evaluating functions with profiling] in rts/Apply.cmm
294 #define ENTER(x) jump stg_ap_0_fast(x);
295 #else
296 #define ENTER(x) ENTER_(return,x)
297 #endif
298
299 #define ENTER_R1() ENTER_(RET_R1,R1)
300
301 #define RET_R1(x) jump %ENTRY_CODE(Sp(0)) [R1]
302
303 #define ENTER_(ret,x) \
304 again: \
305 W_ info; \
306 LOAD_INFO(ret,x) \
307 switch [INVALID_OBJECT .. N_CLOSURE_TYPES] \
308 (TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { \
309 case \
310 IND, \
311 IND_STATIC: \
312 { \
313 x = StgInd_indirectee(x); \
314 goto again; \
315 } \
316 case \
317 FUN, \
318 FUN_1_0, \
319 FUN_0_1, \
320 FUN_2_0, \
321 FUN_1_1, \
322 FUN_0_2, \
323 FUN_STATIC, \
324 BCO, \
325 PAP: \
326 { \
327 ret(x); \
328 } \
329 default: \
330 { \
331 x = UNTAG_IF_PROF(x); \
332 jump %ENTRY_CODE(info) (x); \
333 } \
334 }
335
336 // The FUN cases almost never happen: a pointer to a non-static FUN
337 // should always be tagged. This unfortunately isn't true for the
338 // interpreter right now, which leaves untagged FUNs on the stack.
339
340 /* -----------------------------------------------------------------------------
341 Constants.
342 -------------------------------------------------------------------------- */
343
344 #include "rts/Constants.h"
345 #include "DerivedConstants.h"
346 #include "rts/storage/ClosureTypes.h"
347 #include "rts/storage/FunTypes.h"
348 #include "rts/OSThreads.h"
349
350 /*
351 * Need MachRegs, because some of the RTS code is conditionally
352 * compiled based on REG_R1, REG_R2, etc.
353 */
354 #include "stg/RtsMachRegs.h"
355
356 #include "rts/prof/LDV.h"
357
358 #undef BLOCK_SIZE
359 #undef MBLOCK_SIZE
360 #include "rts/storage/Block.h" /* For Bdescr() */
361
362
363 #define MyCapability() (BaseReg - OFFSET_Capability_r)
364
365 /* -------------------------------------------------------------------------
366 Info tables
367 ------------------------------------------------------------------------- */
368
369 #if defined(PROFILING)
370 #define PROF_HDR_FIELDS(w_,hdr1,hdr2) \
371 w_ hdr1, \
372 w_ hdr2,
373 #else
374 #define PROF_HDR_FIELDS(w_,hdr1,hdr2) /* nothing */
375 #endif
376
377 /* -------------------------------------------------------------------------
378 Allocation and garbage collection
379 ------------------------------------------------------------------------- */
380
381 /*
382 * ALLOC_PRIM is for allocating memory on the heap for a primitive
383 * object. It is used all over PrimOps.cmm.
384 *
385 * We make the simplifying assumption that the "admin" part of a
386 * primitive closure is just the header when calculating sizes for
387 * ticky-ticky. It's not clear whether eg. the size field of an array
388 * should be counted as "admin", or the various fields of a BCO.
389 */
390 #define ALLOC_PRIM(bytes) \
391 HP_CHK_GEN_TICKY(bytes); \
392 TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \
393 CCCS_ALLOC(bytes);
394
395 #define HEAP_CHECK(bytes,failure) \
396 TICK_BUMP(HEAP_CHK_ctr); \
397 Hp = Hp + (bytes); \
398 if (Hp > HpLim) { HpAlloc = (bytes); failure; } \
399 TICK_ALLOC_HEAP_NOCTR(bytes);
400
401 #define ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,failure) \
402 HEAP_CHECK(bytes,failure) \
403 TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \
404 CCCS_ALLOC(bytes);
405
406 #define ALLOC_PRIM_(bytes,fun) \
407 ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM(fun));
408
409 #define ALLOC_PRIM_P(bytes,fun,arg) \
410 ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_P(fun,arg));
411
412 #define ALLOC_PRIM_N(bytes,fun,arg) \
413 ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_N(fun,arg));
414
415 /* CCS_ALLOC wants the size in words, because ccs->mem_alloc is in words */
416 #define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), CCCS)
417
418 #define HP_CHK_GEN_TICKY(bytes) \
419 HP_CHK_GEN(bytes); \
420 TICK_ALLOC_HEAP_NOCTR(bytes);
421
422 #define HP_CHK_P(bytes, fun, arg) \
423 HEAP_CHECK(bytes, GC_PRIM_P(fun,arg))
424
425 // TODO I'm not seeing where ALLOC_P_TICKY is used; can it be removed?
426 // -NSF March 2013
427 #define ALLOC_P_TICKY(bytes, fun, arg) \
428 HP_CHK_P(bytes); \
429 TICK_ALLOC_HEAP_NOCTR(bytes);
430
431 #define CHECK_GC() \
432 (bdescr_link(CurrentNursery) == NULL || \
433 generation_n_new_large_words(W_[g0]) >= TO_W_(CLong[large_alloc_lim]))
434
435 // allocate() allocates from the nursery, so we check to see
436 // whether the nursery is nearly empty in any function that uses
437 // allocate() - this includes many of the primops.
438 //
439 // HACK alert: the __L__ stuff is here to coax the common-block
440 // eliminator into commoning up the call stg_gc_noregs() with the same
441 // code that gets generated by a STK_CHK_GEN() in the same proc. We
442 // also need an if (0) { goto __L__; } so that the __L__ label isn't
443 // optimised away by the control-flow optimiser prior to common-block
444 // elimination (it will be optimised away later).
445 //
446 // This saves some code in gmp-wrappers.cmm where we have lots of
447 // MAYBE_GC() in the same proc as STK_CHK_GEN().
448 //
449 #define MAYBE_GC(retry) \
450 if (CHECK_GC()) { \
451 HpAlloc = 0; \
452 goto __L__; \
453 __L__: \
454 call stg_gc_noregs(); \
455 goto retry; \
456 } \
457 if (0) { goto __L__; }
458
459 #define GC_PRIM(fun) \
460 jump stg_gc_prim(fun);
461
462 // Version of GC_PRIM for use in low-level Cmm. We can call
463 // stg_gc_prim, because it takes one argument and therefore has a
464 // platform-independent calling convention (Note [Syntax of .cmm
465 // files] in CmmParse.y).
466 #define GC_PRIM_LL(fun) \
467 R1 = fun; \
468 jump stg_gc_prim [R1];
469
470 // We pass the fun as the second argument, because the arg is
471 // usually already in the first argument position (R1), so this
472 // avoids moving it to a different register / stack slot.
473 #define GC_PRIM_N(fun,arg) \
474 jump stg_gc_prim_n(arg,fun);
475
476 #define GC_PRIM_P(fun,arg) \
477 jump stg_gc_prim_p(arg,fun);
478
479 #define GC_PRIM_P_LL(fun,arg) \
480 R1 = arg; \
481 R2 = fun; \
482 jump stg_gc_prim_p_ll [R1,R2];
483
484 #define GC_PRIM_PP(fun,arg1,arg2) \
485 jump stg_gc_prim_pp(arg1,arg2,fun);
486
487 #define MAYBE_GC_(fun) \
488 if (CHECK_GC()) { \
489 HpAlloc = 0; \
490 GC_PRIM(fun) \
491 }
492
493 #define MAYBE_GC_N(fun,arg) \
494 if (CHECK_GC()) { \
495 HpAlloc = 0; \
496 GC_PRIM_N(fun,arg) \
497 }
498
499 #define MAYBE_GC_P(fun,arg) \
500 if (CHECK_GC()) { \
501 HpAlloc = 0; \
502 GC_PRIM_P(fun,arg) \
503 }
504
505 #define MAYBE_GC_PP(fun,arg1,arg2) \
506 if (CHECK_GC()) { \
507 HpAlloc = 0; \
508 GC_PRIM_PP(fun,arg1,arg2) \
509 }
510
511 #define STK_CHK_LL(n, fun) \
512 TICK_BUMP(STK_CHK_ctr); \
513 if (Sp - (n) < SpLim) { \
514 GC_PRIM_LL(fun) \
515 }
516
517 #define STK_CHK_P_LL(n, fun, arg) \
518 TICK_BUMP(STK_CHK_ctr); \
519 if (Sp - (n) < SpLim) { \
520 GC_PRIM_P_LL(fun,arg) \
521 }
522
523 #define STK_CHK_PP(n, fun, arg1, arg2) \
524 TICK_BUMP(STK_CHK_ctr); \
525 if (Sp - (n) < SpLim) { \
526 GC_PRIM_PP(fun,arg1,arg2) \
527 }
528
529 #define STK_CHK_ENTER(n, closure) \
530 TICK_BUMP(STK_CHK_ctr); \
531 if (Sp - (n) < SpLim) { \
532 jump __stg_gc_enter_1(closure); \
533 }
534
535 // A funky heap check used by AutoApply.cmm
536
537 #define HP_CHK_NP_ASSIGN_SP0(size,f) \
538 HEAP_CHECK(size, Sp(0) = f; jump __stg_gc_enter_1 [R1];)
539
540 /* -----------------------------------------------------------------------------
541 Closure headers
542 -------------------------------------------------------------------------- */
543
544 /*
545 * This is really ugly, since we don't do the rest of StgHeader this
546 * way. The problem is that values from DerivedConstants.h cannot be
547 * dependent on the way (SMP, PROF etc.). For SIZEOF_StgHeader we get
548 * the value from GHC, but it seems like too much trouble to do that
549 * for StgThunkHeader.
550 */
551 #define SIZEOF_StgThunkHeader SIZEOF_StgHeader+SIZEOF_StgSMPThunkHeader
552
553 #define StgThunk_payload(__ptr__,__ix__) \
554 W_[__ptr__+SIZEOF_StgThunkHeader+ WDS(__ix__)]
555
556 /* -----------------------------------------------------------------------------
557 Closures
558 -------------------------------------------------------------------------- */
559
560 /* The offset of the payload of an array */
561 #define BYTE_ARR_CTS(arr) ((arr) + SIZEOF_StgArrBytes)
562
563 /* The number of words allocated in an array payload */
564 #define BYTE_ARR_WDS(arr) ROUNDUP_BYTES_TO_WDS(StgArrBytes_bytes(arr))
565
566 /* Getting/setting the info pointer of a closure */
567 #define SET_INFO(p,info) StgHeader_info(p) = info
568 #define GET_INFO(p) StgHeader_info(p)
569
570 /* Determine the size of an ordinary closure from its info table */
571 #define sizeW_fromITBL(itbl) \
572 SIZEOF_StgHeader + WDS(%INFO_PTRS(itbl)) + WDS(%INFO_NPTRS(itbl))
573
574 /* NB. duplicated from InfoTables.h! */
575 #define BITMAP_SIZE(bitmap) ((bitmap) & BITMAP_SIZE_MASK)
576 #define BITMAP_BITS(bitmap) ((bitmap) >> BITMAP_BITS_SHIFT)
577
578 /* Debugging macros */
579 #define LOOKS_LIKE_INFO_PTR(p) \
580 ((p) != NULL && \
581 LOOKS_LIKE_INFO_PTR_NOT_NULL(p))
582
583 #define LOOKS_LIKE_INFO_PTR_NOT_NULL(p) \
584 ( (TO_W_(%INFO_TYPE(%STD_INFO(p))) != INVALID_OBJECT) && \
585 (TO_W_(%INFO_TYPE(%STD_INFO(p))) < N_CLOSURE_TYPES))
586
587 #define LOOKS_LIKE_CLOSURE_PTR(p) (LOOKS_LIKE_INFO_PTR(GET_INFO(UNTAG(p))))
588
589 /*
590 * The layout of the StgFunInfoExtra part of an info table changes
591 * depending on TABLES_NEXT_TO_CODE. So we define field access
592 * macros which use the appropriate version here:
593 */
594 #if defined(TABLES_NEXT_TO_CODE)
595 /*
596 * when TABLES_NEXT_TO_CODE, slow_apply is stored as an offset
597 * instead of the normal pointer.
598 */
599
600 #define StgFunInfoExtra_slow_apply(fun_info) \
601 (TO_W_(StgFunInfoExtraRev_slow_apply_offset(fun_info)) \
602 + (fun_info) + SIZEOF_StgFunInfoExtraRev + SIZEOF_StgInfoTable)
603
604 #define StgFunInfoExtra_fun_type(i) StgFunInfoExtraRev_fun_type(i)
605 #define StgFunInfoExtra_arity(i) StgFunInfoExtraRev_arity(i)
606 #define StgFunInfoExtra_bitmap(i) StgFunInfoExtraRev_bitmap(i)
607 #else
608 #define StgFunInfoExtra_slow_apply(i) StgFunInfoExtraFwd_slow_apply(i)
609 #define StgFunInfoExtra_fun_type(i) StgFunInfoExtraFwd_fun_type(i)
610 #define StgFunInfoExtra_arity(i) StgFunInfoExtraFwd_arity(i)
611 #define StgFunInfoExtra_bitmap(i) StgFunInfoExtraFwd_bitmap(i)
612 #endif
613
614 #define mutArrCardMask ((1 << MUT_ARR_PTRS_CARD_BITS) - 1)
615 #define mutArrPtrCardDown(i) ((i) >> MUT_ARR_PTRS_CARD_BITS)
616 #define mutArrPtrCardUp(i) (((i) + mutArrCardMask) >> MUT_ARR_PTRS_CARD_BITS)
617 #define mutArrPtrsCardWords(n) ROUNDUP_BYTES_TO_WDS(mutArrPtrCardUp(n))
618
619 #if defined(PROFILING) || (!defined(THREADED_RTS) && defined(DEBUG))
620 #define OVERWRITING_CLOSURE(c) foreign "C" overwritingClosure(c "ptr")
621 #define OVERWRITING_CLOSURE_OFS(c,n) \
622 foreign "C" overwritingClosureOfs(c "ptr", n)
623 #else
624 #define OVERWRITING_CLOSURE(c) /* nothing */
625 #define OVERWRITING_CLOSURE_OFS(c,n) /* nothing */
626 #endif
627
628 #if defined(THREADED_RTS)
629 #define prim_write_barrier prim %write_barrier()
630 #else
631 #define prim_write_barrier /* nothing */
632 #endif
633
634 /* -----------------------------------------------------------------------------
635 Ticky macros
636 -------------------------------------------------------------------------- */
637
638 #if defined(TICKY_TICKY)
639 #define TICK_BUMP_BY(ctr,n) CLong[ctr] = CLong[ctr] + n
640 #else
641 #define TICK_BUMP_BY(ctr,n) /* nothing */
642 #endif
643
644 #define TICK_BUMP(ctr) TICK_BUMP_BY(ctr,1)
645
646 #define TICK_ENT_DYN_IND() TICK_BUMP(ENT_DYN_IND_ctr)
647 #define TICK_ENT_DYN_THK() TICK_BUMP(ENT_DYN_THK_ctr)
648 #define TICK_ENT_VIA_NODE() TICK_BUMP(ENT_VIA_NODE_ctr)
649 #define TICK_ENT_STATIC_IND() TICK_BUMP(ENT_STATIC_IND_ctr)
650 #define TICK_ENT_PERM_IND() TICK_BUMP(ENT_PERM_IND_ctr)
651 #define TICK_ENT_PAP() TICK_BUMP(ENT_PAP_ctr)
652 #define TICK_ENT_AP() TICK_BUMP(ENT_AP_ctr)
653 #define TICK_ENT_AP_STACK() TICK_BUMP(ENT_AP_STACK_ctr)
654 #define TICK_ENT_BH() TICK_BUMP(ENT_BH_ctr)
655 #define TICK_ENT_LNE() TICK_BUMP(ENT_LNE_ctr)
656 #define TICK_UNKNOWN_CALL() TICK_BUMP(UNKNOWN_CALL_ctr)
657 #define TICK_UPDF_PUSHED() TICK_BUMP(UPDF_PUSHED_ctr)
658 #define TICK_CATCHF_PUSHED() TICK_BUMP(CATCHF_PUSHED_ctr)
659 #define TICK_UPDF_OMITTED() TICK_BUMP(UPDF_OMITTED_ctr)
660 #define TICK_UPD_NEW_IND() TICK_BUMP(UPD_NEW_IND_ctr)
661 #define TICK_UPD_NEW_PERM_IND() TICK_BUMP(UPD_NEW_PERM_IND_ctr)
662 #define TICK_UPD_OLD_IND() TICK_BUMP(UPD_OLD_IND_ctr)
663 #define TICK_UPD_OLD_PERM_IND() TICK_BUMP(UPD_OLD_PERM_IND_ctr)
664
665 #define TICK_SLOW_CALL_FUN_TOO_FEW() TICK_BUMP(SLOW_CALL_FUN_TOO_FEW_ctr)
666 #define TICK_SLOW_CALL_FUN_CORRECT() TICK_BUMP(SLOW_CALL_FUN_CORRECT_ctr)
667 #define TICK_SLOW_CALL_FUN_TOO_MANY() TICK_BUMP(SLOW_CALL_FUN_TOO_MANY_ctr)
668 #define TICK_SLOW_CALL_PAP_TOO_FEW() TICK_BUMP(SLOW_CALL_PAP_TOO_FEW_ctr)
669 #define TICK_SLOW_CALL_PAP_CORRECT() TICK_BUMP(SLOW_CALL_PAP_CORRECT_ctr)
670 #define TICK_SLOW_CALL_PAP_TOO_MANY() TICK_BUMP(SLOW_CALL_PAP_TOO_MANY_ctr)
671
672 #define TICK_SLOW_CALL_fast_v16() TICK_BUMP(SLOW_CALL_fast_v16_ctr)
673 #define TICK_SLOW_CALL_fast_v() TICK_BUMP(SLOW_CALL_fast_v_ctr)
674 #define TICK_SLOW_CALL_fast_p() TICK_BUMP(SLOW_CALL_fast_p_ctr)
675 #define TICK_SLOW_CALL_fast_pv() TICK_BUMP(SLOW_CALL_fast_pv_ctr)
676 #define TICK_SLOW_CALL_fast_pp() TICK_BUMP(SLOW_CALL_fast_pp_ctr)
677 #define TICK_SLOW_CALL_fast_ppv() TICK_BUMP(SLOW_CALL_fast_ppv_ctr)
678 #define TICK_SLOW_CALL_fast_ppp() TICK_BUMP(SLOW_CALL_fast_ppp_ctr)
679 #define TICK_SLOW_CALL_fast_pppv() TICK_BUMP(SLOW_CALL_fast_pppv_ctr)
680 #define TICK_SLOW_CALL_fast_pppp() TICK_BUMP(SLOW_CALL_fast_pppp_ctr)
681 #define TICK_SLOW_CALL_fast_ppppp() TICK_BUMP(SLOW_CALL_fast_ppppp_ctr)
682 #define TICK_SLOW_CALL_fast_pppppp() TICK_BUMP(SLOW_CALL_fast_pppppp_ctr)
683 #define TICK_VERY_SLOW_CALL() TICK_BUMP(VERY_SLOW_CALL_ctr)
684
685 /* NOTE: TICK_HISTO_BY and TICK_HISTO
686 currently have no effect.
687 The old code for it didn't typecheck and I
688 just commented it out to get ticky to work.
689 - krc 1/2007 */
690
691 #define TICK_HISTO_BY(histo,n,i) /* nothing */
692
693 #define TICK_HISTO(histo,n) TICK_HISTO_BY(histo,n,1)
694
695 /* An unboxed tuple with n components. */
696 #define TICK_RET_UNBOXED_TUP(n) \
697 TICK_BUMP(RET_UNBOXED_TUP_ctr++); \
698 TICK_HISTO(RET_UNBOXED_TUP,n)
699
700 /*
701 * A slow call with n arguments. In the unevald case, this call has
702 * already been counted once, so don't count it again.
703 */
704 #define TICK_SLOW_CALL(n) \
705 TICK_BUMP(SLOW_CALL_ctr); \
706 TICK_HISTO(SLOW_CALL,n)
707
708 /*
709 * This slow call was found to be to an unevaluated function; undo the
710 * ticks we did in TICK_SLOW_CALL.
711 */
712 #define TICK_SLOW_CALL_UNEVALD(n) \
713 TICK_BUMP(SLOW_CALL_UNEVALD_ctr); \
714 TICK_BUMP_BY(SLOW_CALL_ctr,-1); \
715 TICK_HISTO_BY(SLOW_CALL,n,-1);
716
717 /* Updating a closure with a new CON */
718 #define TICK_UPD_CON_IN_NEW(n) \
719 TICK_BUMP(UPD_CON_IN_NEW_ctr); \
720 TICK_HISTO(UPD_CON_IN_NEW,n)
721
722 #define TICK_ALLOC_HEAP_NOCTR(bytes) \
723 TICK_BUMP(ALLOC_RTS_ctr); \
724 TICK_BUMP_BY(ALLOC_RTS_tot,bytes)
725
726 /* -----------------------------------------------------------------------------
727 Saving and restoring STG registers
728
729 STG registers must be saved around a C call, just in case the STG
730 register is mapped to a caller-saves machine register. Normally we
731 don't need to worry about this the code generator has already
732 loaded any live STG registers into variables for us, but in
733 hand-written low-level Cmm code where we don't know which registers
734 are live, we might have to save them all.
735 -------------------------------------------------------------------------- */
736
737 #define SAVE_STGREGS \
738 W_ r1, r2, r3, r4, r5, r6, r7, r8; \
739 F_ f1, f2, f3, f4, f5, f6; \
740 D_ d1, d2, d3, d4, d5, d6; \
741 L_ l1; \
742 \
743 r1 = R1; \
744 r2 = R2; \
745 r3 = R3; \
746 r4 = R4; \
747 r5 = R5; \
748 r6 = R6; \
749 r7 = R7; \
750 r8 = R8; \
751 \
752 f1 = F1; \
753 f2 = F2; \
754 f3 = F3; \
755 f4 = F4; \
756 f5 = F5; \
757 f6 = F6; \
758 \
759 d1 = D1; \
760 d2 = D2; \
761 d3 = D3; \
762 d4 = D4; \
763 d5 = D5; \
764 d6 = D6; \
765 \
766 l1 = L1;
767
768
769 #define RESTORE_STGREGS \
770 R1 = r1; \
771 R2 = r2; \
772 R3 = r3; \
773 R4 = r4; \
774 R5 = r5; \
775 R6 = r6; \
776 R7 = r7; \
777 R8 = r8; \
778 \
779 F1 = f1; \
780 F2 = f2; \
781 F3 = f3; \
782 F4 = f4; \
783 F5 = f5; \
784 F6 = f6; \
785 \
786 D1 = d1; \
787 D2 = d2; \
788 D3 = d3; \
789 D4 = d4; \
790 D5 = d5; \
791 D6 = d6; \
792 \
793 L1 = l1;
794
795 /* -----------------------------------------------------------------------------
796 Misc junk
797 -------------------------------------------------------------------------- */
798
799 #define NO_TREC stg_NO_TREC_closure
800 #define END_TSO_QUEUE stg_END_TSO_QUEUE_closure
801 #define STM_AWOKEN stg_STM_AWOKEN_closure
802 #define END_INVARIANT_CHECK_QUEUE stg_END_INVARIANT_CHECK_QUEUE_closure
803
804 #define recordMutableCap(p, gen) \
805 W_ __bd; \
806 W_ mut_list; \
807 mut_list = Capability_mut_lists(MyCapability()) + WDS(gen); \
808 __bd = W_[mut_list]; \
809 if (bdescr_free(__bd) >= bdescr_start(__bd) + BLOCK_SIZE) { \
810 W_ __new_bd; \
811 ("ptr" __new_bd) = foreign "C" allocBlock_lock(); \
812 bdescr_link(__new_bd) = __bd; \
813 __bd = __new_bd; \
814 W_[mut_list] = __bd; \
815 } \
816 W_ free; \
817 free = bdescr_free(__bd); \
818 W_[free] = p; \
819 bdescr_free(__bd) = free + WDS(1);
820
821 #define recordMutable(p) \
822 P_ __p; \
823 W_ __bd; \
824 W_ __gen; \
825 __p = p; \
826 __bd = Bdescr(__p); \
827 __gen = TO_W_(bdescr_gen_no(__bd)); \
828 if (__gen > 0) { recordMutableCap(__p, __gen); }
829
830 /* -----------------------------------------------------------------------------
831 Arrays
832 -------------------------------------------------------------------------- */
833
834 /* Complete function body for the clone family of (mutable) array ops.
835 Defined as a macro to avoid function call overhead or code
836 duplication. */
837 #define cloneArray(info, src, offset, n) \
838 W_ words, size; \
839 gcptr dst, dst_p, src_p; \
840 \
841 again: MAYBE_GC(again); \
842 \
843 size = n + mutArrPtrsCardWords(n); \
844 words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; \
845 ("ptr" dst) = ccall allocate(MyCapability() "ptr", words); \
846 TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0); \
847 \
848 SET_HDR(dst, info, CCCS); \
849 StgMutArrPtrs_ptrs(dst) = n; \
850 StgMutArrPtrs_size(dst) = size; \
851 \
852 dst_p = dst + SIZEOF_StgMutArrPtrs; \
853 src_p = src + SIZEOF_StgMutArrPtrs + WDS(offset); \
854 prim %memcpy(dst_p, src_p, n * SIZEOF_W, SIZEOF_W); \
855 \
856 return (dst);
857
858 #define copyArray(src, src_off, dst, dst_off, n) \
859 W_ dst_elems_p, dst_p, src_p, dst_cards_p, bytes; \
860 \
861 if ((n) != 0) { \
862 SET_HDR(dst, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); \
863 \
864 dst_elems_p = (dst) + SIZEOF_StgMutArrPtrs; \
865 dst_p = dst_elems_p + WDS(dst_off); \
866 src_p = (src) + SIZEOF_StgMutArrPtrs + WDS(src_off); \
867 bytes = WDS(n); \
868 \
869 prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \
870 \
871 dst_cards_p = dst_elems_p + WDS(StgMutArrPtrs_ptrs(dst)); \
872 setCards(dst_cards_p, dst_off, n); \
873 } \
874 \
875 return ();
876
877 #define copyMutableArray(src, src_off, dst, dst_off, n) \
878 W_ dst_elems_p, dst_p, src_p, dst_cards_p, bytes; \
879 \
880 if ((n) != 0) { \
881 SET_HDR(dst, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); \
882 \
883 dst_elems_p = (dst) + SIZEOF_StgMutArrPtrs; \
884 dst_p = dst_elems_p + WDS(dst_off); \
885 src_p = (src) + SIZEOF_StgMutArrPtrs + WDS(src_off); \
886 bytes = WDS(n); \
887 \
888 if ((src) == (dst)) { \
889 prim %memmove(dst_p, src_p, bytes, SIZEOF_W); \
890 } else { \
891 prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \
892 } \
893 \
894 dst_cards_p = dst_elems_p + WDS(StgMutArrPtrs_ptrs(dst)); \
895 setCards(dst_cards_p, dst_off, n); \
896 } \
897 \
898 return ();
899
900 /*
901 * Set the cards in the cards table pointed to by dst_cards_p for an
902 * update to n elements, starting at element dst_off.
903 */
904 #define setCards(dst_cards_p, dst_off, n) \
905 W_ __start_card, __end_card, __cards; \
906 __start_card = mutArrPtrCardDown(dst_off); \
907 __end_card = mutArrPtrCardDown((dst_off) + (n) - 1); \
908 __cards = __end_card - __start_card + 1; \
909 prim %memset((dst_cards_p) + __start_card, 1, __cards, 1);
910
911 /* Complete function body for the clone family of small (mutable)
912 array ops. Defined as a macro to avoid function call overhead or
913 code duplication. */
914 #define cloneSmallArray(info, src, offset, n) \
915 W_ words, size; \
916 gcptr dst, dst_p, src_p; \
917 \
918 again: MAYBE_GC(again); \
919 \
920 words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n; \
921 ("ptr" dst) = ccall allocate(MyCapability() "ptr", words); \
922 TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0); \
923 \
924 SET_HDR(dst, info, CCCS); \
925 StgSmallMutArrPtrs_ptrs(dst) = n; \
926 \
927 dst_p = dst + SIZEOF_StgSmallMutArrPtrs; \
928 src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(offset); \
929 prim %memcpy(dst_p, src_p, n * SIZEOF_W, SIZEOF_W); \
930 \
931 return (dst);