stg_floatToWord32zh: zero-extend the Word32 (#16617)
[ghc.git] / includes / Cmm.h
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The University of Glasgow 2004-2013
4 *
5 * This file is included at the top of all .cmm source files (and
6 * *only* .cmm files). It defines a collection of useful macros for
7 * making .cmm code a bit less error-prone to write, and a bit easier
8 * on the eye for the reader.
9 *
10 * For the syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
11 *
12 * Accessing fields of structures defined in the RTS header files is
13 * done via automatically-generated macros in DerivedConstants.h. For
14 * example, where previously we used
15 *
16 * CurrentTSO->what_next = x
17 *
18 * in C-- we now use
19 *
20 * StgTSO_what_next(CurrentTSO) = x
21 *
22 * where the StgTSO_what_next() macro is automatically generated by
23 * mkDerivedConstants.c. If you need to access a field that doesn't
24 * already have a macro, edit that file (it's pretty self-explanatory).
25 *
26 * -------------------------------------------------------------------------- */
27
28 #pragma once
29
30 /*
31 * In files that are included into both C and C-- (and perhaps
32 * Haskell) sources, we sometimes need to conditionally compile bits
33 * depending on the language. CMINUSMINUS==1 in .cmm sources:
34 */
35 #define CMINUSMINUS 1
36
37 #include "ghcconfig.h"
38
39 /* -----------------------------------------------------------------------------
40 Types
41
42 The following synonyms for C-- types are declared here:
43
44 I8, I16, I32, I64 MachRep-style names for convenience
45
46 W_ is shorthand for the word type (== StgWord)
47 F_ shorthand for float (F_ == StgFloat == C's float)
48 D_ shorthand for double (D_ == StgDouble == C's double)
49
50 CInt has the same size as an int in C on this platform
51 CLong has the same size as a long in C on this platform
52 CBool has the same size as a bool in C on this platform
53
54 --------------------------------------------------------------------------- */
55
56 #define I8 bits8
57 #define I16 bits16
58 #define I32 bits32
59 #define I64 bits64
60 #define P_ gcptr
61
62 #if SIZEOF_VOID_P == 4
63 #define W_ bits32
64 /* Maybe it's better to include MachDeps.h */
65 #define TAG_BITS 2
66 #elif SIZEOF_VOID_P == 8
67 #define W_ bits64
68 /* Maybe it's better to include MachDeps.h */
69 #define TAG_BITS 3
70 #else
71 #error Unknown word size
72 #endif
73
74 /*
75 * The RTS must sometimes UNTAG a pointer before dereferencing it.
76 * See the wiki page commentary/rts/haskell-execution/pointer-tagging
77 */
78 #define TAG_MASK ((1 << TAG_BITS) - 1)
79 #define UNTAG(p) (p & ~TAG_MASK)
80 #define GETTAG(p) (p & TAG_MASK)
81
82 #if SIZEOF_INT == 4
83 #define CInt bits32
84 #elif SIZEOF_INT == 8
85 #define CInt bits64
86 #else
87 #error Unknown int size
88 #endif
89
90 #if SIZEOF_LONG == 4
91 #define CLong bits32
92 #elif SIZEOF_LONG == 8
93 #define CLong bits64
94 #else
95 #error Unknown long size
96 #endif
97
98 #define CBool bits8
99
100 #define F_ float32
101 #define D_ float64
102 #define L_ bits64
103 #define V16_ bits128
104 #define V32_ bits256
105 #define V64_ bits512
106
107 #define SIZEOF_StgDouble 8
108 #define SIZEOF_StgWord64 8
109
110 /* -----------------------------------------------------------------------------
111 Misc useful stuff
112 -------------------------------------------------------------------------- */
113
114 #define ccall foreign "C"
115
116 #define NULL (0::W_)
117
118 #define STRING(name,str) \
119 section "rodata" { \
120 name : bits8[] str; \
121 } \
122
123 #if defined(TABLES_NEXT_TO_CODE)
124 #define RET_LBL(f) f##_info
125 #else
126 #define RET_LBL(f) f##_ret
127 #endif
128
129 #if defined(TABLES_NEXT_TO_CODE)
130 #define ENTRY_LBL(f) f##_info
131 #else
132 #define ENTRY_LBL(f) f##_entry
133 #endif
134
135 /* -----------------------------------------------------------------------------
136 Byte/word macros
137
138 Everything in C-- is in byte offsets (well, most things). We use
139 some macros to allow us to express offsets in words and to try to
140 avoid byte/word confusion.
141 -------------------------------------------------------------------------- */
142
143 #define SIZEOF_W SIZEOF_VOID_P
144 #define W_MASK (SIZEOF_W-1)
145
146 #if SIZEOF_W == 4
147 #define W_SHIFT 2
148 #elif SIZEOF_W == 8
149 #define W_SHIFT 3
150 #endif
151
152 /* Converting quantities of words to bytes */
153 #define WDS(n) ((n)*SIZEOF_W)
154
155 /*
156 * Converting quantities of bytes to words
157 * NB. these work on *unsigned* values only
158 */
159 #define BYTES_TO_WDS(n) ((n) / SIZEOF_W)
160 #define ROUNDUP_BYTES_TO_WDS(n) (((n) + SIZEOF_W - 1) / SIZEOF_W)
161
162 /*
163 * TO_W_(n) and TO_ZXW_(n) convert n to W_ type from a smaller type,
164 * with and without sign extension respectively
165 */
166 #if SIZEOF_W == 4
167 #define TO_I64(x) %sx64(x)
168 #define TO_W_(x) %sx32(x)
169 #define TO_ZXW_(x) %zx32(x)
170 #define HALF_W_(x) %lobits16(x)
171 #elif SIZEOF_W == 8
172 #define TO_I64(x) (x)
173 #define TO_W_(x) %sx64(x)
174 #define TO_ZXW_(x) %zx64(x)
175 #define HALF_W_(x) %lobits32(x)
176 #endif
177
178 #if SIZEOF_INT == 4 && SIZEOF_W == 8
179 #define W_TO_INT(x) %lobits32(x)
180 #elif SIZEOF_INT == SIZEOF_W
181 #define W_TO_INT(x) (x)
182 #endif
183
184 #if SIZEOF_LONG == 4 && SIZEOF_W == 8
185 #define W_TO_LONG(x) %lobits32(x)
186 #elif SIZEOF_LONG == SIZEOF_W
187 #define W_TO_LONG(x) (x)
188 #endif
189
190 /* -----------------------------------------------------------------------------
191 Atomic memory operations.
192 -------------------------------------------------------------------------- */
193
194 #if SIZEOF_W == 4
195 #define cmpxchgW cmpxchg32
196 #elif SIZEOF_W == 8
197 #define cmpxchgW cmpxchg64
198 #endif
199
200 /* -----------------------------------------------------------------------------
201 Heap/stack access, and adjusting the heap/stack pointers.
202 -------------------------------------------------------------------------- */
203
204 #define Sp(n) W_[Sp + WDS(n)]
205 #define Hp(n) W_[Hp + WDS(n)]
206
207 #define Sp_adj(n) Sp = Sp + WDS(n) /* pronounced "spadge" */
208 #define Hp_adj(n) Hp = Hp + WDS(n)
209
210 /* -----------------------------------------------------------------------------
211 Assertions and Debuggery
212 -------------------------------------------------------------------------- */
213
214 #if defined(DEBUG)
215 #define ASSERT(predicate) \
216 if (predicate) { \
217 /*null*/; \
218 } else { \
219 foreign "C" _assertFail(__FILE__, __LINE__) never returns; \
220 }
221 #else
222 #define ASSERT(p) /* nothing */
223 #endif
224
225 #if defined(DEBUG)
226 #define DEBUG_ONLY(s) s
227 #else
228 #define DEBUG_ONLY(s) /* nothing */
229 #endif
230
231 /*
232 * The IF_DEBUG macro is useful for debug messages that depend on one
233 * of the RTS debug options. For example:
234 *
235 * IF_DEBUG(RtsFlags_DebugFlags_apply,
236 * foreign "C" fprintf(stderr, stg_ap_0_ret_str));
237 *
238 * Note the syntax is slightly different to the C version of this macro.
239 */
240 #if defined(DEBUG)
241 #define IF_DEBUG(c,s) if (RtsFlags_DebugFlags_##c(RtsFlags) != 0::CBool) { s; }
242 #else
243 #define IF_DEBUG(c,s) /* nothing */
244 #endif
245
246 /* -----------------------------------------------------------------------------
247 Entering
248
249 It isn't safe to "enter" every closure. Functions in particular
250 have no entry code as such; their entry point contains the code to
251 apply the function.
252
253 ToDo: range should end in N_CLOSURE_TYPES-1, not N_CLOSURE_TYPES,
254 but switch doesn't allow us to use exprs there yet.
255
256 If R1 points to a tagged object it points either to
257 * A constructor.
258 * A function with arity <= TAG_MASK.
259 In both cases the right thing to do is to return.
260 Note: it is rather lucky that we can use the tag bits to do this
261 for both objects. Maybe it points to a brittle design?
262
263 Indirections can contain tagged pointers, so their tag is checked.
264 -------------------------------------------------------------------------- */
265
266 #if defined(PROFILING)
267
268 // When profiling, we cannot shortcut ENTER() by checking the tag,
269 // because LDV profiling relies on entering closures to mark them as
270 // "used".
271
272 #define LOAD_INFO(ret,x) \
273 info = %INFO_PTR(UNTAG(x));
274
275 #define UNTAG_IF_PROF(x) UNTAG(x)
276
277 #else
278
279 #define LOAD_INFO(ret,x) \
280 if (GETTAG(x) != 0) { \
281 ret(x); \
282 } \
283 info = %INFO_PTR(x);
284
285 #define UNTAG_IF_PROF(x) (x) /* already untagged */
286
287 #endif
288
289 // We need two versions of ENTER():
290 // - ENTER(x) takes the closure as an argument and uses return(),
291 // for use in civilized code where the stack is handled by GHC
292 //
293 // - ENTER_NOSTACK() where the closure is in R1, and returns are
294 // explicit jumps, for use when we are doing the stack management
295 // ourselves.
296
297 #if defined(PROFILING)
298 // See Note [Evaluating functions with profiling] in rts/Apply.cmm
299 #define ENTER(x) jump stg_ap_0_fast(x);
300 #else
301 #define ENTER(x) ENTER_(return,x)
302 #endif
303
304 #define ENTER_R1() ENTER_(RET_R1,R1)
305
306 #define RET_R1(x) jump %ENTRY_CODE(Sp(0)) [R1]
307
308 #define ENTER_(ret,x) \
309 again: \
310 W_ info; \
311 LOAD_INFO(ret,x) \
312 switch [INVALID_OBJECT .. N_CLOSURE_TYPES] \
313 (TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { \
314 case \
315 IND, \
316 IND_STATIC: \
317 { \
318 x = StgInd_indirectee(x); \
319 goto again; \
320 } \
321 case \
322 FUN, \
323 FUN_1_0, \
324 FUN_0_1, \
325 FUN_2_0, \
326 FUN_1_1, \
327 FUN_0_2, \
328 FUN_STATIC, \
329 BCO, \
330 PAP: \
331 { \
332 ret(x); \
333 } \
334 default: \
335 { \
336 x = UNTAG_IF_PROF(x); \
337 jump %ENTRY_CODE(info) (x); \
338 } \
339 }
340
341 // The FUN cases almost never happen: a pointer to a non-static FUN
342 // should always be tagged. This unfortunately isn't true for the
343 // interpreter right now, which leaves untagged FUNs on the stack.
344
345 /* -----------------------------------------------------------------------------
346 Constants.
347 -------------------------------------------------------------------------- */
348
349 #include "rts/Constants.h"
350 #include "DerivedConstants.h"
351 #include "rts/storage/ClosureTypes.h"
352 #include "rts/storage/FunTypes.h"
353 #include "rts/OSThreads.h"
354
355 /*
356 * Need MachRegs, because some of the RTS code is conditionally
357 * compiled based on REG_R1, REG_R2, etc.
358 */
359 #include "stg/RtsMachRegs.h"
360
361 #include "rts/prof/LDV.h"
362
363 #undef BLOCK_SIZE
364 #undef MBLOCK_SIZE
365 #include "rts/storage/Block.h" /* For Bdescr() */
366
367
368 #define MyCapability() (BaseReg - OFFSET_Capability_r)
369
370 /* -------------------------------------------------------------------------
371 Info tables
372 ------------------------------------------------------------------------- */
373
374 #if defined(PROFILING)
375 #define PROF_HDR_FIELDS(w_,hdr1,hdr2) \
376 w_ hdr1, \
377 w_ hdr2,
378 #else
379 #define PROF_HDR_FIELDS(w_,hdr1,hdr2) /* nothing */
380 #endif
381
382 /* -------------------------------------------------------------------------
383 Allocation and garbage collection
384 ------------------------------------------------------------------------- */
385
386 /*
387 * ALLOC_PRIM is for allocating memory on the heap for a primitive
388 * object. It is used all over PrimOps.cmm.
389 *
390 * We make the simplifying assumption that the "admin" part of a
391 * primitive closure is just the header when calculating sizes for
392 * ticky-ticky. It's not clear whether eg. the size field of an array
393 * should be counted as "admin", or the various fields of a BCO.
394 */
395 #define ALLOC_PRIM(bytes) \
396 HP_CHK_GEN_TICKY(bytes); \
397 TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \
398 CCCS_ALLOC(bytes);
399
400 #define HEAP_CHECK(bytes,failure) \
401 TICK_BUMP(HEAP_CHK_ctr); \
402 Hp = Hp + (bytes); \
403 if (Hp > HpLim) { HpAlloc = (bytes); failure; } \
404 TICK_ALLOC_HEAP_NOCTR(bytes);
405
406 #define ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,failure) \
407 HEAP_CHECK(bytes,failure) \
408 TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \
409 CCCS_ALLOC(bytes);
410
411 #define ALLOC_PRIM_(bytes,fun) \
412 ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM(fun));
413
414 #define ALLOC_PRIM_P(bytes,fun,arg) \
415 ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_P(fun,arg));
416
417 #define ALLOC_PRIM_N(bytes,fun,arg) \
418 ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_N(fun,arg));
419
420 /* CCS_ALLOC wants the size in words, because ccs->mem_alloc is in words */
421 #define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), CCCS)
422
423 #define HP_CHK_GEN_TICKY(bytes) \
424 HP_CHK_GEN(bytes); \
425 TICK_ALLOC_HEAP_NOCTR(bytes);
426
427 #define HP_CHK_P(bytes, fun, arg) \
428 HEAP_CHECK(bytes, GC_PRIM_P(fun,arg))
429
430 // TODO I'm not seeing where ALLOC_P_TICKY is used; can it be removed?
431 // -NSF March 2013
432 #define ALLOC_P_TICKY(bytes, fun, arg) \
433 HP_CHK_P(bytes); \
434 TICK_ALLOC_HEAP_NOCTR(bytes);
435
436 #define CHECK_GC() \
437 (bdescr_link(CurrentNursery) == NULL || \
438 generation_n_new_large_words(W_[g0]) >= TO_W_(CLong[large_alloc_lim]))
439
440 // allocate() allocates from the nursery, so we check to see
441 // whether the nursery is nearly empty in any function that uses
442 // allocate() - this includes many of the primops.
443 //
444 // HACK alert: the __L__ stuff is here to coax the common-block
445 // eliminator into commoning up the call stg_gc_noregs() with the same
446 // code that gets generated by a STK_CHK_GEN() in the same proc. We
447 // also need an if (0) { goto __L__; } so that the __L__ label isn't
448 // optimised away by the control-flow optimiser prior to common-block
449 // elimination (it will be optimised away later).
450 //
451 // This saves some code in gmp-wrappers.cmm where we have lots of
452 // MAYBE_GC() in the same proc as STK_CHK_GEN().
453 //
454 #define MAYBE_GC(retry) \
455 if (CHECK_GC()) { \
456 HpAlloc = 0; \
457 goto __L__; \
458 __L__: \
459 call stg_gc_noregs(); \
460 goto retry; \
461 } \
462 if (0) { goto __L__; }
463
464 #define GC_PRIM(fun) \
465 jump stg_gc_prim(fun);
466
467 // Version of GC_PRIM for use in low-level Cmm. We can call
468 // stg_gc_prim, because it takes one argument and therefore has a
469 // platform-independent calling convention (Note [Syntax of .cmm
470 // files] in CmmParse.y).
471 #define GC_PRIM_LL(fun) \
472 R1 = fun; \
473 jump stg_gc_prim [R1];
474
475 // We pass the fun as the second argument, because the arg is
476 // usually already in the first argument position (R1), so this
477 // avoids moving it to a different register / stack slot.
478 #define GC_PRIM_N(fun,arg) \
479 jump stg_gc_prim_n(arg,fun);
480
481 #define GC_PRIM_P(fun,arg) \
482 jump stg_gc_prim_p(arg,fun);
483
484 #define GC_PRIM_P_LL(fun,arg) \
485 R1 = arg; \
486 R2 = fun; \
487 jump stg_gc_prim_p_ll [R1,R2];
488
489 #define GC_PRIM_PP(fun,arg1,arg2) \
490 jump stg_gc_prim_pp(arg1,arg2,fun);
491
492 #define MAYBE_GC_(fun) \
493 if (CHECK_GC()) { \
494 HpAlloc = 0; \
495 GC_PRIM(fun) \
496 }
497
498 #define MAYBE_GC_N(fun,arg) \
499 if (CHECK_GC()) { \
500 HpAlloc = 0; \
501 GC_PRIM_N(fun,arg) \
502 }
503
504 #define MAYBE_GC_P(fun,arg) \
505 if (CHECK_GC()) { \
506 HpAlloc = 0; \
507 GC_PRIM_P(fun,arg) \
508 }
509
510 #define MAYBE_GC_PP(fun,arg1,arg2) \
511 if (CHECK_GC()) { \
512 HpAlloc = 0; \
513 GC_PRIM_PP(fun,arg1,arg2) \
514 }
515
516 #define STK_CHK_LL(n, fun) \
517 TICK_BUMP(STK_CHK_ctr); \
518 if (Sp - (n) < SpLim) { \
519 GC_PRIM_LL(fun) \
520 }
521
522 #define STK_CHK_P_LL(n, fun, arg) \
523 TICK_BUMP(STK_CHK_ctr); \
524 if (Sp - (n) < SpLim) { \
525 GC_PRIM_P_LL(fun,arg) \
526 }
527
528 #define STK_CHK_PP(n, fun, arg1, arg2) \
529 TICK_BUMP(STK_CHK_ctr); \
530 if (Sp - (n) < SpLim) { \
531 GC_PRIM_PP(fun,arg1,arg2) \
532 }
533
534 #define STK_CHK_ENTER(n, closure) \
535 TICK_BUMP(STK_CHK_ctr); \
536 if (Sp - (n) < SpLim) { \
537 jump __stg_gc_enter_1(closure); \
538 }
539
540 // A funky heap check used by AutoApply.cmm
541
542 #define HP_CHK_NP_ASSIGN_SP0(size,f) \
543 HEAP_CHECK(size, Sp(0) = f; jump __stg_gc_enter_1 [R1];)
544
545 /* -----------------------------------------------------------------------------
546 Closure headers
547 -------------------------------------------------------------------------- */
548
549 /*
550 * This is really ugly, since we don't do the rest of StgHeader this
551 * way. The problem is that values from DerivedConstants.h cannot be
552 * dependent on the way (SMP, PROF etc.). For SIZEOF_StgHeader we get
553 * the value from GHC, but it seems like too much trouble to do that
554 * for StgThunkHeader.
555 */
556 #define SIZEOF_StgThunkHeader SIZEOF_StgHeader+SIZEOF_StgSMPThunkHeader
557
558 #define StgThunk_payload(__ptr__,__ix__) \
559 W_[__ptr__+SIZEOF_StgThunkHeader+ WDS(__ix__)]
560
561 /* -----------------------------------------------------------------------------
562 Closures
563 -------------------------------------------------------------------------- */
564
565 /* The offset of the payload of an array */
566 #define BYTE_ARR_CTS(arr) ((arr) + SIZEOF_StgArrBytes)
567
568 /* The number of words allocated in an array payload */
569 #define BYTE_ARR_WDS(arr) ROUNDUP_BYTES_TO_WDS(StgArrBytes_bytes(arr))
570
571 /* Getting/setting the info pointer of a closure */
572 #define SET_INFO(p,info) StgHeader_info(p) = info
573 #define GET_INFO(p) StgHeader_info(p)
574
575 /* Determine the size of an ordinary closure from its info table */
576 #define sizeW_fromITBL(itbl) \
577 SIZEOF_StgHeader + WDS(%INFO_PTRS(itbl)) + WDS(%INFO_NPTRS(itbl))
578
579 /* NB. duplicated from InfoTables.h! */
580 #define BITMAP_SIZE(bitmap) ((bitmap) & BITMAP_SIZE_MASK)
581 #define BITMAP_BITS(bitmap) ((bitmap) >> BITMAP_BITS_SHIFT)
582
583 /* Debugging macros */
584 #define LOOKS_LIKE_INFO_PTR(p) \
585 ((p) != NULL && \
586 LOOKS_LIKE_INFO_PTR_NOT_NULL(p))
587
588 #define LOOKS_LIKE_INFO_PTR_NOT_NULL(p) \
589 ( (TO_W_(%INFO_TYPE(%STD_INFO(p))) != INVALID_OBJECT) && \
590 (TO_W_(%INFO_TYPE(%STD_INFO(p))) < N_CLOSURE_TYPES))
591
592 #define LOOKS_LIKE_CLOSURE_PTR(p) (LOOKS_LIKE_INFO_PTR(GET_INFO(UNTAG(p))))
593
594 /*
595 * The layout of the StgFunInfoExtra part of an info table changes
596 * depending on TABLES_NEXT_TO_CODE. So we define field access
597 * macros which use the appropriate version here:
598 */
599 #if defined(TABLES_NEXT_TO_CODE)
600 /*
601 * when TABLES_NEXT_TO_CODE, slow_apply is stored as an offset
602 * instead of the normal pointer.
603 */
604
605 #define StgFunInfoExtra_slow_apply(fun_info) \
606 (TO_W_(StgFunInfoExtraRev_slow_apply_offset(fun_info)) \
607 + (fun_info) + SIZEOF_StgFunInfoExtraRev + SIZEOF_StgInfoTable)
608
609 #define StgFunInfoExtra_fun_type(i) StgFunInfoExtraRev_fun_type(i)
610 #define StgFunInfoExtra_arity(i) StgFunInfoExtraRev_arity(i)
611 #define StgFunInfoExtra_bitmap(i) StgFunInfoExtraRev_bitmap(i)
612 #else
613 #define StgFunInfoExtra_slow_apply(i) StgFunInfoExtraFwd_slow_apply(i)
614 #define StgFunInfoExtra_fun_type(i) StgFunInfoExtraFwd_fun_type(i)
615 #define StgFunInfoExtra_arity(i) StgFunInfoExtraFwd_arity(i)
616 #define StgFunInfoExtra_bitmap(i) StgFunInfoExtraFwd_bitmap(i)
617 #endif
618
619 #define mutArrCardMask ((1 << MUT_ARR_PTRS_CARD_BITS) - 1)
620 #define mutArrPtrCardDown(i) ((i) >> MUT_ARR_PTRS_CARD_BITS)
621 #define mutArrPtrCardUp(i) (((i) + mutArrCardMask) >> MUT_ARR_PTRS_CARD_BITS)
622 #define mutArrPtrsCardWords(n) ROUNDUP_BYTES_TO_WDS(mutArrPtrCardUp(n))
623
624 #if defined(PROFILING) || (!defined(THREADED_RTS) && defined(DEBUG))
625 #define OVERWRITING_CLOSURE_SIZE(c, size) foreign "C" overwritingClosureSize(c "ptr", size)
626 #define OVERWRITING_CLOSURE(c) foreign "C" overwritingClosure(c "ptr")
627 #define OVERWRITING_CLOSURE_OFS(c,n) foreign "C" overwritingClosureOfs(c "ptr", n)
628 #else
629 #define OVERWRITING_CLOSURE_SIZE(c, size) /* nothing */
630 #define OVERWRITING_CLOSURE(c) /* nothing */
631 #define OVERWRITING_CLOSURE_OFS(c,n) /* nothing */
632 #endif
633
634 #if defined(THREADED_RTS)
635 #define prim_write_barrier prim %write_barrier()
636 #else
637 #define prim_write_barrier /* nothing */
638 #endif
639
640 /* -----------------------------------------------------------------------------
641 Ticky macros
642 -------------------------------------------------------------------------- */
643
644 #if defined(TICKY_TICKY)
645 #define TICK_BUMP_BY(ctr,n) CLong[ctr] = CLong[ctr] + n
646 #else
647 #define TICK_BUMP_BY(ctr,n) /* nothing */
648 #endif
649
650 #define TICK_BUMP(ctr) TICK_BUMP_BY(ctr,1)
651
652 #define TICK_ENT_DYN_IND() TICK_BUMP(ENT_DYN_IND_ctr)
653 #define TICK_ENT_DYN_THK() TICK_BUMP(ENT_DYN_THK_ctr)
654 #define TICK_ENT_VIA_NODE() TICK_BUMP(ENT_VIA_NODE_ctr)
655 #define TICK_ENT_STATIC_IND() TICK_BUMP(ENT_STATIC_IND_ctr)
656 #define TICK_ENT_PERM_IND() TICK_BUMP(ENT_PERM_IND_ctr)
657 #define TICK_ENT_PAP() TICK_BUMP(ENT_PAP_ctr)
658 #define TICK_ENT_AP() TICK_BUMP(ENT_AP_ctr)
659 #define TICK_ENT_AP_STACK() TICK_BUMP(ENT_AP_STACK_ctr)
660 #define TICK_ENT_BH() TICK_BUMP(ENT_BH_ctr)
661 #define TICK_ENT_LNE() TICK_BUMP(ENT_LNE_ctr)
662 #define TICK_UNKNOWN_CALL() TICK_BUMP(UNKNOWN_CALL_ctr)
663 #define TICK_UPDF_PUSHED() TICK_BUMP(UPDF_PUSHED_ctr)
664 #define TICK_CATCHF_PUSHED() TICK_BUMP(CATCHF_PUSHED_ctr)
665 #define TICK_UPDF_OMITTED() TICK_BUMP(UPDF_OMITTED_ctr)
666 #define TICK_UPD_NEW_IND() TICK_BUMP(UPD_NEW_IND_ctr)
667 #define TICK_UPD_NEW_PERM_IND() TICK_BUMP(UPD_NEW_PERM_IND_ctr)
668 #define TICK_UPD_OLD_IND() TICK_BUMP(UPD_OLD_IND_ctr)
669 #define TICK_UPD_OLD_PERM_IND() TICK_BUMP(UPD_OLD_PERM_IND_ctr)
670
671 #define TICK_SLOW_CALL_FUN_TOO_FEW() TICK_BUMP(SLOW_CALL_FUN_TOO_FEW_ctr)
672 #define TICK_SLOW_CALL_FUN_CORRECT() TICK_BUMP(SLOW_CALL_FUN_CORRECT_ctr)
673 #define TICK_SLOW_CALL_FUN_TOO_MANY() TICK_BUMP(SLOW_CALL_FUN_TOO_MANY_ctr)
674 #define TICK_SLOW_CALL_PAP_TOO_FEW() TICK_BUMP(SLOW_CALL_PAP_TOO_FEW_ctr)
675 #define TICK_SLOW_CALL_PAP_CORRECT() TICK_BUMP(SLOW_CALL_PAP_CORRECT_ctr)
676 #define TICK_SLOW_CALL_PAP_TOO_MANY() TICK_BUMP(SLOW_CALL_PAP_TOO_MANY_ctr)
677
678 #define TICK_SLOW_CALL_fast_v16() TICK_BUMP(SLOW_CALL_fast_v16_ctr)
679 #define TICK_SLOW_CALL_fast_v() TICK_BUMP(SLOW_CALL_fast_v_ctr)
680 #define TICK_SLOW_CALL_fast_p() TICK_BUMP(SLOW_CALL_fast_p_ctr)
681 #define TICK_SLOW_CALL_fast_pv() TICK_BUMP(SLOW_CALL_fast_pv_ctr)
682 #define TICK_SLOW_CALL_fast_pp() TICK_BUMP(SLOW_CALL_fast_pp_ctr)
683 #define TICK_SLOW_CALL_fast_ppv() TICK_BUMP(SLOW_CALL_fast_ppv_ctr)
684 #define TICK_SLOW_CALL_fast_ppp() TICK_BUMP(SLOW_CALL_fast_ppp_ctr)
685 #define TICK_SLOW_CALL_fast_pppv() TICK_BUMP(SLOW_CALL_fast_pppv_ctr)
686 #define TICK_SLOW_CALL_fast_pppp() TICK_BUMP(SLOW_CALL_fast_pppp_ctr)
687 #define TICK_SLOW_CALL_fast_ppppp() TICK_BUMP(SLOW_CALL_fast_ppppp_ctr)
688 #define TICK_SLOW_CALL_fast_pppppp() TICK_BUMP(SLOW_CALL_fast_pppppp_ctr)
689 #define TICK_VERY_SLOW_CALL() TICK_BUMP(VERY_SLOW_CALL_ctr)
690
691 /* NOTE: TICK_HISTO_BY and TICK_HISTO
692 currently have no effect.
693 The old code for it didn't typecheck and I
694 just commented it out to get ticky to work.
695 - krc 1/2007 */
696
697 #define TICK_HISTO_BY(histo,n,i) /* nothing */
698
699 #define TICK_HISTO(histo,n) TICK_HISTO_BY(histo,n,1)
700
701 /* An unboxed tuple with n components. */
702 #define TICK_RET_UNBOXED_TUP(n) \
703 TICK_BUMP(RET_UNBOXED_TUP_ctr++); \
704 TICK_HISTO(RET_UNBOXED_TUP,n)
705
706 /*
707 * A slow call with n arguments. In the unevald case, this call has
708 * already been counted once, so don't count it again.
709 */
710 #define TICK_SLOW_CALL(n) \
711 TICK_BUMP(SLOW_CALL_ctr); \
712 TICK_HISTO(SLOW_CALL,n)
713
714 /*
715 * This slow call was found to be to an unevaluated function; undo the
716 * ticks we did in TICK_SLOW_CALL.
717 */
718 #define TICK_SLOW_CALL_UNEVALD(n) \
719 TICK_BUMP(SLOW_CALL_UNEVALD_ctr); \
720 TICK_BUMP_BY(SLOW_CALL_ctr,-1); \
721 TICK_HISTO_BY(SLOW_CALL,n,-1);
722
723 /* Updating a closure with a new CON */
724 #define TICK_UPD_CON_IN_NEW(n) \
725 TICK_BUMP(UPD_CON_IN_NEW_ctr); \
726 TICK_HISTO(UPD_CON_IN_NEW,n)
727
728 #define TICK_ALLOC_HEAP_NOCTR(bytes) \
729 TICK_BUMP(ALLOC_RTS_ctr); \
730 TICK_BUMP_BY(ALLOC_RTS_tot,bytes)
731
732 /* -----------------------------------------------------------------------------
733 Saving and restoring STG registers
734
735 STG registers must be saved around a C call, just in case the STG
736 register is mapped to a caller-saves machine register. Normally we
737 don't need to worry about this the code generator has already
738 loaded any live STG registers into variables for us, but in
739 hand-written low-level Cmm code where we don't know which registers
740 are live, we might have to save them all.
741 -------------------------------------------------------------------------- */
742
743 #define SAVE_STGREGS \
744 W_ r1, r2, r3, r4, r5, r6, r7, r8; \
745 F_ f1, f2, f3, f4, f5, f6; \
746 D_ d1, d2, d3, d4, d5, d6; \
747 L_ l1; \
748 \
749 r1 = R1; \
750 r2 = R2; \
751 r3 = R3; \
752 r4 = R4; \
753 r5 = R5; \
754 r6 = R6; \
755 r7 = R7; \
756 r8 = R8; \
757 \
758 f1 = F1; \
759 f2 = F2; \
760 f3 = F3; \
761 f4 = F4; \
762 f5 = F5; \
763 f6 = F6; \
764 \
765 d1 = D1; \
766 d2 = D2; \
767 d3 = D3; \
768 d4 = D4; \
769 d5 = D5; \
770 d6 = D6; \
771 \
772 l1 = L1;
773
774
775 #define RESTORE_STGREGS \
776 R1 = r1; \
777 R2 = r2; \
778 R3 = r3; \
779 R4 = r4; \
780 R5 = r5; \
781 R6 = r6; \
782 R7 = r7; \
783 R8 = r8; \
784 \
785 F1 = f1; \
786 F2 = f2; \
787 F3 = f3; \
788 F4 = f4; \
789 F5 = f5; \
790 F6 = f6; \
791 \
792 D1 = d1; \
793 D2 = d2; \
794 D3 = d3; \
795 D4 = d4; \
796 D5 = d5; \
797 D6 = d6; \
798 \
799 L1 = l1;
800
801 /* -----------------------------------------------------------------------------
802 Misc junk
803 -------------------------------------------------------------------------- */
804
805 #define NO_TREC stg_NO_TREC_closure
806 #define END_TSO_QUEUE stg_END_TSO_QUEUE_closure
807 #define STM_AWOKEN stg_STM_AWOKEN_closure
808
809 #define recordMutableCap(p, gen) \
810 W_ __bd; \
811 W_ mut_list; \
812 mut_list = Capability_mut_lists(MyCapability()) + WDS(gen); \
813 __bd = W_[mut_list]; \
814 if (bdescr_free(__bd) >= bdescr_start(__bd) + BLOCK_SIZE) { \
815 W_ __new_bd; \
816 ("ptr" __new_bd) = foreign "C" allocBlock_lock(); \
817 bdescr_link(__new_bd) = __bd; \
818 __bd = __new_bd; \
819 W_[mut_list] = __bd; \
820 } \
821 W_ free; \
822 free = bdescr_free(__bd); \
823 W_[free] = p; \
824 bdescr_free(__bd) = free + WDS(1);
825
826 #define recordMutable(p) \
827 P_ __p; \
828 W_ __bd; \
829 W_ __gen; \
830 __p = p; \
831 __bd = Bdescr(__p); \
832 __gen = TO_W_(bdescr_gen_no(__bd)); \
833 if (__gen > 0) { recordMutableCap(__p, __gen); }
834
835 /* -----------------------------------------------------------------------------
836 Arrays
837 -------------------------------------------------------------------------- */
838
839 /* Complete function body for the clone family of (mutable) array ops.
840 Defined as a macro to avoid function call overhead or code
841 duplication. */
842 #define cloneArray(info, src, offset, n) \
843 W_ words, size; \
844 gcptr dst, dst_p, src_p; \
845 \
846 again: MAYBE_GC(again); \
847 \
848 size = n + mutArrPtrsCardWords(n); \
849 words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; \
850 ("ptr" dst) = ccall allocate(MyCapability() "ptr", words); \
851 TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0); \
852 \
853 SET_HDR(dst, info, CCCS); \
854 StgMutArrPtrs_ptrs(dst) = n; \
855 StgMutArrPtrs_size(dst) = size; \
856 \
857 dst_p = dst + SIZEOF_StgMutArrPtrs; \
858 src_p = src + SIZEOF_StgMutArrPtrs + WDS(offset); \
859 prim %memcpy(dst_p, src_p, n * SIZEOF_W, SIZEOF_W); \
860 \
861 return (dst);
862
863 #define copyArray(src, src_off, dst, dst_off, n) \
864 W_ dst_elems_p, dst_p, src_p, dst_cards_p, bytes; \
865 \
866 if ((n) != 0) { \
867 SET_HDR(dst, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); \
868 \
869 dst_elems_p = (dst) + SIZEOF_StgMutArrPtrs; \
870 dst_p = dst_elems_p + WDS(dst_off); \
871 src_p = (src) + SIZEOF_StgMutArrPtrs + WDS(src_off); \
872 bytes = WDS(n); \
873 \
874 prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \
875 \
876 dst_cards_p = dst_elems_p + WDS(StgMutArrPtrs_ptrs(dst)); \
877 setCards(dst_cards_p, dst_off, n); \
878 } \
879 \
880 return ();
881
882 #define copyMutableArray(src, src_off, dst, dst_off, n) \
883 W_ dst_elems_p, dst_p, src_p, dst_cards_p, bytes; \
884 \
885 if ((n) != 0) { \
886 SET_HDR(dst, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); \
887 \
888 dst_elems_p = (dst) + SIZEOF_StgMutArrPtrs; \
889 dst_p = dst_elems_p + WDS(dst_off); \
890 src_p = (src) + SIZEOF_StgMutArrPtrs + WDS(src_off); \
891 bytes = WDS(n); \
892 \
893 if ((src) == (dst)) { \
894 prim %memmove(dst_p, src_p, bytes, SIZEOF_W); \
895 } else { \
896 prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \
897 } \
898 \
899 dst_cards_p = dst_elems_p + WDS(StgMutArrPtrs_ptrs(dst)); \
900 setCards(dst_cards_p, dst_off, n); \
901 } \
902 \
903 return ();
904
905 /*
906 * Set the cards in the cards table pointed to by dst_cards_p for an
907 * update to n elements, starting at element dst_off.
908 */
909 #define setCards(dst_cards_p, dst_off, n) \
910 W_ __start_card, __end_card, __cards; \
911 __start_card = mutArrPtrCardDown(dst_off); \
912 __end_card = mutArrPtrCardDown((dst_off) + (n) - 1); \
913 __cards = __end_card - __start_card + 1; \
914 prim %memset((dst_cards_p) + __start_card, 1, __cards, 1);
915
916 /* Complete function body for the clone family of small (mutable)
917 array ops. Defined as a macro to avoid function call overhead or
918 code duplication. */
919 #define cloneSmallArray(info, src, offset, n) \
920 W_ words, size; \
921 gcptr dst, dst_p, src_p; \
922 \
923 again: MAYBE_GC(again); \
924 \
925 words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n; \
926 ("ptr" dst) = ccall allocate(MyCapability() "ptr", words); \
927 TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0); \
928 \
929 SET_HDR(dst, info, CCCS); \
930 StgSmallMutArrPtrs_ptrs(dst) = n; \
931 \
932 dst_p = dst + SIZEOF_StgSmallMutArrPtrs; \
933 src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(offset); \
934 prim %memcpy(dst_p, src_p, n * SIZEOF_W, SIZEOF_W); \
935 \
936 return (dst);