Add explicit imports for RTS-external variables
[ghc.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 import __gmpz_init;
31 import __gmpz_add;
32 import __gmpz_sub;
33 import __gmpz_mul;
34 import __gmpz_gcd;
35 import __gmpn_gcd_1;
36 import __gmpn_cmp;
37 import __gmpz_tdiv_q;
38 import __gmpz_tdiv_r;
39 import __gmpz_tdiv_qr;
40 import __gmpz_fdiv_qr;
41 import __gmpz_divexact;
42 import __gmpz_and;
43 import __gmpz_xor;
44 import __gmpz_ior;
45 import __gmpz_com;
46 import base_GHCziIOBase_NestedAtomically_closure;
47 import pthread_mutex_lock;
48 import pthread_mutex_unlock;
49
50 /*-----------------------------------------------------------------------------
51   Array Primitives
52
53   Basically just new*Array - the others are all inline macros.
54
55   The size arg is always passed in R1, and the result returned in R1.
56
57   The slow entry point is for returning from a heap check, the saved
58   size argument must be re-loaded from the stack.
59   -------------------------------------------------------------------------- */
60
61 /* for objects that are *less* than the size of a word, make sure we
62  * round up to the nearest word for the size of the array.
63  */
64
65 newByteArrayzh_fast
66 {
67     W_ words, payload_words, n, p;
68     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
69     n = R1;
70     payload_words = ROUNDUP_BYTES_TO_WDS(n);
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
73     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
74     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
75     StgArrWords_words(p) = payload_words;
76     RET_P(p);
77 }
78
79 newPinnedByteArrayzh_fast
80 {
81     W_ words, payload_words, n, p;
82
83     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
84     n = R1;
85     payload_words = ROUNDUP_BYTES_TO_WDS(n);
86
87     // We want an 8-byte aligned array.  allocatePinned() gives us
88     // 8-byte aligned memory by default, but we want to align the
89     // *goods* inside the ArrWords object, so we have to check the
90     // size of the ArrWords header and adjust our size accordingly.
91     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
92     if ((SIZEOF_StgArrWords & 7) != 0) {
93         words = words + 1;
94     }
95
96     ("ptr" p) = foreign "C" allocatePinned(words) [];
97     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
98
99     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
100     // have to push the object forward one word so that the goods
101     // fall on an 8-byte boundary.
102     if ((SIZEOF_StgArrWords & 7) != 0) {
103         p = p + WDS(1);
104     }
105
106     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
107     StgArrWords_words(p) = payload_words;
108     RET_P(p);
109 }
110
111 newArrayzh_fast
112 {
113     W_ words, n, init, arr, p;
114     /* Args: R1 = words, R2 = initialisation value */
115
116     n = R1;
117     MAYBE_GC(R2_PTR,newArrayzh_fast);
118
119     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
120     ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
121     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
122
123     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
124     StgMutArrPtrs_ptrs(arr) = n;
125
126     // Initialise all elements of the the array with the value in R2
127     init = R2;
128     p = arr + SIZEOF_StgMutArrPtrs;
129   for:
130     if (p < arr + WDS(words)) {
131         W_[p] = init;
132         p = p + WDS(1);
133         goto for;
134     }
135
136     RET_P(arr);
137 }
138
139 unsafeThawArrayzh_fast
140 {
141   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
142   //
143   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
144   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
145   // it on the mutable list for the GC to remove (removing something from
146   // the mutable list is not easy, because the mut_list is only singly-linked).
147   // 
148   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
149   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
150   // to indicate that it is still on the mutable list.
151   //
152   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
153   // either it is on a mut_list, or it isn't.  We adopt the convention that
154   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
155   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
156   // we put it on the mutable list more than once, but it would get scavenged
157   // multiple times during GC, which would be unnecessarily slow.
158   //
159   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
160         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
161         recordMutable(R1, R1);
162         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
163         RET_P(R1);
164   } else {
165         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
166         RET_P(R1);
167   }
168 }
169
170 /* -----------------------------------------------------------------------------
171    MutVar primitives
172    -------------------------------------------------------------------------- */
173
174 newMutVarzh_fast
175 {
176     W_ mv;
177     /* Args: R1 = initialisation value */
178
179     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
180
181     mv = Hp - SIZEOF_StgMutVar + WDS(1);
182     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
183     StgMutVar_var(mv) = R1;
184     
185     RET_P(mv);
186 }
187
188 atomicModifyMutVarzh_fast
189 {
190     W_ mv, z, x, y, r;
191     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
192
193     /* If x is the current contents of the MutVar#, then 
194        We want to make the new contents point to
195
196          (sel_0 (f x))
197  
198        and the return value is
199          
200          (sel_1 (f x))
201
202         obviously we can share (f x).
203
204          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
205          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
206          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
207     */
208
209 #if MIN_UPD_SIZE > 1
210 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
211 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
212 #else
213 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
214 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
215 #endif
216
217 #if MIN_UPD_SIZE > 2
218 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
219 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
220 #else
221 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
222 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
223 #endif
224
225 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
226
227    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
228
229 #if defined(THREADED_RTS)
230     foreign "C" ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
231 #endif
232
233    x = StgMutVar_var(R1);
234
235    TICK_ALLOC_THUNK_2();
236    CCCS_ALLOC(THUNK_2_SIZE);
237    z = Hp - THUNK_2_SIZE + WDS(1);
238    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
239    LDV_RECORD_CREATE(z);
240    StgThunk_payload(z,0) = R2;
241    StgThunk_payload(z,1) = x;
242
243    TICK_ALLOC_THUNK_1();
244    CCCS_ALLOC(THUNK_1_SIZE);
245    y = z - THUNK_1_SIZE;
246    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
247    LDV_RECORD_CREATE(y);
248    StgThunk_payload(y,0) = z;
249
250    StgMutVar_var(R1) = y;
251    foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
252
253    TICK_ALLOC_THUNK_1();
254    CCCS_ALLOC(THUNK_1_SIZE);
255    r = y - THUNK_1_SIZE;
256    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
257    LDV_RECORD_CREATE(r);
258    StgThunk_payload(r,0) = z;
259
260 #if defined(THREADED_RTS)
261     foreign "C" RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
262 #endif
263
264    RET_P(r);
265 }
266
267 /* -----------------------------------------------------------------------------
268    Weak Pointer Primitives
269    -------------------------------------------------------------------------- */
270
271 STRING(stg_weak_msg,"New weak pointer at %p\n")
272
273 mkWeakzh_fast
274 {
275   /* R1 = key
276      R2 = value
277      R3 = finalizer (or NULL)
278   */
279   W_ w;
280
281   if (R3 == NULL) {
282     R3 = stg_NO_FINALIZER_closure;
283   }
284
285   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
286
287   w = Hp - SIZEOF_StgWeak + WDS(1);
288   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
289
290   StgWeak_key(w)       = R1;
291   StgWeak_value(w)     = R2;
292   StgWeak_finalizer(w) = R3;
293
294   StgWeak_link(w)       = W_[weak_ptr_list];
295   W_[weak_ptr_list]     = w;
296
297   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
298
299   RET_P(w);
300 }
301
302
303 finalizzeWeakzh_fast
304 {
305   /* R1 = weak ptr
306    */
307   W_ w, f;
308
309   w = R1;
310
311   // already dead?
312   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
313       RET_NP(0,stg_NO_FINALIZER_closure);
314   }
315
316   // kill it
317 #ifdef PROFILING
318   // @LDV profiling
319   // A weak pointer is inherently used, so we do not need to call
320   // LDV_recordDead_FILL_SLOP_DYNAMIC():
321   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
322   // or, LDV_recordDead():
323   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
324   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
325   // large as weak pointers, so there is no need to fill the slop, either.
326   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
327 #endif
328
329   //
330   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
331   //
332   SET_INFO(w,stg_DEAD_WEAK_info);
333   LDV_RECORD_CREATE(w);
334
335   f = StgWeak_finalizer(w);
336   StgDeadWeak_link(w) = StgWeak_link(w);
337
338   /* return the finalizer */
339   if (f == stg_NO_FINALIZER_closure) {
340       RET_NP(0,stg_NO_FINALIZER_closure);
341   } else {
342       RET_NP(1,f);
343   }
344 }
345
346 deRefWeakzh_fast
347 {
348   /* R1 = weak ptr */
349   W_ w, code, val;
350
351   w = R1;
352   if (GET_INFO(w) == stg_WEAK_info) {
353     code = 1;
354     val = StgWeak_value(w);
355   } else {
356     code = 0;
357     val = w;
358   }
359   RET_NP(code,val);
360 }
361
362 /* -----------------------------------------------------------------------------
363    Arbitrary-precision Integer operations.
364
365    There are some assumptions in this code that mp_limb_t == W_.  This is
366    the case for all the platforms that GHC supports, currently.
367    -------------------------------------------------------------------------- */
368
369 int2Integerzh_fast
370 {
371    /* arguments: R1 = Int# */
372
373    W_ val, s, p;        /* to avoid aliasing */
374
375    val = R1;
376    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
377
378    p = Hp - SIZEOF_StgArrWords;
379    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
380    StgArrWords_words(p) = 1;
381
382    /* mpz_set_si is inlined here, makes things simpler */
383    if (%lt(val,0)) { 
384         s  = -1;
385         Hp(0) = -val;
386    } else { 
387      if (%gt(val,0)) {
388         s = 1;
389         Hp(0) = val;
390      } else {
391         s = 0;
392      }
393   }
394
395    /* returns (# size  :: Int#, 
396                  data  :: ByteArray# 
397                #)
398    */
399    RET_NP(s,p);
400 }
401
402 word2Integerzh_fast
403 {
404    /* arguments: R1 = Word# */
405
406    W_ val, s, p;        /* to avoid aliasing */
407
408    val = R1;
409
410    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
411
412    p = Hp - SIZEOF_StgArrWords;
413    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
414    StgArrWords_words(p) = 1;
415
416    if (val != 0) {
417         s = 1;
418         W_[Hp] = val;
419    } else {
420         s = 0;
421    }
422
423    /* returns (# size  :: Int#, 
424                  data  :: ByteArray# #)
425    */
426    RET_NP(s,p);
427 }
428
429
430 /*
431  * 'long long' primops for converting to/from Integers.
432  */
433
434 #ifdef SUPPORT_LONG_LONGS
435
436 int64ToIntegerzh_fast
437 {
438    /* arguments: L1 = Int64# */
439
440    L_ val;
441    W_ hi, lo, s, neg, words_needed, p;
442
443    val = L1;
444    neg = 0;
445
446    hi = TO_W_(val >> 32);
447    lo = TO_W_(val);
448
449    if ( hi != 0 && hi != 0xFFFFFFFF )  { 
450        words_needed = 2;
451    } else { 
452        // minimum is one word
453        words_needed = 1;
454    }
455
456    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
457                NO_PTRS, int64ToIntegerzh_fast );
458
459    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
460    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
461    StgArrWords_words(p) = words_needed;
462
463    if ( %lt(hi,0) ) {
464      neg = 1;
465      lo = -lo;
466      if(lo == 0) {
467        hi = -hi;
468      } else {
469        hi = -hi - 1;
470      }
471    }
472
473    if ( words_needed == 2 )  { 
474       s = 2;
475       Hp(-1) = lo;
476       Hp(0) = hi;
477    } else { 
478        if ( lo != 0 ) {
479            s = 1;
480            Hp(0) = lo;
481        } else /* val==0 */  {
482            s = 0;
483        }
484    }
485    if ( neg != 0 ) {
486         s = -s;
487    }
488
489    /* returns (# size  :: Int#, 
490                  data  :: ByteArray# #)
491    */
492    RET_NP(s,p);
493 }
494 word64ToIntegerzh_fast
495 {
496    /* arguments: L1 = Word64# */
497
498    L_ val;
499    W_ hi, lo, s, words_needed, p;
500
501    val = L1;
502    hi = TO_W_(val >> 32);
503    lo = TO_W_(val);
504
505    if ( hi != 0 ) {
506       words_needed = 2;
507    } else {
508       words_needed = 1;
509    }
510
511    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
512                NO_PTRS, word64ToIntegerzh_fast );
513
514    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
515    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
516    StgArrWords_words(p) = words_needed;
517
518    if ( hi != 0 ) { 
519      s = 2;
520      Hp(-1) = lo;
521      Hp(0)  = hi;
522    } else {
523       if ( lo != 0 ) {
524         s = 1;
525         Hp(0) = lo;
526      } else /* val==0 */  {
527       s = 0;
528      }
529   }
530
531    /* returns (# size  :: Int#, 
532                  data  :: ByteArray# #)
533    */
534    RET_NP(s,p);
535 }
536
537
538
539 #endif /* SUPPORT_LONG_LONGS */
540
541 /* ToDo: this is shockingly inefficient */
542
543 #ifndef THREADED_RTS
544 section "bss" {
545   mp_tmp1:
546     bits8 [SIZEOF_MP_INT];
547 }
548
549 section "bss" {
550   mp_tmp2:
551     bits8 [SIZEOF_MP_INT];
552 }
553
554 section "bss" {
555   mp_result1:
556     bits8 [SIZEOF_MP_INT];
557 }
558
559 section "bss" {
560   mp_result2:
561     bits8 [SIZEOF_MP_INT];
562 }
563 #endif
564
565 #ifdef THREADED_RTS
566 #define FETCH_MP_TEMP(X) \
567 W_ X; \
568 X = BaseReg + (OFFSET_StgRegTable_r ## X);
569 #else
570 #define FETCH_MP_TEMP(X) /* Nothing */
571 #endif
572
573 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
574 name                                                                    \
575 {                                                                       \
576   CInt s1, s2;                                                          \
577   W_ d1, d2;                                                            \
578   FETCH_MP_TEMP(mp_tmp1);                                               \
579   FETCH_MP_TEMP(mp_tmp2);                                               \
580   FETCH_MP_TEMP(mp_result1)                                             \
581   FETCH_MP_TEMP(mp_result2);                                            \
582                                                                         \
583   /* call doYouWantToGC() */                                            \
584   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
585                                                                         \
586   s1 = W_TO_INT(R1);                                                    \
587   d1 = R2;                                                              \
588   s2 = W_TO_INT(R3);                                                    \
589   d2 = R4;                                                              \
590                                                                         \
591   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
592   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
593   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
594   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
595   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
596   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
597                                                                         \
598   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
599                                                                         \
600   /* Perform the operation */                                           \
601   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
602                                                                         \
603   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
604          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
605 }
606
607 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
608 name                                                                    \
609 {                                                                       \
610   CInt s1;                                                              \
611   W_ d1;                                                                \
612   FETCH_MP_TEMP(mp_tmp1);                                               \
613   FETCH_MP_TEMP(mp_result1)                                             \
614                                                                         \
615   /* call doYouWantToGC() */                                            \
616   MAYBE_GC(R2_PTR, name);                                               \
617                                                                         \
618   d1 = R2;                                                              \
619   s1 = W_TO_INT(R1);                                                    \
620                                                                         \
621   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
622   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
623   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
624                                                                         \
625   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
626                                                                         \
627   /* Perform the operation */                                           \
628   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
629                                                                         \
630   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
631          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
632 }
633
634 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
635 name                                                                                    \
636 {                                                                                       \
637   CInt s1, s2;                                                                          \
638   W_ d1, d2;                                                                            \
639   FETCH_MP_TEMP(mp_tmp1);                                                               \
640   FETCH_MP_TEMP(mp_tmp2);                                                               \
641   FETCH_MP_TEMP(mp_result1)                                                             \
642   FETCH_MP_TEMP(mp_result2)                                                             \
643                                                                                         \
644   /* call doYouWantToGC() */                                                            \
645   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
646                                                                                         \
647   s1 = W_TO_INT(R1);                                                                    \
648   d1 = R2;                                                                              \
649   s2 = W_TO_INT(R3);                                                                    \
650   d2 = R4;                                                                              \
651                                                                                         \
652   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
653   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
654   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
655   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
656   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
657   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
658                                                                                         \
659   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
660   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
661                                                                                         \
662   /* Perform the operation */                                                           \
663   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
664                                                                                         \
665   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
666            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
667            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
668            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
669 }
670
671 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
672 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
673 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
674 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
675 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
676 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
677 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
678 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
679 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
680 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
681 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
682
683 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
684 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
685
686 #ifndef THREADED_RTS
687 section "bss" {
688   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
689 }
690 #endif
691
692 gcdIntzh_fast
693 {
694     /* R1 = the first Int#; R2 = the second Int# */
695     W_ r; 
696     FETCH_MP_TEMP(mp_tmp_w);
697
698     W_[mp_tmp_w] = R1;
699     (r) = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
700
701     R1 = r;
702     /* Result parked in R1, return via info-pointer at TOS */
703     jump %ENTRY_CODE(Sp(0));
704 }
705
706
707 gcdIntegerIntzh_fast
708 {
709     /* R1 = s1; R2 = d1; R3 = the int */
710     W_ s1;
711     (s1) = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
712     R1 = s1;
713     
714     /* Result parked in R1, return via info-pointer at TOS */
715     jump %ENTRY_CODE(Sp(0));
716 }
717
718
719 cmpIntegerIntzh_fast
720 {
721     /* R1 = s1; R2 = d1; R3 = the int */
722     W_ usize, vsize, v_digit, u_digit;
723
724     usize = R1;
725     vsize = 0;
726     v_digit = R3;
727
728     // paraphrased from __gmpz_cmp_si() in the GMP sources
729     if (%gt(v_digit,0)) {
730         vsize = 1;
731     } else { 
732         if (%lt(v_digit,0)) {
733             vsize = -1;
734             v_digit = -v_digit;
735         }
736     }
737
738     if (usize != vsize) {
739         R1 = usize - vsize; 
740         jump %ENTRY_CODE(Sp(0));
741     }
742
743     if (usize == 0) {
744         R1 = 0; 
745         jump %ENTRY_CODE(Sp(0));
746     }
747
748     u_digit = W_[BYTE_ARR_CTS(R2)];
749
750     if (u_digit == v_digit) {
751         R1 = 0; 
752         jump %ENTRY_CODE(Sp(0));
753     }
754
755     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
756         R1 = usize; 
757     } else {
758         R1 = -usize; 
759     }
760
761     jump %ENTRY_CODE(Sp(0));
762 }
763
764 cmpIntegerzh_fast
765 {
766     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
767     W_ usize, vsize, size, up, vp;
768     CInt cmp;
769
770     // paraphrased from __gmpz_cmp() in the GMP sources
771     usize = R1;
772     vsize = R3;
773
774     if (usize != vsize) {
775         R1 = usize - vsize; 
776         jump %ENTRY_CODE(Sp(0));
777     }
778
779     if (usize == 0) {
780         R1 = 0; 
781         jump %ENTRY_CODE(Sp(0));
782     }
783
784     if (%lt(usize,0)) { // NB. not <, which is unsigned
785         size = -usize;
786     } else {
787         size = usize;
788     }
789
790     up = BYTE_ARR_CTS(R2);
791     vp = BYTE_ARR_CTS(R4);
792
793     (cmp) = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
794
795     if (cmp == 0 :: CInt) {
796         R1 = 0; 
797         jump %ENTRY_CODE(Sp(0));
798     }
799
800     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
801         R1 = 1;
802     } else {
803         R1 = (-1); 
804     }
805     /* Result parked in R1, return via info-pointer at TOS */
806     jump %ENTRY_CODE(Sp(0));
807 }
808
809 integer2Intzh_fast
810 {
811     /* R1 = s; R2 = d */
812     W_ r, s;
813
814     s = R1;
815     if (s == 0) {
816         r = 0;
817     } else {
818         r = W_[R2 + SIZEOF_StgArrWords];
819         if (%lt(s,0)) {
820             r = -r;
821         }
822     }
823     /* Result parked in R1, return via info-pointer at TOS */
824     R1 = r;
825     jump %ENTRY_CODE(Sp(0));
826 }
827
828 integer2Wordzh_fast
829 {
830   /* R1 = s; R2 = d */
831   W_ r, s;
832
833   s = R1;
834   if (s == 0) {
835     r = 0;
836   } else {
837     r = W_[R2 + SIZEOF_StgArrWords];
838     if (%lt(s,0)) {
839         r = -r;
840     }
841   }
842   /* Result parked in R1, return via info-pointer at TOS */
843   R1 = r;
844   jump %ENTRY_CODE(Sp(0));
845 }
846
847 decodeFloatzh_fast
848
849     W_ p;
850     F_ arg;
851     FETCH_MP_TEMP(mp_tmp1);
852     FETCH_MP_TEMP(mp_tmp_w);
853     
854     /* arguments: F1 = Float# */
855     arg = F1;
856     
857     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
858     
859     /* Be prepared to tell Lennart-coded __decodeFloat
860        where mantissa._mp_d can be put (it does not care about the rest) */
861     p = Hp - SIZEOF_StgArrWords;
862     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
863     StgArrWords_words(p) = 1;
864     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
865     
866     /* Perform the operation */
867     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
868     
869     /* returns: (Int# (expn), Int#, ByteArray#) */
870     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
871 }
872
873 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
874 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
875
876 decodeDoublezh_fast
877
878     D_ arg;
879     W_ p;
880     FETCH_MP_TEMP(mp_tmp1);
881     FETCH_MP_TEMP(mp_tmp_w);
882
883     /* arguments: D1 = Double# */
884     arg = D1;
885
886     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
887     
888     /* Be prepared to tell Lennart-coded __decodeDouble
889        where mantissa.d can be put (it does not care about the rest) */
890     p = Hp - ARR_SIZE + WDS(1);
891     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
892     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
893     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
894
895     /* Perform the operation */
896     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
897     
898     /* returns: (Int# (expn), Int#, ByteArray#) */
899     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
900 }
901
902 /* -----------------------------------------------------------------------------
903  * Concurrency primitives
904  * -------------------------------------------------------------------------- */
905
906 forkzh_fast
907 {
908   /* args: R1 = closure to spark */
909
910   MAYBE_GC(R1_PTR, forkzh_fast);
911
912   W_ closure;
913   W_ threadid;
914   closure = R1;
915
916   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
917                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
918                                 closure "ptr") [];
919   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
920
921   // switch at the earliest opportunity
922   CInt[context_switch] = 1 :: CInt;
923   
924   RET_P(threadid);
925 }
926
927 forkOnzh_fast
928 {
929   /* args: R1 = cpu, R2 = closure to spark */
930
931   MAYBE_GC(R2_PTR, forkOnzh_fast);
932
933   W_ cpu;
934   W_ closure;
935   W_ threadid;
936   cpu = R1;
937   closure = R2;
938
939   ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", 
940                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
941                                 closure "ptr") [];
942   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
943
944   // switch at the earliest opportunity
945   CInt[context_switch] = 1 :: CInt;
946   
947   RET_P(threadid);
948 }
949
950 yieldzh_fast
951 {
952   jump stg_yield_noregs;
953 }
954
955 myThreadIdzh_fast
956 {
957   /* no args. */
958   RET_P(CurrentTSO);
959 }
960
961 labelThreadzh_fast
962 {
963   /* args: 
964         R1 = ThreadId#
965         R2 = Addr# */
966 #ifdef DEBUG
967   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
968 #endif
969   jump %ENTRY_CODE(Sp(0));
970 }
971
972 isCurrentThreadBoundzh_fast
973 {
974   /* no args */
975   W_ r;
976   (r) = foreign "C" isThreadBound(CurrentTSO) [];
977   RET_N(r);
978 }
979
980
981 /* -----------------------------------------------------------------------------
982  * TVar primitives
983  * -------------------------------------------------------------------------- */
984
985 #ifdef REG_R1
986 #define SP_OFF 0
987 #define IF_NOT_REG_R1(x) 
988 #else
989 #define SP_OFF 1
990 #define IF_NOT_REG_R1(x) x
991 #endif
992
993 // Catch retry frame ------------------------------------------------------------
994
995 INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
996 #if defined(PROFILING)
997   W_ unused1, W_ unused2,
998 #endif
999   W_ unused3, "ptr" W_ unused4, "ptr" W_ unused5)
1000 {
1001    W_ r, frame, trec, outer;
1002    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1003
1004    frame = Sp;
1005    trec = StgTSO_trec(CurrentTSO);
1006    ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1007    (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1008    if (r != 0) {
1009      /* Succeeded (either first branch or second branch) */
1010      StgTSO_trec(CurrentTSO) = outer;
1011      Sp = Sp + SIZEOF_StgCatchRetryFrame;
1012      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1013      jump %ENTRY_CODE(Sp(SP_OFF));
1014    } else {
1015      /* Did not commit: re-execute */
1016      W_ new_trec;
1017      ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1018      StgTSO_trec(CurrentTSO) = new_trec;
1019      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1020        R1 = StgCatchRetryFrame_alt_code(frame);
1021      } else {
1022        R1 = StgCatchRetryFrame_first_code(frame);
1023      }
1024      jump stg_ap_v_fast;
1025    }
1026 }
1027
1028
1029 // Atomically frame ------------------------------------------------------------
1030
1031 INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
1032 #if defined(PROFILING)
1033   W_ unused1, W_ unused2,
1034 #endif
1035   "ptr" W_ unused3, "ptr" W_ unused4)
1036 {
1037   W_ frame, trec, valid, next_invariant, q, outer;
1038   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1039
1040   frame = Sp;
1041   trec = StgTSO_trec(CurrentTSO);
1042   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1043
1044   if (outer == NO_TREC) {
1045     /* First time back at the atomically frame -- pick up invariants */
1046     ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1047     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1048
1049   } else {
1050     /* Second/subsequent time back at the atomically frame -- abort the
1051      * tx that's checking the invariant and move on to the next one */
1052     StgTSO_trec(CurrentTSO) = outer;
1053     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1054     StgInvariantCheckQueue_my_execution(q) = trec;
1055     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1056     /* Don't free trec -- it's linked from q and will be stashed in the
1057      * invariant if we eventually commit. */
1058     q = StgInvariantCheckQueue_next_queue_entry(q);
1059     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1060     trec = outer;
1061   }
1062
1063   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1064
1065   if (q != END_INVARIANT_CHECK_QUEUE) {
1066     /* We can't commit yet: another invariant to check */
1067     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1068     StgTSO_trec(CurrentTSO) = trec;
1069
1070     next_invariant = StgInvariantCheckQueue_invariant(q);
1071     R1 = StgAtomicInvariant_code(next_invariant);
1072     jump stg_ap_v_fast;
1073
1074   } else {
1075
1076     /* We've got no more invariants to check, try to commit */
1077     (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1078     if (valid != 0) {
1079       /* Transaction was valid: commit succeeded */
1080       StgTSO_trec(CurrentTSO) = NO_TREC;
1081       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1082       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1083       jump %ENTRY_CODE(Sp(SP_OFF));
1084     } else {
1085       /* Transaction was not valid: try again */
1086       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1087       StgTSO_trec(CurrentTSO) = trec;
1088       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1089       R1 = StgAtomicallyFrame_code(frame);
1090       jump stg_ap_v_fast;
1091     }
1092   }
1093 }
1094
1095 INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
1096 #if defined(PROFILING)
1097   W_ unused1, W_ unused2,
1098 #endif
1099   "ptr" W_ unused3, "ptr" W_ unused4)
1100 {
1101   W_ frame, trec, valid;
1102   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1103
1104   frame = Sp;
1105
1106   /* The TSO is currently waiting: should we stop waiting? */
1107   (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1108   if (valid != 0) {
1109     /* Previous attempt is still valid: no point trying again yet */
1110           IF_NOT_REG_R1(Sp_adj(-2);
1111                         Sp(1) = stg_NO_FINALIZER_closure;
1112                         Sp(0) = stg_ut_1_0_unreg_info;)
1113     jump stg_block_noregs;
1114   } else {
1115     /* Previous attempt is no longer valid: try again */
1116     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1117     StgTSO_trec(CurrentTSO) = trec;
1118     StgHeader_info(frame) = stg_atomically_frame_info;
1119     R1 = StgAtomicallyFrame_code(frame);
1120     jump stg_ap_v_fast;
1121   }
1122 }
1123
1124 // STM catch frame --------------------------------------------------------------
1125
1126 #ifdef REG_R1
1127 #define SP_OFF 0
1128 #else
1129 #define SP_OFF 1
1130 #endif
1131
1132 /* Catch frames are very similar to update frames, but when entering
1133  * one we just pop the frame off the stack and perform the correct
1134  * kind of return to the activation record underneath us on the stack.
1135  */
1136
1137 INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME,
1138 #if defined(PROFILING)
1139   W_ unused1, W_ unused2,
1140 #endif
1141   "ptr" W_ unused3, "ptr" W_ unused4)
1142    {
1143       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1144       W_ r, frame, trec, outer;
1145       frame = Sp;
1146       trec = StgTSO_trec(CurrentTSO);
1147       ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1148       (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1149       if (r != 0) {
1150         /* Commit succeeded */
1151         StgTSO_trec(CurrentTSO) = outer;
1152         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1153         IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1154         jump Sp(SP_OFF);
1155       } else {
1156         /* Commit failed */
1157         W_ new_trec;
1158         ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1159         StgTSO_trec(CurrentTSO) = new_trec;
1160         R1 = StgCatchSTMFrame_code(frame);
1161         jump stg_ap_v_fast;
1162       }
1163    }
1164
1165
1166 // Primop definition ------------------------------------------------------------
1167
1168 atomicallyzh_fast
1169 {
1170   W_ frame;
1171   W_ old_trec;
1172   W_ new_trec;
1173   
1174   // stmStartTransaction may allocate
1175   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1176
1177   /* Args: R1 = m :: STM a */
1178   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1179
1180   old_trec = StgTSO_trec(CurrentTSO);
1181
1182   /* Nested transactions are not allowed; raise an exception */
1183   if (old_trec != NO_TREC) {
1184      R1 = base_GHCziIOBase_NestedAtomically_closure;
1185      jump raisezh_fast;
1186   }
1187
1188   /* Set up the atomically frame */
1189   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1190   frame = Sp;
1191
1192   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1193   StgAtomicallyFrame_code(frame) = R1;
1194   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1195
1196   /* Start the memory transcation */
1197   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1198   StgTSO_trec(CurrentTSO) = new_trec;
1199
1200   /* Apply R1 to the realworld token */
1201   jump stg_ap_v_fast;
1202 }
1203
1204
1205 catchSTMzh_fast
1206 {
1207   W_ frame;
1208   
1209   /* Args: R1 :: STM a */
1210   /* Args: R2 :: Exception -> STM a */
1211   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1212
1213   /* Set up the catch frame */
1214   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1215   frame = Sp;
1216
1217   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1218   StgCatchSTMFrame_handler(frame) = R2;
1219   StgCatchSTMFrame_code(frame) = R1;
1220
1221   /* Start a nested transaction to run the body of the try block in */
1222   W_ cur_trec;  
1223   W_ new_trec;
1224   cur_trec = StgTSO_trec(CurrentTSO);
1225   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1226   StgTSO_trec(CurrentTSO) = new_trec;
1227
1228   /* Apply R1 to the realworld token */
1229   jump stg_ap_v_fast;
1230 }
1231
1232
1233 catchRetryzh_fast
1234 {
1235   W_ frame;
1236   W_ new_trec;
1237   W_ trec;
1238
1239   // stmStartTransaction may allocate
1240   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1241
1242   /* Args: R1 :: STM a */
1243   /* Args: R2 :: STM a */
1244   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1245
1246   /* Start a nested transaction within which to run the first code */
1247   trec = StgTSO_trec(CurrentTSO);
1248   ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1249   StgTSO_trec(CurrentTSO) = new_trec;
1250
1251   /* Set up the catch-retry frame */
1252   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1253   frame = Sp;
1254   
1255   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1256   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1257   StgCatchRetryFrame_first_code(frame) = R1;
1258   StgCatchRetryFrame_alt_code(frame) = R2;
1259
1260   /* Apply R1 to the realworld token */
1261   jump stg_ap_v_fast;
1262 }
1263
1264
1265 retryzh_fast
1266 {
1267   W_ frame_type;
1268   W_ frame;
1269   W_ trec;
1270   W_ outer;
1271   W_ r;
1272
1273   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1274
1275   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1276 retry_pop_stack:
1277   StgTSO_sp(CurrentTSO) = Sp;
1278   (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1279   Sp = StgTSO_sp(CurrentTSO);
1280   frame = Sp;
1281   trec = StgTSO_trec(CurrentTSO);
1282   ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1283
1284   if (frame_type == CATCH_RETRY_FRAME) {
1285     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1286     ASSERT(outer != NO_TREC);
1287     // Abort the transaction attempting the current branch
1288     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1289     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1290     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1291       // Retry in the first branch: try the alternative
1292       ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1293       StgTSO_trec(CurrentTSO) = trec;
1294       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1295       R1 = StgCatchRetryFrame_alt_code(frame);
1296       jump stg_ap_v_fast;
1297     } else {
1298       // Retry in the alternative code: propagate the retry
1299       StgTSO_trec(CurrentTSO) = outer;
1300       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1301       goto retry_pop_stack;
1302     }
1303   }
1304
1305   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1306   ASSERT(frame_type == ATOMICALLY_FRAME);
1307   if (outer != NO_TREC) {
1308     // We called retry while checking invariants, so abort the current
1309     // invariant check (merging its TVar accesses into the parents read
1310     // set so we'll wait on them)
1311     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1312     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1313     trec = outer;
1314     StgTSO_trec(CurrentTSO) = trec;
1315     ("ptr" outer) = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1316   }
1317   ASSERT(outer == NO_TREC);
1318
1319   (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1320   if (r != 0) {
1321     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1322     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1323     Sp = frame;
1324     // Fix up the stack in the unregisterised case: the return convention is different.
1325     IF_NOT_REG_R1(Sp_adj(-2); 
1326                   Sp(1) = stg_NO_FINALIZER_closure;
1327                   Sp(0) = stg_ut_1_0_unreg_info;)
1328     R3 = trec; // passing to stmWaitUnblock()
1329     jump stg_block_stmwait;
1330   } else {
1331     // Transaction was not valid: retry immediately
1332     ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1333     StgTSO_trec(CurrentTSO) = trec;
1334     R1 = StgAtomicallyFrame_code(frame);
1335     Sp = frame;
1336     jump stg_ap_v_fast;
1337   }
1338 }
1339
1340
1341 checkzh_fast
1342 {
1343   W_ trec, closure;
1344
1345   /* Args: R1 = invariant closure */
1346   MAYBE_GC (R1_PTR, checkzh_fast); 
1347
1348   trec = StgTSO_trec(CurrentTSO);
1349   closure = R1;
1350   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1351                                      trec "ptr",
1352                                      closure "ptr") [];
1353
1354   jump %ENTRY_CODE(Sp(0));
1355 }
1356
1357
1358 newTVarzh_fast
1359 {
1360   W_ tv;
1361   W_ new_value;
1362
1363   /* Args: R1 = initialisation value */
1364
1365   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1366   new_value = R1;
1367   ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1368   RET_P(tv);
1369 }
1370
1371
1372 readTVarzh_fast
1373 {
1374   W_ trec;
1375   W_ tvar;
1376   W_ result;
1377
1378   /* Args: R1 = TVar closure */
1379
1380   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1381   trec = StgTSO_trec(CurrentTSO);
1382   tvar = R1;
1383   ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1384
1385   RET_P(result);
1386 }
1387
1388
1389 writeTVarzh_fast
1390 {
1391   W_ trec;
1392   W_ tvar;
1393   W_ new_value;
1394   
1395   /* Args: R1 = TVar closure */
1396   /*       R2 = New value    */
1397
1398   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1399   trec = StgTSO_trec(CurrentTSO);
1400   tvar = R1;
1401   new_value = R2;
1402   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1403
1404   jump %ENTRY_CODE(Sp(0));
1405 }
1406
1407
1408 /* -----------------------------------------------------------------------------
1409  * MVar primitives
1410  *
1411  * take & putMVar work as follows.  Firstly, an important invariant:
1412  *
1413  *    If the MVar is full, then the blocking queue contains only
1414  *    threads blocked on putMVar, and if the MVar is empty then the
1415  *    blocking queue contains only threads blocked on takeMVar.
1416  *
1417  * takeMvar:
1418  *    MVar empty : then add ourselves to the blocking queue
1419  *    MVar full  : remove the value from the MVar, and
1420  *                 blocking queue empty     : return
1421  *                 blocking queue non-empty : perform the first blocked putMVar
1422  *                                            from the queue, and wake up the
1423  *                                            thread (MVar is now full again)
1424  *
1425  * putMVar is just the dual of the above algorithm.
1426  *
1427  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1428  * the stack of the thread waiting to do the putMVar.  See
1429  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1430  * the stack layout, and the PerformPut and PerformTake macros below.
1431  *
1432  * It is important that a blocked take or put is woken up with the
1433  * take/put already performed, because otherwise there would be a
1434  * small window of vulnerability where the thread could receive an
1435  * exception and never perform its take or put, and we'd end up with a
1436  * deadlock.
1437  *
1438  * -------------------------------------------------------------------------- */
1439
1440 isEmptyMVarzh_fast
1441 {
1442     /* args: R1 = MVar closure */
1443
1444     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1445         RET_N(1);
1446     } else {
1447         RET_N(0);
1448     }
1449 }
1450
1451 newMVarzh_fast
1452 {
1453     /* args: none */
1454     W_ mvar;
1455
1456     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1457   
1458     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1459     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1460     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1461     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1462     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1463     RET_P(mvar);
1464 }
1465
1466
1467 /* If R1 isn't available, pass it on the stack */
1468 #ifdef REG_R1
1469 #define PerformTake(tso, value)                         \
1470     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1471     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1472 #else
1473 #define PerformTake(tso, value)                                 \
1474     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1475     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1476 #endif
1477
1478 #define PerformPut(tso,lval)                    \
1479     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1480     lval = W_[StgTSO_sp(tso) - WDS(1)];
1481
1482 takeMVarzh_fast
1483 {
1484     W_ mvar, val, info, tso;
1485
1486     /* args: R1 = MVar closure */
1487     mvar = R1;
1488
1489 #if defined(THREADED_RTS)
1490     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1491 #else
1492     info = GET_INFO(mvar);
1493 #endif
1494
1495     /* If the MVar is empty, put ourselves on its blocking queue,
1496      * and wait until we're woken up.
1497      */
1498     if (info == stg_EMPTY_MVAR_info) {
1499         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1500             StgMVar_head(mvar) = CurrentTSO;
1501         } else {
1502             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1503         }
1504         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1505         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1506         StgTSO_block_info(CurrentTSO)  = mvar;
1507         StgMVar_tail(mvar) = CurrentTSO;
1508         
1509         jump stg_block_takemvar;
1510   }
1511
1512   /* we got the value... */
1513   val = StgMVar_value(mvar);
1514
1515   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1516   {
1517       /* There are putMVar(s) waiting... 
1518        * wake up the first thread on the queue
1519        */
1520       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1521
1522       /* actually perform the putMVar for the thread that we just woke up */
1523       tso = StgMVar_head(mvar);
1524       PerformPut(tso,StgMVar_value(mvar));
1525       dirtyTSO(tso);
1526
1527 #if defined(GRAN) || defined(PAR)
1528       /* ToDo: check 2nd arg (mvar) is right */
1529       ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1530       StgMVar_head(mvar) = tso;
1531 #else
1532       ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", 
1533                                          StgMVar_head(mvar) "ptr") [];
1534       StgMVar_head(mvar) = tso;
1535 #endif
1536
1537       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1538           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1539       }
1540
1541 #if defined(THREADED_RTS)
1542       unlockClosure(mvar, stg_FULL_MVAR_info);
1543 #endif
1544       RET_P(val);
1545   } 
1546   else
1547   {
1548       /* No further putMVars, MVar is now empty */
1549       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1550  
1551 #if defined(THREADED_RTS)
1552       unlockClosure(mvar, stg_EMPTY_MVAR_info);
1553 #else
1554       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1555 #endif
1556
1557       RET_P(val);
1558   }
1559 }
1560
1561
1562 tryTakeMVarzh_fast
1563 {
1564     W_ mvar, val, info, tso;
1565
1566     /* args: R1 = MVar closure */
1567
1568     mvar = R1;
1569
1570 #if defined(THREADED_RTS)
1571     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [];
1572 #else
1573     info = GET_INFO(mvar);
1574 #endif
1575
1576     if (info == stg_EMPTY_MVAR_info) {
1577 #if defined(THREADED_RTS)
1578         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1579 #endif
1580         /* HACK: we need a pointer to pass back, 
1581          * so we abuse NO_FINALIZER_closure
1582          */
1583         RET_NP(0, stg_NO_FINALIZER_closure);
1584     }
1585
1586     /* we got the value... */
1587     val = StgMVar_value(mvar);
1588
1589     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1590
1591         /* There are putMVar(s) waiting... 
1592          * wake up the first thread on the queue
1593          */
1594         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1595
1596         /* actually perform the putMVar for the thread that we just woke up */
1597         tso = StgMVar_head(mvar);
1598         PerformPut(tso,StgMVar_value(mvar));
1599         dirtyTSO(tso);
1600
1601 #if defined(GRAN) || defined(PAR)
1602         /* ToDo: check 2nd arg (mvar) is right */
1603         ("ptr" tso) = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1604         StgMVar_head(mvar) = tso;
1605 #else
1606         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr",
1607                                            StgMVar_head(mvar) "ptr") [];
1608         StgMVar_head(mvar) = tso;
1609 #endif
1610
1611         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1612             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1613         }
1614 #if defined(THREADED_RTS)
1615         unlockClosure(mvar, stg_FULL_MVAR_info);
1616 #endif
1617     }
1618     else 
1619     {
1620         /* No further putMVars, MVar is now empty */
1621         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1622 #if defined(THREADED_RTS)
1623         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1624 #else
1625         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1626 #endif
1627     }
1628     
1629     RET_NP(1, val);
1630 }
1631
1632
1633 putMVarzh_fast
1634 {
1635     W_ mvar, info, tso;
1636
1637     /* args: R1 = MVar, R2 = value */
1638     mvar = R1;
1639
1640 #if defined(THREADED_RTS)
1641     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1642 #else
1643     info = GET_INFO(mvar);
1644 #endif
1645
1646     if (info == stg_FULL_MVAR_info) {
1647         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1648             StgMVar_head(mvar) = CurrentTSO;
1649         } else {
1650             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1651         }
1652         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1653         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1654         StgTSO_block_info(CurrentTSO)  = mvar;
1655         StgMVar_tail(mvar) = CurrentTSO;
1656         
1657         jump stg_block_putmvar;
1658     }
1659   
1660     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1661
1662         /* There are takeMVar(s) waiting: wake up the first one
1663          */
1664         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1665
1666         /* actually perform the takeMVar */
1667         tso = StgMVar_head(mvar);
1668         PerformTake(tso, R2);
1669         dirtyTSO(tso);
1670       
1671 #if defined(GRAN) || defined(PAR)
1672         /* ToDo: check 2nd arg (mvar) is right */
1673         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1674         StgMVar_head(mvar) = tso;
1675 #else
1676         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1677         StgMVar_head(mvar) = tso;
1678 #endif
1679
1680         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1681             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1682         }
1683
1684 #if defined(THREADED_RTS)
1685         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1686 #endif
1687         jump %ENTRY_CODE(Sp(0));
1688     }
1689     else
1690     {
1691         /* No further takes, the MVar is now full. */
1692         StgMVar_value(mvar) = R2;
1693
1694 #if defined(THREADED_RTS)
1695         unlockClosure(mvar, stg_FULL_MVAR_info);
1696 #else
1697         SET_INFO(mvar,stg_FULL_MVAR_info);
1698 #endif
1699         jump %ENTRY_CODE(Sp(0));
1700     }
1701     
1702     /* ToDo: yield afterward for better communication performance? */
1703 }
1704
1705
1706 tryPutMVarzh_fast
1707 {
1708     W_ mvar, info, tso;
1709
1710     /* args: R1 = MVar, R2 = value */
1711     mvar = R1;
1712
1713 #if defined(THREADED_RTS)
1714     ("ptr" info) = foreign "C" lockClosure(mvar "ptr") [R2];
1715 #else
1716     info = GET_INFO(mvar);
1717 #endif
1718
1719     if (info == stg_FULL_MVAR_info) {
1720 #if defined(THREADED_RTS)
1721         unlockClosure(mvar, stg_FULL_MVAR_info);
1722 #endif
1723         RET_N(0);
1724     }
1725   
1726     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1727
1728         /* There are takeMVar(s) waiting: wake up the first one
1729          */
1730         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1731         
1732         /* actually perform the takeMVar */
1733         tso = StgMVar_head(mvar);
1734         PerformTake(tso, R2);
1735         dirtyTSO(tso);
1736       
1737 #if defined(GRAN) || defined(PAR)
1738         /* ToDo: check 2nd arg (mvar) is right */
1739         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1740         StgMVar_head(mvar) = tso;
1741 #else
1742         ("ptr" tso) = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1743         StgMVar_head(mvar) = tso;
1744 #endif
1745
1746         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1747             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1748         }
1749
1750 #if defined(THREADED_RTS)
1751         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1752 #endif
1753     }
1754     else
1755     {
1756         /* No further takes, the MVar is now full. */
1757         StgMVar_value(mvar) = R2;
1758
1759 #if defined(THREADED_RTS)
1760         unlockClosure(mvar, stg_FULL_MVAR_info);
1761 #else
1762         SET_INFO(mvar,stg_FULL_MVAR_info);
1763 #endif
1764     }
1765     
1766     RET_N(1);
1767     /* ToDo: yield afterward for better communication performance? */
1768 }
1769
1770
1771 /* -----------------------------------------------------------------------------
1772    Stable pointer primitives
1773    -------------------------------------------------------------------------  */
1774
1775 makeStableNamezh_fast
1776 {
1777     W_ index, sn_obj;
1778
1779     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1780   
1781     (index) = foreign "C" lookupStableName(R1 "ptr") [];
1782
1783     /* Is there already a StableName for this heap object?
1784      *  stable_ptr_table is a pointer to an array of snEntry structs.
1785      */
1786     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1787         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1788         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1789         StgStableName_sn(sn_obj) = index;
1790         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1791     } else {
1792         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1793     }
1794     
1795     RET_P(sn_obj);
1796 }
1797
1798
1799 makeStablePtrzh_fast
1800 {
1801     /* Args: R1 = a */
1802     W_ sp;
1803     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1804     ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") [];
1805     RET_N(sp);
1806 }
1807
1808 deRefStablePtrzh_fast
1809 {
1810     /* Args: R1 = the stable ptr */
1811     W_ r, sp;
1812     sp = R1;
1813     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1814     RET_P(r);
1815 }
1816
1817 /* -----------------------------------------------------------------------------
1818    Bytecode object primitives
1819    -------------------------------------------------------------------------  */
1820
1821 newBCOzh_fast
1822 {
1823     /* R1 = instrs
1824        R2 = literals
1825        R3 = ptrs
1826        R4 = arity
1827        R5 = bitmap array
1828     */
1829     W_ bco, bitmap_arr, bytes, words;
1830     
1831     bitmap_arr = R5;
1832
1833     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1834     bytes = WDS(words);
1835
1836     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
1837
1838     bco = Hp - bytes + WDS(1);
1839     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1840     
1841     StgBCO_instrs(bco)     = R1;
1842     StgBCO_literals(bco)   = R2;
1843     StgBCO_ptrs(bco)       = R3;
1844     StgBCO_arity(bco)      = HALF_W_(R4);
1845     StgBCO_size(bco)       = HALF_W_(words);
1846     
1847     // Copy the arity/bitmap info into the BCO
1848     W_ i;
1849     i = 0;
1850 for:
1851     if (i < StgArrWords_words(bitmap_arr)) {
1852         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1853         i = i + 1;
1854         goto for;
1855     }
1856     
1857     RET_P(bco);
1858 }
1859
1860
1861 mkApUpd0zh_fast
1862 {
1863     // R1 = the BCO# for the AP
1864     //  
1865     W_ ap;
1866
1867     // This function is *only* used to wrap zero-arity BCOs in an
1868     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1869     // saturated and always points directly to a FUN or BCO.
1870     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1871            StgBCO_arity(R1) == HALF_W_(0));
1872
1873     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1874     TICK_ALLOC_UP_THK(0, 0);
1875     CCCS_ALLOC(SIZEOF_StgAP);
1876
1877     ap = Hp - SIZEOF_StgAP + WDS(1);
1878     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1879     
1880     StgAP_n_args(ap) = HALF_W_(0);
1881     StgAP_fun(ap) = R1;
1882     
1883     RET_P(ap);
1884 }
1885
1886 unpackClosurezh_fast
1887 {
1888 /* args: R1 = closure to analyze */
1889 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1890
1891     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1892     info  = %GET_STD_INFO(UNTAG(R1));
1893
1894     // Some closures have non-standard layout, so we omit those here.
1895     W_ type;
1896     type = TO_W_(%INFO_TYPE(info));
1897     switch [0 .. N_CLOSURE_TYPES] type {
1898     case THUNK_SELECTOR : {
1899         ptrs = 1;
1900         nptrs = 0;
1901         goto out;
1902     }
1903     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
1904          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
1905         ptrs = 0;
1906         nptrs = 0;
1907         goto out;
1908     }
1909     default: {
1910         ptrs  = TO_W_(%INFO_PTRS(info)); 
1911         nptrs = TO_W_(%INFO_NPTRS(info));
1912         goto out;
1913     }}
1914 out:
1915
1916     W_ ptrs_arr_sz, nptrs_arr_sz;
1917     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
1918     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
1919
1920     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
1921
1922     W_ clos;
1923     clos = UNTAG(R1);
1924
1925     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
1926     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
1927
1928     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
1929     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
1930     p = 0;
1931 for:
1932     if(p < ptrs) {
1933          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
1934          p = p + 1;
1935          goto for;
1936     }
1937     
1938     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
1939     StgArrWords_words(nptrs_arr) = nptrs;
1940     p = 0;
1941 for2:
1942     if(p < nptrs) {
1943          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
1944          p = p + 1;
1945          goto for2;
1946     }
1947     RET_NPP(info, ptrs_arr, nptrs_arr);
1948 }
1949
1950 /* -----------------------------------------------------------------------------
1951    Thread I/O blocking primitives
1952    -------------------------------------------------------------------------- */
1953
1954 /* Add a thread to the end of the blocked queue. (C-- version of the C
1955  * macro in Schedule.h).
1956  */
1957 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1958     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1959     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1960       W_[blocked_queue_hd] = tso;                       \
1961     } else {                                            \
1962       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1963     }                                                   \
1964     W_[blocked_queue_tl] = tso;
1965
1966 waitReadzh_fast
1967 {
1968     /* args: R1 */
1969 #ifdef THREADED_RTS
1970     foreign "C" barf("waitRead# on threaded RTS");
1971 #else
1972
1973     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1974     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1975     StgTSO_block_info(CurrentTSO) = R1;
1976     // No locking - we're not going to use this interface in the
1977     // threaded RTS anyway.
1978     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1979     jump stg_block_noregs;
1980 #endif
1981 }
1982
1983 waitWritezh_fast
1984 {
1985     /* args: R1 */
1986 #ifdef THREADED_RTS
1987     foreign "C" barf("waitWrite# on threaded RTS");
1988 #else
1989
1990     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1991     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1992     StgTSO_block_info(CurrentTSO) = R1;
1993     // No locking - we're not going to use this interface in the
1994     // threaded RTS anyway.
1995     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1996     jump stg_block_noregs;
1997 #endif
1998 }
1999
2000
2001 STRING(stg_delayzh_malloc_str, "delayzh_fast")
2002 delayzh_fast
2003 {
2004 #ifdef mingw32_HOST_OS
2005     W_ ares;
2006     CInt reqID;
2007 #else
2008     W_ t, prev, target;
2009 #endif
2010
2011 #ifdef THREADED_RTS
2012     foreign "C" barf("delay# on threaded RTS");
2013 #else
2014
2015     /* args: R1 (microsecond delay amount) */
2016     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2017     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2018
2019 #ifdef mingw32_HOST_OS
2020
2021     /* could probably allocate this on the heap instead */
2022     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2023                                             stg_delayzh_malloc_str);
2024     (reqID) = foreign "C" addDelayRequest(R1);
2025     StgAsyncIOResult_reqID(ares)   = reqID;
2026     StgAsyncIOResult_len(ares)     = 0;
2027     StgAsyncIOResult_errCode(ares) = 0;
2028     StgTSO_block_info(CurrentTSO)  = ares;
2029
2030     /* Having all async-blocked threads reside on the blocked_queue
2031      * simplifies matters, so change the status to OnDoProc put the
2032      * delayed thread on the blocked_queue.
2033      */
2034     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2035     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2036     jump stg_block_async_void;
2037
2038 #else
2039
2040     W_ time;
2041     W_ divisor;
2042     (time) = foreign "C" getourtimeofday() [R1];
2043     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags))*1000;
2044     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2045            + time + 1; /* Add 1 as getourtimeofday rounds down */
2046     StgTSO_block_info(CurrentTSO) = target;
2047
2048     /* Insert the new thread in the sleeping queue. */
2049     prev = NULL;
2050     t = W_[sleeping_queue];
2051 while:
2052     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2053         prev = t;
2054         t = StgTSO_link(t);
2055         goto while;
2056     }
2057
2058     StgTSO_link(CurrentTSO) = t;
2059     if (prev == NULL) {
2060         W_[sleeping_queue] = CurrentTSO;
2061     } else {
2062         StgTSO_link(prev) = CurrentTSO;
2063     }
2064     jump stg_block_noregs;
2065 #endif
2066 #endif /* !THREADED_RTS */
2067 }
2068
2069
2070 #ifdef mingw32_HOST_OS
2071 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2072 asyncReadzh_fast
2073 {
2074     W_ ares;
2075     CInt reqID;
2076
2077 #ifdef THREADED_RTS
2078     foreign "C" barf("asyncRead# on threaded RTS");
2079 #else
2080
2081     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2082     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2083     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2084
2085     /* could probably allocate this on the heap instead */
2086     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2087                                             stg_asyncReadzh_malloc_str)
2088                         [R1,R2,R3,R4];
2089     (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2090     StgAsyncIOResult_reqID(ares)   = reqID;
2091     StgAsyncIOResult_len(ares)     = 0;
2092     StgAsyncIOResult_errCode(ares) = 0;
2093     StgTSO_block_info(CurrentTSO)  = ares;
2094     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2095     jump stg_block_async;
2096 #endif
2097 }
2098
2099 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2100 asyncWritezh_fast
2101 {
2102     W_ ares;
2103     CInt reqID;
2104
2105 #ifdef THREADED_RTS
2106     foreign "C" barf("asyncWrite# on threaded RTS");
2107 #else
2108
2109     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2110     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2111     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2112
2113     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2114                                             stg_asyncWritezh_malloc_str)
2115                         [R1,R2,R3,R4];
2116     (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2117
2118     StgAsyncIOResult_reqID(ares)   = reqID;
2119     StgAsyncIOResult_len(ares)     = 0;
2120     StgAsyncIOResult_errCode(ares) = 0;
2121     StgTSO_block_info(CurrentTSO)  = ares;
2122     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2123     jump stg_block_async;
2124 #endif
2125 }
2126
2127 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2128 asyncDoProczh_fast
2129 {
2130     W_ ares;
2131     CInt reqID;
2132
2133 #ifdef THREADED_RTS
2134     foreign "C" barf("asyncDoProc# on threaded RTS");
2135 #else
2136
2137     /* args: R1 = proc, R2 = param */
2138     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2139     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2140
2141     /* could probably allocate this on the heap instead */
2142     ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2143                                             stg_asyncDoProczh_malloc_str) 
2144                                 [R1,R2];
2145     (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2146     StgAsyncIOResult_reqID(ares)   = reqID;
2147     StgAsyncIOResult_len(ares)     = 0;
2148     StgAsyncIOResult_errCode(ares) = 0;
2149     StgTSO_block_info(CurrentTSO) = ares;
2150     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2151     jump stg_block_async;
2152 #endif
2153 }
2154 #endif
2155
2156 // noDuplicate# tries to ensure that none of the thunks under
2157 // evaluation by the current thread are also under evaluation by
2158 // another thread.  It relies on *both* threads doing noDuplicate#;
2159 // the second one will get blocked if they are duplicating some work.
2160 noDuplicatezh_fast
2161 {
2162     SAVE_THREAD_STATE();
2163     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2164     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2165     
2166     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2167         jump stg_threadFinished;
2168     } else {
2169         LOAD_THREAD_STATE();
2170         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2171         jump %ENTRY_CODE(Sp(0));
2172     }
2173 }
2174
2175 getApStackValzh_fast
2176 {
2177    W_ ap_stack, offset, val, ok;
2178
2179    /* args: R1 = AP_STACK, R2 = offset */
2180    ap_stack = R1;
2181    offset   = R2;
2182
2183    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2184         ok = 1;
2185         val = StgAP_STACK_payload(ap_stack,offset); 
2186    } else {
2187         ok = 0;
2188         val = R1;
2189    }
2190    RET_NP(ok,val);
2191 }