Make the threaded RTS compilable using -fasm
[ghc.git] / rts / PrimOps.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Out-of-line primitive operations
6  *
7  * This file contains the implementations of all the primitive
8  * operations ("primops") which are not expanded inline.  See
9  * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
10  * this file contains code for most of those with the attribute
11  * out_of_line=True.
12  *
13  * Entry convention: the entry convention for a primop is that all the
14  * args are in Stg registers (R1, R2, etc.).  This is to make writing
15  * the primops easier.  (see compiler/codeGen/CgCallConv.hs).
16  *
17  * Return convention: results from a primop are generally returned
18  * using the ordinary unboxed tuple return convention.  The C-- parser
19  * implements the RET_xxxx() macros to perform unboxed-tuple returns
20  * based on the prevailing return convention.
21  *
22  * This file is written in a subset of C--, extended with various
23  * features specific to GHC.  It is compiled by GHC directly.  For the
24  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
25  *
26  * ---------------------------------------------------------------------------*/
27
28 #include "Cmm.h"
29
30 /*-----------------------------------------------------------------------------
31   Array Primitives
32
33   Basically just new*Array - the others are all inline macros.
34
35   The size arg is always passed in R1, and the result returned in R1.
36
37   The slow entry point is for returning from a heap check, the saved
38   size argument must be re-loaded from the stack.
39   -------------------------------------------------------------------------- */
40
41 /* for objects that are *less* than the size of a word, make sure we
42  * round up to the nearest word for the size of the array.
43  */
44
45 newByteArrayzh_fast
46 {
47     W_ words, payload_words, n, p;
48     MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
49     n = R1;
50     payload_words = ROUNDUP_BYTES_TO_WDS(n);
51     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
52     "ptr" p = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
53     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
54     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
55     StgArrWords_words(p) = payload_words;
56     RET_P(p);
57 }
58
59 newPinnedByteArrayzh_fast
60 {
61     W_ words, payload_words, n, p;
62
63     MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
64     n = R1;
65     payload_words = ROUNDUP_BYTES_TO_WDS(n);
66
67     // We want an 8-byte aligned array.  allocatePinned() gives us
68     // 8-byte aligned memory by default, but we want to align the
69     // *goods* inside the ArrWords object, so we have to check the
70     // size of the ArrWords header and adjust our size accordingly.
71     words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
72     if ((SIZEOF_StgArrWords & 7) != 0) {
73         words = words + 1;
74     }
75
76     "ptr" p = foreign "C" allocatePinned(words) [];
77     TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
78
79     // Again, if the ArrWords header isn't a multiple of 8 bytes, we
80     // have to push the object forward one word so that the goods
81     // fall on an 8-byte boundary.
82     if ((SIZEOF_StgArrWords & 7) != 0) {
83         p = p + WDS(1);
84     }
85
86     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
87     StgArrWords_words(p) = payload_words;
88     RET_P(p);
89 }
90
91 newArrayzh_fast
92 {
93     W_ words, n, init, arr, p;
94     /* Args: R1 = words, R2 = initialisation value */
95
96     n = R1;
97     MAYBE_GC(R2_PTR,newArrayzh_fast);
98
99     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
100     "ptr" arr = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
101     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
102
103     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
104     StgMutArrPtrs_ptrs(arr) = n;
105
106     // Initialise all elements of the the array with the value in R2
107     init = R2;
108     p = arr + SIZEOF_StgMutArrPtrs;
109   for:
110     if (p < arr + WDS(words)) {
111         W_[p] = init;
112         p = p + WDS(1);
113         goto for;
114     }
115
116     RET_P(arr);
117 }
118
119 unsafeThawArrayzh_fast
120 {
121   // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
122   //
123   // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN 
124   // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
125   // it on the mutable list for the GC to remove (removing something from
126   // the mutable list is not easy, because the mut_list is only singly-linked).
127   // 
128   // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
129   // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
130   // to indicate that it is still on the mutable list.
131   //
132   // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
133   // either it is on a mut_list, or it isn't.  We adopt the convention that
134   // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
135   // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
136   // we put it on the mutable list more than once, but it would get scavenged
137   // multiple times during GC, which would be unnecessarily slow.
138   //
139   if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
140         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
141         recordMutable(R1, R1);
142         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
143         RET_P(R1);
144   } else {
145         SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
146         RET_P(R1);
147   }
148 }
149
150 /* -----------------------------------------------------------------------------
151    MutVar primitives
152    -------------------------------------------------------------------------- */
153
154 newMutVarzh_fast
155 {
156     W_ mv;
157     /* Args: R1 = initialisation value */
158
159     ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
160
161     mv = Hp - SIZEOF_StgMutVar + WDS(1);
162     SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
163     StgMutVar_var(mv) = R1;
164     
165     RET_P(mv);
166 }
167
168 atomicModifyMutVarzh_fast
169 {
170     W_ mv, z, x, y, r;
171     /* Args: R1 :: MutVar#,  R2 :: a -> (a,b) */
172
173     /* If x is the current contents of the MutVar#, then 
174        We want to make the new contents point to
175
176          (sel_0 (f x))
177  
178        and the return value is
179          
180          (sel_1 (f x))
181
182         obviously we can share (f x).
183
184          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
185          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
186          r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
187     */
188
189 #if MIN_UPD_SIZE > 1
190 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
191 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
192 #else
193 #define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
194 #define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
195 #endif
196
197 #if MIN_UPD_SIZE > 2
198 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
199 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
200 #else
201 #define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
202 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
203 #endif
204
205 #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
206
207    HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
208
209 #if defined(THREADED_RTS)
210     foreign "C" ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
211 #endif
212
213    x = StgMutVar_var(R1);
214
215    TICK_ALLOC_THUNK_2();
216    CCCS_ALLOC(THUNK_2_SIZE);
217    z = Hp - THUNK_2_SIZE + WDS(1);
218    SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
219    LDV_RECORD_CREATE(z);
220    StgThunk_payload(z,0) = R2;
221    StgThunk_payload(z,1) = x;
222
223    TICK_ALLOC_THUNK_1();
224    CCCS_ALLOC(THUNK_1_SIZE);
225    y = z - THUNK_1_SIZE;
226    SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
227    LDV_RECORD_CREATE(y);
228    StgThunk_payload(y,0) = z;
229
230    StgMutVar_var(R1) = y;
231    foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
232
233    TICK_ALLOC_THUNK_1();
234    CCCS_ALLOC(THUNK_1_SIZE);
235    r = y - THUNK_1_SIZE;
236    SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
237    LDV_RECORD_CREATE(r);
238    StgThunk_payload(r,0) = z;
239
240 #if defined(THREADED_RTS)
241     foreign "C" RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
242 #endif
243
244    RET_P(r);
245 }
246
247 /* -----------------------------------------------------------------------------
248    Weak Pointer Primitives
249    -------------------------------------------------------------------------- */
250
251 STRING(stg_weak_msg,"New weak pointer at %p\n")
252
253 mkWeakzh_fast
254 {
255   /* R1 = key
256      R2 = value
257      R3 = finalizer (or NULL)
258   */
259   W_ w;
260
261   if (R3 == NULL) {
262     R3 = stg_NO_FINALIZER_closure;
263   }
264
265   ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
266
267   w = Hp - SIZEOF_StgWeak + WDS(1);
268   SET_HDR(w, stg_WEAK_info, W_[CCCS]);
269
270   StgWeak_key(w)       = R1;
271   StgWeak_value(w)     = R2;
272   StgWeak_finalizer(w) = R3;
273
274   StgWeak_link(w)       = W_[weak_ptr_list];
275   W_[weak_ptr_list]     = w;
276
277   IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
278
279   RET_P(w);
280 }
281
282
283 finalizzeWeakzh_fast
284 {
285   /* R1 = weak ptr
286    */
287   W_ w, f;
288
289   w = R1;
290
291   // already dead?
292   if (GET_INFO(w) == stg_DEAD_WEAK_info) {
293       RET_NP(0,stg_NO_FINALIZER_closure);
294   }
295
296   // kill it
297 #ifdef PROFILING
298   // @LDV profiling
299   // A weak pointer is inherently used, so we do not need to call
300   // LDV_recordDead_FILL_SLOP_DYNAMIC():
301   //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
302   // or, LDV_recordDead():
303   //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
304   // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
305   // large as weak pointers, so there is no need to fill the slop, either.
306   // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
307 #endif
308
309   //
310   // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
311   //
312   SET_INFO(w,stg_DEAD_WEAK_info);
313   LDV_RECORD_CREATE(w);
314
315   f = StgWeak_finalizer(w);
316   StgDeadWeak_link(w) = StgWeak_link(w);
317
318   /* return the finalizer */
319   if (f == stg_NO_FINALIZER_closure) {
320       RET_NP(0,stg_NO_FINALIZER_closure);
321   } else {
322       RET_NP(1,f);
323   }
324 }
325
326 deRefWeakzh_fast
327 {
328   /* R1 = weak ptr */
329   W_ w, code, val;
330
331   w = R1;
332   if (GET_INFO(w) == stg_WEAK_info) {
333     code = 1;
334     val = StgWeak_value(w);
335   } else {
336     code = 0;
337     val = w;
338   }
339   RET_NP(code,val);
340 }
341
342 /* -----------------------------------------------------------------------------
343    Arbitrary-precision Integer operations.
344
345    There are some assumptions in this code that mp_limb_t == W_.  This is
346    the case for all the platforms that GHC supports, currently.
347    -------------------------------------------------------------------------- */
348
349 int2Integerzh_fast
350 {
351    /* arguments: R1 = Int# */
352
353    W_ val, s, p;        /* to avoid aliasing */
354
355    val = R1;
356    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
357
358    p = Hp - SIZEOF_StgArrWords;
359    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
360    StgArrWords_words(p) = 1;
361
362    /* mpz_set_si is inlined here, makes things simpler */
363    if (%lt(val,0)) { 
364         s  = -1;
365         Hp(0) = -val;
366    } else { 
367      if (%gt(val,0)) {
368         s = 1;
369         Hp(0) = val;
370      } else {
371         s = 0;
372      }
373   }
374
375    /* returns (# size  :: Int#, 
376                  data  :: ByteArray# 
377                #)
378    */
379    RET_NP(s,p);
380 }
381
382 word2Integerzh_fast
383 {
384    /* arguments: R1 = Word# */
385
386    W_ val, s, p;        /* to avoid aliasing */
387
388    val = R1;
389
390    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
391
392    p = Hp - SIZEOF_StgArrWords;
393    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
394    StgArrWords_words(p) = 1;
395
396    if (val != 0) {
397         s = 1;
398         W_[Hp] = val;
399    } else {
400         s = 0;
401    }
402
403    /* returns (# size  :: Int#, 
404                  data  :: ByteArray# #)
405    */
406    RET_NP(s,p);
407 }
408
409
410 /*
411  * 'long long' primops for converting to/from Integers.
412  */
413
414 #ifdef SUPPORT_LONG_LONGS
415
416 int64ToIntegerzh_fast
417 {
418    /* arguments: L1 = Int64# */
419
420    L_ val;
421    W_ hi, lo, s, neg, words_needed, p;
422
423    val = L1;
424    neg = 0;
425
426    hi = TO_W_(val >> 32);
427    lo = TO_W_(val);
428
429    if ( hi != 0 && hi != 0xFFFFFFFF )  { 
430        words_needed = 2;
431    } else { 
432        // minimum is one word
433        words_needed = 1;
434    }
435
436    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
437                NO_PTRS, int64ToIntegerzh_fast );
438
439    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
440    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
441    StgArrWords_words(p) = words_needed;
442
443    if ( %lt(hi,0) ) {
444      neg = 1;
445      lo = -lo;
446      if(lo == 0) {
447        hi = -hi;
448      } else {
449        hi = -hi - 1;
450      }
451    }
452
453    if ( words_needed == 2 )  { 
454       s = 2;
455       Hp(-1) = lo;
456       Hp(0) = hi;
457    } else { 
458        if ( lo != 0 ) {
459            s = 1;
460            Hp(0) = lo;
461        } else /* val==0 */  {
462            s = 0;
463        }
464    }
465    if ( neg != 0 ) {
466         s = -s;
467    }
468
469    /* returns (# size  :: Int#, 
470                  data  :: ByteArray# #)
471    */
472    RET_NP(s,p);
473 }
474 word64ToIntegerzh_fast
475 {
476    /* arguments: L1 = Word64# */
477
478    L_ val;
479    W_ hi, lo, s, words_needed, p;
480
481    val = L1;
482    hi = TO_W_(val >> 32);
483    lo = TO_W_(val);
484
485    if ( hi != 0 ) {
486       words_needed = 2;
487    } else {
488       words_needed = 1;
489    }
490
491    ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
492                NO_PTRS, word64ToIntegerzh_fast );
493
494    p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
495    SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
496    StgArrWords_words(p) = words_needed;
497
498    if ( hi != 0 ) { 
499      s = 2;
500      Hp(-1) = lo;
501      Hp(0)  = hi;
502    } else {
503       if ( lo != 0 ) {
504         s = 1;
505         Hp(0) = lo;
506      } else /* val==0 */  {
507       s = 0;
508      }
509   }
510
511    /* returns (# size  :: Int#, 
512                  data  :: ByteArray# #)
513    */
514    RET_NP(s,p);
515 }
516
517
518
519 #endif /* SUPPORT_LONG_LONGS */
520
521 /* ToDo: this is shockingly inefficient */
522
523 #ifndef THREADED_RTS
524 section "bss" {
525   mp_tmp1:
526     bits8 [SIZEOF_MP_INT];
527 }
528
529 section "bss" {
530   mp_tmp2:
531     bits8 [SIZEOF_MP_INT];
532 }
533
534 section "bss" {
535   mp_result1:
536     bits8 [SIZEOF_MP_INT];
537 }
538
539 section "bss" {
540   mp_result2:
541     bits8 [SIZEOF_MP_INT];
542 }
543 #endif
544
545 #ifdef THREADED_RTS
546 #define FETCH_MP_TEMP(X) \
547 W_ X; \
548 X = BaseReg + (OFFSET_StgRegTable_r ## X);
549 #else
550 #define FETCH_MP_TEMP(X) /* Nothing */
551 #endif
552
553 #define GMP_TAKE2_RET1(name,mp_fun)                                     \
554 name                                                                    \
555 {                                                                       \
556   CInt s1, s2;                                                          \
557   W_ d1, d2;                                                            \
558   FETCH_MP_TEMP(mp_tmp1);                                               \
559   FETCH_MP_TEMP(mp_tmp2);                                               \
560   FETCH_MP_TEMP(mp_result1)                                             \
561   FETCH_MP_TEMP(mp_result2);                                            \
562                                                                         \
563   /* call doYouWantToGC() */                                            \
564   MAYBE_GC(R2_PTR & R4_PTR, name);                                      \
565                                                                         \
566   s1 = W_TO_INT(R1);                                                    \
567   d1 = R2;                                                              \
568   s2 = W_TO_INT(R3);                                                    \
569   d2 = R4;                                                              \
570                                                                         \
571   MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1));          \
572   MP_INT__mp_size(mp_tmp1)  = (s1);                                     \
573   MP_INT__mp_d(mp_tmp1)     = BYTE_ARR_CTS(d1);                         \
574   MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2));          \
575   MP_INT__mp_size(mp_tmp2)  = (s2);                                     \
576   MP_INT__mp_d(mp_tmp2)     = BYTE_ARR_CTS(d2);                         \
577                                                                         \
578   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
579                                                                         \
580   /* Perform the operation */                                           \
581   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1  "ptr",mp_tmp2  "ptr") []; \
582                                                                         \
583   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
584          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
585 }
586
587 #define GMP_TAKE1_RET1(name,mp_fun)                                     \
588 name                                                                    \
589 {                                                                       \
590   CInt s1;                                                              \
591   W_ d1;                                                                \
592   FETCH_MP_TEMP(mp_tmp1);                                               \
593   FETCH_MP_TEMP(mp_result1)                                             \
594                                                                         \
595   /* call doYouWantToGC() */                                            \
596   MAYBE_GC(R2_PTR, name);                                               \
597                                                                         \
598   d1 = R2;                                                              \
599   s1 = W_TO_INT(R1);                                                    \
600                                                                         \
601   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));      \
602   MP_INT__mp_size(mp_tmp1)      = (s1);                                 \
603   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                     \
604                                                                         \
605   foreign "C" __gmpz_init(mp_result1 "ptr") [];                            \
606                                                                         \
607   /* Perform the operation */                                           \
608   foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") [];                \
609                                                                         \
610   RET_NP(TO_W_(MP_INT__mp_size(mp_result1)),                            \
611          MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords);                \
612 }
613
614 #define GMP_TAKE2_RET2(name,mp_fun)                                                     \
615 name                                                                                    \
616 {                                                                                       \
617   CInt s1, s2;                                                                          \
618   W_ d1, d2;                                                                            \
619   FETCH_MP_TEMP(mp_tmp1);                                                               \
620   FETCH_MP_TEMP(mp_tmp2);                                                               \
621   FETCH_MP_TEMP(mp_result1)                                                             \
622   FETCH_MP_TEMP(mp_result2)                                                             \
623                                                                                         \
624   /* call doYouWantToGC() */                                                            \
625   MAYBE_GC(R2_PTR & R4_PTR, name);                                                      \
626                                                                                         \
627   s1 = W_TO_INT(R1);                                                                    \
628   d1 = R2;                                                                              \
629   s2 = W_TO_INT(R3);                                                                    \
630   d2 = R4;                                                                              \
631                                                                                         \
632   MP_INT__mp_alloc(mp_tmp1)     = W_TO_INT(StgArrWords_words(d1));                      \
633   MP_INT__mp_size(mp_tmp1)      = (s1);                                                 \
634   MP_INT__mp_d(mp_tmp1)         = BYTE_ARR_CTS(d1);                                     \
635   MP_INT__mp_alloc(mp_tmp2)     = W_TO_INT(StgArrWords_words(d2));                      \
636   MP_INT__mp_size(mp_tmp2)      = (s2);                                                 \
637   MP_INT__mp_d(mp_tmp2)         = BYTE_ARR_CTS(d2);                                     \
638                                                                                         \
639   foreign "C" __gmpz_init(mp_result1 "ptr") [];                                               \
640   foreign "C" __gmpz_init(mp_result2 "ptr") [];                                               \
641                                                                                         \
642   /* Perform the operation */                                                           \
643   foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") [];    \
644                                                                                         \
645   RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)),                                          \
646            MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords,                               \
647            TO_W_(MP_INT__mp_size(mp_result2)),                                          \
648            MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords);                              \
649 }
650
651 GMP_TAKE2_RET1(plusIntegerzh_fast,     __gmpz_add)
652 GMP_TAKE2_RET1(minusIntegerzh_fast,    __gmpz_sub)
653 GMP_TAKE2_RET1(timesIntegerzh_fast,    __gmpz_mul)
654 GMP_TAKE2_RET1(gcdIntegerzh_fast,      __gmpz_gcd)
655 GMP_TAKE2_RET1(quotIntegerzh_fast,     __gmpz_tdiv_q)
656 GMP_TAKE2_RET1(remIntegerzh_fast,      __gmpz_tdiv_r)
657 GMP_TAKE2_RET1(divExactIntegerzh_fast, __gmpz_divexact)
658 GMP_TAKE2_RET1(andIntegerzh_fast,      __gmpz_and)
659 GMP_TAKE2_RET1(orIntegerzh_fast,       __gmpz_ior)
660 GMP_TAKE2_RET1(xorIntegerzh_fast,      __gmpz_xor)
661 GMP_TAKE1_RET1(complementIntegerzh_fast, __gmpz_com)
662
663 GMP_TAKE2_RET2(quotRemIntegerzh_fast, __gmpz_tdiv_qr)
664 GMP_TAKE2_RET2(divModIntegerzh_fast,  __gmpz_fdiv_qr)
665
666 #ifndef THREADED_RTS
667 section "bss" {
668   mp_tmp_w:  W_; // NB. mp_tmp_w is really an here mp_limb_t
669 }
670 #endif
671
672 gcdIntzh_fast
673 {
674     /* R1 = the first Int#; R2 = the second Int# */
675     W_ r; 
676     FETCH_MP_TEMP(mp_tmp_w);
677
678     W_[mp_tmp_w] = R1;
679     r = foreign "C" __gmpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
680
681     R1 = r;
682     /* Result parked in R1, return via info-pointer at TOS */
683     jump %ENTRY_CODE(Sp(0));
684 }
685
686
687 gcdIntegerIntzh_fast
688 {
689     /* R1 = s1; R2 = d1; R3 = the int */
690     R1 = foreign "C" __gmpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
691     
692     /* Result parked in R1, return via info-pointer at TOS */
693     jump %ENTRY_CODE(Sp(0));
694 }
695
696
697 cmpIntegerIntzh_fast
698 {
699     /* R1 = s1; R2 = d1; R3 = the int */
700     W_ usize, vsize, v_digit, u_digit;
701
702     usize = R1;
703     vsize = 0;
704     v_digit = R3;
705
706     // paraphrased from __gmpz_cmp_si() in the GMP sources
707     if (%gt(v_digit,0)) {
708         vsize = 1;
709     } else { 
710         if (%lt(v_digit,0)) {
711             vsize = -1;
712             v_digit = -v_digit;
713         }
714     }
715
716     if (usize != vsize) {
717         R1 = usize - vsize; 
718         jump %ENTRY_CODE(Sp(0));
719     }
720
721     if (usize == 0) {
722         R1 = 0; 
723         jump %ENTRY_CODE(Sp(0));
724     }
725
726     u_digit = W_[BYTE_ARR_CTS(R2)];
727
728     if (u_digit == v_digit) {
729         R1 = 0; 
730         jump %ENTRY_CODE(Sp(0));
731     }
732
733     if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
734         R1 = usize; 
735     } else {
736         R1 = -usize; 
737     }
738
739     jump %ENTRY_CODE(Sp(0));
740 }
741
742 cmpIntegerzh_fast
743 {
744     /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
745     W_ usize, vsize, size, up, vp;
746     CInt cmp;
747
748     // paraphrased from __gmpz_cmp() in the GMP sources
749     usize = R1;
750     vsize = R3;
751
752     if (usize != vsize) {
753         R1 = usize - vsize; 
754         jump %ENTRY_CODE(Sp(0));
755     }
756
757     if (usize == 0) {
758         R1 = 0; 
759         jump %ENTRY_CODE(Sp(0));
760     }
761
762     if (%lt(usize,0)) { // NB. not <, which is unsigned
763         size = -usize;
764     } else {
765         size = usize;
766     }
767
768     up = BYTE_ARR_CTS(R2);
769     vp = BYTE_ARR_CTS(R4);
770
771     cmp = foreign "C" __gmpn_cmp(up "ptr", vp "ptr", size) [];
772
773     if (cmp == 0 :: CInt) {
774         R1 = 0; 
775         jump %ENTRY_CODE(Sp(0));
776     }
777
778     if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
779         R1 = 1;
780     } else {
781         R1 = (-1); 
782     }
783     /* Result parked in R1, return via info-pointer at TOS */
784     jump %ENTRY_CODE(Sp(0));
785 }
786
787 integer2Intzh_fast
788 {
789     /* R1 = s; R2 = d */
790     W_ r, s;
791
792     s = R1;
793     if (s == 0) {
794         r = 0;
795     } else {
796         r = W_[R2 + SIZEOF_StgArrWords];
797         if (%lt(s,0)) {
798             r = -r;
799         }
800     }
801     /* Result parked in R1, return via info-pointer at TOS */
802     R1 = r;
803     jump %ENTRY_CODE(Sp(0));
804 }
805
806 integer2Wordzh_fast
807 {
808   /* R1 = s; R2 = d */
809   W_ r, s;
810
811   s = R1;
812   if (s == 0) {
813     r = 0;
814   } else {
815     r = W_[R2 + SIZEOF_StgArrWords];
816     if (%lt(s,0)) {
817         r = -r;
818     }
819   }
820   /* Result parked in R1, return via info-pointer at TOS */
821   R1 = r;
822   jump %ENTRY_CODE(Sp(0));
823 }
824
825 decodeFloatzh_fast
826
827     W_ p;
828     F_ arg;
829     FETCH_MP_TEMP(mp_tmp1);
830     FETCH_MP_TEMP(mp_tmp_w);
831     
832     /* arguments: F1 = Float# */
833     arg = F1;
834     
835     ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
836     
837     /* Be prepared to tell Lennart-coded __decodeFloat
838        where mantissa._mp_d can be put (it does not care about the rest) */
839     p = Hp - SIZEOF_StgArrWords;
840     SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
841     StgArrWords_words(p) = 1;
842     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
843     
844     /* Perform the operation */
845     foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
846     
847     /* returns: (Int# (expn), Int#, ByteArray#) */
848     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
849 }
850
851 #define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
852 #define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
853
854 decodeDoublezh_fast
855
856     D_ arg;
857     W_ p;
858     FETCH_MP_TEMP(mp_tmp1);
859     FETCH_MP_TEMP(mp_tmp_w);
860
861     /* arguments: D1 = Double# */
862     arg = D1;
863
864     ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
865     
866     /* Be prepared to tell Lennart-coded __decodeDouble
867        where mantissa.d can be put (it does not care about the rest) */
868     p = Hp - ARR_SIZE + WDS(1);
869     SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
870     StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
871     MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
872
873     /* Perform the operation */
874     foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
875     
876     /* returns: (Int# (expn), Int#, ByteArray#) */
877     RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
878 }
879
880 /* -----------------------------------------------------------------------------
881  * Concurrency primitives
882  * -------------------------------------------------------------------------- */
883
884 forkzh_fast
885 {
886   /* args: R1 = closure to spark */
887
888   MAYBE_GC(R1_PTR, forkzh_fast);
889
890   W_ closure;
891   W_ threadid;
892   closure = R1;
893
894   "ptr" threadid = foreign "C" createIOThread( MyCapability() "ptr", 
895                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
896                                 closure "ptr") [];
897   foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
898
899   // switch at the earliest opportunity
900   CInt[context_switch] = 1 :: CInt;
901   
902   RET_P(threadid);
903 }
904
905 forkOnzh_fast
906 {
907   /* args: R1 = cpu, R2 = closure to spark */
908
909   MAYBE_GC(R2_PTR, forkOnzh_fast);
910
911   W_ cpu;
912   W_ closure;
913   W_ threadid;
914   cpu = R1;
915   closure = R2;
916
917   "ptr" threadid = foreign "C" createIOThread( MyCapability() "ptr", 
918                                 RtsFlags_GcFlags_initialStkSize(RtsFlags), 
919                                 closure "ptr") [];
920   foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
921
922   // switch at the earliest opportunity
923   CInt[context_switch] = 1 :: CInt;
924   
925   RET_P(threadid);
926 }
927
928 yieldzh_fast
929 {
930   jump stg_yield_noregs;
931 }
932
933 myThreadIdzh_fast
934 {
935   /* no args. */
936   RET_P(CurrentTSO);
937 }
938
939 labelThreadzh_fast
940 {
941   /* args: 
942         R1 = ThreadId#
943         R2 = Addr# */
944 #ifdef DEBUG
945   foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
946 #endif
947   jump %ENTRY_CODE(Sp(0));
948 }
949
950 isCurrentThreadBoundzh_fast
951 {
952   /* no args */
953   W_ r;
954   r = foreign "C" isThreadBound(CurrentTSO) [];
955   RET_N(r);
956 }
957
958
959 /* -----------------------------------------------------------------------------
960  * TVar primitives
961  * -------------------------------------------------------------------------- */
962
963 #ifdef REG_R1
964 #define SP_OFF 0
965 #define IF_NOT_REG_R1(x) 
966 #else
967 #define SP_OFF 1
968 #define IF_NOT_REG_R1(x) x
969 #endif
970
971 // Catch retry frame ------------------------------------------------------------
972
973 #if defined(PROFILING)
974 #define CATCH_RETRY_FRAME_BITMAP 7
975 #define CATCH_RETRY_FRAME_WORDS  5
976 #else
977 #define CATCH_RETRY_FRAME_BITMAP 1
978 #define CATCH_RETRY_FRAME_WORDS  3
979 #endif
980
981 INFO_TABLE_RET(stg_catch_retry_frame,
982                CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
983                CATCH_RETRY_FRAME)
984 {
985    W_ r, frame, trec, outer;
986    IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
987
988    frame = Sp;
989    trec = StgTSO_trec(CurrentTSO);
990    "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
991    r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
992    if (r != 0) {
993      /* Succeeded (either first branch or second branch) */
994      StgTSO_trec(CurrentTSO) = outer;
995      Sp = Sp + SIZEOF_StgCatchRetryFrame;
996      IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
997      jump %ENTRY_CODE(Sp(SP_OFF));
998    } else {
999      /* Did not commit: re-execute */
1000      W_ new_trec;
1001      "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1002      StgTSO_trec(CurrentTSO) = new_trec;
1003      if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1004        R1 = StgCatchRetryFrame_alt_code(frame);
1005      } else {
1006        R1 = StgCatchRetryFrame_first_code(frame);
1007      }
1008      jump stg_ap_v_fast;
1009    }
1010 }
1011
1012
1013 // Atomically frame ------------------------------------------------------------
1014
1015 #if defined(PROFILING)
1016 #define ATOMICALLY_FRAME_BITMAP 3
1017 #define ATOMICALLY_FRAME_WORDS  4
1018 #else
1019 #define ATOMICALLY_FRAME_BITMAP 0
1020 #define ATOMICALLY_FRAME_WORDS  2
1021 #endif
1022
1023 INFO_TABLE_RET(stg_atomically_frame,
1024                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1025                ATOMICALLY_FRAME)
1026 {
1027   W_ frame, trec, valid, next_invariant, q, outer;
1028   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1029
1030   frame = Sp;
1031   trec = StgTSO_trec(CurrentTSO);
1032   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1033
1034   if (outer == NO_TREC) {
1035     /* First time back at the atomically frame -- pick up invariants */
1036     "ptr" q = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") [];
1037     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1038
1039   } else {
1040     /* Second/subsequent time back at the atomically frame -- abort the
1041      * tx that's checking the invariant and move on to the next one */
1042     StgTSO_trec(CurrentTSO) = outer;
1043     q = StgAtomicallyFrame_next_invariant_to_check(frame);
1044     StgInvariantCheckQueue_my_execution(q) = trec;
1045     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1046     /* Don't free trec -- it's linked from q and will be stashed in the
1047      * invariant if we eventually commit. */
1048     q = StgInvariantCheckQueue_next_queue_entry(q);
1049     StgAtomicallyFrame_next_invariant_to_check(frame) = q;
1050     trec = outer;
1051   }
1052
1053   q = StgAtomicallyFrame_next_invariant_to_check(frame);
1054
1055   if (q != END_INVARIANT_CHECK_QUEUE) {
1056     /* We can't commit yet: another invariant to check */
1057     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [];
1058     StgTSO_trec(CurrentTSO) = trec;
1059
1060     next_invariant = StgInvariantCheckQueue_invariant(q);
1061     R1 = StgAtomicInvariant_code(next_invariant);
1062     jump stg_ap_v_fast;
1063
1064   } else {
1065
1066     /* We've got no more invariants to check, try to commit */
1067     valid = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
1068     if (valid != 0) {
1069       /* Transaction was valid: commit succeeded */
1070       StgTSO_trec(CurrentTSO) = NO_TREC;
1071       Sp = Sp + SIZEOF_StgAtomicallyFrame;
1072       IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1073       jump %ENTRY_CODE(Sp(SP_OFF));
1074     } else {
1075       /* Transaction was not valid: try again */
1076       "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1077       StgTSO_trec(CurrentTSO) = trec;
1078       StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1079       R1 = StgAtomicallyFrame_code(frame);
1080       jump stg_ap_v_fast;
1081     }
1082   }
1083 }
1084
1085 INFO_TABLE_RET(stg_atomically_waiting_frame,
1086                ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
1087                ATOMICALLY_FRAME)
1088 {
1089   W_ frame, trec, valid;
1090   IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1091
1092   frame = Sp;
1093
1094   /* The TSO is currently waiting: should we stop waiting? */
1095   valid = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
1096   if (valid != 0) {
1097     /* Previous attempt is still valid: no point trying again yet */
1098           IF_NOT_REG_R1(Sp_adj(-2);
1099                         Sp(1) = stg_NO_FINALIZER_closure;
1100                         Sp(0) = stg_ut_1_0_unreg_info;)
1101     jump stg_block_noregs;
1102   } else {
1103     /* Previous attempt is no longer valid: try again */
1104     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
1105     StgTSO_trec(CurrentTSO) = trec;
1106     StgHeader_info(frame) = stg_atomically_frame_info;
1107     R1 = StgAtomicallyFrame_code(frame);
1108     jump stg_ap_v_fast;
1109   }
1110 }
1111
1112 // STM catch frame --------------------------------------------------------------
1113
1114 #ifdef REG_R1
1115 #define SP_OFF 0
1116 #else
1117 #define SP_OFF 1
1118 #endif
1119
1120 #if defined(PROFILING)
1121 #define CATCH_STM_FRAME_BITMAP 3
1122 #define CATCH_STM_FRAME_WORDS  4
1123 #else
1124 #define CATCH_STM_FRAME_BITMAP 0
1125 #define CATCH_STM_FRAME_WORDS  2
1126 #endif
1127
1128 /* Catch frames are very similar to update frames, but when entering
1129  * one we just pop the frame off the stack and perform the correct
1130  * kind of return to the activation record underneath us on the stack.
1131  */
1132
1133 INFO_TABLE_RET(stg_catch_stm_frame,
1134                CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
1135                CATCH_STM_FRAME)
1136    {
1137       IF_NOT_REG_R1(W_ rval;  rval = Sp(0);  Sp_adj(1); )
1138       W_ r, frame, trec, outer;
1139       frame = Sp;
1140       trec = StgTSO_trec(CurrentTSO);
1141       "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1142       r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
1143       if (r != 0) {
1144         /* Commit succeeded */
1145         StgTSO_trec(CurrentTSO) = outer;
1146         Sp = Sp + SIZEOF_StgCatchSTMFrame;
1147         IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
1148         jump Sp(SP_OFF);
1149       } else {
1150         /* Commit failed */
1151         W_ new_trec;
1152         "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1153         StgTSO_trec(CurrentTSO) = new_trec;
1154         R1 = StgCatchSTMFrame_code(frame);
1155         jump stg_ap_v_fast;
1156       }
1157    }
1158
1159
1160 // Primop definition ------------------------------------------------------------
1161
1162 atomicallyzh_fast
1163 {
1164   W_ frame;
1165   W_ old_trec;
1166   W_ new_trec;
1167   
1168   // stmStartTransaction may allocate
1169   MAYBE_GC (R1_PTR, atomicallyzh_fast); 
1170
1171   /* Args: R1 = m :: STM a */
1172   STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
1173
1174   old_trec = StgTSO_trec(CurrentTSO);
1175
1176   /* Nested transactions are not allowed; raise an exception */
1177   if (old_trec != NO_TREC) {
1178      R1 = base_GHCziIOBase_NestedAtomically_closure;
1179      jump raisezh_fast;
1180   }
1181
1182   /* Set up the atomically frame */
1183   Sp = Sp - SIZEOF_StgAtomicallyFrame;
1184   frame = Sp;
1185
1186   SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
1187   StgAtomicallyFrame_code(frame) = R1;
1188   StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
1189
1190   /* Start the memory transcation */
1191   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
1192   StgTSO_trec(CurrentTSO) = new_trec;
1193
1194   /* Apply R1 to the realworld token */
1195   jump stg_ap_v_fast;
1196 }
1197
1198
1199 catchSTMzh_fast
1200 {
1201   W_ frame;
1202   
1203   /* Args: R1 :: STM a */
1204   /* Args: R2 :: Exception -> STM a */
1205   STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
1206
1207   /* Set up the catch frame */
1208   Sp = Sp - SIZEOF_StgCatchSTMFrame;
1209   frame = Sp;
1210
1211   SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
1212   StgCatchSTMFrame_handler(frame) = R2;
1213   StgCatchSTMFrame_code(frame) = R1;
1214
1215   /* Start a nested transaction to run the body of the try block in */
1216   W_ cur_trec;  
1217   W_ new_trec;
1218   cur_trec = StgTSO_trec(CurrentTSO);
1219   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr");
1220   StgTSO_trec(CurrentTSO) = new_trec;
1221
1222   /* Apply R1 to the realworld token */
1223   jump stg_ap_v_fast;
1224 }
1225
1226
1227 catchRetryzh_fast
1228 {
1229   W_ frame;
1230   W_ new_trec;
1231   W_ trec;
1232
1233   // stmStartTransaction may allocate
1234   MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast); 
1235
1236   /* Args: R1 :: STM a */
1237   /* Args: R2 :: STM a */
1238   STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
1239
1240   /* Start a nested transaction within which to run the first code */
1241   trec = StgTSO_trec(CurrentTSO);
1242   "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
1243   StgTSO_trec(CurrentTSO) = new_trec;
1244
1245   /* Set up the catch-retry frame */
1246   Sp = Sp - SIZEOF_StgCatchRetryFrame;
1247   frame = Sp;
1248   
1249   SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
1250   StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
1251   StgCatchRetryFrame_first_code(frame) = R1;
1252   StgCatchRetryFrame_alt_code(frame) = R2;
1253
1254   /* Apply R1 to the realworld token */
1255   jump stg_ap_v_fast;
1256 }
1257
1258
1259 retryzh_fast
1260 {
1261   W_ frame_type;
1262   W_ frame;
1263   W_ trec;
1264   W_ outer;
1265   W_ r;
1266
1267   MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
1268
1269   // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
1270 retry_pop_stack:
1271   StgTSO_sp(CurrentTSO) = Sp;
1272   frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
1273   Sp = StgTSO_sp(CurrentTSO);
1274   frame = Sp;
1275   trec = StgTSO_trec(CurrentTSO);
1276   "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1277
1278   if (frame_type == CATCH_RETRY_FRAME) {
1279     // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
1280     ASSERT(outer != NO_TREC);
1281     // Abort the transaction attempting the current branch
1282     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1283     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1284     if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) {
1285       // Retry in the first branch: try the alternative
1286       "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1287       StgTSO_trec(CurrentTSO) = trec;
1288       StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
1289       R1 = StgCatchRetryFrame_alt_code(frame);
1290       jump stg_ap_v_fast;
1291     } else {
1292       // Retry in the alternative code: propagate the retry
1293       StgTSO_trec(CurrentTSO) = outer;
1294       Sp = Sp + SIZEOF_StgCatchRetryFrame;
1295       goto retry_pop_stack;
1296     }
1297   }
1298
1299   // We've reached the ATOMICALLY_FRAME: attempt to wait 
1300   ASSERT(frame_type == ATOMICALLY_FRAME);
1301   if (outer != NO_TREC) {
1302     // We called retry while checking invariants, so abort the current
1303     // invariant check (merging its TVar accesses into the parents read
1304     // set so we'll wait on them)
1305     foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
1306     foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") [];
1307     trec = outer;
1308      StgTSO_trec(CurrentTSO) = trec;
1309     "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
1310   }
1311   ASSERT(outer == NO_TREC);
1312
1313   r = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
1314   if (r != 0) {
1315     // Transaction was valid: stmWait put us on the TVars' queues, we now block
1316     StgHeader_info(frame) = stg_atomically_waiting_frame_info;
1317     Sp = frame;
1318     // Fix up the stack in the unregisterised case: the return convention is different.
1319     IF_NOT_REG_R1(Sp_adj(-2); 
1320                   Sp(1) = stg_NO_FINALIZER_closure;
1321                   Sp(0) = stg_ut_1_0_unreg_info;)
1322     R3 = trec; // passing to stmWaitUnblock()
1323     jump stg_block_stmwait;
1324   } else {
1325     // Transaction was not valid: retry immediately
1326     "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
1327     StgTSO_trec(CurrentTSO) = trec;
1328     R1 = StgAtomicallyFrame_code(frame);
1329     Sp = frame;
1330     jump stg_ap_v_fast;
1331   }
1332 }
1333
1334
1335 checkzh_fast
1336 {
1337   W_ trec, closure;
1338
1339   /* Args: R1 = invariant closure */
1340   MAYBE_GC (R1_PTR, checkzh_fast); 
1341
1342   trec = StgTSO_trec(CurrentTSO);
1343   closure = R1;
1344   foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", 
1345                                      trec "ptr",
1346                                      closure "ptr") [];
1347
1348   jump %ENTRY_CODE(Sp(0));
1349 }
1350
1351
1352 newTVarzh_fast
1353 {
1354   W_ tv;
1355   W_ new_value;
1356
1357   /* Args: R1 = initialisation value */
1358
1359   MAYBE_GC (R1_PTR, newTVarzh_fast); 
1360   new_value = R1;
1361   "ptr" tv = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
1362   RET_P(tv);
1363 }
1364
1365
1366 readTVarzh_fast
1367 {
1368   W_ trec;
1369   W_ tvar;
1370   W_ result;
1371
1372   /* Args: R1 = TVar closure */
1373
1374   MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
1375   trec = StgTSO_trec(CurrentTSO);
1376   tvar = R1;
1377   "ptr" result = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
1378
1379   RET_P(result);
1380 }
1381
1382
1383 writeTVarzh_fast
1384 {
1385   W_ trec;
1386   W_ tvar;
1387   W_ new_value;
1388   
1389   /* Args: R1 = TVar closure */
1390   /*       R2 = New value    */
1391
1392   MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
1393   trec = StgTSO_trec(CurrentTSO);
1394   tvar = R1;
1395   new_value = R2;
1396   foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
1397
1398   jump %ENTRY_CODE(Sp(0));
1399 }
1400
1401
1402 /* -----------------------------------------------------------------------------
1403  * MVar primitives
1404  *
1405  * take & putMVar work as follows.  Firstly, an important invariant:
1406  *
1407  *    If the MVar is full, then the blocking queue contains only
1408  *    threads blocked on putMVar, and if the MVar is empty then the
1409  *    blocking queue contains only threads blocked on takeMVar.
1410  *
1411  * takeMvar:
1412  *    MVar empty : then add ourselves to the blocking queue
1413  *    MVar full  : remove the value from the MVar, and
1414  *                 blocking queue empty     : return
1415  *                 blocking queue non-empty : perform the first blocked putMVar
1416  *                                            from the queue, and wake up the
1417  *                                            thread (MVar is now full again)
1418  *
1419  * putMVar is just the dual of the above algorithm.
1420  *
1421  * How do we "perform a putMVar"?  Well, we have to fiddle around with
1422  * the stack of the thread waiting to do the putMVar.  See
1423  * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
1424  * the stack layout, and the PerformPut and PerformTake macros below.
1425  *
1426  * It is important that a blocked take or put is woken up with the
1427  * take/put already performed, because otherwise there would be a
1428  * small window of vulnerability where the thread could receive an
1429  * exception and never perform its take or put, and we'd end up with a
1430  * deadlock.
1431  *
1432  * -------------------------------------------------------------------------- */
1433
1434 isEmptyMVarzh_fast
1435 {
1436     /* args: R1 = MVar closure */
1437
1438     if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
1439         RET_N(1);
1440     } else {
1441         RET_N(0);
1442     }
1443 }
1444
1445 newMVarzh_fast
1446 {
1447     /* args: none */
1448     W_ mvar;
1449
1450     ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
1451   
1452     mvar = Hp - SIZEOF_StgMVar + WDS(1);
1453     SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
1454     StgMVar_head(mvar)  = stg_END_TSO_QUEUE_closure;
1455     StgMVar_tail(mvar)  = stg_END_TSO_QUEUE_closure;
1456     StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1457     RET_P(mvar);
1458 }
1459
1460
1461 /* If R1 isn't available, pass it on the stack */
1462 #ifdef REG_R1
1463 #define PerformTake(tso, value)                         \
1464     W_[StgTSO_sp(tso) + WDS(1)] = value;                \
1465     W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
1466 #else
1467 #define PerformTake(tso, value)                                 \
1468     W_[StgTSO_sp(tso) + WDS(1)] = value;                        \
1469     W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
1470 #endif
1471
1472 #define PerformPut(tso,lval)                    \
1473     StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3);   \
1474     lval = W_[StgTSO_sp(tso) - WDS(1)];
1475
1476 takeMVarzh_fast
1477 {
1478     W_ mvar, val, info, tso;
1479
1480     /* args: R1 = MVar closure */
1481     mvar = R1;
1482
1483 #if defined(THREADED_RTS)
1484     "ptr" info = foreign "C" lockClosure(mvar "ptr") [];
1485 #else
1486     info = GET_INFO(mvar);
1487 #endif
1488
1489     /* If the MVar is empty, put ourselves on its blocking queue,
1490      * and wait until we're woken up.
1491      */
1492     if (info == stg_EMPTY_MVAR_info) {
1493         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1494             StgMVar_head(mvar) = CurrentTSO;
1495         } else {
1496             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1497         }
1498         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1499         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1500         StgTSO_block_info(CurrentTSO)  = mvar;
1501         StgMVar_tail(mvar) = CurrentTSO;
1502         
1503         jump stg_block_takemvar;
1504   }
1505
1506   /* we got the value... */
1507   val = StgMVar_value(mvar);
1508
1509   if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
1510   {
1511       /* There are putMVar(s) waiting... 
1512        * wake up the first thread on the queue
1513        */
1514       ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1515
1516       /* actually perform the putMVar for the thread that we just woke up */
1517       tso = StgMVar_head(mvar);
1518       PerformPut(tso,StgMVar_value(mvar));
1519       dirtyTSO(tso);
1520
1521 #if defined(GRAN) || defined(PAR)
1522       /* ToDo: check 2nd arg (mvar) is right */
1523       "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
1524       StgMVar_head(mvar) = tso;
1525 #else
1526       "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", 
1527                                          StgMVar_head(mvar) "ptr") [];
1528       StgMVar_head(mvar) = tso;
1529 #endif
1530
1531       if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1532           StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1533       }
1534
1535 #if defined(THREADED_RTS)
1536       unlockClosure(mvar, stg_FULL_MVAR_info);
1537 #endif
1538       RET_P(val);
1539   } 
1540   else
1541   {
1542       /* No further putMVars, MVar is now empty */
1543       StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1544  
1545 #if defined(THREADED_RTS)
1546       unlockClosure(mvar, stg_EMPTY_MVAR_info);
1547 #else
1548       SET_INFO(mvar,stg_EMPTY_MVAR_info);
1549 #endif
1550
1551       RET_P(val);
1552   }
1553 }
1554
1555
1556 tryTakeMVarzh_fast
1557 {
1558     W_ mvar, val, info, tso;
1559
1560     /* args: R1 = MVar closure */
1561
1562     mvar = R1;
1563
1564 #if defined(THREADED_RTS)
1565     "ptr" info = foreign "C" lockClosure(mvar "ptr") [];
1566 #else
1567     info = GET_INFO(mvar);
1568 #endif
1569
1570     if (info == stg_EMPTY_MVAR_info) {
1571 #if defined(THREADED_RTS)
1572         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1573 #endif
1574         /* HACK: we need a pointer to pass back, 
1575          * so we abuse NO_FINALIZER_closure
1576          */
1577         RET_NP(0, stg_NO_FINALIZER_closure);
1578     }
1579
1580     /* we got the value... */
1581     val = StgMVar_value(mvar);
1582
1583     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1584
1585         /* There are putMVar(s) waiting... 
1586          * wake up the first thread on the queue
1587          */
1588         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1589
1590         /* actually perform the putMVar for the thread that we just woke up */
1591         tso = StgMVar_head(mvar);
1592         PerformPut(tso,StgMVar_value(mvar));
1593         dirtyTSO(tso);
1594
1595 #if defined(GRAN) || defined(PAR)
1596         /* ToDo: check 2nd arg (mvar) is right */
1597         "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
1598         StgMVar_head(mvar) = tso;
1599 #else
1600         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr",
1601                                            StgMVar_head(mvar) "ptr") [];
1602         StgMVar_head(mvar) = tso;
1603 #endif
1604
1605         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1606             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1607         }
1608 #if defined(THREADED_RTS)
1609         unlockClosure(mvar, stg_FULL_MVAR_info);
1610 #endif
1611     }
1612     else 
1613     {
1614         /* No further putMVars, MVar is now empty */
1615         StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
1616 #if defined(THREADED_RTS)
1617         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1618 #else
1619         SET_INFO(mvar,stg_EMPTY_MVAR_info);
1620 #endif
1621     }
1622     
1623     RET_NP(1, val);
1624 }
1625
1626
1627 putMVarzh_fast
1628 {
1629     W_ mvar, info, tso;
1630
1631     /* args: R1 = MVar, R2 = value */
1632     mvar = R1;
1633
1634 #if defined(THREADED_RTS)
1635     "ptr" info = foreign "C" lockClosure(mvar "ptr") [R2];
1636 #else
1637     info = GET_INFO(mvar);
1638 #endif
1639
1640     if (info == stg_FULL_MVAR_info) {
1641         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1642             StgMVar_head(mvar) = CurrentTSO;
1643         } else {
1644             StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
1645         }
1646         StgTSO_link(CurrentTSO)        = stg_END_TSO_QUEUE_closure;
1647         StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
1648         StgTSO_block_info(CurrentTSO)  = mvar;
1649         StgMVar_tail(mvar) = CurrentTSO;
1650         
1651         jump stg_block_putmvar;
1652     }
1653   
1654     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1655
1656         /* There are takeMVar(s) waiting: wake up the first one
1657          */
1658         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1659
1660         /* actually perform the takeMVar */
1661         tso = StgMVar_head(mvar);
1662         PerformTake(tso, R2);
1663         dirtyTSO(tso);
1664       
1665 #if defined(GRAN) || defined(PAR)
1666         /* ToDo: check 2nd arg (mvar) is right */
1667         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1668         StgMVar_head(mvar) = tso;
1669 #else
1670         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1671         StgMVar_head(mvar) = tso;
1672 #endif
1673
1674         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1675             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1676         }
1677
1678 #if defined(THREADED_RTS)
1679         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1680 #endif
1681         jump %ENTRY_CODE(Sp(0));
1682     }
1683     else
1684     {
1685         /* No further takes, the MVar is now full. */
1686         StgMVar_value(mvar) = R2;
1687
1688 #if defined(THREADED_RTS)
1689         unlockClosure(mvar, stg_FULL_MVAR_info);
1690 #else
1691         SET_INFO(mvar,stg_FULL_MVAR_info);
1692 #endif
1693         jump %ENTRY_CODE(Sp(0));
1694     }
1695     
1696     /* ToDo: yield afterward for better communication performance? */
1697 }
1698
1699
1700 tryPutMVarzh_fast
1701 {
1702     W_ mvar, info, tso;
1703
1704     /* args: R1 = MVar, R2 = value */
1705     mvar = R1;
1706
1707 #if defined(THREADED_RTS)
1708     "ptr" info = foreign "C" lockClosure(mvar "ptr") [R2];
1709 #else
1710     info = GET_INFO(mvar);
1711 #endif
1712
1713     if (info == stg_FULL_MVAR_info) {
1714 #if defined(THREADED_RTS)
1715         unlockClosure(mvar, stg_FULL_MVAR_info);
1716 #endif
1717         RET_N(0);
1718     }
1719   
1720     if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
1721
1722         /* There are takeMVar(s) waiting: wake up the first one
1723          */
1724         ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
1725         
1726         /* actually perform the takeMVar */
1727         tso = StgMVar_head(mvar);
1728         PerformTake(tso, R2);
1729         dirtyTSO(tso);
1730       
1731 #if defined(GRAN) || defined(PAR)
1732         /* ToDo: check 2nd arg (mvar) is right */
1733         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
1734         StgMVar_head(mvar) = tso;
1735 #else
1736         "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
1737         StgMVar_head(mvar) = tso;
1738 #endif
1739
1740         if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
1741             StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
1742         }
1743
1744 #if defined(THREADED_RTS)
1745         unlockClosure(mvar, stg_EMPTY_MVAR_info);
1746 #endif
1747     }
1748     else
1749     {
1750         /* No further takes, the MVar is now full. */
1751         StgMVar_value(mvar) = R2;
1752
1753 #if defined(THREADED_RTS)
1754         unlockClosure(mvar, stg_FULL_MVAR_info);
1755 #else
1756         SET_INFO(mvar,stg_FULL_MVAR_info);
1757 #endif
1758     }
1759     
1760     RET_N(1);
1761     /* ToDo: yield afterward for better communication performance? */
1762 }
1763
1764
1765 /* -----------------------------------------------------------------------------
1766    Stable pointer primitives
1767    -------------------------------------------------------------------------  */
1768
1769 makeStableNamezh_fast
1770 {
1771     W_ index, sn_obj;
1772
1773     ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
1774   
1775     index = foreign "C" lookupStableName(R1 "ptr") [];
1776
1777     /* Is there already a StableName for this heap object?
1778      *  stable_ptr_table is a pointer to an array of snEntry structs.
1779      */
1780     if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
1781         sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
1782         SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
1783         StgStableName_sn(sn_obj) = index;
1784         snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
1785     } else {
1786         sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
1787     }
1788     
1789     RET_P(sn_obj);
1790 }
1791
1792
1793 makeStablePtrzh_fast
1794 {
1795     /* Args: R1 = a */
1796     W_ sp;
1797     MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
1798     "ptr" sp = foreign "C" getStablePtr(R1 "ptr") [];
1799     RET_N(sp);
1800 }
1801
1802 deRefStablePtrzh_fast
1803 {
1804     /* Args: R1 = the stable ptr */
1805     W_ r, sp;
1806     sp = R1;
1807     r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
1808     RET_P(r);
1809 }
1810
1811 /* -----------------------------------------------------------------------------
1812    Bytecode object primitives
1813    -------------------------------------------------------------------------  */
1814
1815 newBCOzh_fast
1816 {
1817     /* R1 = instrs
1818        R2 = literals
1819        R3 = ptrs
1820        R4 = arity
1821        R5 = bitmap array
1822     */
1823     W_ bco, bitmap_arr, bytes, words;
1824     
1825     bitmap_arr = R5;
1826
1827     words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
1828     bytes = WDS(words);
1829
1830     ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, newBCOzh_fast );
1831
1832     bco = Hp - bytes + WDS(1);
1833     SET_HDR(bco, stg_BCO_info, W_[CCCS]);
1834     
1835     StgBCO_instrs(bco)     = R1;
1836     StgBCO_literals(bco)   = R2;
1837     StgBCO_ptrs(bco)       = R3;
1838     StgBCO_arity(bco)      = HALF_W_(R4);
1839     StgBCO_size(bco)       = HALF_W_(words);
1840     
1841     // Copy the arity/bitmap info into the BCO
1842     W_ i;
1843     i = 0;
1844 for:
1845     if (i < StgArrWords_words(bitmap_arr)) {
1846         StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
1847         i = i + 1;
1848         goto for;
1849     }
1850     
1851     RET_P(bco);
1852 }
1853
1854
1855 mkApUpd0zh_fast
1856 {
1857     // R1 = the BCO# for the AP
1858     //  
1859     W_ ap;
1860
1861     // This function is *only* used to wrap zero-arity BCOs in an
1862     // updatable wrapper (see ByteCodeLink.lhs).  An AP thunk is always
1863     // saturated and always points directly to a FUN or BCO.
1864     ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
1865            StgBCO_arity(R1) == HALF_W_(0));
1866
1867     HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
1868     TICK_ALLOC_UP_THK(0, 0);
1869     CCCS_ALLOC(SIZEOF_StgAP);
1870
1871     ap = Hp - SIZEOF_StgAP + WDS(1);
1872     SET_HDR(ap, stg_AP_info, W_[CCCS]);
1873     
1874     StgAP_n_args(ap) = HALF_W_(0);
1875     StgAP_fun(ap) = R1;
1876     
1877     RET_P(ap);
1878 }
1879
1880 unpackClosurezh_fast
1881 {
1882 /* args: R1 = closure to analyze */
1883 // TODO: Consider the absence of ptrs or nonptrs as a special case ?
1884
1885     W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
1886     info  = %GET_STD_INFO(R1);
1887
1888     // Some closures have non-standard layout, so we omit those here.
1889     W_ type;
1890     type = TO_W_(%INFO_TYPE(info));
1891     switch [0 .. N_CLOSURE_TYPES] type {
1892     case THUNK_SELECTOR : {
1893         ptrs = 1;
1894         nptrs = 0;
1895         goto out;
1896     }
1897     case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, 
1898          THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
1899         ptrs = 0;
1900         nptrs = 0;
1901         goto out;
1902     }
1903     default: {
1904         ptrs  = TO_W_(%INFO_PTRS(info)); 
1905         nptrs = TO_W_(%INFO_NPTRS(info));
1906         goto out;
1907     }}
1908 out:
1909
1910     W_ ptrs_arr_sz, nptrs_arr_sz;
1911     nptrs_arr_sz = SIZEOF_StgArrWords   + WDS(nptrs);
1912     ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs);
1913
1914     ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
1915
1916     ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
1917     nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
1918
1919     SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
1920     StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
1921     p = 0;
1922 for:
1923     if(p < ptrs) {
1924          W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(R1,p);
1925          p = p + 1;
1926          goto for;
1927     }
1928     
1929     SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
1930     StgArrWords_words(nptrs_arr) = nptrs;
1931     p = 0;
1932 for2:
1933     if(p < nptrs) {
1934          W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(R1, p+ptrs);
1935          p = p + 1;
1936          goto for2;
1937     }
1938     RET_NPP(info, ptrs_arr, nptrs_arr);
1939 }
1940
1941 /* -----------------------------------------------------------------------------
1942    Thread I/O blocking primitives
1943    -------------------------------------------------------------------------- */
1944
1945 /* Add a thread to the end of the blocked queue. (C-- version of the C
1946  * macro in Schedule.h).
1947  */
1948 #define APPEND_TO_BLOCKED_QUEUE(tso)                    \
1949     ASSERT(StgTSO_link(tso) == END_TSO_QUEUE);          \
1950     if (W_[blocked_queue_hd] == END_TSO_QUEUE) {        \
1951       W_[blocked_queue_hd] = tso;                       \
1952     } else {                                            \
1953       StgTSO_link(W_[blocked_queue_tl]) = tso;          \
1954     }                                                   \
1955     W_[blocked_queue_tl] = tso;
1956
1957 waitReadzh_fast
1958 {
1959     /* args: R1 */
1960 #ifdef THREADED_RTS
1961     foreign "C" barf("waitRead# on threaded RTS");
1962 #else
1963
1964     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1965     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
1966     StgTSO_block_info(CurrentTSO) = R1;
1967     // No locking - we're not going to use this interface in the
1968     // threaded RTS anyway.
1969     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1970     jump stg_block_noregs;
1971 #endif
1972 }
1973
1974 waitWritezh_fast
1975 {
1976     /* args: R1 */
1977 #ifdef THREADED_RTS
1978     foreign "C" barf("waitWrite# on threaded RTS");
1979 #else
1980
1981     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
1982     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
1983     StgTSO_block_info(CurrentTSO) = R1;
1984     // No locking - we're not going to use this interface in the
1985     // threaded RTS anyway.
1986     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
1987     jump stg_block_noregs;
1988 #endif
1989 }
1990
1991
1992 STRING(stg_delayzh_malloc_str, "delayzh_fast")
1993 delayzh_fast
1994 {
1995 #ifdef mingw32_HOST_OS
1996     W_ ares;
1997     CInt reqID;
1998 #else
1999     W_ t, prev, target;
2000 #endif
2001
2002 #ifdef THREADED_RTS
2003     foreign "C" barf("delay# on threaded RTS");
2004 #else
2005
2006     /* args: R1 (microsecond delay amount) */
2007     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2008     StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
2009
2010 #ifdef mingw32_HOST_OS
2011
2012     /* could probably allocate this on the heap instead */
2013     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2014                                             stg_delayzh_malloc_str);
2015     reqID = foreign "C" addDelayRequest(R1);
2016     StgAsyncIOResult_reqID(ares)   = reqID;
2017     StgAsyncIOResult_len(ares)     = 0;
2018     StgAsyncIOResult_errCode(ares) = 0;
2019     StgTSO_block_info(CurrentTSO)  = ares;
2020
2021     /* Having all async-blocked threads reside on the blocked_queue
2022      * simplifies matters, so change the status to OnDoProc put the
2023      * delayed thread on the blocked_queue.
2024      */
2025     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2026     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2027     jump stg_block_async_void;
2028
2029 #else
2030
2031     W_ time;
2032     W_ divisor;
2033     time = foreign "C" getourtimeofday() [R1];
2034     divisor = TO_W_(RtsFlags_MiscFlags_tickInterval(RtsFlags))*1000;
2035     target = ((R1 + divisor - 1) / divisor) /* divide rounding up */
2036            + time + 1; /* Add 1 as getourtimeofday rounds down */
2037     StgTSO_block_info(CurrentTSO) = target;
2038
2039     /* Insert the new thread in the sleeping queue. */
2040     prev = NULL;
2041     t = W_[sleeping_queue];
2042 while:
2043     if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
2044         prev = t;
2045         t = StgTSO_link(t);
2046         goto while;
2047     }
2048
2049     StgTSO_link(CurrentTSO) = t;
2050     if (prev == NULL) {
2051         W_[sleeping_queue] = CurrentTSO;
2052     } else {
2053         StgTSO_link(prev) = CurrentTSO;
2054     }
2055     jump stg_block_noregs;
2056 #endif
2057 #endif /* !THREADED_RTS */
2058 }
2059
2060
2061 #ifdef mingw32_HOST_OS
2062 STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
2063 asyncReadzh_fast
2064 {
2065     W_ ares;
2066     CInt reqID;
2067
2068 #ifdef THREADED_RTS
2069     foreign "C" barf("asyncRead# on threaded RTS");
2070 #else
2071
2072     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2073     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2074     StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
2075
2076     /* could probably allocate this on the heap instead */
2077     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2078                                             stg_asyncReadzh_malloc_str)
2079                         [R1,R2,R3,R4];
2080     reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
2081     StgAsyncIOResult_reqID(ares)   = reqID;
2082     StgAsyncIOResult_len(ares)     = 0;
2083     StgAsyncIOResult_errCode(ares) = 0;
2084     StgTSO_block_info(CurrentTSO)  = ares;
2085     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2086     jump stg_block_async;
2087 #endif
2088 }
2089
2090 STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
2091 asyncWritezh_fast
2092 {
2093     W_ ares;
2094     CInt reqID;
2095
2096 #ifdef THREADED_RTS
2097     foreign "C" barf("asyncWrite# on threaded RTS");
2098 #else
2099
2100     /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
2101     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2102     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
2103
2104     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2105                                             stg_asyncWritezh_malloc_str)
2106                         [R1,R2,R3,R4];
2107     reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
2108
2109     StgAsyncIOResult_reqID(ares)   = reqID;
2110     StgAsyncIOResult_len(ares)     = 0;
2111     StgAsyncIOResult_errCode(ares) = 0;
2112     StgTSO_block_info(CurrentTSO)  = ares;
2113     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2114     jump stg_block_async;
2115 #endif
2116 }
2117
2118 STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
2119 asyncDoProczh_fast
2120 {
2121     W_ ares;
2122     CInt reqID;
2123
2124 #ifdef THREADED_RTS
2125     foreign "C" barf("asyncDoProc# on threaded RTS");
2126 #else
2127
2128     /* args: R1 = proc, R2 = param */
2129     ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
2130     StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
2131
2132     /* could probably allocate this on the heap instead */
2133     "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
2134                                             stg_asyncDoProczh_malloc_str) 
2135                                 [R1,R2];
2136     reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
2137     StgAsyncIOResult_reqID(ares)   = reqID;
2138     StgAsyncIOResult_len(ares)     = 0;
2139     StgAsyncIOResult_errCode(ares) = 0;
2140     StgTSO_block_info(CurrentTSO) = ares;
2141     APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
2142     jump stg_block_async;
2143 #endif
2144 }
2145 #endif
2146
2147 // noDuplicate# tries to ensure that none of the thunks under
2148 // evaluation by the current thread are also under evaluation by
2149 // another thread.  It relies on *both* threads doing noDuplicate#;
2150 // the second one will get blocked if they are duplicating some work.
2151 noDuplicatezh_fast
2152 {
2153     SAVE_THREAD_STATE();
2154     ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2155     foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") [];
2156     
2157     if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
2158         jump stg_threadFinished;
2159     } else {
2160         LOAD_THREAD_STATE();
2161         ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
2162         jump %ENTRY_CODE(Sp(0));
2163     }
2164 }
2165
2166 getApStackValzh_fast
2167 {
2168    W_ ap_stack, offset, val, ok;
2169
2170    /* args: R1 = AP_STACK, R2 = offset */
2171    ap_stack = R1;
2172    offset   = R2;
2173
2174    if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
2175         ok = 1;
2176         val = StgAP_STACK_payload(ap_stack,offset); 
2177    } else {
2178         ok = 0;
2179         val = R1;
2180    }
2181    RET_NP(ok,val);
2182 }