Fixes to "Retract Hp *before* checking for HpLim==0"
[ghc.git] / rts / HeapStackCheck.cmm
1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 1998-2004
4  *
5  * Canned Heap-Check and Stack-Check sequences.
6  *
7  * This file is written in a subset of C--, extended with various
8  * features specific to GHC.  It is compiled by GHC directly.  For the
9  * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
10  *
11  * ---------------------------------------------------------------------------*/
12
13 #include "Cmm.h"
14
15 #ifdef __PIC__
16 import pthread_mutex_unlock;
17 #endif
18 import EnterCriticalSection;
19 import LeaveCriticalSection;
20
21 /* Stack/Heap Check Failure
22  * ------------------------
23  *
24  * On discovering that a stack or heap check has failed, we do the following:
25  *
26  *    - If HpLim==0, indicating that we should context-switch, we yield
27  *      to the scheduler (return ThreadYielding).
28  *
29  * Note that we must leave no slop in the heap (this is a requirement
30  * for LDV profiling, at least), so if we just had a heap-check
31  * failure, then we must retract Hp by HpAlloc.  How do we know
32  * whether there was a heap-check failure?  HpLim might be zero, and
33  * yet we got here as a result of a stack-check failure.  Hence, we
34  * require that HpAlloc is only non-zero if there was a heap-check
35  * failure, otherwise it is zero, so we can always safely subtract
36  * HpAlloc from Hp.
37  *
38  * Hence, HpAlloc is zeroed in LOAD_THREAD_STATE().
39  *
40  *    - If the context_switch flag is set (the backup plan if setting HpLim
41  *      to 0 didn't trigger a context switch), we yield to the scheduler
42  *      (return ThreadYielding).
43  *
44  *    - If Hp > HpLim, we've had a heap check failure.  This means we've
45  *      come to the end of the current heap block, so we try to chain
46  *      another block on with ExtendNursery().  
47  *
48  *           - If this succeeds, we carry on without returning to the 
49  *             scheduler.  
50  *
51  *           - If it fails, we return to the scheduler claiming HeapOverflow
52  *             so that a garbage collection can be performed.
53  *
54  *    - If Hp <= HpLim, it must have been a stack check that failed.  In
55  *      which case, we return to the scheduler claiming StackOverflow, the
56  *      scheduler will either increase the size of our stack, or raise
57  *      an exception if the stack is already too big.
58  *
59  * The effect of checking for context switch only in the heap/stack check
60  * failure code is that we'll switch threads after the current thread has
61  * reached the end of its heap block.  If a thread isn't allocating
62  * at all, it won't yield.  Hopefully this won't be a problem in practice.
63  */
64  
65 #define PRE_RETURN(why,what_next)                       \
66   StgTSO_what_next(CurrentTSO) = what_next::I16;        \
67   StgRegTable_rRet(BaseReg) = why;                      \
68   R1 = BaseReg;
69
70 /* Remember that the return address is *removed* when returning to a
71  * ThreadRunGHC thread.
72  */
73
74 #define GC_GENERIC                                              \
75     DEBUG_ONLY(foreign "C" heapCheckFail());                    \
76     if (Hp > HpLim) {                                           \
77         Hp = Hp - HpAlloc/*in bytes*/;                          \
78         if (HpLim == 0) { \
79                 R1 = ThreadYielding;                            \
80                 goto sched;                                     \
81         }                                               \
82         if (HpAlloc <= BLOCK_SIZE                               \
83             && bdescr_link(CurrentNursery) != NULL) {           \
84             HpAlloc = 0;                                        \
85             CLOSE_NURSERY();                                    \
86             CurrentNursery = bdescr_link(CurrentNursery);       \
87             OPEN_NURSERY();                                     \
88             if (Capability_context_switch(MyCapability()) != 0 :: CInt) { \
89                 R1 = ThreadYielding;                            \
90                 goto sched;                                     \
91             } else {                                            \
92                 jump %ENTRY_CODE(Sp(0));                        \
93             }                                                   \
94         } else {                                                \
95             R1 = HeapOverflow;                                  \
96             goto sched;                                         \
97         }                                                       \
98     } else {                                                    \
99         R1 = StackOverflow;                                     \
100     }                                                           \
101   sched:                                                        \
102     PRE_RETURN(R1,ThreadRunGHC);                                \
103     jump stg_returnToSched;
104
105 #define HP_GENERIC                              \
106    PRE_RETURN(HeapOverflow, ThreadRunGHC)       \
107   jump stg_returnToSched;
108
109 #define BLOCK_GENERIC                           \
110    PRE_RETURN(ThreadBlocked,  ThreadRunGHC)     \
111   jump stg_returnToSched;
112
113 #define YIELD_GENERIC                           \
114   PRE_RETURN(ThreadYielding, ThreadRunGHC)      \
115   jump stg_returnToSched;
116
117 #define BLOCK_BUT_FIRST(c)                      \
118   PRE_RETURN(ThreadBlocked, ThreadRunGHC)       \
119   R2 = c;                                       \
120   jump stg_returnToSchedButFirst;
121
122 #define YIELD_TO_INTERPRETER                    \
123   PRE_RETURN(ThreadYielding, ThreadInterpret)   \
124   jump stg_returnToSchedNotPaused;
125
126 /* -----------------------------------------------------------------------------
127    Heap checks in thunks/functions.
128
129    In these cases, node always points to the function closure.  This gives
130    us an easy way to return to the function: just leave R1 on the top of
131    the stack, and have the scheduler enter it to return.
132
133    There are canned sequences for 'n' pointer values in registers.
134    -------------------------------------------------------------------------- */
135
136 INFO_TABLE_RET( stg_enter, RET_SMALL, P_ unused)
137 {
138     R1 = Sp(1);
139     Sp_adj(2);
140     ENTER();
141 }
142
143 __stg_gc_enter_1
144 {
145     Sp_adj(-2);
146     Sp(1) = R1;
147     Sp(0) = stg_enter_info;
148     GC_GENERIC
149 }
150
151 #if defined(GRAN)
152 /*
153   ToDo: merge the block and yield macros, calling something like BLOCK(N)
154         at the end;
155 */
156
157 /* 
158    Should we actually ever do a yield in such a case?? -- HWL
159 */
160 gran_yield_0
161 {
162     SAVE_THREAD_STATE();                                        
163     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
164     R1 = ThreadYielding;
165     jump StgReturn;
166 }
167
168 gran_yield_1
169 {
170     Sp_adj(-1);
171     Sp(0) = R1;
172     SAVE_THREAD_STATE();                                        
173     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
174     R1 = ThreadYielding;
175     jump StgReturn;
176 }
177
178 /*- 2 Regs--------------------------------------------------------------------*/
179
180 gran_yield_2
181 {
182     Sp_adj(-2);
183     Sp(1) = R2;
184     Sp(0) = R1;
185     SAVE_THREAD_STATE();                                        
186     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
187     R1 = ThreadYielding;
188     jump StgReturn;
189 }
190
191 /*- 3 Regs -------------------------------------------------------------------*/
192
193 gran_yield_3
194 {
195     Sp_adj(-3);
196     Sp(2) = R3;
197     Sp(1) = R2;
198     Sp(0) = R1;
199     SAVE_THREAD_STATE();                                        
200     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
201     R1 = ThreadYielding;
202     jump StgReturn;
203 }
204
205 /*- 4 Regs -------------------------------------------------------------------*/
206
207 gran_yield_4
208 {
209     Sp_adj(-4);
210     Sp(3) = R4;
211     Sp(2) = R3;
212     Sp(1) = R2;
213     Sp(0) = R1;
214     SAVE_THREAD_STATE();                                        
215     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
216     R1 = ThreadYielding;
217     jump StgReturn;
218 }
219
220 /*- 5 Regs -------------------------------------------------------------------*/
221
222 gran_yield_5
223 {
224     Sp_adj(-5);
225     Sp(4) = R5;
226     Sp(3) = R4;
227     Sp(2) = R3;
228     Sp(1) = R2;
229     Sp(0) = R1;
230     SAVE_THREAD_STATE();                                        
231     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
232     R1 = ThreadYielding;
233     jump StgReturn;
234 }
235
236 /*- 6 Regs -------------------------------------------------------------------*/
237
238 gran_yield_6
239 {
240     Sp_adj(-6);
241     Sp(5) = R6;
242     Sp(4) = R5;
243     Sp(3) = R4;
244     Sp(2) = R3;
245     Sp(1) = R2;
246     Sp(0) = R1;
247     SAVE_THREAD_STATE();                                        
248     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
249     R1 = ThreadYielding;
250     jump StgReturn;
251 }
252
253 /*- 7 Regs -------------------------------------------------------------------*/
254
255 gran_yield_7
256 {
257     Sp_adj(-7);
258     Sp(6) = R7;
259     Sp(5) = R6;
260     Sp(4) = R5;
261     Sp(3) = R4;
262     Sp(2) = R3;
263     Sp(1) = R2;
264     Sp(0) = R1;
265     SAVE_THREAD_STATE();                                        
266     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
267     R1 = ThreadYielding;
268     jump StgReturn;
269 }
270
271 /*- 8 Regs -------------------------------------------------------------------*/
272
273 gran_yield_8
274 {
275     Sp_adj(-8);
276     Sp(7) = R8;
277     Sp(6) = R7;
278     Sp(5) = R6;
279     Sp(4) = R5;
280     Sp(3) = R4;
281     Sp(2) = R3;
282     Sp(1) = R2;
283     Sp(0) = R1;
284     SAVE_THREAD_STATE();                                        
285     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
286     R1 = ThreadYielding;
287     jump StgReturn;
288 }
289
290 // the same routines but with a block rather than a yield
291
292 gran_block_1
293 {
294     Sp_adj(-1);
295     Sp(0) = R1;
296     SAVE_THREAD_STATE();                                        
297     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
298     R1 = ThreadBlocked;
299     jump StgReturn;
300 }
301
302 /*- 2 Regs--------------------------------------------------------------------*/
303
304 gran_block_2
305 {
306     Sp_adj(-2);
307     Sp(1) = R2;
308     Sp(0) = R1;
309     SAVE_THREAD_STATE();                                        
310     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
311     R1 = ThreadBlocked;
312     jump StgReturn;
313 }
314
315 /*- 3 Regs -------------------------------------------------------------------*/
316
317 gran_block_3
318 {
319     Sp_adj(-3);
320     Sp(2) = R3;
321     Sp(1) = R2;
322     Sp(0) = R1;
323     SAVE_THREAD_STATE();                                        
324     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
325     R1 = ThreadBlocked;
326     jump StgReturn;
327 }
328
329 /*- 4 Regs -------------------------------------------------------------------*/
330
331 gran_block_4
332 {
333     Sp_adj(-4);
334     Sp(3) = R4;
335     Sp(2) = R3;
336     Sp(1) = R2;
337     Sp(0) = R1;
338     SAVE_THREAD_STATE();                                        
339     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
340     R1 = ThreadBlocked;
341     jump StgReturn;
342 }
343
344 /*- 5 Regs -------------------------------------------------------------------*/
345
346 gran_block_5
347 {
348     Sp_adj(-5);
349     Sp(4) = R5;
350     Sp(3) = R4;
351     Sp(2) = R3;
352     Sp(1) = R2;
353     Sp(0) = R1;
354     SAVE_THREAD_STATE();                                        
355     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
356     R1 = ThreadBlocked;
357     jump StgReturn;
358 }
359
360 /*- 6 Regs -------------------------------------------------------------------*/
361
362 gran_block_6
363 {
364     Sp_adj(-6);
365     Sp(5) = R6;
366     Sp(4) = R5;
367     Sp(3) = R4;
368     Sp(2) = R3;
369     Sp(1) = R2;
370     Sp(0) = R1;
371     SAVE_THREAD_STATE();                                        
372     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
373     R1 = ThreadBlocked;
374     jump StgReturn;
375 }
376
377 /*- 7 Regs -------------------------------------------------------------------*/
378
379 gran_block_7
380 {
381     Sp_adj(-7);
382     Sp(6) = R7;
383     Sp(5) = R6;
384     Sp(4) = R5;
385     Sp(3) = R4;
386     Sp(2) = R3;
387     Sp(1) = R2;
388     Sp(0) = R1;
389     SAVE_THREAD_STATE();                                        
390     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
391     R1 = ThreadBlocked;
392     jump StgReturn;
393 }
394
395 /*- 8 Regs -------------------------------------------------------------------*/
396
397 gran_block_8
398 {
399     Sp_adj(-8);
400     Sp(7) = R8;
401     Sp(6) = R7;
402     Sp(5) = R6;
403     Sp(4) = R5;
404     Sp(3) = R4;
405     Sp(2) = R3;
406     Sp(1) = R2;
407     Sp(0) = R1;
408     SAVE_THREAD_STATE();                                        
409     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
410     R1 = ThreadBlocked;
411     jump StgReturn;
412 }
413
414 #endif
415
416 #if 0 && defined(PAR)
417
418 /*
419   Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
420   saving of the thread state from the actual jump via an StgReturn.
421   We need this separation because we call RTS routines in blocking entry codes
422   before jumping back into the RTS (see parallel/FetchMe.hc).
423 */
424
425 par_block_1_no_jump
426 {
427     Sp_adj(-1);
428     Sp(0) = R1;
429     SAVE_THREAD_STATE();                                        
430 }
431
432 par_jump
433 {
434     TSO_what_next(CurrentTSO) = ThreadRunGHC;           
435     R1 = ThreadBlocked;
436     jump StgReturn;
437 }
438
439 #endif
440
441 /* -----------------------------------------------------------------------------
442    Heap checks in Primitive case alternatives
443
444    A primitive case alternative is entered with a value either in 
445    R1, FloatReg1 or D1 depending on the return convention.  All the
446    cases are covered below.
447    -------------------------------------------------------------------------- */
448
449 /*-- No Registers live ------------------------------------------------------ */
450
451 stg_gc_noregs
452 {
453     GC_GENERIC
454 }
455
456 /*-- void return ------------------------------------------------------------ */
457
458 INFO_TABLE_RET( stg_gc_void, RET_SMALL)
459 {
460     Sp_adj(1);
461     jump %ENTRY_CODE(Sp(0));
462 }
463
464 /*-- R1 is boxed/unpointed -------------------------------------------------- */
465
466 INFO_TABLE_RET( stg_gc_unpt_r1, RET_SMALL, P_ unused)
467 {
468     R1 = Sp(1);
469     Sp_adj(2);
470     jump %ENTRY_CODE(Sp(0));
471 }
472
473 stg_gc_unpt_r1
474 {
475     Sp_adj(-2);
476     Sp(1) = R1;
477     Sp(0) = stg_gc_unpt_r1_info;
478     GC_GENERIC
479 }
480
481 /*-- R1 is unboxed -------------------------------------------------- */
482
483 /* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
484 INFO_TABLE_RET( stg_gc_unbx_r1, RET_SMALL, W_ unused )
485 {
486     R1 = Sp(1);
487     Sp_adj(2);
488     jump %ENTRY_CODE(Sp(0));
489 }
490
491 stg_gc_unbx_r1
492 {
493     Sp_adj(-2);
494     Sp(1) = R1;
495     Sp(0) = stg_gc_unbx_r1_info;
496     GC_GENERIC
497 }
498
499 /*-- F1 contains a float ------------------------------------------------- */
500
501 INFO_TABLE_RET( stg_gc_f1, RET_SMALL, F_ unused )
502 {
503     F1 = F_[Sp+WDS(1)];
504     Sp_adj(2);
505     jump %ENTRY_CODE(Sp(0));
506 }
507
508 stg_gc_f1
509 {
510     Sp_adj(-2);
511     F_[Sp + WDS(1)] = F1;
512     Sp(0) = stg_gc_f1_info;
513     GC_GENERIC
514 }
515
516 /*-- D1 contains a double ------------------------------------------------- */
517
518 INFO_TABLE_RET( stg_gc_d1, RET_SMALL, D_ unused )
519 {
520     D1 = D_[Sp + WDS(1)];
521     Sp = Sp + WDS(1) + SIZEOF_StgDouble;
522     jump %ENTRY_CODE(Sp(0));
523 }
524
525 stg_gc_d1
526 {
527     Sp = Sp - WDS(1) - SIZEOF_StgDouble;
528     D_[Sp + WDS(1)] = D1;
529     Sp(0) = stg_gc_d1_info;
530     GC_GENERIC
531 }
532
533
534 /*-- L1 contains an int64 ------------------------------------------------- */
535
536 INFO_TABLE_RET( stg_gc_l1, RET_SMALL, L_ unused )
537 {
538     L1 = L_[Sp + WDS(1)];
539     Sp_adj(1) + SIZEOF_StgWord64;
540     jump %ENTRY_CODE(Sp(0));
541 }
542
543 stg_gc_l1
544 {
545     Sp_adj(-1) - SIZEOF_StgWord64;
546     L_[Sp + WDS(1)] = L1;
547     Sp(0) = stg_gc_l1_info;
548     GC_GENERIC
549 }
550
551 /*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */
552
553 INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, P_ unused )
554 {
555     Sp_adj(1);
556     // one ptr is on the stack (Sp(0))
557     jump %ENTRY_CODE(Sp(1));
558 }
559
560 /* -----------------------------------------------------------------------------
561    Generic function entry heap check code.
562
563    At a function entry point, the arguments are as per the calling convention,
564    i.e. some in regs and some on the stack.  There may or may not be 
565    a pointer to the function closure in R1 - if there isn't, then the heap
566    check failure code in the function will arrange to load it.
567
568    The function's argument types are described in its info table, so we
569    can just jump to this bit of generic code to save away all the
570    registers and return to the scheduler.
571
572    This code arranges the stack like this:
573          
574          |        ....         |
575          |        args         |
576          +---------------------+
577          |      f_closure      |
578          +---------------------+
579          |        size         |
580          +---------------------+
581          |   stg_gc_fun_info   |
582          +---------------------+
583
584    The size is the number of words of arguments on the stack, and is cached
585    in the frame in order to simplify stack walking: otherwise the size of
586    this stack frame would have to be calculated by looking at f's info table.
587
588    -------------------------------------------------------------------------- */
589
590 __stg_gc_fun
591 {
592     W_ size;
593     W_ info;
594     W_ type;
595
596     info = %GET_FUN_INFO(UNTAG(R1));
597
598     // cache the size
599     type = TO_W_(StgFunInfoExtra_fun_type(info));
600     if (type == ARG_GEN) {
601         size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
602     } else { 
603         if (type == ARG_GEN_BIG) {
604 #ifdef TABLES_NEXT_TO_CODE
605             // bitmap field holds an offset
606             size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
607                                         + %GET_ENTRY(UNTAG(R1)) /* ### */ );
608 #else
609             size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
610 #endif
611         } else {
612             size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
613         }
614     }
615     
616 #ifdef NO_ARG_REGS
617     // we don't have to save any registers away
618     Sp_adj(-3);
619     Sp(2) = R1;
620     Sp(1) = size;
621     Sp(0) = stg_gc_fun_info;
622     GC_GENERIC
623 #else
624     W_ type;
625     type = TO_W_(StgFunInfoExtra_fun_type(info));
626     // cache the size
627     if (type == ARG_GEN || type == ARG_GEN_BIG) {
628         // regs already saved by the heap check code
629         Sp_adj(-3);
630         Sp(2) = R1;
631         Sp(1) = size;
632         Sp(0) = stg_gc_fun_info;
633         // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
634         GC_GENERIC
635     } else { 
636         jump W_[stg_stack_save_entries + WDS(type)];
637             // jumps to stg_gc_noregs after saving stuff
638     }
639 #endif /* !NO_ARG_REGS */
640 }
641
642 /* -----------------------------------------------------------------------------
643    Generic Apply (return point)
644
645    The dual to stg_fun_gc_gen (above): this fragment returns to the
646    function, passing arguments in the stack and in registers
647    appropriately.  The stack layout is given above.
648    -------------------------------------------------------------------------- */
649
650 INFO_TABLE_RET( stg_gc_fun, RET_FUN )
651 {
652     R1 = Sp(2);
653     Sp_adj(3);
654 #ifdef NO_ARG_REGS
655     // Minor optimisation: there are no argument registers to load up,
656     // so we can just jump straight to the function's entry point.
657     jump %GET_ENTRY(UNTAG(R1));
658 #else
659     W_ info;
660     W_ type;
661     
662     info = %GET_FUN_INFO(UNTAG(R1));
663     type = TO_W_(StgFunInfoExtra_fun_type(info));
664     if (type == ARG_GEN || type == ARG_GEN_BIG) {
665         jump StgFunInfoExtra_slow_apply(info);
666     } else { 
667         if (type == ARG_BCO) {
668             // cover this case just to be on the safe side
669             Sp_adj(-2);
670             Sp(1) = R1;
671             Sp(0) = stg_apply_interp_info;
672             jump stg_yield_to_interpreter;
673         } else {
674             jump W_[stg_ap_stack_entries + WDS(type)];
675         }
676     }
677 #endif
678 }
679
680 /* -----------------------------------------------------------------------------
681    Generic Heap Check Code.
682
683    Called with Liveness mask in R9,  Return address in R10.
684    Stack must be consistent (containing all necessary info pointers
685    to relevant SRTs).
686
687    See StgMacros.h for a description of the RET_DYN stack frame.
688
689    We also define an stg_gen_yield here, because it's very similar.
690    -------------------------------------------------------------------------- */
691
692 // For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P
693 // on a 64-bit machine, we'll end up wasting a couple of words, but
694 // it's not a big deal.
695
696 #define RESTORE_EVERYTHING                      \
697     L1   = L_[Sp + WDS(19)];                    \
698     D2   = D_[Sp + WDS(17)];                    \
699     D1   = D_[Sp + WDS(15)];                    \
700     F4   = F_[Sp + WDS(14)];                    \
701     F3   = F_[Sp + WDS(13)];                    \
702     F2   = F_[Sp + WDS(12)];                    \
703     F1   = F_[Sp + WDS(11)];                    \
704     R8 = Sp(10);                                \
705     R7 = Sp(9);                                 \
706     R6 = Sp(8);                                 \
707     R5 = Sp(7);                                 \
708     R4 = Sp(6);                                 \
709     R3 = Sp(5);                                 \
710     R2 = Sp(4);                                 \
711     R1 = Sp(3);                                 \
712     Sp_adj(21);
713
714 #define RET_OFFSET (-19)
715
716 #define SAVE_EVERYTHING                         \
717     Sp_adj(-21);                                \
718     L_[Sp + WDS(19)] = L1;                      \
719     D_[Sp + WDS(17)] = D2;                      \
720     D_[Sp + WDS(15)] = D1;                      \
721     F_[Sp + WDS(14)] = F4;                      \
722     F_[Sp + WDS(13)] = F3;                      \
723     F_[Sp + WDS(12)] = F2;                      \
724     F_[Sp + WDS(11)] = F1;                      \
725     Sp(10) = R8;                                \
726     Sp(9) = R7;                                 \
727     Sp(8) = R6;                                 \
728     Sp(7) = R5;                                 \
729     Sp(6) = R4;                                 \
730     Sp(5) = R3;                                 \
731     Sp(4) = R2;                                 \
732     Sp(3) = R1;                                 \
733     Sp(2) = R10;    /* return address */        \
734     Sp(1) = R9;     /* liveness mask  */        \
735     Sp(0) = stg_gc_gen_info;
736
737 INFO_TABLE_RET( stg_gc_gen, RET_DYN )
738 /* bitmap in the above info table is unused, the real one is on the stack. */
739 {
740     RESTORE_EVERYTHING;
741     jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */
742 }
743
744 stg_gc_gen
745 {
746     SAVE_EVERYTHING;
747     GC_GENERIC
748 }         
749
750 // A heap check at an unboxed tuple return point.  The return address
751 // is on the stack, and we can find it by using the offsets given
752 // to us in the liveness mask.
753 stg_gc_ut
754 {
755     R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9)));
756     SAVE_EVERYTHING;
757     GC_GENERIC
758 }
759
760 /*
761  * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
762  * because we've just failed doYouWantToGC(), not a standard heap
763  * check.  GC_GENERIC would end up returning StackOverflow.
764  */
765 stg_gc_gen_hp
766 {
767     SAVE_EVERYTHING;
768     HP_GENERIC
769 }         
770
771 /* -----------------------------------------------------------------------------
772    Yields
773    -------------------------------------------------------------------------- */
774
775 stg_gen_yield
776 {
777     SAVE_EVERYTHING;
778     YIELD_GENERIC
779 }
780
781 stg_yield_noregs
782 {
783     YIELD_GENERIC;
784 }
785
786 /* -----------------------------------------------------------------------------
787    Yielding to the interpreter... top of stack says what to do next.
788    -------------------------------------------------------------------------- */
789
790 stg_yield_to_interpreter
791 {
792     YIELD_TO_INTERPRETER;
793 }
794
795 /* -----------------------------------------------------------------------------
796    Blocks
797    -------------------------------------------------------------------------- */
798
799 stg_gen_block
800 {
801     SAVE_EVERYTHING;
802     BLOCK_GENERIC;
803 }
804
805 stg_block_noregs
806 {
807     BLOCK_GENERIC;
808 }
809
810 stg_block_1
811 {
812     Sp_adj(-2);
813     Sp(1) = R1;
814     Sp(0) = stg_enter_info;
815     BLOCK_GENERIC;
816 }
817
818 /* -----------------------------------------------------------------------------
819  * takeMVar/putMVar-specific blocks
820  *
821  * Stack layout for a thread blocked in takeMVar:
822  *      
823  *       ret. addr
824  *       ptr to MVar   (R1)
825  *       stg_block_takemvar_info
826  *
827  * Stack layout for a thread blocked in putMVar:
828  *      
829  *       ret. addr
830  *       ptr to Value  (R2)
831  *       ptr to MVar   (R1)
832  *       stg_block_putmvar_info
833  *
834  * See PrimOps.hc for a description of the workings of take/putMVar.
835  * 
836  * -------------------------------------------------------------------------- */
837
838 INFO_TABLE_RET( stg_block_takemvar, RET_SMALL, P_ unused )
839 {
840     R1 = Sp(1);
841     Sp_adj(2);
842     jump takeMVarzh_fast;
843 }
844
845 // code fragment executed just before we return to the scheduler
846 stg_block_takemvar_finally
847 {
848 #ifdef THREADED_RTS
849     unlockClosure(R3, stg_MVAR_DIRTY_info);
850 #else
851     SET_INFO(R3, stg_MVAR_DIRTY_info);
852 #endif
853     jump StgReturn;
854 }
855
856 stg_block_takemvar
857 {
858     Sp_adj(-2);
859     Sp(1) = R1;
860     Sp(0) = stg_block_takemvar_info;
861     R3 = R1;
862     BLOCK_BUT_FIRST(stg_block_takemvar_finally);
863 }
864
865 INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, P_ unused1, P_ unused2 )
866 {
867     R2 = Sp(2);
868     R1 = Sp(1);
869     Sp_adj(3);
870     jump putMVarzh_fast;
871 }
872
873 // code fragment executed just before we return to the scheduler
874 stg_block_putmvar_finally
875 {
876 #ifdef THREADED_RTS
877     unlockClosure(R3, stg_MVAR_DIRTY_info);
878 #else
879     SET_INFO(R3, stg_MVAR_DIRTY_info);
880 #endif
881     jump StgReturn;
882 }
883
884 stg_block_putmvar
885 {
886     Sp_adj(-3);
887     Sp(2) = R2;
888     Sp(1) = R1;
889     Sp(0) = stg_block_putmvar_info;
890     R3 = R1;
891     BLOCK_BUT_FIRST(stg_block_putmvar_finally);
892 }
893
894 // code fragment executed just before we return to the scheduler
895 stg_block_blackhole_finally
896 {
897 #if defined(THREADED_RTS)
898     // The last thing we do is release sched_lock, which is
899     // preventing other threads from accessing blackhole_queue and
900     // picking up this thread before we are finished with it.
901     RELEASE_LOCK(sched_mutex "ptr");
902 #endif
903     jump StgReturn;
904 }
905
906 stg_block_blackhole
907 {
908     Sp_adj(-2);
909     Sp(1) = R1;
910     Sp(0) = stg_enter_info;
911     BLOCK_BUT_FIRST(stg_block_blackhole_finally);
912 }
913
914 INFO_TABLE_RET( stg_block_throwto, RET_SMALL, P_ unused, P_ unused )
915 {
916     R2 = Sp(2);
917     R1 = Sp(1);
918     Sp_adj(3);
919     jump killThreadzh_fast;
920 }
921
922 stg_block_throwto_finally
923 {
924 #ifdef THREADED_RTS
925     foreign "C" throwToReleaseTarget (R3 "ptr");
926 #endif
927     jump StgReturn;
928 }
929
930 stg_block_throwto
931 {
932     Sp_adj(-3);
933     Sp(2) = R2;
934     Sp(1) = R1;
935     Sp(0) = stg_block_throwto_info;
936     BLOCK_BUT_FIRST(stg_block_throwto_finally);
937 }
938
939 #ifdef mingw32_HOST_OS
940 INFO_TABLE_RET( stg_block_async, RET_SMALL )
941 {
942     W_ ares;
943     W_ len, errC;
944
945     ares = StgTSO_block_info(CurrentTSO);
946     len = StgAsyncIOResult_len(ares);
947     errC = StgAsyncIOResult_errCode(ares);
948     StgTSO_block_info(CurrentTSO) = NULL;
949     foreign "C" free(ares "ptr");
950     R1 = len;
951     Sp(0) = errC;
952     jump %ENTRY_CODE(Sp(1));
953 }
954
955 stg_block_async
956 {
957     Sp_adj(-1);
958     Sp(0) = stg_block_async_info;
959     BLOCK_GENERIC;
960 }
961
962 /* Used by threadDelay implementation; it would be desirable to get rid of
963  * this free()'ing void return continuation.
964  */
965 INFO_TABLE_RET( stg_block_async_void, RET_SMALL )
966 {
967     W_ ares;
968
969     ares = StgTSO_block_info(CurrentTSO);
970     StgTSO_block_info(CurrentTSO) = NULL;
971     foreign "C" free(ares "ptr");
972     Sp_adj(1);
973     jump %ENTRY_CODE(Sp(0));
974 }
975
976 stg_block_async_void
977 {
978     Sp_adj(-1);
979     Sp(0) = stg_block_async_void_info;
980     BLOCK_GENERIC;
981 }
982
983 #endif
984
985 /* -----------------------------------------------------------------------------
986    STM-specific waiting
987    -------------------------------------------------------------------------- */
988
989 stg_block_stmwait_finally
990 {
991     foreign "C" stmWaitUnlock(MyCapability() "ptr", R3 "ptr");
992     jump StgReturn;
993 }
994
995 stg_block_stmwait
996 {
997     BLOCK_BUT_FIRST(stg_block_stmwait_finally);
998 }