1 /* -----------------------------------------------------------------------------
3 * (c) The University of Glasgow 2004
5 * Application-related bits.
7 * This file is written in a subset of C--, extended with various
8 * features specific to GHC. It is compiled by GHC directly. For the
9 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
11 * -------------------------------------------------------------------------- */
15 /* ----------------------------------------------------------------------------
16 * Evaluate a closure and return it.
18 * There isn't an info table / return address version of stg_ap_0, because
19 * everything being returned is guaranteed evaluated, so it would be a no-op.
22 STRING(stg_ap_0_ret_str,"stg_ap_0_ret... ")
24 stg_ap_0_fast ( P_ fun )
27 ccall debugBelch(stg_ap_0_ret_str);
28 ccall printClosure(R1 "ptr"));
31 ccall checkStackFrame(Sp "ptr"));
33 #if !defined(PROFILING)
40 Note [Evaluating functions with profiling]
42 If we evaluate something like
44 let f = {-# SCC "f" #-} g
46 where g is a function, then updating the thunk for f to point to g
47 would be incorrect: we've lost the SCC annotation. In general, when
48 we evaluate a function and the current CCS is different from the one
49 stored in the function, we need to return a function with the
52 The mechanism we use to wrap the function is to create a
53 zero-argument PAP as a proxy object to hold the new CCS, and return
56 If the closure we evaluated is itself a PAP, we cannot make a nested
57 PAP, so we copy the original PAP and set the CCS in the new PAP to
58 enterFunCCS(pap->header.prof.ccs).
65 // We must obey the correct heap object observation pattern in
66 // Note [Heap memory barriers] in SMP.h.
67 untaggedfun = UNTAG(fun);
68 info = %INFO_PTR(untaggedfun);
69 switch [INVALID_OBJECT .. N_CLOSURE_TYPES]
70 (TO_W_( %INFO_TYPE(%STD_INFO(info)) )) {
75 fun = StgInd_indirectee(fun);
80 arity = TO_W_(StgBCO_arity(untaggedfun));
92 arity = TO_W_(StgFunInfoExtra_arity(%FUN_INFO(info)));
94 if (CCCS == StgHeader_ccs(untaggedfun)) {
97 // We're going to build a new PAP, with zero extra
98 // arguments and therefore the same arity as the
99 // original function. In other words, we're using a
100 // zero-argument PAP as an indirection to the
101 // function, so that we can attach a different CCS to
103 HP_CHK_GEN(SIZEOF_StgPAP);
104 TICK_ALLOC_PAP(SIZEOF_StgPAP, 0);
105 // attribute this allocation to the "overhead of profiling"
106 CCS_ALLOC(BYTES_TO_WDS(SIZEOF_StgPAP), CCS_OVERHEAD);
108 pap = Hp - SIZEOF_StgPAP + WDS(1);
109 SET_HDR(pap, stg_PAP_info, CCCS);
110 StgPAP_arity(pap) = arity;
111 if (arity <= TAG_MASK) {
112 // TODO: Shouldn't this already be tagged? If not why did we
113 // untag it at the beginning of this function?
114 fun = untaggedfun + arity;
116 StgPAP_fun(pap) = fun;
117 StgPAP_n_args(pap) = 0;
123 if (CCCS == StgHeader_ccs(untaggedfun)) {
126 // We're going to copy this PAP, and put the new CCS in it
128 size = SIZEOF_StgPAP + WDS(TO_W_(StgPAP_n_args(untaggedfun)));
130 TICK_ALLOC_PAP(size, 0);
131 // attribute this allocation to the "overhead of profiling"
132 CCS_ALLOC(BYTES_TO_WDS(SIZEOF_StgPAP), CCS_OVERHEAD);
134 pap = Hp - size + WDS(1);
135 // We'll lose the original PAP, so we should enter its CCS
136 ccall enterFunCCS(BaseReg "ptr", StgHeader_ccs(untaggedfun) "ptr");
137 SET_HDR(pap, stg_PAP_info, CCCS);
138 StgPAP_arity(pap) = StgPAP_arity(untaggedfun);
139 StgPAP_n_args(pap) = StgPAP_n_args(untaggedfun);
140 StgPAP_fun(pap) = StgPAP_fun(fun);
142 i = TO_W_(StgPAP_n_args(untaggedfun));
148 StgPAP_payload(pap,i) = StgPAP_payload(fun,i);
165 // We have a thunk of some kind, so evaluate it.
167 // The thunk might evaluate to a function, so we have to
168 // come back here again to adjust its CCS if necessary.
169 // Therefore we need to push a stack frame to look at the
170 // function that gets returned (a stg_restore_ccs_eval
171 // frame), and therefore we need a stack check.
174 // We can't use the value of 'info' any more, because if
175 // STK_CHK_GEN() did a GC then the closure we're looking
176 // at may have changed, e.g. a THUNK_SELECTOR may have
177 // been evaluated by the GC. So we reload the info
179 untaggedfun = UNTAG(fun);
180 info = %INFO_PTR(untaggedfun);
182 jump %ENTRY_CODE(info)
183 (stg_restore_cccs_eval_info, CCCS)
188 jump %ENTRY_CODE(info) (UNTAG(fun));
194 /* -----------------------------------------------------------------------------
195 Entry Code for a PAP.
197 This entry code is *only* called by one of the stg_ap functions.
198 On entry: Sp points to the remaining arguments on the stack. If
199 the stack check fails, we can just push the PAP on the stack and
200 return to the scheduler.
202 On entry: R1 points to the PAP. The rest of the function's
203 arguments (apart from those that are already in the PAP) are on the
204 stack, starting at Sp(0). R2 contains an info table which
205 describes these arguments, which is used in the event that the
206 stack check in the entry code below fails. The info table is
207 currently one of the stg_ap_*_ret family, as this code is always
208 entered from those functions.
210 The idea is to copy the chunk of stack from the PAP object onto the
211 stack / into registers, and enter the function.
212 -------------------------------------------------------------------------- */
214 INFO_TABLE(stg_PAP,/*special layout*/0,0,PAP,"PAP","PAP")
215 { ccall barf("PAP object (%p) entered!", R1) never returns; }
217 stg_PAP_apply /* no args => explicit stack */
224 Words = TO_W_(StgPAP_n_args(pap));
227 // Check for stack overflow and bump the stack pointer.
228 // We have a hand-rolled stack check fragment here, because none of
229 // the canned ones suit this situation.
231 if (Sp - (WDS(Words) + 2/* see ARG_BCO below */) < SpLim) {
232 // there is a return address in R2 in the event of a
233 // stack check failure. The various stg_apply functions arrange
234 // this before calling stg_PAP_entry.
237 jump stg_gc_unpt_r1 [R1];
244 #if defined(PROFILING)
245 ccall enterFunCCS(BaseReg "ptr", StgHeader_ccs(pap) "ptr");
251 p = pap + SIZEOF_StgHeader + OFFSET_StgPAP_payload;
261 R1 = StgPAP_fun(pap);
263 /* DEBUGGING CODE, ensures that arity 1 and 2 functions are entered tagged
264 if (TO_W_(StgFunInfoExtra_arity(%FUN_INFO(%INFO_PTR(UNTAG(R1))))) == 1 ) {
270 if (TO_W_(StgFunInfoExtra_arity(%FUN_INFO(%INFO_PTR(UNTAG(R1))))) == 2 ) {
280 #if defined(NO_ARG_REGS)
281 jump %GET_ENTRY(UNTAG(R1)) [R1];
284 info = %GET_FUN_INFO(UNTAG(R1));
286 type = TO_W_(StgFunInfoExtra_fun_type(info));
287 if (type == ARG_GEN) {
288 jump StgFunInfoExtra_slow_apply(info) [R1];
290 if (type == ARG_GEN_BIG) {
291 jump StgFunInfoExtra_slow_apply(info) [R1];
293 if (type == ARG_BCO) {
296 Sp(0) = stg_apply_interp_info;
297 jump stg_yield_to_interpreter [];
299 jump W_[stg_ap_stack_entries +
300 WDS(TO_W_(StgFunInfoExtra_fun_type(info)))] [R1];
304 /* -----------------------------------------------------------------------------
305 Entry Code for an AP (a PAP with arity zero).
307 The entry code is very similar to a PAP, except there are no
308 further arguments on the stack to worry about, so the stack check
309 is simpler. We must also push an update frame on the stack before
310 applying the function.
311 -------------------------------------------------------------------------- */
313 INFO_TABLE(stg_AP,/*special layout*/0,0,AP,"AP","AP")
314 /* no args => explicit stack */
321 Words = TO_W_(StgAP_n_args(ap));
324 * Check for stack overflow. IMPORTANT: use a _ENTER check here,
325 * because if the check fails, we might end up blackholing this very
326 * closure, in which case we must enter the blackhole on return rather
327 * than continuing to evaluate the now-defunct closure.
329 STK_CHK_ENTER(WDS(Words) +
330 SIZEOF_StgUpdateFrame +
331 2/* see ARG_BCO below */, R1);
333 PUSH_UPD_FRAME(Sp - SIZEOF_StgUpdateFrame, R1);
334 Sp = Sp - SIZEOF_StgUpdateFrame - WDS(Words);
343 p = ap + SIZEOF_StgHeader + OFFSET_StgAP_payload;
358 #if defined(NO_ARG_REGS)
359 jump %GET_ENTRY(UNTAG(R1)) [R1];
362 info = %GET_FUN_INFO(UNTAG(R1));
364 type = TO_W_(StgFunInfoExtra_fun_type(info));
365 if (type == ARG_GEN) {
366 jump StgFunInfoExtra_slow_apply(info) [R1];
368 if (type == ARG_GEN_BIG) {
369 jump StgFunInfoExtra_slow_apply(info) [R1];
371 if (type == ARG_BCO) {
374 Sp(0) = stg_apply_interp_info;
375 jump stg_yield_to_interpreter [];
377 jump W_[stg_ap_stack_entries +
378 WDS(TO_W_(StgFunInfoExtra_fun_type(info)))] [R1];
382 /* AP_NOUPD is exactly like AP, except that no update frame is pushed.
383 Use for thunks that are guaranteed to be entered once only, such as
384 those generated by the byte-code compiler for inserting breakpoints. */
386 INFO_TABLE(stg_AP_NOUPD,/*special layout*/0,0,AP,"AP_NOUPD","AP_NOUPD")
387 /* no args => explicit stack */
394 Words = TO_W_(StgAP_n_args(ap));
397 * Check for stack overflow. IMPORTANT: use a _ENTER check here,
398 * because if the check fails, we might end up blackholing this very
399 * closure, in which case we must enter the blackhole on return rather
400 * than continuing to evaluate the now-defunct closure.
402 STK_CHK_ENTER(WDS(Words) +
403 2/* see ARG_BCO below */, R1);
404 Sp = Sp - WDS(Words);
413 p = ap + SIZEOF_StgHeader + OFFSET_StgAP_payload;
428 #if defined(NO_ARG_REGS)
429 jump %GET_ENTRY(UNTAG(R1)) [R1];
432 info = %GET_FUN_INFO(UNTAG(R1));
434 type = TO_W_(StgFunInfoExtra_fun_type(info));
435 if (type == ARG_GEN) {
436 jump StgFunInfoExtra_slow_apply(info) [R1];
438 if (type == ARG_GEN_BIG) {
439 jump StgFunInfoExtra_slow_apply(info) [R1];
441 if (type == ARG_BCO) {
444 Sp(0) = stg_apply_interp_info;
445 jump stg_yield_to_interpreter [];
447 jump W_[stg_ap_stack_entries +
448 WDS(TO_W_(StgFunInfoExtra_fun_type(info)))] [R1];
452 /* -----------------------------------------------------------------------------
453 Entry Code for an AP_STACK.
455 Very similar to a PAP and AP. The layout is the same as PAP
456 and AP, except that the payload is a chunk of stack instead of
457 being described by the function's info table. Like an AP,
458 there are no further arguments on the stack to worry about.
459 However, the function closure (ap->fun) does not necessarily point
460 directly to a function, so we have to enter it using stg_ap_0.
461 -------------------------------------------------------------------------- */
464 Note [AP_STACKs must be eagerly blackholed]
465 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
466 #13615 describes a nasty concurrency issue where we can enter into the
467 middle of an ST action multiple times, resulting in duplication of effects.
468 In short, the construction of an AP_STACK allows us to suspend a computation
469 which should not be duplicated. When running with lazy blackholing, we can then
470 enter this AP_STACK multiple times, duplicating the computation with potentially
471 disastrous consequences.
473 For instance, consider the case of a simple ST program which computes a sum
474 using in─place mutation,
476 inplaceSum :: Num a => [a] ─> a
477 inplaceSum xs0 = runST $ do
479 let go [] = readSTRef y
485 Of course, it is fine if we enter an inplaceSum thunk more than once: the two
486 threads will inhabit different worlds with different STRefs. However, if we
487 suspend some part of inplaceSum (for instance, due to the heap check at the
488 beginning of go) and then multiple threads resume that suspension (as is safe in
489 pure computation) we will have multiple threads concurrently mutating the same
492 Let's consider how this might happen: Consider this situation,
494 ┌─────────┐ ┌───────┐ ┌───────┐ ┌─────────┐
495 │ TSO 1 │ ╭────→│ go │ │ fun │ │ TSO 2 │
496 └─────────┘ │ └───────┘ └───────┘ └─────────┘
498 ┌─────────┐ │ │ ┌─────────┐
499 │ │──────╯ ╰──────────────│ │
500 ├─────────┤ ┌─────────┐ ├─────────┤
501 │ UPDATE_ │──────────→│ THUNK A │ ╭────────│ UPDATE_ │
502 │ FRAME │ updatee └─────────┘ │updatee │ FRAME │
503 ├─────────┤ │ ├─────────┤
505 ├─────────┤ updatee ┌─────────┐ │
506 │ UPDATE_ │─────────────────────→│ THUNK B │←───╯
507 │ FRAME │ └─────────┘
511 Here we have two threads (TSO 1 and TSO 2) which are in currently pausing (e.g.
512 in threadPaused). Since they are pausing, their stacks are headed by a pointer
513 to the continuation code which we will run on resumption (go and fun,
514 respectively). We also see that there are two thunks on the heap: THUNK A and
515 THUNK B where THUNK B depends upon THUNK A (as in, evaluation of B will force
516 A). We see that thread 1 has THUNK A under evaluation, and both threads have
517 THUNK B under evaluation.
519 As each thread enters threadPaused, threadPaused will walk its stack looking for
520 duplicate computation (see Note [suspend duplicate work], although there is some
521 background described below as well). Let's consider what this check does:
523 Say that TSO 2 begins this check first. The check will walk TSO 2's stack, until
524 it finds the first update frame, which updates THUNK B. Upon finding this frame,
525 it will try to lock THUNK B, replacing it with a BLACKHOLE owned by its TSO. We
528 ┌─────────┐ ┌───────┐ ┌───────┐ ┌─────────┐
529 │ TSO 1 │ ╭────→│ go │ │ fun │ ╭────────→│ TSO 2 │
530 └─────────┘ │ └───────┘ └───────┘ │ └─────────┘
532 ┌─────────┐ │ │ │ ┌─────────┐
533 │ │──────╯ ╰─────────────────│ │
534 ├─────────┤ updatee ┌─────────┐ │ ├─────────┤
535 │ UPDATE_ │──────────→│ THUNK A │ │ ╭──────────│ UPDATE_ │
536 │ FRAME │ └─────────┘ │ │ updatee │ FRAME │
537 ├─────────┤ │ │ ├─────────┤
538 │ ... │ owner│ │ │ etc. │
539 ├─────────┤ updatee ┌────────────┐ │
540 │ UPDATE_ │──────────────────→│ BLACKHOLE │←─╯
541 │ FRAME │ └────────────┘
545 Now consider what happens when TSO 1 runs its duplicate-computation check.
546 Again, we start walking the stack from the top, where we find the update
547 frame updating THUNK A. We will lock this thunk, replacing it with a BLACKHOLE
548 owned by its TSO. We now have,
550 ┌─────────┐ ┌───────┐ ┌───────┐ ┌─────────┐
551 │ TSO 1 │←──╮ ╭────→│ go │ │ fun │ ╭────────→│ TSO 2 │
552 └─────────┘ │ │ └───────┘ └───────┘ │ └─────────┘
554 ┌─────────┐ ╰──│─────────╮ │ │ ┌─────────┐
555 │ │──────╯ │owner ╰─────────────────│ │
556 ├─────────┤ ┌───────────┐ │ ├─────────┤
557 │ UPDATE_ │──────────→│ BLACKHOLE │ │ ╭──────────│ UPDATE_ │
558 │ FRAME │ updatee └───────────┘ │ │ updatee │ FRAME │
559 ├─────────┤ │ │ ├─────────┤
560 │ ... │ owner│ │ │ etc. │
561 ├─────────┤ updatee ┌────────────┐ │
562 │ UPDATE_ │──────────────────→│ BLACKHOLE │←─╯
563 │ FRAME │ └────────────┘
567 Now we will continue walking down TSO 1's stack, next coming across the second
568 update frame, pointing to the now-BLACKHOLE'd THUNK B. At this point
569 threadPaused will correctly conclude that TSO 1 is duplicating a computation
570 being carried out by TSO 2 and attempt to suspend it.
572 The suspension process proceeds by invoking raiseAsync, which walks the stack
573 from the top looking for update frames. For each update frame we take any stack
574 frames preceding it and construct an AP_STACK heap object from them. We then
575 replace the updatee of the frame with an indirection pointing to the AP_STACK.
576 So, after suspending the first update frame we have,
578 ┌─────────┐ ┌───────┐ ┌───────┐ ┌─────────┐
579 │ TSO 1 │ ╭────────→│ go │←─╮ │ fun │ ╭───────→│ TSO 2 │
580 └─────────┘ │ └───────┘ │ └───────┘ │ └─────────┘
581 │ ┌───────────┐ │ ↑ ╭─────╯
582 ┌─────────┐ │ │ AP_STACK │ │ │ │ ┌─────────┐
583 │ │──╯ ├───────────┤ │ ╰────────────────│ │
584 ├─────────┤ │ │─╯ │ ├─────────┤
585 │ UPDATE_ │───────╮ └───────────┘ │ ╭──────────│ UPDATE_ │
586 │ FRAME │updatee│ ↑ │ │ updatee │ FRAME │
587 ├─────────┤ │ │indirectee │ │ ├─────────┤
588 │ ... │ ╰→┌───────────┐ │ │ │ etc. │
589 ├─────────┤updatee │ BLACKHOLE │ │ │
590 │ UPDATE_ │──╮ └───────────┘ owner│ │
591 │ FRAME │ │ ┌────────────┐ │
592 ├─────────┤ ╰───────────────→│ BLACKHOLE │←─╯
593 │ etc. │ └────────────┘
595 Finally, we will replace the second update frame with a blackhole so that TSO 1
596 will block on TSO 2's computation of THUNK B,
598 ┌─────────┐ ┌───────┐ ┌───────┐ ┌─────────┐
599 │ TSO 1 │ ╭────────→│ go │←─╮ │ fun │ ╭───────→│ TSO 2 │
600 └─────────┘ │ └───────┘ │ └───────┘ │ └─────────┘
601 │ ┌───────────┐ │ ↑ ╭─────╯
602 ┌─────────┐ │ │ AP_STACK │ │ │ │ ┌─────────┐
603 │ │──╯ ├───────────┤ │ ╰────────────────│ │
604 ├─────────┤ │ │─╯ │ ├─────────┤
605 │ UPDATE_ │───────╮ └───────────┘ │ ╭──────────│ UPDATE_ │
606 │ FRAME │updatee│ ↑ │ │ updatee │ FRAME │
607 ├─────────┤ │ │indirectee │ │ ├─────────┤
608 │ ... │ ╰→┌───────────┐ │ │ │ etc. │
609 ├─────────┤ │ BLACKHOLE │ │ │
610 │ BLACK │ └───────────┘ owner│ │
611 │ HOLE │───────────╮ ┌────────────┐ │
612 ├─────────┤indirectee ╰──────→│ BLACKHOLE │←─╯
613 │ etc. │ └────────────┘
615 At first glance there's still nothing terribly alarming here. However, consider
616 what would happen if some other closure held a reference to THUNK A. We would
617 now have leaked an AP_STACK capturing the state of a potentially
618 non-duplicatable computation to heap. Even worse, if two threads had references
619 to THUNK A and both attempted to enter at the same time, they would both succeed
620 if we allowed AP_STACKs to be lazily blackholed. This is the reason why we must
621 be very careful when entering AP_STACKS: they introduce the possibility that we
622 duplicate a computation which could never otherwise be duplicated.
624 For this reason we employ an atomic blackholing strategy when entering AP_STACK
629 INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK")
630 /* no args => explicit stack */
637 Words = StgAP_STACK_size(ap);
640 * Check for stack overflow. IMPORTANT: use a _ENTER check here,
641 * because if the check fails, we might end up blackholing this very
642 * closure, in which case we must enter the blackhole on return rather
643 * than continuing to evaluate the now-defunct closure.
645 STK_CHK_ENTER(WDS(Words) + SIZEOF_StgUpdateFrame + WDS(AP_STACK_SPLIM), R1);
648 * It is imperative that we blackhole lest we may duplicate computation which
649 * must not be duplicated. See Note [AP_STACKs must be eagerly blackholed].
652 (old_info) = prim %cmpxchgW(ap, stg_AP_STACK_info, stg_WHITEHOLE_info);
653 if (old_info != stg_AP_STACK_info) {
654 /* someone else beat us to it */
655 jump ENTRY_LBL(stg_WHITEHOLE) (ap);
657 // Can't add StgInd_indirectee(ap) to UpdRemSet here because the old value is
659 StgInd_indirectee(ap) = CurrentTSO;
661 SET_INFO(ap, __stg_EAGER_BLACKHOLE_info);
663 /* ensure there is at least AP_STACK_SPLIM words of headroom available
664 * after unpacking the AP_STACK. See bug #1466 */
665 PUSH_UPD_FRAME(Sp - SIZEOF_StgUpdateFrame, R1);
666 Sp = Sp - SIZEOF_StgUpdateFrame - WDS(Words);
675 p = ap + SIZEOF_StgHeader + OFFSET_StgAP_STACK_payload;
688 R1 = StgAP_STACK_fun(ap);
690 // Because of eager blackholing the closure no longer has correct size so
691 // threadPaused() can't correctly zero the slop, so we do it here. See #15571
692 // and Note [zeroing slop].
693 OVERWRITING_CLOSURE_SIZE(ap, BYTES_TO_WDS(SIZEOF_StgThunkHeader) + 2 + Words);
698 /* -----------------------------------------------------------------------------
699 AP_STACK_NOUPD - exactly like AP_STACK, but doesn't push an update frame.
700 -------------------------------------------------------------------------- */
702 INFO_TABLE(stg_AP_STACK_NOUPD,/*special layout*/0,0,AP_STACK,
703 "AP_STACK_NOUPD","AP_STACK_NOUPD")
704 /* no args => explicit stack */
711 Words = StgAP_STACK_size(ap);
714 * Check for stack overflow. IMPORTANT: use a _NP check here,
715 * because if the check fails, we might end up blackholing this very
716 * closure, in which case we must enter the blackhole on return rather
717 * than continuing to evaluate the now-defunct closure.
719 STK_CHK_ENTER(WDS(Words) + WDS(AP_STACK_SPLIM), R1);
720 /* ensure there is at least AP_STACK_SPLIM words of headroom available
721 * after unpacking the AP_STACK. See bug #1466 */
723 Sp = Sp - WDS(Words);
732 p = ap + SIZEOF_StgHeader + OFFSET_StgAP_STACK_payload;
745 R1 = StgAP_STACK_fun(ap);