811ea3c44ac454e85ea78f971fc5b226156e5111
[ghc.git] / compiler / codeGen / StgCmmExpr.hs
1 {-# LANGUAGE CPP #-}
2 {-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
3
4 -----------------------------------------------------------------------------
5 --
6 -- Stg to C-- code generation: expressions
7 --
8 -- (c) The University of Glasgow 2004-2006
9 --
10 -----------------------------------------------------------------------------
11
12 module StgCmmExpr ( cgExpr ) where
13
14 #include "HsVersions.h"
15
16 import {-# SOURCE #-} StgCmmBind ( cgBind )
17
18 import StgCmmMonad
19 import StgCmmHeap
20 import StgCmmEnv
21 import StgCmmCon
22 import StgCmmProf (saveCurrentCostCentre, restoreCurrentCostCentre, emitSetCCC)
23 import StgCmmLayout
24 import StgCmmPrim
25 import StgCmmHpc
26 import StgCmmTicky
27 import StgCmmUtils
28 import StgCmmClosure
29
30 import StgSyn
31
32 import MkGraph
33 import BlockId
34 import Cmm
35 import CmmInfo
36 import CoreSyn
37 import DataCon
38 import ForeignCall
39 import Id
40 import PrimOp
41 import TyCon
42 import Type
43 import CostCentre ( CostCentreStack, currentCCS )
44 import Maybes
45 import Util
46 import FastString
47 import Outputable
48
49 import Control.Monad (unless,void)
50 import Control.Arrow (first)
51
52 import Prelude hiding ((<*>))
53
54 ------------------------------------------------------------------------
55 -- cgExpr: the main function
56 ------------------------------------------------------------------------
57
58 cgExpr :: StgExpr -> FCode ReturnKind
59
60 cgExpr (StgApp fun args) = cgIdApp fun args
61
62 {- seq# a s ==> a -}
63 cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
64 cgIdApp a []
65
66 cgExpr (StgOpApp op args ty) = cgOpApp op args ty
67 cgExpr (StgConApp con args) = cgConApp con args
68 cgExpr (StgTick t e) = cgTick t >> cgExpr e
69 cgExpr (StgLit lit) = do cmm_lit <- cgLit lit
70 emitReturn [CmmLit cmm_lit]
71
72 cgExpr (StgLet binds expr) = do { cgBind binds; cgExpr expr }
73 cgExpr (StgLetNoEscape binds expr) =
74 do { u <- newUnique
75 ; let join_id = mkBlockId u
76 ; cgLneBinds join_id binds
77 ; r <- cgExpr expr
78 ; emitLabel join_id
79 ; return r }
80
81 cgExpr (StgCase expr bndr alt_type alts) =
82 cgCase expr bndr alt_type alts
83
84 cgExpr (StgLam {}) = panic "cgExpr: StgLam"
85
86 ------------------------------------------------------------------------
87 -- Let no escape
88 ------------------------------------------------------------------------
89
90 {- Generating code for a let-no-escape binding, aka join point is very
91 very similar to what we do for a case expression. The duality is
92 between
93 let-no-escape x = b
94 in e
95 and
96 case e of ... -> b
97
98 That is, the RHS of 'x' (ie 'b') will execute *later*, just like
99 the alternative of the case; it needs to be compiled in an environment
100 in which all volatile bindings are forgotten, and the free vars are
101 bound only to stable things like stack locations.. The 'e' part will
102 execute *next*, just like the scrutinee of a case. -}
103
104 -------------------------
105 cgLneBinds :: BlockId -> StgBinding -> FCode ()
106 cgLneBinds join_id (StgNonRec bndr rhs)
107 = do { local_cc <- saveCurrentCostCentre
108 -- See Note [Saving the current cost centre]
109 ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
110 ; fcode
111 ; addBindC info }
112
113 cgLneBinds join_id (StgRec pairs)
114 = do { local_cc <- saveCurrentCostCentre
115 ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
116 ; let (infos, fcodes) = unzip r
117 ; addBindsC infos
118 ; sequence_ fcodes
119 }
120
121 -------------------------
122 cgLetNoEscapeRhs
123 :: BlockId -- join point for successor of let-no-escape
124 -> Maybe LocalReg -- Saved cost centre
125 -> Id
126 -> StgRhs
127 -> FCode (CgIdInfo, FCode ())
128
129 cgLetNoEscapeRhs join_id local_cc bndr rhs =
130 do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
131 ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
132 ; let code = do { (_, body) <- getCodeScoped rhs_code
133 ; emitOutOfLine bid (first (<*> mkBranch join_id) body) }
134 ; return (info, code)
135 }
136
137 cgLetNoEscapeRhsBody
138 :: Maybe LocalReg -- Saved cost centre
139 -> Id
140 -> StgRhs
141 -> FCode (CgIdInfo, FCode ())
142 cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd args body)
143 = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
144 cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
145 = cgLetNoEscapeClosure bndr local_cc cc [] (StgConApp con args)
146 -- For a constructor RHS we want to generate a single chunk of
147 -- code which can be jumped to from many places, which will
148 -- return the constructor. It's easy; just behave as if it
149 -- was an StgRhsClosure with a ConApp inside!
150
151 -------------------------
152 cgLetNoEscapeClosure
153 :: Id -- binder
154 -> Maybe LocalReg -- Slot for saved current cost centre
155 -> CostCentreStack -- XXX: *** NOT USED *** why not?
156 -> [NonVoid Id] -- Args (as in \ args -> body)
157 -> StgExpr -- Body (as in above)
158 -> FCode (CgIdInfo, FCode ())
159
160 cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
161 = do dflags <- getDynFlags
162 return ( lneIdInfo dflags bndr args
163 , code )
164 where
165 code = forkLneBody $ do {
166 ; withNewTickyCounterLNE (idName bndr) args $ do
167 ; restoreCurrentCostCentre cc_slot
168 ; arg_regs <- bindArgsToRegs args
169 ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
170
171
172 ------------------------------------------------------------------------
173 -- Case expressions
174 ------------------------------------------------------------------------
175
176 {- Note [Compiling case expressions]
177 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
178 It is quite interesting to decide whether to put a heap-check at the
179 start of each alternative. Of course we certainly have to do so if
180 the case forces an evaluation, or if there is a primitive op which can
181 trigger GC.
182
183 A more interesting situation is this (a Plan-B situation)
184
185 !P!;
186 ...P...
187 case x# of
188 0# -> !Q!; ...Q...
189 default -> !R!; ...R...
190
191 where !x! indicates a possible heap-check point. The heap checks
192 in the alternatives *can* be omitted, in which case the topmost
193 heapcheck will take their worst case into account.
194
195 In favour of omitting !Q!, !R!:
196
197 - *May* save a heap overflow test,
198 if ...P... allocates anything.
199
200 - We can use relative addressing from a single Hp to
201 get at all the closures so allocated.
202
203 - No need to save volatile vars etc across heap checks
204 in !Q!, !R!
205
206 Against omitting !Q!, !R!
207
208 - May put a heap-check into the inner loop. Suppose
209 the main loop is P -> R -> P -> R...
210 Q is the loop exit, and only it does allocation.
211 This only hurts us if P does no allocation. If P allocates,
212 then there is a heap check in the inner loop anyway.
213
214 - May do more allocation than reqd. This sometimes bites us
215 badly. For example, nfib (ha!) allocates about 30\% more space if the
216 worst-casing is done, because many many calls to nfib are leaf calls
217 which don't need to allocate anything.
218
219 We can un-allocate, but that costs an instruction
220
221 Neither problem hurts us if there is only one alternative.
222
223 Suppose the inner loop is P->R->P->R etc. Then here is
224 how many heap checks we get in the *inner loop* under various
225 conditions
226
227 Alooc Heap check in branches (!Q!, !R!)?
228 P Q R yes no (absorb to !P!)
229 --------------------------------------
230 n n n 0 0
231 n y n 0 1
232 n . y 1 1
233 y . y 2 1
234 y . n 1 1
235
236 Best choices: absorb heap checks from Q and R into !P! iff
237 a) P itself does some allocation
238 or
239 b) P does allocation, or there is exactly one alternative
240
241 We adopt (b) because that is more likely to put the heap check at the
242 entry to a function, when not many things are live. After a bunch of
243 single-branch cases, we may have lots of things live
244
245 Hence: two basic plans for
246
247 case e of r { alts }
248
249 ------ Plan A: the general case ---------
250
251 ...save current cost centre...
252
253 ...code for e,
254 with sequel (SetLocals r)
255
256 ...restore current cost centre...
257 ...code for alts...
258 ...alts do their own heap checks
259
260 ------ Plan B: special case when ---------
261 (i) e does not allocate or call GC
262 (ii) either upstream code performs allocation
263 or there is just one alternative
264
265 Then heap allocation in the (single) case branch
266 is absorbed by the upstream check.
267 Very common example: primops on unboxed values
268
269 ...code for e,
270 with sequel (SetLocals r)...
271
272 ...code for alts...
273 ...no heap check...
274 -}
275
276
277
278 -------------------------------------
279 data GcPlan
280 = GcInAlts -- Put a GC check at the start the case alternatives,
281 [LocalReg] -- which binds these registers
282 | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
283 -- primitive op which does no GC. Absorb the allocation
284 -- of the case alternative(s) into the upstream check
285
286 -------------------------------------
287 cgCase :: StgExpr -> Id -> AltType -> [StgAlt] -> FCode ReturnKind
288
289 cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
290 | isEnumerationTyCon tycon -- Note [case on bool]
291 = do { tag_expr <- do_enum_primop op args
292
293 -- If the binder is not dead, convert the tag to a constructor
294 -- and assign it.
295 ; unless (isDeadBinder bndr) $ do
296 { dflags <- getDynFlags
297 ; tmp_reg <- bindArgToReg (NonVoid bndr)
298 ; emitAssign (CmmLocal tmp_reg)
299 (tagToClosure dflags tycon tag_expr) }
300
301 ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
302 (NonVoid bndr) alts
303 ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
304 ; return AssignedDirectly
305 }
306 where
307 do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
308 do_enum_primop TagToEnumOp [arg] -- No code!
309 = getArgAmode (NonVoid arg)
310 do_enum_primop primop args
311 = do dflags <- getDynFlags
312 tmp <- newTemp (bWord dflags)
313 cgPrimOp [tmp] primop args
314 return (CmmReg (CmmLocal tmp))
315
316 {-
317 Note [case on bool]
318 ~~~~~~~~~~~~~~~~~~~
319 This special case handles code like
320
321 case a <# b of
322 True ->
323 False ->
324
325 --> case tagToEnum# (a <$# b) of
326 True -> .. ; False -> ...
327
328 --> case (a <$# b) of r ->
329 case tagToEnum# r of
330 True -> .. ; False -> ...
331
332 If we let the ordinary case code handle it, we'll get something like
333
334 tmp1 = a < b
335 tmp2 = Bool_closure_tbl[tmp1]
336 if (tmp2 & 7 != 0) then ... // normal tagged case
337
338 but this junk won't optimise away. What we really want is just an
339 inline comparison:
340
341 if (a < b) then ...
342
343 So we add a special case to generate
344
345 tmp1 = a < b
346 if (tmp1 == 0) then ...
347
348 and later optimisations will further improve this.
349
350 Now that #6135 has been resolved it should be possible to remove that
351 special case. The idea behind this special case and pre-6135 implementation
352 of Bool-returning primops was that tagToEnum# was added implicitly in the
353 codegen and then optimized away. Now the call to tagToEnum# is explicit
354 in the source code, which allows to optimize it away at the earlier stages
355 of compilation (i.e. at the Core level).
356
357 Note [Scrutinising VoidRep]
358 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
359 Suppose we have this STG code:
360 f = \[s : State# RealWorld] ->
361 case s of _ -> blah
362 This is very odd. Why are we scrutinising a state token? But it
363 can arise with bizarre NOINLINE pragmas (Trac #9964)
364 crash :: IO ()
365 crash = IO (\s -> let {-# NOINLINE s' #-}
366 s' = s
367 in (# s', () #))
368
369 Now the trouble is that 's' has VoidRep, and we do not bind void
370 arguments in the environment; they don't live anywhere. See the
371 calls to nonVoidIds in various places. So we must not look up
372 's' in the environment. Instead, just evaluate the RHS! Simple.
373 -}
374
375 cgCase (StgApp v []) _ (PrimAlt _) alts
376 | isVoidRep (idPrimRep v) -- See Note [Scrutinising VoidRep]
377 , [(DEFAULT, _, rhs)] <- alts
378 = cgExpr rhs
379
380 {- Note [Dodgy unsafeCoerce 1]
381 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
382 Consider
383 case (x :: HValue) |> co of (y :: MutVar# Int)
384 DEFAULT -> ...
385 We want to gnerate an assignment
386 y := x
387 We want to allow this assignment to be generated in the case when the
388 types are compatible, because this allows some slightly-dodgy but
389 occasionally-useful casts to be used, such as in RtClosureInspect
390 where we cast an HValue to a MutVar# so we can print out the contents
391 of the MutVar#. If instead we generate code that enters the HValue,
392 then we'll get a runtime panic, because the HValue really is a
393 MutVar#. The types are compatible though, so we can just generate an
394 assignment.
395 -}
396 cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
397 | isUnliftedType (idType v) -- Note [Dodgy unsafeCoerce 1]
398 || reps_compatible
399 = -- assignment suffices for unlifted types
400 do { dflags <- getDynFlags
401 ; unless reps_compatible $
402 panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
403 ; v_info <- getCgIdInfo v
404 ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr)))
405 (idInfoToAmode v_info)
406 ; bindArgToReg (NonVoid bndr)
407 ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
408 where
409 reps_compatible = idPrimRep v == idPrimRep bndr
410
411 {- Note [Dodgy unsafeCoerce 2, #3132]
412 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
413 In all other cases of a lifted Id being cast to an unlifted type, the
414 Id should be bound to bottom, otherwise this is an unsafe use of
415 unsafeCoerce. We can generate code to enter the Id and assume that
416 it will never return. Hence, we emit the usual enter/return code, and
417 because bottom must be untagged, it will be entered. The Sequel is a
418 type-correct assignment, albeit bogus. The (dead) continuation loops;
419 it would be better to invoke some kind of panic function here.
420 -}
421 cgCase scrut@(StgApp v []) _ (PrimAlt _) _
422 = do { dflags <- getDynFlags
423 ; mb_cc <- maybeSaveCostCentre True
424 ; withSequel (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut)
425 ; restoreCurrentCostCentre mb_cc
426 ; emitComment $ mkFastString "should be unreachable code"
427 ; l <- newLabelC
428 ; emitLabel l
429 ; emit (mkBranch l) -- an infinite loop
430 ; return AssignedDirectly
431 }
432
433 {- Note [Handle seq#]
434 ~~~~~~~~~~~~~~~~~~~~~
435 case seq# a s of v
436 (# s', a' #) -> e
437
438 ==>
439
440 case a of v
441 (# s', a' #) -> e
442
443 (taking advantage of the fact that the return convention for (# State#, a #)
444 is the same as the return convention for just 'a')
445 -}
446
447 cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
448 = -- Note [Handle seq#]
449 -- Use the same return convention as vanilla 'a'.
450 cgCase (StgApp a []) bndr alt_type alts
451
452 cgCase scrut bndr alt_type alts
453 = -- the general case
454 do { dflags <- getDynFlags
455 ; up_hp_usg <- getVirtHp -- Upstream heap usage
456 ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
457 alt_regs = map (idToReg dflags) ret_bndrs
458 ; simple_scrut <- isSimpleScrut scrut alt_type
459 ; let do_gc | not simple_scrut = True
460 | isSingleton alts = False
461 | up_hp_usg > 0 = False
462 | otherwise = True
463 -- cf Note [Compiling case expressions]
464 gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
465
466 ; mb_cc <- maybeSaveCostCentre simple_scrut
467
468 ; let sequel = AssignTo alt_regs do_gc{- Note [scrut sequel] -}
469 ; ret_kind <- withSequel sequel (cgExpr scrut)
470 ; restoreCurrentCostCentre mb_cc
471 ; _ <- bindArgsToRegs ret_bndrs
472 ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
473 }
474
475
476 {-
477 Note [scrut sequel]
478
479 The job of the scrutinee is to assign its value(s) to alt_regs.
480 Additionally, if we plan to do a heap-check in the alternatives (see
481 Note [Compiling case expressions]), then we *must* retreat Hp to
482 recover any unused heap before passing control to the sequel. If we
483 don't do this, then any unused heap will become slop because the heap
484 check will reset the heap usage. Slop in the heap breaks LDV profiling
485 (+RTS -hb) which needs to do a linear sweep through the nursery.
486
487
488 Note [Inlining out-of-line primops and heap checks]
489 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
490 If shouldInlinePrimOp returns True when called from StgCmmExpr for the
491 purpose of heap check placement, we *must* inline the primop later in
492 StgCmmPrim. If we don't things will go wrong.
493 -}
494
495 -----------------
496 maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
497 maybeSaveCostCentre simple_scrut
498 | simple_scrut = return Nothing
499 | otherwise = saveCurrentCostCentre
500
501
502 -----------------
503 isSimpleScrut :: StgExpr -> AltType -> FCode Bool
504 -- Simple scrutinee, does not block or allocate; hence safe to amalgamate
505 -- heap usage from alternatives into the stuff before the case
506 -- NB: if you get this wrong, and claim that the expression doesn't allocate
507 -- when it does, you'll deeply mess up allocation
508 isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args
509 isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... }
510 isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... }
511 isSimpleScrut _ _ = return False
512
513 isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
514 -- True iff the op cannot block or allocate
515 isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe)
516 isSimpleOp (StgPrimOp op) stg_args = do
517 arg_exprs <- getNonVoidArgAmodes stg_args
518 dflags <- getDynFlags
519 -- See Note [Inlining out-of-line primops and heap checks]
520 return $! isJust $ shouldInlinePrimOp dflags op arg_exprs
521 isSimpleOp (StgPrimCallOp _) _ = return False
522
523 -----------------
524 chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id]
525 -- These are the binders of a case that are assigned
526 -- by the evaluation of the scrutinee
527 -- Only non-void ones come back
528 chooseReturnBndrs bndr (PrimAlt _) _alts
529 = nonVoidIds [bndr]
530
531 chooseReturnBndrs _bndr (UbxTupAlt _) [(_, ids, _)]
532 = nonVoidIds ids -- 'bndr' is not assigned!
533
534 chooseReturnBndrs bndr (AlgAlt _) _alts
535 = nonVoidIds [bndr] -- Only 'bndr' is assigned
536
537 chooseReturnBndrs bndr PolyAlt _alts
538 = nonVoidIds [bndr] -- Only 'bndr' is assigned
539
540 chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
541 -- UbxTupALt has only one alternative
542
543 -------------------------------------
544 cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [StgAlt]
545 -> FCode ReturnKind
546 -- At this point the result of the case are in the binders
547 cgAlts gc_plan _bndr PolyAlt [(_, _, rhs)]
548 = maybeAltHeapCheck gc_plan (cgExpr rhs)
549
550 cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, rhs)]
551 = maybeAltHeapCheck gc_plan (cgExpr rhs)
552 -- Here bndrs are *already* in scope, so don't rebind them
553
554 cgAlts gc_plan bndr (PrimAlt _) alts
555 = do { dflags <- getDynFlags
556
557 ; tagged_cmms <- cgAltRhss gc_plan bndr alts
558
559 ; let bndr_reg = CmmLocal (idToReg dflags bndr)
560 (DEFAULT,deflt) = head tagged_cmms
561 -- PrimAlts always have a DEFAULT case
562 -- and it always comes first
563
564 tagged_cmms' = [(lit,code)
565 | (LitAlt lit, code) <- tagged_cmms]
566 ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
567 ; return AssignedDirectly }
568
569 cgAlts gc_plan bndr (AlgAlt tycon) alts
570 = do { dflags <- getDynFlags
571
572 ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
573
574 ; let fam_sz = tyConFamilySize tycon
575 bndr_reg = CmmLocal (idToReg dflags bndr)
576
577 -- Is the constructor tag in the node reg?
578 ; if isSmallFamily dflags fam_sz
579 then do
580 let -- Yes, bndr_reg has constr. tag in ls bits
581 tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
582 branches' = [(tag+1,branch) | (tag,branch) <- branches]
583 emitSwitch tag_expr branches' mb_deflt 1 fam_sz
584
585 else -- No, get tag from info table
586 do dflags <- getDynFlags
587 let -- Note that ptr _always_ has tag 1
588 -- when the family size is big enough
589 untagged_ptr = cmmRegOffB bndr_reg (-1)
590 tag_expr = getConstrTag dflags (untagged_ptr)
591 emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
592
593 ; return AssignedDirectly }
594
595 cgAlts _ _ _ _ = panic "cgAlts"
596 -- UbxTupAlt and PolyAlt have only one alternative
597
598
599 -- Note [alg-alt heap check]
600 --
601 -- In an algebraic case with more than one alternative, we will have
602 -- code like
603 --
604 -- L0:
605 -- x = R1
606 -- goto L1
607 -- L1:
608 -- if (x & 7 >= 2) then goto L2 else goto L3
609 -- L2:
610 -- Hp = Hp + 16
611 -- if (Hp > HpLim) then goto L4
612 -- ...
613 -- L4:
614 -- call gc() returns to L5
615 -- L5:
616 -- x = R1
617 -- goto L1
618
619 -------------------
620 cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
621 -> FCode ( Maybe CmmAGraphScoped
622 , [(ConTagZ, CmmAGraphScoped)] )
623 cgAlgAltRhss gc_plan bndr alts
624 = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
625
626 ; let { mb_deflt = case tagged_cmms of
627 ((DEFAULT,rhs) : _) -> Just rhs
628 _other -> Nothing
629 -- DEFAULT is always first, if present
630
631 ; branches = [ (dataConTagZ con, cmm)
632 | (DataAlt con, cmm) <- tagged_cmms ]
633 }
634
635 ; return (mb_deflt, branches)
636 }
637
638
639 -------------------
640 cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
641 -> FCode [(AltCon, CmmAGraphScoped)]
642 cgAltRhss gc_plan bndr alts = do
643 dflags <- getDynFlags
644 let
645 base_reg = idToReg dflags bndr
646 cg_alt :: StgAlt -> FCode (AltCon, CmmAGraphScoped)
647 cg_alt (con, bndrs, rhs)
648 = getCodeScoped $
649 maybeAltHeapCheck gc_plan $
650 do { _ <- bindConArgs con base_reg bndrs
651 ; _ <- cgExpr rhs
652 ; return con }
653 forkAlts (map cg_alt alts)
654
655 maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
656 maybeAltHeapCheck (NoGcInAlts,_) code = code
657 maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
658 altHeapCheck regs code
659 maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
660 altHeapCheckReturnsTo regs lret off code
661
662 -----------------------------------------------------------------------------
663 -- Tail calls
664 -----------------------------------------------------------------------------
665
666 cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
667 cgConApp con stg_args
668 | isUnboxedTupleCon con -- Unboxed tuple: assign and return
669 = do { arg_exprs <- getNonVoidArgAmodes stg_args
670 ; tickyUnboxedTupleReturn (length arg_exprs)
671 ; emitReturn arg_exprs }
672
673 | otherwise -- Boxed constructors; allocate and return
674 = ASSERT2( stg_args `lengthIs` dataConRepRepArity con, ppr con <+> ppr stg_args )
675 do { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) False
676 currentCCS con stg_args
677 -- The first "con" says that the name bound to this
678 -- closure is is "con", which is a bit of a fudge, but
679 -- it only affects profiling (hence the False)
680
681 ; emit =<< fcode_init
682 ; tickyReturnNewCon (length stg_args)
683 ; emitReturn [idInfoToAmode idinfo] }
684
685 cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
686 cgIdApp fun_id [] | isVoidTy (idType fun_id) = emitReturn []
687 cgIdApp fun_id args = do
688 dflags <- getDynFlags
689 fun_info <- getCgIdInfo fun_id
690 self_loop_info <- getSelfLoop
691 let cg_fun_id = cg_id fun_info
692 -- NB: use (cg_id fun_info) instead of fun_id, because
693 -- the former may be externalised for -split-objs.
694 -- See Note [Externalise when splitting] in StgCmmMonad
695
696 fun_arg = StgVarArg cg_fun_id
697 fun_name = idName cg_fun_id
698 fun = idInfoToAmode fun_info
699 lf_info = cg_lf fun_info
700 n_args = length args
701 v_args = length $ filter (isVoidTy . stgArgType) args
702 node_points dflags = nodeMustPointToIt dflags lf_info
703 case getCallMethod dflags fun_name cg_fun_id lf_info n_args v_args (cg_loc fun_info) self_loop_info of
704
705 -- A value in WHNF, so we can just return it.
706 ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged?
707
708 EnterIt -> ASSERT( null args ) -- Discarding arguments
709 emitEnter fun
710
711 SlowCall -> do -- A slow function call via the RTS apply routines
712 { tickySlowCall lf_info args
713 ; emitComment $ mkFastString "slowCall"
714 ; slowCall fun args }
715
716 -- A direct function call (possibly with some left-over arguments)
717 DirectEntry lbl arity -> do
718 { tickyDirectCall arity args
719 ; if node_points dflags
720 then directCall NativeNodeCall lbl arity (fun_arg:args)
721 else directCall NativeDirectCall lbl arity args }
722
723 -- Let-no-escape call or self-recursive tail-call
724 JumpToIt blk_id lne_regs -> do
725 { adjustHpBackwards -- always do this before a tail-call
726 ; cmm_args <- getNonVoidArgAmodes args
727 ; emitMultiAssign lne_regs cmm_args
728 ; emit (mkBranch blk_id)
729 ; return AssignedDirectly }
730
731 -- Note [Self-recursive tail calls]
732 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
733 --
734 -- Self-recursive tail calls can be optimized into a local jump in the same
735 -- way as let-no-escape bindings (see Note [What is a non-escaping let] in
736 -- stgSyn/CoreToStg.hs). Consider this:
737 --
738 -- foo.info:
739 -- a = R1 // calling convention
740 -- b = R2
741 -- goto L1
742 -- L1: ...
743 -- ...
744 -- ...
745 -- L2: R1 = x
746 -- R2 = y
747 -- call foo(R1,R2)
748 --
749 -- Instead of putting x and y into registers (or other locations required by the
750 -- calling convention) and performing a call we can put them into local
751 -- variables a and b and perform jump to L1:
752 --
753 -- foo.info:
754 -- a = R1
755 -- b = R2
756 -- goto L1
757 -- L1: ...
758 -- ...
759 -- ...
760 -- L2: a = x
761 -- b = y
762 -- goto L1
763 --
764 -- This can be done only when function is calling itself in a tail position
765 -- and only if the call passes number of parameters equal to function's arity.
766 -- Note that this cannot be performed if a function calls itself with a
767 -- continuation.
768 --
769 -- This in fact implements optimization known as "loopification". It was
770 -- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
771 -- by Krzysztof Woś, though we use different approach. Krzysztof performed his
772 -- optimization at the Cmm level, whereas we perform ours during code generation
773 -- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
774 -- generated in the first place.
775 --
776 -- Implementation is spread across a couple of places in the code:
777 --
778 -- * FCode monad stores additional information in its reader environment
779 -- (cgd_self_loop field). This information tells us which function can
780 -- tail call itself in an optimized way (it is the function currently
781 -- being compiled), what is the label of a loop header (L1 in example above)
782 -- and information about local registers in which we should arguments
783 -- before making a call (this would be a and b in example above).
784 --
785 -- * Whenever we are compiling a function, we set that information to reflect
786 -- the fact that function currently being compiled can be jumped to, instead
787 -- of called. This is done in closureCodyBody in StgCmmBind.
788 --
789 -- * We also have to emit a label to which we will be jumping. We make sure
790 -- that the label is placed after a stack check but before the heap
791 -- check. The reason is that making a recursive tail-call does not increase
792 -- the stack so we only need to check once. But it may grow the heap, so we
793 -- have to repeat the heap check in every self-call. This is done in
794 -- do_checks in StgCmmHeap.
795 --
796 -- * When we begin compilation of another closure we remove the additional
797 -- information from the environment. This is done by forkClosureBody
798 -- in StgCmmMonad. Other functions that duplicate the environment -
799 -- forkLneBody, forkAlts, codeOnly - duplicate that information. In other
800 -- words, we only need to clean the environment of the self-loop information
801 -- when compiling right hand side of a closure (binding).
802 --
803 -- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
804 -- of call will be generated. getCallMethod decides to generate a self
805 -- recursive tail call when (a) environment stores information about
806 -- possible self tail-call; (b) that tail call is to a function currently
807 -- being compiled; (c) number of passed non-void arguments is equal to
808 -- function's arity. (d) loopification is turned on via -floopification
809 -- command-line option.
810 --
811 -- * Command line option to turn loopification on and off is implemented in
812 -- DynFlags.
813 --
814 --
815 -- Note [Void arguments in self-recursive tail calls]
816 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
817 --
818 -- State# tokens can get in the way of the loopification optimization as seen in
819 -- #11372. Consider this:
820 --
821 -- foo :: [a]
822 -- -> (a -> State# s -> (# State s, Bool #))
823 -- -> State# s
824 -- -> (# State# s, Maybe a #)
825 -- foo [] f s = (# s, Nothing #)
826 -- foo (x:xs) f s = case f x s of
827 -- (# s', b #) -> case b of
828 -- True -> (# s', Just x #)
829 -- False -> foo xs f s'
830 --
831 -- We would like to compile the call to foo as a local jump instead of a call
832 -- (see Note [Self-recursive tail calls]). However, the generated function has
833 -- an arity of 2 while we apply it to 3 arguments, one of them being of void
834 -- type. Thus, we mustn't count arguments of void type when checking whether
835 -- we can turn a call into a self-recursive jump.
836 --
837
838 emitEnter :: CmmExpr -> FCode ReturnKind
839 emitEnter fun = do
840 { dflags <- getDynFlags
841 ; adjustHpBackwards
842 ; sequel <- getSequel
843 ; updfr_off <- getUpdFrameOff
844 ; case sequel of
845 -- For a return, we have the option of generating a tag-test or
846 -- not. If the value is tagged, we can return directly, which
847 -- is quicker than entering the value. This is a code
848 -- size/speed trade-off: when optimising for speed rather than
849 -- size we could generate the tag test.
850 --
851 -- Right now, we do what the old codegen did, and omit the tag
852 -- test, just generating an enter.
853 Return _ -> do
854 { let entry = entryCode dflags $ closureInfoPtr dflags $ CmmReg nodeReg
855 ; emit $ mkJump dflags NativeNodeCall entry
856 [cmmUntag dflags fun] updfr_off
857 ; return AssignedDirectly
858 }
859
860 -- The result will be scrutinised in the sequel. This is where
861 -- we generate a tag-test to avoid entering the closure if
862 -- possible.
863 --
864 -- The generated code will be something like this:
865 --
866 -- R1 = fun -- copyout
867 -- if (fun & 7 != 0) goto Lcall else goto Lret
868 -- Lcall:
869 -- call [fun] returns to Lret
870 -- Lret:
871 -- fun' = R1 -- copyin
872 -- ...
873 --
874 -- Note in particular that the label Lret is used as a
875 -- destination by both the tag-test and the call. This is
876 -- becase Lret will necessarily be a proc-point, and we want to
877 -- ensure that we generate only one proc-point for this
878 -- sequence.
879 --
880 -- Furthermore, we tell the caller that we generated a native
881 -- return continuation by returning (ReturnedTo Lret off), so
882 -- that the continuation can be reused by the heap-check failure
883 -- code in the enclosing case expression.
884 --
885 AssignTo res_regs _ -> do
886 { lret <- newLabelC
887 ; let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs []
888 ; lcall <- newLabelC
889 ; updfr_off <- getUpdFrameOff
890 ; let area = Young lret
891 ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area
892 [fun] updfr_off []
893 -- refer to fun via nodeReg after the copyout, to avoid having
894 -- both live simultaneously; this sometimes enables fun to be
895 -- inlined in the RHS of the R1 assignment.
896 ; let entry = entryCode dflags (closureInfoPtr dflags (CmmReg nodeReg))
897 the_call = toCall entry (Just lret) updfr_off off outArgs regs
898 ; tscope <- getTickScope
899 ; emit $
900 copyout <*>
901 mkCbranch (cmmIsTagged dflags (CmmReg nodeReg))
902 lret lcall Nothing <*>
903 outOfLine lcall (the_call,tscope) <*>
904 mkLabel lret tscope <*>
905 copyin
906 ; return (ReturnedTo lret off)
907 }
908 }
909
910 ------------------------------------------------------------------------
911 -- Ticks
912 ------------------------------------------------------------------------
913
914 -- | Generate Cmm code for a tick. Depending on the type of Tickish,
915 -- this will either generate actual Cmm instrumentation code, or
916 -- simply pass on the annotation as a @CmmTickish@.
917 cgTick :: Tickish Id -> FCode ()
918 cgTick tick
919 = do { dflags <- getDynFlags
920 ; case tick of
921 ProfNote cc t p -> emitSetCCC cc t p
922 HpcTick m n -> emit (mkTickBox dflags m n)
923 SourceNote s n -> emitTick $ SourceNote s n
924 _other -> return () -- ignore
925 }