923450e6f3ab81a0f47505e4d3922380fe018f7b
[ghc.git] / compiler / codeGen / StgCmmExpr.hs
1 {-# LANGUAGE CPP #-}
2 {-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
3
4 -----------------------------------------------------------------------------
5 --
6 -- Stg to C-- code generation: expressions
7 --
8 -- (c) The University of Glasgow 2004-2006
9 --
10 -----------------------------------------------------------------------------
11
12 module StgCmmExpr ( cgExpr ) where
13
14 #define FAST_STRING_NOT_NEEDED
15 #include "HsVersions.h"
16
17 import {-# SOURCE #-} StgCmmBind ( cgBind )
18
19 import StgCmmMonad
20 import StgCmmHeap
21 import StgCmmEnv
22 import StgCmmCon
23 import StgCmmProf (saveCurrentCostCentre, restoreCurrentCostCentre, emitSetCCC)
24 import StgCmmLayout
25 import StgCmmPrim
26 import StgCmmHpc
27 import StgCmmTicky
28 import StgCmmUtils
29 import StgCmmClosure
30
31 import StgSyn
32
33 import MkGraph
34 import BlockId
35 import Cmm
36 import CmmInfo
37 import CoreSyn
38 import DataCon
39 import ForeignCall
40 import Id
41 import PrimOp
42 import TyCon
43 import Type
44 import CostCentre ( CostCentreStack, currentCCS )
45 import Maybes
46 import Util
47 import FastString
48 import Outputable
49
50 import Control.Monad (unless,void)
51 import Control.Arrow (first)
52
53 import Prelude hiding ((<*>))
54
55 ------------------------------------------------------------------------
56 -- cgExpr: the main function
57 ------------------------------------------------------------------------
58
59 cgExpr :: StgExpr -> FCode ReturnKind
60
61 cgExpr (StgApp fun args) = cgIdApp fun args
62
63 {- seq# a s ==> a -}
64 cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
65 cgIdApp a []
66
67 cgExpr (StgOpApp op args ty) = cgOpApp op args ty
68 cgExpr (StgConApp con args) = cgConApp con args
69 cgExpr (StgTick t e) = cgTick t >> cgExpr e
70 cgExpr (StgLit lit) = do cmm_lit <- cgLit lit
71 emitReturn [CmmLit cmm_lit]
72
73 cgExpr (StgLet binds expr) = do { cgBind binds; cgExpr expr }
74 cgExpr (StgLetNoEscape _ _ binds expr) =
75 do { u <- newUnique
76 ; let join_id = mkBlockId u
77 ; cgLneBinds join_id binds
78 ; r <- cgExpr expr
79 ; emitLabel join_id
80 ; return r }
81
82 cgExpr (StgCase expr _live_vars _save_vars bndr _srt alt_type alts) =
83 cgCase expr bndr alt_type alts
84
85 cgExpr (StgLam {}) = panic "cgExpr: StgLam"
86
87 ------------------------------------------------------------------------
88 -- Let no escape
89 ------------------------------------------------------------------------
90
91 {- Generating code for a let-no-escape binding, aka join point is very
92 very similar to what we do for a case expression. The duality is
93 between
94 let-no-escape x = b
95 in e
96 and
97 case e of ... -> b
98
99 That is, the RHS of 'x' (ie 'b') will execute *later*, just like
100 the alternative of the case; it needs to be compiled in an environment
101 in which all volatile bindings are forgotten, and the free vars are
102 bound only to stable things like stack locations.. The 'e' part will
103 execute *next*, just like the scrutinee of a case. -}
104
105 -------------------------
106 cgLneBinds :: BlockId -> StgBinding -> FCode ()
107 cgLneBinds join_id (StgNonRec bndr rhs)
108 = do { local_cc <- saveCurrentCostCentre
109 -- See Note [Saving the current cost centre]
110 ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
111 ; fcode
112 ; addBindC info }
113
114 cgLneBinds join_id (StgRec pairs)
115 = do { local_cc <- saveCurrentCostCentre
116 ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
117 ; let (infos, fcodes) = unzip r
118 ; addBindsC infos
119 ; sequence_ fcodes
120 }
121
122 -------------------------
123 cgLetNoEscapeRhs
124 :: BlockId -- join point for successor of let-no-escape
125 -> Maybe LocalReg -- Saved cost centre
126 -> Id
127 -> StgRhs
128 -> FCode (CgIdInfo, FCode ())
129
130 cgLetNoEscapeRhs join_id local_cc bndr rhs =
131 do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
132 ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
133 ; let code = do { (_, body) <- getCodeScoped rhs_code
134 ; emitOutOfLine bid (first (<*> mkBranch join_id) body) }
135 ; return (info, code)
136 }
137
138 cgLetNoEscapeRhsBody
139 :: Maybe LocalReg -- Saved cost centre
140 -> Id
141 -> StgRhs
142 -> FCode (CgIdInfo, FCode ())
143 cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd _ args body)
144 = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
145 cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
146 = cgLetNoEscapeClosure bndr local_cc cc [] (StgConApp con args)
147 -- For a constructor RHS we want to generate a single chunk of
148 -- code which can be jumped to from many places, which will
149 -- return the constructor. It's easy; just behave as if it
150 -- was an StgRhsClosure with a ConApp inside!
151
152 -------------------------
153 cgLetNoEscapeClosure
154 :: Id -- binder
155 -> Maybe LocalReg -- Slot for saved current cost centre
156 -> CostCentreStack -- XXX: *** NOT USED *** why not?
157 -> [NonVoid Id] -- Args (as in \ args -> body)
158 -> StgExpr -- Body (as in above)
159 -> FCode (CgIdInfo, FCode ())
160
161 cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
162 = do dflags <- getDynFlags
163 return ( lneIdInfo dflags bndr args
164 , code )
165 where
166 code = forkLneBody $ do {
167 ; withNewTickyCounterLNE (idName bndr) args $ do
168 ; restoreCurrentCostCentre cc_slot
169 ; arg_regs <- bindArgsToRegs args
170 ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
171
172
173 ------------------------------------------------------------------------
174 -- Case expressions
175 ------------------------------------------------------------------------
176
177 {- Note [Compiling case expressions]
178 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
179 It is quite interesting to decide whether to put a heap-check at the
180 start of each alternative. Of course we certainly have to do so if
181 the case forces an evaluation, or if there is a primitive op which can
182 trigger GC.
183
184 A more interesting situation is this (a Plan-B situation)
185
186 !P!;
187 ...P...
188 case x# of
189 0# -> !Q!; ...Q...
190 default -> !R!; ...R...
191
192 where !x! indicates a possible heap-check point. The heap checks
193 in the alternatives *can* be omitted, in which case the topmost
194 heapcheck will take their worst case into account.
195
196 In favour of omitting !Q!, !R!:
197
198 - *May* save a heap overflow test,
199 if ...P... allocates anything.
200
201 - We can use relative addressing from a single Hp to
202 get at all the closures so allocated.
203
204 - No need to save volatile vars etc across heap checks
205 in !Q!, !R!
206
207 Against omitting !Q!, !R!
208
209 - May put a heap-check into the inner loop. Suppose
210 the main loop is P -> R -> P -> R...
211 Q is the loop exit, and only it does allocation.
212 This only hurts us if P does no allocation. If P allocates,
213 then there is a heap check in the inner loop anyway.
214
215 - May do more allocation than reqd. This sometimes bites us
216 badly. For example, nfib (ha!) allocates about 30\% more space if the
217 worst-casing is done, because many many calls to nfib are leaf calls
218 which don't need to allocate anything.
219
220 We can un-allocate, but that costs an instruction
221
222 Neither problem hurts us if there is only one alternative.
223
224 Suppose the inner loop is P->R->P->R etc. Then here is
225 how many heap checks we get in the *inner loop* under various
226 conditions
227
228 Alooc Heap check in branches (!Q!, !R!)?
229 P Q R yes no (absorb to !P!)
230 --------------------------------------
231 n n n 0 0
232 n y n 0 1
233 n . y 1 1
234 y . y 2 1
235 y . n 1 1
236
237 Best choices: absorb heap checks from Q and R into !P! iff
238 a) P itself does some allocation
239 or
240 b) P does allocation, or there is exactly one alternative
241
242 We adopt (b) because that is more likely to put the heap check at the
243 entry to a function, when not many things are live. After a bunch of
244 single-branch cases, we may have lots of things live
245
246 Hence: two basic plans for
247
248 case e of r { alts }
249
250 ------ Plan A: the general case ---------
251
252 ...save current cost centre...
253
254 ...code for e,
255 with sequel (SetLocals r)
256
257 ...restore current cost centre...
258 ...code for alts...
259 ...alts do their own heap checks
260
261 ------ Plan B: special case when ---------
262 (i) e does not allocate or call GC
263 (ii) either upstream code performs allocation
264 or there is just one alternative
265
266 Then heap allocation in the (single) case branch
267 is absorbed by the upstream check.
268 Very common example: primops on unboxed values
269
270 ...code for e,
271 with sequel (SetLocals r)...
272
273 ...code for alts...
274 ...no heap check...
275 -}
276
277
278
279 -------------------------------------
280 data GcPlan
281 = GcInAlts -- Put a GC check at the start the case alternatives,
282 [LocalReg] -- which binds these registers
283 | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
284 -- primitive op which does no GC. Absorb the allocation
285 -- of the case alternative(s) into the upstream check
286
287 -------------------------------------
288 cgCase :: StgExpr -> Id -> AltType -> [StgAlt] -> FCode ReturnKind
289
290 cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
291 | isEnumerationTyCon tycon -- Note [case on bool]
292 = do { tag_expr <- do_enum_primop op args
293
294 -- If the binder is not dead, convert the tag to a constructor
295 -- and assign it.
296 ; unless (isDeadBinder bndr) $ do
297 { dflags <- getDynFlags
298 ; tmp_reg <- bindArgToReg (NonVoid bndr)
299 ; emitAssign (CmmLocal tmp_reg)
300 (tagToClosure dflags tycon tag_expr) }
301
302 ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
303 (NonVoid bndr) alts
304 ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
305 ; return AssignedDirectly
306 }
307 where
308 do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
309 do_enum_primop TagToEnumOp [arg] -- No code!
310 = getArgAmode (NonVoid arg)
311 do_enum_primop primop args
312 = do dflags <- getDynFlags
313 tmp <- newTemp (bWord dflags)
314 cgPrimOp [tmp] primop args
315 return (CmmReg (CmmLocal tmp))
316
317 {-
318 Note [case on bool]
319 ~~~~~~~~~~~~~~~~~~~
320 This special case handles code like
321
322 case a <# b of
323 True ->
324 False ->
325
326 --> case tagToEnum# (a <$# b) of
327 True -> .. ; False -> ...
328
329 --> case (a <$# b) of r ->
330 case tagToEnum# r of
331 True -> .. ; False -> ...
332
333 If we let the ordinary case code handle it, we'll get something like
334
335 tmp1 = a < b
336 tmp2 = Bool_closure_tbl[tmp1]
337 if (tmp2 & 7 != 0) then ... // normal tagged case
338
339 but this junk won't optimise away. What we really want is just an
340 inline comparison:
341
342 if (a < b) then ...
343
344 So we add a special case to generate
345
346 tmp1 = a < b
347 if (tmp1 == 0) then ...
348
349 and later optimisations will further improve this.
350
351 Now that #6135 has been resolved it should be possible to remove that
352 special case. The idea behind this special case and pre-6135 implementation
353 of Bool-returning primops was that tagToEnum# was added implicitly in the
354 codegen and then optimized away. Now the call to tagToEnum# is explicit
355 in the source code, which allows to optimize it away at the earlier stages
356 of compilation (i.e. at the Core level).
357
358 Note [Scrutinising VoidRep]
359 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
360 Suppose we have this STG code:
361 f = \[s : State# RealWorld] ->
362 case s of _ -> blah
363 This is very odd. Why are we scrutinising a state token? But it
364 can arise with bizarre NOINLINE pragmas (Trac #9964)
365 crash :: IO ()
366 crash = IO (\s -> let {-# NOINLINE s' #-}
367 s' = s
368 in (# s', () #))
369
370 Now the trouble is that 's' has VoidRep, and we do not bind void
371 arguments in the environment; they don't live anywhere. See the
372 calls to nonVoidIds in various places. So we must not look up
373 's' in the environment. Instead, just evaluate the RHS! Simple.
374 -}
375
376 cgCase (StgApp v []) _ (PrimAlt _) alts
377 | isVoidRep (idPrimRep v) -- See Note [Scrutinising VoidRep]
378 , [(DEFAULT, _, _, rhs)] <- alts
379 = cgExpr rhs
380
381 {- Note [Dodgy unsafeCoerce 1]
382 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
383 Consider
384 case (x :: HValue) |> co of (y :: MutVar# Int)
385 DEFAULT -> ...
386 We want to gnerate an assignment
387 y := x
388 We want to allow this assignment to be generated in the case when the
389 types are compatible, because this allows some slightly-dodgy but
390 occasionally-useful casts to be used, such as in RtClosureInspect
391 where we cast an HValue to a MutVar# so we can print out the contents
392 of the MutVar#. If instead we generate code that enters the HValue,
393 then we'll get a runtime panic, because the HValue really is a
394 MutVar#. The types are compatible though, so we can just generate an
395 assignment.
396 -}
397 cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
398 | isUnliftedType (idType v) -- Note [Dodgy unsafeCoerce 1]
399 || reps_compatible
400 = -- assignment suffices for unlifted types
401 do { dflags <- getDynFlags
402 ; unless reps_compatible $
403 panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
404 ; v_info <- getCgIdInfo v
405 ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr)))
406 (idInfoToAmode v_info)
407 ; bindArgsToRegs [NonVoid bndr]
408 ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
409 where
410 reps_compatible = idPrimRep v == idPrimRep bndr
411
412 {- Note [Dodgy unsafeCoerce 2, #3132]
413 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
414 In all other cases of a lifted Id being cast to an unlifted type, the
415 Id should be bound to bottom, otherwise this is an unsafe use of
416 unsafeCoerce. We can generate code to enter the Id and assume that
417 it will never return. Hence, we emit the usual enter/return code, and
418 because bottom must be untagged, it will be entered. The Sequel is a
419 type-correct assignment, albeit bogus. The (dead) continuation loops;
420 it would be better to invoke some kind of panic function here.
421 -}
422 cgCase scrut@(StgApp v []) _ (PrimAlt _) _
423 = do { dflags <- getDynFlags
424 ; mb_cc <- maybeSaveCostCentre True
425 ; withSequel (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut)
426 ; restoreCurrentCostCentre mb_cc
427 ; emitComment $ mkFastString "should be unreachable code"
428 ; l <- newLabelC
429 ; emitLabel l
430 ; emit (mkBranch l) -- an infinite loop
431 ; return AssignedDirectly
432 }
433
434 {- Note [Handle seq#]
435 ~~~~~~~~~~~~~~~~~~~~~
436 case seq# a s of v
437 (# s', a' #) -> e
438
439 ==>
440
441 case a of v
442 (# s', a' #) -> e
443
444 (taking advantage of the fact that the return convention for (# State#, a #)
445 is the same as the return convention for just 'a')
446 -}
447
448 cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
449 = -- Note [Handle seq#]
450 -- Use the same return convention as vanilla 'a'.
451 cgCase (StgApp a []) bndr alt_type alts
452
453 cgCase scrut bndr alt_type alts
454 = -- the general case
455 do { dflags <- getDynFlags
456 ; up_hp_usg <- getVirtHp -- Upstream heap usage
457 ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
458 alt_regs = map (idToReg dflags) ret_bndrs
459 ; simple_scrut <- isSimpleScrut scrut alt_type
460 ; let do_gc | not simple_scrut = True
461 | isSingleton alts = False
462 | up_hp_usg > 0 = False
463 | otherwise = True
464 -- cf Note [Compiling case expressions]
465 gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
466
467 ; mb_cc <- maybeSaveCostCentre simple_scrut
468
469 ; let sequel = AssignTo alt_regs do_gc{- Note [scrut sequel] -}
470 ; ret_kind <- withSequel sequel (cgExpr scrut)
471 ; restoreCurrentCostCentre mb_cc
472 ; _ <- bindArgsToRegs ret_bndrs
473 ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
474 }
475
476
477 {-
478 Note [scrut sequel]
479
480 The job of the scrutinee is to assign its value(s) to alt_regs.
481 Additionally, if we plan to do a heap-check in the alternatives (see
482 Note [Compiling case expressions]), then we *must* retreat Hp to
483 recover any unused heap before passing control to the sequel. If we
484 don't do this, then any unused heap will become slop because the heap
485 check will reset the heap usage. Slop in the heap breaks LDV profiling
486 (+RTS -hb) which needs to do a linear sweep through the nursery.
487
488
489 Note [Inlining out-of-line primops and heap checks]
490 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
491 If shouldInlinePrimOp returns True when called from StgCmmExpr for the
492 purpose of heap check placement, we *must* inline the primop later in
493 StgCmmPrim. If we don't things will go wrong.
494 -}
495
496 -----------------
497 maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
498 maybeSaveCostCentre simple_scrut
499 | simple_scrut = return Nothing
500 | otherwise = saveCurrentCostCentre
501
502
503 -----------------
504 isSimpleScrut :: StgExpr -> AltType -> FCode Bool
505 -- Simple scrutinee, does not block or allocate; hence safe to amalgamate
506 -- heap usage from alternatives into the stuff before the case
507 -- NB: if you get this wrong, and claim that the expression doesn't allocate
508 -- when it does, you'll deeply mess up allocation
509 isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args
510 isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... }
511 isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... }
512 isSimpleScrut _ _ = return False
513
514 isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
515 -- True iff the op cannot block or allocate
516 isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe)
517 isSimpleOp (StgPrimOp op) stg_args = do
518 arg_exprs <- getNonVoidArgAmodes stg_args
519 dflags <- getDynFlags
520 -- See Note [Inlining out-of-line primops and heap checks]
521 return $! isJust $ shouldInlinePrimOp dflags op arg_exprs
522 isSimpleOp (StgPrimCallOp _) _ = return False
523
524 -----------------
525 chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id]
526 -- These are the binders of a case that are assigned
527 -- by the evaluation of the scrutinee
528 -- Only non-void ones come back
529 chooseReturnBndrs bndr (PrimAlt _) _alts
530 = nonVoidIds [bndr]
531
532 chooseReturnBndrs _bndr (UbxTupAlt _) [(_, ids, _, _)]
533 = nonVoidIds ids -- 'bndr' is not assigned!
534
535 chooseReturnBndrs bndr (AlgAlt _) _alts
536 = nonVoidIds [bndr] -- Only 'bndr' is assigned
537
538 chooseReturnBndrs bndr PolyAlt _alts
539 = nonVoidIds [bndr] -- Only 'bndr' is assigned
540
541 chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
542 -- UbxTupALt has only one alternative
543
544 -------------------------------------
545 cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [StgAlt]
546 -> FCode ReturnKind
547 -- At this point the result of the case are in the binders
548 cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)]
549 = maybeAltHeapCheck gc_plan (cgExpr rhs)
550
551 cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)]
552 = maybeAltHeapCheck gc_plan (cgExpr rhs)
553 -- Here bndrs are *already* in scope, so don't rebind them
554
555 cgAlts gc_plan bndr (PrimAlt _) alts
556 = do { dflags <- getDynFlags
557
558 ; tagged_cmms <- cgAltRhss gc_plan bndr alts
559
560 ; let bndr_reg = CmmLocal (idToReg dflags bndr)
561 (DEFAULT,deflt) = head tagged_cmms
562 -- PrimAlts always have a DEFAULT case
563 -- and it always comes first
564
565 tagged_cmms' = [(lit,code)
566 | (LitAlt lit, code) <- tagged_cmms]
567 ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
568 ; return AssignedDirectly }
569
570 cgAlts gc_plan bndr (AlgAlt tycon) alts
571 = do { dflags <- getDynFlags
572
573 ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
574
575 ; let fam_sz = tyConFamilySize tycon
576 bndr_reg = CmmLocal (idToReg dflags bndr)
577
578 -- Is the constructor tag in the node reg?
579 ; if isSmallFamily dflags fam_sz
580 then do
581 let -- Yes, bndr_reg has constr. tag in ls bits
582 tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
583 branches' = [(tag+1,branch) | (tag,branch) <- branches]
584 emitSwitch tag_expr branches' mb_deflt 1 fam_sz
585 return AssignedDirectly
586
587 else -- No, get tag from info table
588 do dflags <- getDynFlags
589 let -- Note that ptr _always_ has tag 1
590 -- when the family size is big enough
591 untagged_ptr = cmmRegOffB bndr_reg (-1)
592 tag_expr = getConstrTag dflags (untagged_ptr)
593 emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
594 return AssignedDirectly }
595
596 cgAlts _ _ _ _ = panic "cgAlts"
597 -- UbxTupAlt and PolyAlt have only one alternative
598
599
600 -- Note [alg-alt heap check]
601 --
602 -- In an algebraic case with more than one alternative, we will have
603 -- code like
604 --
605 -- L0:
606 -- x = R1
607 -- goto L1
608 -- L1:
609 -- if (x & 7 >= 2) then goto L2 else goto L3
610 -- L2:
611 -- Hp = Hp + 16
612 -- if (Hp > HpLim) then goto L4
613 -- ...
614 -- L4:
615 -- call gc() returns to L5
616 -- L5:
617 -- x = R1
618 -- goto L1
619
620 -------------------
621 cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
622 -> FCode ( Maybe CmmAGraphScoped
623 , [(ConTagZ, CmmAGraphScoped)] )
624 cgAlgAltRhss gc_plan bndr alts
625 = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
626
627 ; let { mb_deflt = case tagged_cmms of
628 ((DEFAULT,rhs) : _) -> Just rhs
629 _other -> Nothing
630 -- DEFAULT is always first, if present
631
632 ; branches = [ (dataConTagZ con, cmm)
633 | (DataAlt con, cmm) <- tagged_cmms ]
634 }
635
636 ; return (mb_deflt, branches)
637 }
638
639
640 -------------------
641 cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
642 -> FCode [(AltCon, CmmAGraphScoped)]
643 cgAltRhss gc_plan bndr alts = do
644 dflags <- getDynFlags
645 let
646 base_reg = idToReg dflags bndr
647 cg_alt :: StgAlt -> FCode (AltCon, CmmAGraphScoped)
648 cg_alt (con, bndrs, _uses, rhs)
649 = getCodeScoped $
650 maybeAltHeapCheck gc_plan $
651 do { _ <- bindConArgs con base_reg bndrs
652 ; _ <- cgExpr rhs
653 ; return con }
654 forkAlts (map cg_alt alts)
655
656 maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
657 maybeAltHeapCheck (NoGcInAlts,_) code = code
658 maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
659 altHeapCheck regs code
660 maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
661 altHeapCheckReturnsTo regs lret off code
662
663 -----------------------------------------------------------------------------
664 -- Tail calls
665 -----------------------------------------------------------------------------
666
667 cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
668 cgConApp con stg_args
669 | isUnboxedTupleCon con -- Unboxed tuple: assign and return
670 = do { arg_exprs <- getNonVoidArgAmodes stg_args
671 ; tickyUnboxedTupleReturn (length arg_exprs)
672 ; emitReturn arg_exprs }
673
674 | otherwise -- Boxed constructors; allocate and return
675 = ASSERT2( stg_args `lengthIs` dataConRepRepArity con, ppr con <+> ppr stg_args )
676 do { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) False
677 currentCCS con stg_args
678 -- The first "con" says that the name bound to this
679 -- closure is is "con", which is a bit of a fudge, but
680 -- it only affects profiling (hence the False)
681
682 ; emit =<< fcode_init
683 ; emitReturn [idInfoToAmode idinfo] }
684
685 cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
686 cgIdApp fun_id [] | isVoidTy (idType fun_id) = emitReturn []
687 cgIdApp fun_id args = do
688 dflags <- getDynFlags
689 fun_info <- getCgIdInfo fun_id
690 self_loop_info <- getSelfLoop
691 let cg_fun_id = cg_id fun_info
692 -- NB: use (cg_id fun_info) instead of fun_id, because
693 -- the former may be externalised for -split-objs.
694 -- See Note [Externalise when splitting] in StgCmmMonad
695
696 fun_arg = StgVarArg cg_fun_id
697 fun_name = idName cg_fun_id
698 fun = idInfoToAmode fun_info
699 lf_info = cg_lf fun_info
700 n_args = length args
701 v_args = length $ filter (isVoidTy . stgArgType) args
702 node_points dflags = nodeMustPointToIt dflags lf_info
703 case getCallMethod dflags fun_name cg_fun_id lf_info n_args v_args (cg_loc fun_info) self_loop_info of
704
705 -- A value in WHNF, so we can just return it.
706 ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged?
707
708 EnterIt -> ASSERT( null args ) -- Discarding arguments
709 emitEnter fun
710
711 SlowCall -> do -- A slow function call via the RTS apply routines
712 { tickySlowCall lf_info args
713 ; emitComment $ mkFastString "slowCall"
714 ; slowCall fun args }
715
716 -- A direct function call (possibly with some left-over arguments)
717 DirectEntry lbl arity -> do
718 { tickyDirectCall arity args
719 ; if node_points dflags
720 then directCall NativeNodeCall lbl arity (fun_arg:args)
721 else directCall NativeDirectCall lbl arity args }
722
723 -- Let-no-escape call or self-recursive tail-call
724 JumpToIt blk_id lne_regs -> do
725 { adjustHpBackwards -- always do this before a tail-call
726 ; cmm_args <- getNonVoidArgAmodes args
727 ; emitMultiAssign lne_regs cmm_args
728 ; emit (mkBranch blk_id)
729 ; return AssignedDirectly }
730
731 -- Note [Self-recursive tail calls]
732 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
733 --
734 -- Self-recursive tail calls can be optimized into a local jump in the same
735 -- way as let-no-escape bindings (see Note [What is a non-escaping let] in
736 -- stgSyn/CoreToStg.hs). Consider this:
737 --
738 -- foo.info:
739 -- a = R1 // calling convention
740 -- b = R2
741 -- goto L1
742 -- L1: ...
743 -- ...
744 -- ...
745 -- L2: R1 = x
746 -- R2 = y
747 -- call foo(R1,R2)
748 --
749 -- Instead of putting x and y into registers (or other locations required by the
750 -- calling convention) and performing a call we can put them into local
751 -- variables a and b and perform jump to L1:
752 --
753 -- foo.info:
754 -- a = R1
755 -- b = R2
756 -- goto L1
757 -- L1: ...
758 -- ...
759 -- ...
760 -- L2: a = x
761 -- b = y
762 -- goto L1
763 --
764 -- This can be done only when function is calling itself in a tail position
765 -- and only if the call passes number of parameters equal to function's arity.
766 -- Note that this cannot be performed if a function calls itself with a
767 -- continuation.
768 --
769 -- This in fact implements optimization known as "loopification". It was
770 -- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
771 -- by Krzysztof Woś, though we use different approach. Krzysztof performed his
772 -- optimization at the Cmm level, whereas we perform ours during code generation
773 -- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
774 -- generated in the first place.
775 --
776 -- Implementation is spread across a couple of places in the code:
777 --
778 -- * FCode monad stores additional information in its reader environment
779 -- (cgd_self_loop field). This information tells us which function can
780 -- tail call itself in an optimized way (it is the function currently
781 -- being compiled), what is the label of a loop header (L1 in example above)
782 -- and information about local registers in which we should arguments
783 -- before making a call (this would be a and b in example above).
784 --
785 -- * Whenever we are compiling a function, we set that information to reflect
786 -- the fact that function currently being compiled can be jumped to, instead
787 -- of called. This is done in closureCodyBody in StgCmmBind.
788 --
789 -- * We also have to emit a label to which we will be jumping. We make sure
790 -- that the label is placed after a stack check but before the heap
791 -- check. The reason is that making a recursive tail-call does not increase
792 -- the stack so we only need to check once. But it may grow the heap, so we
793 -- have to repeat the heap check in every self-call. This is done in
794 -- do_checks in StgCmmHeap.
795 --
796 -- * When we begin compilation of another closure we remove the additional
797 -- information from the environment. This is done by forkClosureBody
798 -- in StgCmmMonad. Other functions that duplicate the environment -
799 -- forkLneBody, forkAlts, codeOnly - duplicate that information. In other
800 -- words, we only need to clean the environment of the self-loop information
801 -- when compiling right hand side of a closure (binding).
802 --
803 -- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
804 -- of call will be generated. getCallMethod decides to generate a self
805 -- recursive tail call when (a) environment stores information about
806 -- possible self tail-call; (b) that tail call is to a function currently
807 -- being compiled; (c) number of passed non-void arguments is equal to
808 -- function's arity. (d) loopification is turned on via -floopification
809 -- command-line option.
810 --
811 -- * Command line option to turn loopification on and off is implemented in
812 -- DynFlags.
813 --
814 --
815 -- Note [Void arguments in self-recursive tail calls]
816 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
817 --
818 -- State# tokens can get in the way of the loopification optimization as seen in
819 -- #11372. Consider this:
820 --
821 -- foo :: [a]
822 -- -> (a -> State# s -> (# State s, Bool #))
823 -- -> State# s
824 -- -> (# State# s, Maybe a #)
825 -- foo [] f s = (# s, Nothing #)
826 -- foo (x:xs) f s = case f x s of
827 -- (# s', b #) -> case b of
828 -- True -> (# s', Just x #)
829 -- False -> foo xs f s'
830 --
831 -- We would like to compile the call to foo as a local jump instead of a call
832 -- (see Note [Self-recursive tail calls]). However, the generated function has
833 -- an arity of 2 while we apply it to 3 arguments, one of them being of void
834 -- type. Thus, we mustn't count arguments of void type when checking whether
835 -- we can turn a call into a self-recursive jump.
836 --
837
838 emitEnter :: CmmExpr -> FCode ReturnKind
839 emitEnter fun = do
840 { dflags <- getDynFlags
841 ; adjustHpBackwards
842 ; sequel <- getSequel
843 ; updfr_off <- getUpdFrameOff
844 ; case sequel of
845 -- For a return, we have the option of generating a tag-test or
846 -- not. If the value is tagged, we can return directly, which
847 -- is quicker than entering the value. This is a code
848 -- size/speed trade-off: when optimising for speed rather than
849 -- size we could generate the tag test.
850 --
851 -- Right now, we do what the old codegen did, and omit the tag
852 -- test, just generating an enter.
853 Return _ -> do
854 { let entry = entryCode dflags $ closureInfoPtr dflags $ CmmReg nodeReg
855 ; emit $ mkJump dflags NativeNodeCall entry
856 [cmmUntag dflags fun] updfr_off
857 ; return AssignedDirectly
858 }
859
860 -- The result will be scrutinised in the sequel. This is where
861 -- we generate a tag-test to avoid entering the closure if
862 -- possible.
863 --
864 -- The generated code will be something like this:
865 --
866 -- R1 = fun -- copyout
867 -- if (fun & 7 != 0) goto Lcall else goto Lret
868 -- Lcall:
869 -- call [fun] returns to Lret
870 -- Lret:
871 -- fun' = R1 -- copyin
872 -- ...
873 --
874 -- Note in particular that the label Lret is used as a
875 -- destination by both the tag-test and the call. This is
876 -- becase Lret will necessarily be a proc-point, and we want to
877 -- ensure that we generate only one proc-point for this
878 -- sequence.
879 --
880 -- Furthermore, we tell the caller that we generated a native
881 -- return continuation by returning (ReturnedTo Lret off), so
882 -- that the continuation can be reused by the heap-check failure
883 -- code in the enclosing case expression.
884 --
885 AssignTo res_regs _ -> do
886 { lret <- newLabelC
887 ; let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs []
888 ; lcall <- newLabelC
889 ; updfr_off <- getUpdFrameOff
890 ; let area = Young lret
891 ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area
892 [fun] updfr_off []
893 -- refer to fun via nodeReg after the copyout, to avoid having
894 -- both live simultaneously; this sometimes enables fun to be
895 -- inlined in the RHS of the R1 assignment.
896 ; let entry = entryCode dflags (closureInfoPtr dflags (CmmReg nodeReg))
897 the_call = toCall entry (Just lret) updfr_off off outArgs regs
898 ; tscope <- getTickScope
899 ; emit $
900 copyout <*>
901 mkCbranch (cmmIsTagged dflags (CmmReg nodeReg))
902 lret lcall Nothing <*>
903 outOfLine lcall (the_call,tscope) <*>
904 mkLabel lret tscope <*>
905 copyin
906 ; return (ReturnedTo lret off)
907 }
908 }
909
910 ------------------------------------------------------------------------
911 -- Ticks
912 ------------------------------------------------------------------------
913
914 -- | Generate Cmm code for a tick. Depending on the type of Tickish,
915 -- this will either generate actual Cmm instrumentation code, or
916 -- simply pass on the annotation as a @CmmTickish@.
917 cgTick :: Tickish Id -> FCode ()
918 cgTick tick
919 = do { dflags <- getDynFlags
920 ; case tick of
921 ProfNote cc t p -> emitSetCCC cc t p
922 HpcTick m n -> emit (mkTickBox dflags m n)
923 SourceNote s n -> emitTick $ SourceNote s n
924 _other -> return () -- ignore
925 }