Implement unboxed sum primitive type
[ghc.git] / compiler / codeGen / StgCmmExpr.hs
1 {-# LANGUAGE CPP #-}
2 {-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
3
4 -----------------------------------------------------------------------------
5 --
6 -- Stg to C-- code generation: expressions
7 --
8 -- (c) The University of Glasgow 2004-2006
9 --
10 -----------------------------------------------------------------------------
11
12 module StgCmmExpr ( cgExpr ) where
13
14 #include "HsVersions.h"
15
16 import {-# SOURCE #-} StgCmmBind ( cgBind )
17
18 import StgCmmMonad
19 import StgCmmHeap
20 import StgCmmEnv
21 import StgCmmCon
22 import StgCmmProf (saveCurrentCostCentre, restoreCurrentCostCentre, emitSetCCC)
23 import StgCmmLayout
24 import StgCmmPrim
25 import StgCmmHpc
26 import StgCmmTicky
27 import StgCmmUtils
28 import StgCmmClosure
29
30 import StgSyn
31
32 import MkGraph
33 import BlockId
34 import Cmm
35 import CmmInfo
36 import CoreSyn
37 import DataCon
38 import ForeignCall
39 import Id
40 import PrimOp
41 import TyCon
42 import Type
43 import RepType ( isVoidTy, countConRepArgs )
44 import CostCentre ( CostCentreStack, currentCCS )
45 import Maybes
46 import Util
47 import FastString
48 import Outputable
49
50 import Control.Monad (unless,void)
51 import Control.Arrow (first)
52
53 import Prelude hiding ((<*>))
54
55 ------------------------------------------------------------------------
56 -- cgExpr: the main function
57 ------------------------------------------------------------------------
58
59 cgExpr :: StgExpr -> FCode ReturnKind
60
61 cgExpr (StgApp fun args) = cgIdApp fun args
62
63 {- seq# a s ==> a -}
64 cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
65 cgIdApp a []
66
67 cgExpr (StgOpApp op args ty) = cgOpApp op args ty
68 cgExpr (StgConApp con args _)= cgConApp con args
69 cgExpr (StgTick t e) = cgTick t >> cgExpr e
70 cgExpr (StgLit lit) = do cmm_lit <- cgLit lit
71 emitReturn [CmmExprArg (CmmLit cmm_lit)]
72
73 cgExpr (StgLet binds expr) = do { cgBind binds; cgExpr expr }
74 cgExpr (StgLetNoEscape binds expr) =
75 do { u <- newUnique
76 ; let join_id = mkBlockId u
77 ; cgLneBinds join_id binds
78 ; r <- cgExpr expr
79 ; emitLabel join_id
80 ; return r }
81
82 cgExpr (StgCase expr bndr alt_type alts) =
83 cgCase expr bndr alt_type alts
84
85 cgExpr (StgLam {}) = panic "cgExpr: StgLam"
86
87 ------------------------------------------------------------------------
88 -- Let no escape
89 ------------------------------------------------------------------------
90
91 {- Generating code for a let-no-escape binding, aka join point is very
92 very similar to what we do for a case expression. The duality is
93 between
94 let-no-escape x = b
95 in e
96 and
97 case e of ... -> b
98
99 That is, the RHS of 'x' (ie 'b') will execute *later*, just like
100 the alternative of the case; it needs to be compiled in an environment
101 in which all volatile bindings are forgotten, and the free vars are
102 bound only to stable things like stack locations.. The 'e' part will
103 execute *next*, just like the scrutinee of a case. -}
104
105 -------------------------
106 cgLneBinds :: BlockId -> StgBinding -> FCode ()
107 cgLneBinds join_id (StgNonRec bndr rhs)
108 = do { local_cc <- saveCurrentCostCentre
109 -- See Note [Saving the current cost centre]
110 ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
111 ; fcode
112 ; addBindC info }
113
114 cgLneBinds join_id (StgRec pairs)
115 = do { local_cc <- saveCurrentCostCentre
116 ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
117 ; let (infos, fcodes) = unzip r
118 ; addBindsC infos
119 ; sequence_ fcodes
120 }
121
122 -------------------------
123 cgLetNoEscapeRhs
124 :: BlockId -- join point for successor of let-no-escape
125 -> Maybe LocalReg -- Saved cost centre
126 -> Id
127 -> StgRhs
128 -> FCode (CgIdInfo, FCode ())
129
130 cgLetNoEscapeRhs join_id local_cc bndr rhs =
131 do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
132 ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
133 ; let code = do { (_, body) <- getCodeScoped rhs_code
134 ; emitOutOfLine bid (first (<*> mkBranch join_id) body) }
135 ; return (info, code)
136 }
137
138 cgLetNoEscapeRhsBody
139 :: Maybe LocalReg -- Saved cost centre
140 -> Id
141 -> StgRhs
142 -> FCode (CgIdInfo, FCode ())
143 cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd args body)
144 = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
145 cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
146 = cgLetNoEscapeClosure bndr local_cc cc []
147 (StgConApp con args (pprPanic "cgLetNoEscapeRhsBody" $
148 text "StgRhsCon doesn't have type args"))
149 -- For a constructor RHS we want to generate a single chunk of
150 -- code which can be jumped to from many places, which will
151 -- return the constructor. It's easy; just behave as if it
152 -- was an StgRhsClosure with a ConApp inside!
153
154 -------------------------
155 cgLetNoEscapeClosure
156 :: Id -- binder
157 -> Maybe LocalReg -- Slot for saved current cost centre
158 -> CostCentreStack -- XXX: *** NOT USED *** why not?
159 -> [NonVoid Id] -- Args (as in \ args -> body)
160 -> StgExpr -- Body (as in above)
161 -> FCode (CgIdInfo, FCode ())
162
163 cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
164 = do dflags <- getDynFlags
165 return ( lneIdInfo dflags bndr args
166 , code )
167 where
168 code = forkLneBody $ do {
169 ; withNewTickyCounterLNE (idName bndr) args $ do
170 ; restoreCurrentCostCentre cc_slot
171 ; arg_regs <- bindArgsToRegs args
172 ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
173
174
175 ------------------------------------------------------------------------
176 -- Case expressions
177 ------------------------------------------------------------------------
178
179 {- Note [Compiling case expressions]
180 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
181 It is quite interesting to decide whether to put a heap-check at the
182 start of each alternative. Of course we certainly have to do so if
183 the case forces an evaluation, or if there is a primitive op which can
184 trigger GC.
185
186 A more interesting situation is this (a Plan-B situation)
187
188 !P!;
189 ...P...
190 case x# of
191 0# -> !Q!; ...Q...
192 default -> !R!; ...R...
193
194 where !x! indicates a possible heap-check point. The heap checks
195 in the alternatives *can* be omitted, in which case the topmost
196 heapcheck will take their worst case into account.
197
198 In favour of omitting !Q!, !R!:
199
200 - *May* save a heap overflow test,
201 if ...P... allocates anything.
202
203 - We can use relative addressing from a single Hp to
204 get at all the closures so allocated.
205
206 - No need to save volatile vars etc across heap checks
207 in !Q!, !R!
208
209 Against omitting !Q!, !R!
210
211 - May put a heap-check into the inner loop. Suppose
212 the main loop is P -> R -> P -> R...
213 Q is the loop exit, and only it does allocation.
214 This only hurts us if P does no allocation. If P allocates,
215 then there is a heap check in the inner loop anyway.
216
217 - May do more allocation than reqd. This sometimes bites us
218 badly. For example, nfib (ha!) allocates about 30\% more space if the
219 worst-casing is done, because many many calls to nfib are leaf calls
220 which don't need to allocate anything.
221
222 We can un-allocate, but that costs an instruction
223
224 Neither problem hurts us if there is only one alternative.
225
226 Suppose the inner loop is P->R->P->R etc. Then here is
227 how many heap checks we get in the *inner loop* under various
228 conditions
229
230 Alooc Heap check in branches (!Q!, !R!)?
231 P Q R yes no (absorb to !P!)
232 --------------------------------------
233 n n n 0 0
234 n y n 0 1
235 n . y 1 1
236 y . y 2 1
237 y . n 1 1
238
239 Best choices: absorb heap checks from Q and R into !P! iff
240 a) P itself does some allocation
241 or
242 b) P does allocation, or there is exactly one alternative
243
244 We adopt (b) because that is more likely to put the heap check at the
245 entry to a function, when not many things are live. After a bunch of
246 single-branch cases, we may have lots of things live
247
248 Hence: two basic plans for
249
250 case e of r { alts }
251
252 ------ Plan A: the general case ---------
253
254 ...save current cost centre...
255
256 ...code for e,
257 with sequel (SetLocals r)
258
259 ...restore current cost centre...
260 ...code for alts...
261 ...alts do their own heap checks
262
263 ------ Plan B: special case when ---------
264 (i) e does not allocate or call GC
265 (ii) either upstream code performs allocation
266 or there is just one alternative
267
268 Then heap allocation in the (single) case branch
269 is absorbed by the upstream check.
270 Very common example: primops on unboxed values
271
272 ...code for e,
273 with sequel (SetLocals r)...
274
275 ...code for alts...
276 ...no heap check...
277 -}
278
279
280
281 -------------------------------------
282 data GcPlan
283 = GcInAlts -- Put a GC check at the start the case alternatives,
284 [LocalReg] -- which binds these registers
285 | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
286 -- primitive op which does no GC. Absorb the allocation
287 -- of the case alternative(s) into the upstream check
288
289 -------------------------------------
290 cgCase :: StgExpr -> Id -> AltType -> [StgAlt] -> FCode ReturnKind
291
292 cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
293 | isEnumerationTyCon tycon -- Note [case on bool]
294 = do { tag_expr <- do_enum_primop op args
295
296 -- If the binder is not dead, convert the tag to a constructor
297 -- and assign it.
298 ; unless (isDeadBinder bndr) $ do
299 { dflags <- getDynFlags
300 ; tmp_reg <- bindArgToReg (NonVoid bndr)
301 ; emitAssign (CmmLocal tmp_reg)
302 (tagToClosure dflags tycon tag_expr) }
303
304 ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
305 (NonVoid bndr) alts
306 ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
307 ; return AssignedDirectly
308 }
309 where
310 do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
311 do_enum_primop TagToEnumOp [arg] -- No code!
312 = getArgAmode_no_rubbish (NonVoid arg)
313 do_enum_primop primop args
314 = do dflags <- getDynFlags
315 tmp <- newTemp (bWord dflags)
316 cgPrimOp [tmp] primop args
317 return (CmmReg (CmmLocal tmp))
318
319 {-
320 Note [case on bool]
321 ~~~~~~~~~~~~~~~~~~~
322 This special case handles code like
323
324 case a <# b of
325 True ->
326 False ->
327
328 --> case tagToEnum# (a <$# b) of
329 True -> .. ; False -> ...
330
331 --> case (a <$# b) of r ->
332 case tagToEnum# r of
333 True -> .. ; False -> ...
334
335 If we let the ordinary case code handle it, we'll get something like
336
337 tmp1 = a < b
338 tmp2 = Bool_closure_tbl[tmp1]
339 if (tmp2 & 7 != 0) then ... // normal tagged case
340
341 but this junk won't optimise away. What we really want is just an
342 inline comparison:
343
344 if (a < b) then ...
345
346 So we add a special case to generate
347
348 tmp1 = a < b
349 if (tmp1 == 0) then ...
350
351 and later optimisations will further improve this.
352
353 Now that #6135 has been resolved it should be possible to remove that
354 special case. The idea behind this special case and pre-6135 implementation
355 of Bool-returning primops was that tagToEnum# was added implicitly in the
356 codegen and then optimized away. Now the call to tagToEnum# is explicit
357 in the source code, which allows to optimize it away at the earlier stages
358 of compilation (i.e. at the Core level).
359
360 Note [Scrutinising VoidRep]
361 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
362 Suppose we have this STG code:
363 f = \[s : State# RealWorld] ->
364 case s of _ -> blah
365 This is very odd. Why are we scrutinising a state token? But it
366 can arise with bizarre NOINLINE pragmas (Trac #9964)
367 crash :: IO ()
368 crash = IO (\s -> let {-# NOINLINE s' #-}
369 s' = s
370 in (# s', () #))
371
372 Now the trouble is that 's' has VoidRep, and we do not bind void
373 arguments in the environment; they don't live anywhere. See the
374 calls to nonVoidIds in various places. So we must not look up
375 's' in the environment. Instead, just evaluate the RHS! Simple.
376 -}
377
378 cgCase (StgApp v []) _ (PrimAlt _) alts
379 | isVoidRep (idPrimRep v) -- See Note [Scrutinising VoidRep]
380 , [(DEFAULT, _, rhs)] <- alts
381 = cgExpr rhs
382
383 {- Note [Dodgy unsafeCoerce 1]
384 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
385 Consider
386 case (x :: HValue) |> co of (y :: MutVar# Int)
387 DEFAULT -> ...
388 We want to gnerate an assignment
389 y := x
390 We want to allow this assignment to be generated in the case when the
391 types are compatible, because this allows some slightly-dodgy but
392 occasionally-useful casts to be used, such as in RtClosureInspect
393 where we cast an HValue to a MutVar# so we can print out the contents
394 of the MutVar#. If instead we generate code that enters the HValue,
395 then we'll get a runtime panic, because the HValue really is a
396 MutVar#. The types are compatible though, so we can just generate an
397 assignment.
398 -}
399 cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
400 | isUnliftedType (idType v) -- Note [Dodgy unsafeCoerce 1]
401 || reps_compatible
402 = -- assignment suffices for unlifted types
403 do { dflags <- getDynFlags
404 ; unless reps_compatible $
405 panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
406 ; v_info <- getCgIdInfo v
407 ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr)))
408 (idInfoToAmode v_info)
409 ; bindArgToReg (NonVoid bndr)
410 ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
411 where
412 reps_compatible = idPrimRep v == idPrimRep bndr
413
414 {- Note [Dodgy unsafeCoerce 2, #3132]
415 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
416 In all other cases of a lifted Id being cast to an unlifted type, the
417 Id should be bound to bottom, otherwise this is an unsafe use of
418 unsafeCoerce. We can generate code to enter the Id and assume that
419 it will never return. Hence, we emit the usual enter/return code, and
420 because bottom must be untagged, it will be entered. The Sequel is a
421 type-correct assignment, albeit bogus. The (dead) continuation loops;
422 it would be better to invoke some kind of panic function here.
423 -}
424 cgCase scrut@(StgApp v []) _ (PrimAlt _) _
425 = do { dflags <- getDynFlags
426 ; mb_cc <- maybeSaveCostCentre True
427 ; withSequel (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut)
428 ; restoreCurrentCostCentre mb_cc
429 ; emitComment $ mkFastString "should be unreachable code"
430 ; l <- newLabelC
431 ; emitLabel l
432 ; emit (mkBranch l) -- an infinite loop
433 ; return AssignedDirectly
434 }
435
436 {- Note [Handle seq#]
437 ~~~~~~~~~~~~~~~~~~~~~
438 case seq# a s of v
439 (# s', a' #) -> e
440
441 ==>
442
443 case a of v
444 (# s', a' #) -> e
445
446 (taking advantage of the fact that the return convention for (# State#, a #)
447 is the same as the return convention for just 'a')
448 -}
449
450 cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
451 = -- Note [Handle seq#]
452 -- Use the same return convention as vanilla 'a'.
453 cgCase (StgApp a []) bndr alt_type alts
454
455 cgCase scrut bndr alt_type alts
456 = -- the general case
457 do { dflags <- getDynFlags
458 ; up_hp_usg <- getVirtHp -- Upstream heap usage
459 ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
460 alt_regs = map (idToReg dflags) ret_bndrs
461 ; simple_scrut <- isSimpleScrut scrut alt_type
462 ; let do_gc | not simple_scrut = True
463 | isSingleton alts = False
464 | up_hp_usg > 0 = False
465 | otherwise = True
466 -- cf Note [Compiling case expressions]
467 gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
468
469 ; mb_cc <- maybeSaveCostCentre simple_scrut
470
471 ; let sequel = AssignTo alt_regs do_gc{- Note [scrut sequel] -}
472 ; ret_kind <- withSequel sequel (cgExpr scrut)
473 ; restoreCurrentCostCentre mb_cc
474 ; _ <- bindArgsToRegs ret_bndrs
475 ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
476 }
477
478
479 {-
480 Note [scrut sequel]
481
482 The job of the scrutinee is to assign its value(s) to alt_regs.
483 Additionally, if we plan to do a heap-check in the alternatives (see
484 Note [Compiling case expressions]), then we *must* retreat Hp to
485 recover any unused heap before passing control to the sequel. If we
486 don't do this, then any unused heap will become slop because the heap
487 check will reset the heap usage. Slop in the heap breaks LDV profiling
488 (+RTS -hb) which needs to do a linear sweep through the nursery.
489
490
491 Note [Inlining out-of-line primops and heap checks]
492 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
493 If shouldInlinePrimOp returns True when called from StgCmmExpr for the
494 purpose of heap check placement, we *must* inline the primop later in
495 StgCmmPrim. If we don't things will go wrong.
496 -}
497
498 -----------------
499 maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
500 maybeSaveCostCentre simple_scrut
501 | simple_scrut = return Nothing
502 | otherwise = saveCurrentCostCentre
503
504
505 -----------------
506 isSimpleScrut :: StgExpr -> AltType -> FCode Bool
507 -- Simple scrutinee, does not block or allocate; hence safe to amalgamate
508 -- heap usage from alternatives into the stuff before the case
509 -- NB: if you get this wrong, and claim that the expression doesn't allocate
510 -- when it does, you'll deeply mess up allocation
511 isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args
512 isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... }
513 isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... }
514 isSimpleScrut _ _ = return False
515
516 isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
517 -- True iff the op cannot block or allocate
518 isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe)
519 isSimpleOp (StgPrimOp op) stg_args = do
520 arg_exprs <- getNonVoidArgAmodes_no_rubbish stg_args
521 dflags <- getDynFlags
522 -- See Note [Inlining out-of-line primops and heap checks]
523 return $! isJust $ shouldInlinePrimOp dflags op arg_exprs
524 isSimpleOp (StgPrimCallOp _) _ = return False
525
526 -----------------
527 chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id]
528 -- These are the binders of a case that are assigned
529 -- by the evaluation of the scrutinee
530 -- Only non-void ones come back
531 chooseReturnBndrs bndr (PrimAlt _) _alts
532 = nonVoidIds [bndr]
533
534 chooseReturnBndrs _bndr (MultiValAlt n) [(_, ids, _)]
535 = ASSERT2(n == length (nonVoidIds ids), ppr n $$ ppr ids $$ ppr _bndr)
536 nonVoidIds ids -- 'bndr' is not assigned!
537
538 chooseReturnBndrs bndr (AlgAlt _) _alts
539 = nonVoidIds [bndr] -- Only 'bndr' is assigned
540
541 chooseReturnBndrs bndr PolyAlt _alts
542 = nonVoidIds [bndr] -- Only 'bndr' is assigned
543
544 chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
545 -- UbxTupALt has only one alternative
546
547 -------------------------------------
548 cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [StgAlt]
549 -> FCode ReturnKind
550 -- At this point the result of the case are in the binders
551 cgAlts gc_plan _bndr PolyAlt [(_, _, rhs)]
552 = maybeAltHeapCheck gc_plan (cgExpr rhs)
553
554 cgAlts gc_plan _bndr (MultiValAlt _) [(_, _, rhs)]
555 = maybeAltHeapCheck gc_plan (cgExpr rhs)
556 -- Here bndrs are *already* in scope, so don't rebind them
557
558 cgAlts gc_plan bndr (PrimAlt _) alts
559 = do { dflags <- getDynFlags
560
561 ; tagged_cmms <- cgAltRhss gc_plan bndr alts
562
563 ; let bndr_reg = CmmLocal (idToReg dflags bndr)
564 (DEFAULT,deflt) = head tagged_cmms
565 -- PrimAlts always have a DEFAULT case
566 -- and it always comes first
567
568 tagged_cmms' = [(lit,code)
569 | (LitAlt lit, code) <- tagged_cmms]
570 ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
571 ; return AssignedDirectly }
572
573 cgAlts gc_plan bndr (AlgAlt tycon) alts
574 = do { dflags <- getDynFlags
575
576 ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
577
578 ; let fam_sz = tyConFamilySize tycon
579 bndr_reg = CmmLocal (idToReg dflags bndr)
580
581 -- Is the constructor tag in the node reg?
582 ; if isSmallFamily dflags fam_sz
583 then do
584 let -- Yes, bndr_reg has constr. tag in ls bits
585 tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
586 branches' = [(tag+1,branch) | (tag,branch) <- branches]
587 emitSwitch tag_expr branches' mb_deflt 1 fam_sz
588
589 else -- No, get tag from info table
590 do dflags <- getDynFlags
591 let -- Note that ptr _always_ has tag 1
592 -- when the family size is big enough
593 untagged_ptr = cmmRegOffB bndr_reg (-1)
594 tag_expr = getConstrTag dflags (untagged_ptr)
595 emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
596
597 ; return AssignedDirectly }
598
599 cgAlts _ _ _ _ = panic "cgAlts"
600 -- UbxTupAlt and PolyAlt have only one alternative
601
602
603 -- Note [alg-alt heap check]
604 --
605 -- In an algebraic case with more than one alternative, we will have
606 -- code like
607 --
608 -- L0:
609 -- x = R1
610 -- goto L1
611 -- L1:
612 -- if (x & 7 >= 2) then goto L2 else goto L3
613 -- L2:
614 -- Hp = Hp + 16
615 -- if (Hp > HpLim) then goto L4
616 -- ...
617 -- L4:
618 -- call gc() returns to L5
619 -- L5:
620 -- x = R1
621 -- goto L1
622
623 -------------------
624 cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
625 -> FCode ( Maybe CmmAGraphScoped
626 , [(ConTagZ, CmmAGraphScoped)] )
627 cgAlgAltRhss gc_plan bndr alts
628 = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
629
630 ; let { mb_deflt = case tagged_cmms of
631 ((DEFAULT,rhs) : _) -> Just rhs
632 _other -> Nothing
633 -- DEFAULT is always first, if present
634
635 ; branches = [ (dataConTagZ con, cmm)
636 | (DataAlt con, cmm) <- tagged_cmms ]
637 }
638
639 ; return (mb_deflt, branches)
640 }
641
642
643 -------------------
644 cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
645 -> FCode [(AltCon, CmmAGraphScoped)]
646 cgAltRhss gc_plan bndr alts = do
647 dflags <- getDynFlags
648 let
649 base_reg = idToReg dflags bndr
650 cg_alt :: StgAlt -> FCode (AltCon, CmmAGraphScoped)
651 cg_alt (con, bndrs, rhs)
652 = getCodeScoped $
653 maybeAltHeapCheck gc_plan $
654 do { _ <- bindConArgs con base_reg bndrs
655 ; _ <- cgExpr rhs
656 ; return con }
657 forkAlts (map cg_alt alts)
658
659 maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
660 maybeAltHeapCheck (NoGcInAlts,_) code = code
661 maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
662 altHeapCheck regs code
663 maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
664 altHeapCheckReturnsTo regs lret off code
665
666 -----------------------------------------------------------------------------
667 -- Tail calls
668 -----------------------------------------------------------------------------
669
670 cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
671 cgConApp con stg_args
672 | isUnboxedTupleCon con -- Unboxed tuple: assign and return
673 = do { arg_exprs <- getNonVoidArgAmodes stg_args
674 ; tickyUnboxedTupleReturn (length arg_exprs)
675 ; emitReturn arg_exprs }
676
677 | otherwise -- Boxed constructors; allocate and return
678 = ASSERT2( stg_args `lengthIs` countConRepArgs con, ppr con <> parens (ppr (countConRepArgs con)) <+> ppr stg_args )
679 do { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) False
680 currentCCS con stg_args
681 -- The first "con" says that the name bound to this
682 -- closure is is "con", which is a bit of a fudge, but
683 -- it only affects profiling (hence the False)
684
685 ; emit =<< fcode_init
686 ; tickyReturnNewCon (length stg_args)
687 ; emitReturn [CmmExprArg (idInfoToAmode idinfo)] }
688
689 cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
690 cgIdApp fun_id [] | isVoidTy (idType fun_id) = emitReturn []
691 cgIdApp fun_id args = do
692 dflags <- getDynFlags
693 fun_info <- getCgIdInfo fun_id
694 self_loop_info <- getSelfLoop
695 let cg_fun_id = cg_id fun_info
696 -- NB: use (cg_id fun_info) instead of fun_id, because
697 -- the former may be externalised for -split-objs.
698 -- See Note [Externalise when splitting] in StgCmmMonad
699
700 fun_arg = StgVarArg cg_fun_id
701 fun_name = idName cg_fun_id
702 fun = idInfoToAmode fun_info
703 lf_info = cg_lf fun_info
704 n_args = length args
705 v_args = length $ filter (isVoidTy . stgArgType) args
706 node_points dflags = nodeMustPointToIt dflags lf_info
707 case getCallMethod dflags fun_name cg_fun_id lf_info n_args v_args (cg_loc fun_info) self_loop_info of
708
709 -- A value in WHNF, so we can just return it.
710 ReturnIt -> emitReturn [CmmExprArg fun] -- ToDo: does ReturnIt guarantee tagged?
711
712 EnterIt -> ASSERT( null args ) -- Discarding arguments
713 emitEnter fun
714
715 SlowCall -> do -- A slow function call via the RTS apply routines
716 { tickySlowCall lf_info args
717 ; emitComment $ mkFastString "slowCall"
718 ; slowCall fun args }
719
720 -- A direct function call (possibly with some left-over arguments)
721 DirectEntry lbl arity -> do
722 { tickyDirectCall arity args
723 ; if node_points dflags
724 then directCall NativeNodeCall lbl arity (fun_arg:args)
725 else directCall NativeDirectCall lbl arity args }
726
727 -- Let-no-escape call or self-recursive tail-call
728 JumpToIt blk_id lne_regs -> do
729 { adjustHpBackwards -- always do this before a tail-call
730 ; cmm_args <- getNonVoidArgAmodes args
731 ; emitMultiAssign lne_regs cmm_args
732 ; emit (mkBranch blk_id)
733 ; return AssignedDirectly }
734
735 -- Note [Self-recursive tail calls]
736 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
737 --
738 -- Self-recursive tail calls can be optimized into a local jump in the same
739 -- way as let-no-escape bindings (see Note [What is a non-escaping let] in
740 -- stgSyn/CoreToStg.hs). Consider this:
741 --
742 -- foo.info:
743 -- a = R1 // calling convention
744 -- b = R2
745 -- goto L1
746 -- L1: ...
747 -- ...
748 -- ...
749 -- L2: R1 = x
750 -- R2 = y
751 -- call foo(R1,R2)
752 --
753 -- Instead of putting x and y into registers (or other locations required by the
754 -- calling convention) and performing a call we can put them into local
755 -- variables a and b and perform jump to L1:
756 --
757 -- foo.info:
758 -- a = R1
759 -- b = R2
760 -- goto L1
761 -- L1: ...
762 -- ...
763 -- ...
764 -- L2: a = x
765 -- b = y
766 -- goto L1
767 --
768 -- This can be done only when function is calling itself in a tail position
769 -- and only if the call passes number of parameters equal to function's arity.
770 -- Note that this cannot be performed if a function calls itself with a
771 -- continuation.
772 --
773 -- This in fact implements optimization known as "loopification". It was
774 -- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
775 -- by Krzysztof Woś, though we use different approach. Krzysztof performed his
776 -- optimization at the Cmm level, whereas we perform ours during code generation
777 -- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
778 -- generated in the first place.
779 --
780 -- Implementation is spread across a couple of places in the code:
781 --
782 -- * FCode monad stores additional information in its reader environment
783 -- (cgd_self_loop field). This information tells us which function can
784 -- tail call itself in an optimized way (it is the function currently
785 -- being compiled), what is the label of a loop header (L1 in example above)
786 -- and information about local registers in which we should arguments
787 -- before making a call (this would be a and b in example above).
788 --
789 -- * Whenever we are compiling a function, we set that information to reflect
790 -- the fact that function currently being compiled can be jumped to, instead
791 -- of called. This is done in closureCodyBody in StgCmmBind.
792 --
793 -- * We also have to emit a label to which we will be jumping. We make sure
794 -- that the label is placed after a stack check but before the heap
795 -- check. The reason is that making a recursive tail-call does not increase
796 -- the stack so we only need to check once. But it may grow the heap, so we
797 -- have to repeat the heap check in every self-call. This is done in
798 -- do_checks in StgCmmHeap.
799 --
800 -- * When we begin compilation of another closure we remove the additional
801 -- information from the environment. This is done by forkClosureBody
802 -- in StgCmmMonad. Other functions that duplicate the environment -
803 -- forkLneBody, forkAlts, codeOnly - duplicate that information. In other
804 -- words, we only need to clean the environment of the self-loop information
805 -- when compiling right hand side of a closure (binding).
806 --
807 -- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
808 -- of call will be generated. getCallMethod decides to generate a self
809 -- recursive tail call when (a) environment stores information about
810 -- possible self tail-call; (b) that tail call is to a function currently
811 -- being compiled; (c) number of passed non-void arguments is equal to
812 -- function's arity. (d) loopification is turned on via -floopification
813 -- command-line option.
814 --
815 -- * Command line option to turn loopification on and off is implemented in
816 -- DynFlags.
817 --
818 --
819 -- Note [Void arguments in self-recursive tail calls]
820 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
821 --
822 -- State# tokens can get in the way of the loopification optimization as seen in
823 -- #11372. Consider this:
824 --
825 -- foo :: [a]
826 -- -> (a -> State# s -> (# State s, Bool #))
827 -- -> State# s
828 -- -> (# State# s, Maybe a #)
829 -- foo [] f s = (# s, Nothing #)
830 -- foo (x:xs) f s = case f x s of
831 -- (# s', b #) -> case b of
832 -- True -> (# s', Just x #)
833 -- False -> foo xs f s'
834 --
835 -- We would like to compile the call to foo as a local jump instead of a call
836 -- (see Note [Self-recursive tail calls]). However, the generated function has
837 -- an arity of 2 while we apply it to 3 arguments, one of them being of void
838 -- type. Thus, we mustn't count arguments of void type when checking whether
839 -- we can turn a call into a self-recursive jump.
840 --
841
842 emitEnter :: CmmExpr -> FCode ReturnKind
843 emitEnter fun = do
844 { dflags <- getDynFlags
845 ; adjustHpBackwards
846 ; sequel <- getSequel
847 ; updfr_off <- getUpdFrameOff
848 ; case sequel of
849 -- For a return, we have the option of generating a tag-test or
850 -- not. If the value is tagged, we can return directly, which
851 -- is quicker than entering the value. This is a code
852 -- size/speed trade-off: when optimising for speed rather than
853 -- size we could generate the tag test.
854 --
855 -- Right now, we do what the old codegen did, and omit the tag
856 -- test, just generating an enter.
857 Return _ -> do
858 { let entry = entryCode dflags $ closureInfoPtr dflags $ CmmReg nodeReg
859 ; emit $ mkJump dflags NativeNodeCall entry
860 [CmmExprArg (cmmUntag dflags fun)] updfr_off
861 ; return AssignedDirectly
862 }
863
864 -- The result will be scrutinised in the sequel. This is where
865 -- we generate a tag-test to avoid entering the closure if
866 -- possible.
867 --
868 -- The generated code will be something like this:
869 --
870 -- R1 = fun -- copyout
871 -- if (fun & 7 != 0) goto Lcall else goto Lret
872 -- Lcall:
873 -- call [fun] returns to Lret
874 -- Lret:
875 -- fun' = R1 -- copyin
876 -- ...
877 --
878 -- Note in particular that the label Lret is used as a
879 -- destination by both the tag-test and the call. This is
880 -- becase Lret will necessarily be a proc-point, and we want to
881 -- ensure that we generate only one proc-point for this
882 -- sequence.
883 --
884 -- Furthermore, we tell the caller that we generated a native
885 -- return continuation by returning (ReturnedTo Lret off), so
886 -- that the continuation can be reused by the heap-check failure
887 -- code in the enclosing case expression.
888 --
889 AssignTo res_regs _ -> do
890 { lret <- newLabelC
891 ; let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs []
892 ; lcall <- newLabelC
893 ; updfr_off <- getUpdFrameOff
894 ; let area = Young lret
895 ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area
896 [CmmExprArg fun] updfr_off []
897 -- refer to fun via nodeReg after the copyout, to avoid having
898 -- both live simultaneously; this sometimes enables fun to be
899 -- inlined in the RHS of the R1 assignment.
900 ; let entry = entryCode dflags (closureInfoPtr dflags (CmmReg nodeReg))
901 the_call = toCall entry (Just lret) updfr_off off outArgs regs
902 ; tscope <- getTickScope
903 ; emit $
904 copyout <*>
905 mkCbranch (cmmIsTagged dflags (CmmReg nodeReg))
906 lret lcall Nothing <*>
907 outOfLine lcall (the_call,tscope) <*>
908 mkLabel lret tscope <*>
909 copyin
910 ; return (ReturnedTo lret off)
911 }
912 }
913
914 ------------------------------------------------------------------------
915 -- Ticks
916 ------------------------------------------------------------------------
917
918 -- | Generate Cmm code for a tick. Depending on the type of Tickish,
919 -- this will either generate actual Cmm instrumentation code, or
920 -- simply pass on the annotation as a @CmmTickish@.
921 cgTick :: Tickish Id -> FCode ()
922 cgTick tick
923 = do { dflags <- getDynFlags
924 ; case tick of
925 ProfNote cc t p -> emitSetCCC cc t p
926 HpcTick m n -> emit (mkTickBox dflags m n)
927 SourceNote s n -> emitTick $ SourceNote s n
928 _other -> return () -- ignore
929 }