Generalize CmmUnwind and pass unwind information through NCG
[ghc.git] / compiler / codeGen / StgCmmExpr.hs
1 {-# LANGUAGE CPP #-}
2 {-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
3
4 -----------------------------------------------------------------------------
5 --
6 -- Stg to C-- code generation: expressions
7 --
8 -- (c) The University of Glasgow 2004-2006
9 --
10 -----------------------------------------------------------------------------
11
12 module StgCmmExpr ( cgExpr ) where
13
14 #include "HsVersions.h"
15
16 import {-# SOURCE #-} StgCmmBind ( cgBind )
17
18 import StgCmmMonad
19 import StgCmmHeap
20 import StgCmmEnv
21 import StgCmmCon
22 import StgCmmProf (saveCurrentCostCentre, restoreCurrentCostCentre, emitSetCCC)
23 import StgCmmLayout
24 import StgCmmPrim
25 import StgCmmHpc
26 import StgCmmTicky
27 import StgCmmUtils
28 import StgCmmClosure
29
30 import StgSyn
31
32 import MkGraph
33 import BlockId
34 import Cmm
35 import CmmInfo
36 import CoreSyn
37 import DataCon
38 import ForeignCall
39 import Id
40 import PrimOp
41 import TyCon
42 import Type ( isUnliftedType )
43 import RepType ( isVoidTy, countConRepArgs, primRepSlot )
44 import CostCentre ( CostCentreStack, currentCCS )
45 import Maybes
46 import Util
47 import FastString
48 import Outputable
49
50 import Control.Monad (unless,void)
51 import Control.Arrow (first)
52 import Data.Function ( on )
53
54 import Prelude hiding ((<*>))
55
56 ------------------------------------------------------------------------
57 -- cgExpr: the main function
58 ------------------------------------------------------------------------
59
60 cgExpr :: StgExpr -> FCode ReturnKind
61
62 cgExpr (StgApp fun args) = cgIdApp fun args
63
64 {- seq# a s ==> a -}
65 cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
66 cgIdApp a []
67
68 cgExpr (StgOpApp op args ty) = cgOpApp op args ty
69 cgExpr (StgConApp con args _)= cgConApp con args
70 cgExpr (StgTick t e) = cgTick t >> cgExpr e
71 cgExpr (StgLit lit) = do cmm_lit <- cgLit lit
72 emitReturn [CmmLit cmm_lit]
73
74 cgExpr (StgLet binds expr) = do { cgBind binds; cgExpr expr }
75 cgExpr (StgLetNoEscape binds expr) =
76 do { u <- newUnique
77 ; let join_id = mkBlockId u
78 ; cgLneBinds join_id binds
79 ; r <- cgExpr expr
80 ; emitLabel join_id
81 ; return r }
82
83 cgExpr (StgCase expr bndr alt_type alts) =
84 cgCase expr bndr alt_type alts
85
86 cgExpr (StgLam {}) = panic "cgExpr: StgLam"
87
88 ------------------------------------------------------------------------
89 -- Let no escape
90 ------------------------------------------------------------------------
91
92 {- Generating code for a let-no-escape binding, aka join point is very
93 very similar to what we do for a case expression. The duality is
94 between
95 let-no-escape x = b
96 in e
97 and
98 case e of ... -> b
99
100 That is, the RHS of 'x' (ie 'b') will execute *later*, just like
101 the alternative of the case; it needs to be compiled in an environment
102 in which all volatile bindings are forgotten, and the free vars are
103 bound only to stable things like stack locations.. The 'e' part will
104 execute *next*, just like the scrutinee of a case. -}
105
106 -------------------------
107 cgLneBinds :: BlockId -> StgBinding -> FCode ()
108 cgLneBinds join_id (StgNonRec bndr rhs)
109 = do { local_cc <- saveCurrentCostCentre
110 -- See Note [Saving the current cost centre]
111 ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
112 ; fcode
113 ; addBindC info }
114
115 cgLneBinds join_id (StgRec pairs)
116 = do { local_cc <- saveCurrentCostCentre
117 ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
118 ; let (infos, fcodes) = unzip r
119 ; addBindsC infos
120 ; sequence_ fcodes
121 }
122
123 -------------------------
124 cgLetNoEscapeRhs
125 :: BlockId -- join point for successor of let-no-escape
126 -> Maybe LocalReg -- Saved cost centre
127 -> Id
128 -> StgRhs
129 -> FCode (CgIdInfo, FCode ())
130
131 cgLetNoEscapeRhs join_id local_cc bndr rhs =
132 do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
133 ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
134 ; let code = do { (_, body) <- getCodeScoped rhs_code
135 ; emitOutOfLine bid (first (<*> mkBranch join_id) body) }
136 ; return (info, code)
137 }
138
139 cgLetNoEscapeRhsBody
140 :: Maybe LocalReg -- Saved cost centre
141 -> Id
142 -> StgRhs
143 -> FCode (CgIdInfo, FCode ())
144 cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd args body)
145 = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
146 cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
147 = cgLetNoEscapeClosure bndr local_cc cc []
148 (StgConApp con args (pprPanic "cgLetNoEscapeRhsBody" $
149 text "StgRhsCon doesn't have type args"))
150 -- For a constructor RHS we want to generate a single chunk of
151 -- code which can be jumped to from many places, which will
152 -- return the constructor. It's easy; just behave as if it
153 -- was an StgRhsClosure with a ConApp inside!
154
155 -------------------------
156 cgLetNoEscapeClosure
157 :: Id -- binder
158 -> Maybe LocalReg -- Slot for saved current cost centre
159 -> CostCentreStack -- XXX: *** NOT USED *** why not?
160 -> [NonVoid Id] -- Args (as in \ args -> body)
161 -> StgExpr -- Body (as in above)
162 -> FCode (CgIdInfo, FCode ())
163
164 cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
165 = do dflags <- getDynFlags
166 return ( lneIdInfo dflags bndr args
167 , code )
168 where
169 code = forkLneBody $ do {
170 ; withNewTickyCounterLNE (idName bndr) args $ do
171 ; restoreCurrentCostCentre cc_slot
172 ; arg_regs <- bindArgsToRegs args
173 ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
174
175
176 ------------------------------------------------------------------------
177 -- Case expressions
178 ------------------------------------------------------------------------
179
180 {- Note [Compiling case expressions]
181 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
182 It is quite interesting to decide whether to put a heap-check at the
183 start of each alternative. Of course we certainly have to do so if
184 the case forces an evaluation, or if there is a primitive op which can
185 trigger GC.
186
187 A more interesting situation is this (a Plan-B situation)
188
189 !P!;
190 ...P...
191 case x# of
192 0# -> !Q!; ...Q...
193 default -> !R!; ...R...
194
195 where !x! indicates a possible heap-check point. The heap checks
196 in the alternatives *can* be omitted, in which case the topmost
197 heapcheck will take their worst case into account.
198
199 In favour of omitting !Q!, !R!:
200
201 - *May* save a heap overflow test,
202 if ...P... allocates anything.
203
204 - We can use relative addressing from a single Hp to
205 get at all the closures so allocated.
206
207 - No need to save volatile vars etc across heap checks
208 in !Q!, !R!
209
210 Against omitting !Q!, !R!
211
212 - May put a heap-check into the inner loop. Suppose
213 the main loop is P -> R -> P -> R...
214 Q is the loop exit, and only it does allocation.
215 This only hurts us if P does no allocation. If P allocates,
216 then there is a heap check in the inner loop anyway.
217
218 - May do more allocation than reqd. This sometimes bites us
219 badly. For example, nfib (ha!) allocates about 30\% more space if the
220 worst-casing is done, because many many calls to nfib are leaf calls
221 which don't need to allocate anything.
222
223 We can un-allocate, but that costs an instruction
224
225 Neither problem hurts us if there is only one alternative.
226
227 Suppose the inner loop is P->R->P->R etc. Then here is
228 how many heap checks we get in the *inner loop* under various
229 conditions
230
231 Alloc Heap check in branches (!Q!, !R!)?
232 P Q R yes no (absorb to !P!)
233 --------------------------------------
234 n n n 0 0
235 n y n 0 1
236 n . y 1 1
237 y . y 2 1
238 y . n 1 1
239
240 Best choices: absorb heap checks from Q and R into !P! iff
241 a) P itself does some allocation
242 or
243 b) P does allocation, or there is exactly one alternative
244
245 We adopt (b) because that is more likely to put the heap check at the
246 entry to a function, when not many things are live. After a bunch of
247 single-branch cases, we may have lots of things live
248
249 Hence: two basic plans for
250
251 case e of r { alts }
252
253 ------ Plan A: the general case ---------
254
255 ...save current cost centre...
256
257 ...code for e,
258 with sequel (SetLocals r)
259
260 ...restore current cost centre...
261 ...code for alts...
262 ...alts do their own heap checks
263
264 ------ Plan B: special case when ---------
265 (i) e does not allocate or call GC
266 (ii) either upstream code performs allocation
267 or there is just one alternative
268
269 Then heap allocation in the (single) case branch
270 is absorbed by the upstream check.
271 Very common example: primops on unboxed values
272
273 ...code for e,
274 with sequel (SetLocals r)...
275
276 ...code for alts...
277 ...no heap check...
278 -}
279
280
281
282 -------------------------------------
283 data GcPlan
284 = GcInAlts -- Put a GC check at the start the case alternatives,
285 [LocalReg] -- which binds these registers
286 | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
287 -- primitive op which does no GC. Absorb the allocation
288 -- of the case alternative(s) into the upstream check
289
290 -------------------------------------
291 cgCase :: StgExpr -> Id -> AltType -> [StgAlt] -> FCode ReturnKind
292
293 cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
294 | isEnumerationTyCon tycon -- Note [case on bool]
295 = do { tag_expr <- do_enum_primop op args
296
297 -- If the binder is not dead, convert the tag to a constructor
298 -- and assign it.
299 ; unless (isDeadBinder bndr) $ do
300 { dflags <- getDynFlags
301 ; tmp_reg <- bindArgToReg (NonVoid bndr)
302 ; emitAssign (CmmLocal tmp_reg)
303 (tagToClosure dflags tycon tag_expr) }
304
305 ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
306 (NonVoid bndr) alts
307 ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
308 ; return AssignedDirectly
309 }
310 where
311 do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
312 do_enum_primop TagToEnumOp [arg] -- No code!
313 = getArgAmode (NonVoid arg)
314 do_enum_primop primop args
315 = do dflags <- getDynFlags
316 tmp <- newTemp (bWord dflags)
317 cgPrimOp [tmp] primop args
318 return (CmmReg (CmmLocal tmp))
319
320 {-
321 Note [case on bool]
322 ~~~~~~~~~~~~~~~~~~~
323 This special case handles code like
324
325 case a <# b of
326 True ->
327 False ->
328
329 --> case tagToEnum# (a <$# b) of
330 True -> .. ; False -> ...
331
332 --> case (a <$# b) of r ->
333 case tagToEnum# r of
334 True -> .. ; False -> ...
335
336 If we let the ordinary case code handle it, we'll get something like
337
338 tmp1 = a < b
339 tmp2 = Bool_closure_tbl[tmp1]
340 if (tmp2 & 7 != 0) then ... // normal tagged case
341
342 but this junk won't optimise away. What we really want is just an
343 inline comparison:
344
345 if (a < b) then ...
346
347 So we add a special case to generate
348
349 tmp1 = a < b
350 if (tmp1 == 0) then ...
351
352 and later optimisations will further improve this.
353
354 Now that #6135 has been resolved it should be possible to remove that
355 special case. The idea behind this special case and pre-6135 implementation
356 of Bool-returning primops was that tagToEnum# was added implicitly in the
357 codegen and then optimized away. Now the call to tagToEnum# is explicit
358 in the source code, which allows to optimize it away at the earlier stages
359 of compilation (i.e. at the Core level).
360
361 Note [Scrutinising VoidRep]
362 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
363 Suppose we have this STG code:
364 f = \[s : State# RealWorld] ->
365 case s of _ -> blah
366 This is very odd. Why are we scrutinising a state token? But it
367 can arise with bizarre NOINLINE pragmas (Trac #9964)
368 crash :: IO ()
369 crash = IO (\s -> let {-# NOINLINE s' #-}
370 s' = s
371 in (# s', () #))
372
373 Now the trouble is that 's' has VoidRep, and we do not bind void
374 arguments in the environment; they don't live anywhere. See the
375 calls to nonVoidIds in various places. So we must not look up
376 's' in the environment. Instead, just evaluate the RHS! Simple.
377 -}
378
379 cgCase (StgApp v []) _ (PrimAlt _) alts
380 | isVoidRep (idPrimRep v) -- See Note [Scrutinising VoidRep]
381 , [(DEFAULT, _, rhs)] <- alts
382 = cgExpr rhs
383
384 {- Note [Dodgy unsafeCoerce 1]
385 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
386 Consider
387 case (x :: HValue) |> co of (y :: MutVar# Int)
388 DEFAULT -> ...
389 We want to gnerate an assignment
390 y := x
391 We want to allow this assignment to be generated in the case when the
392 types are compatible, because this allows some slightly-dodgy but
393 occasionally-useful casts to be used, such as in RtClosureInspect
394 where we cast an HValue to a MutVar# so we can print out the contents
395 of the MutVar#. If instead we generate code that enters the HValue,
396 then we'll get a runtime panic, because the HValue really is a
397 MutVar#. The types are compatible though, so we can just generate an
398 assignment.
399 -}
400 cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
401 | isUnliftedType (idType v) -- Note [Dodgy unsafeCoerce 1]
402 || reps_compatible
403 = -- assignment suffices for unlifted types
404 do { dflags <- getDynFlags
405 ; unless reps_compatible $
406 pprPanic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
407 (pp_bndr v $$ pp_bndr bndr)
408 ; v_info <- getCgIdInfo v
409 ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr)))
410 (idInfoToAmode v_info)
411 ; bindArgToReg (NonVoid bndr)
412 ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
413 where
414 reps_compatible = ((==) `on` (primRepSlot . idPrimRep)) v bndr
415 -- Must compare SlotTys, not proper PrimReps, because with unboxed sums,
416 -- the types of the binders are generated from slotPrimRep and might not
417 -- match. Test case:
418 -- swap :: (# Int | Int #) -> (# Int | Int #)
419 -- swap (# x | #) = (# | x #)
420 -- swap (# | y #) = (# y | #)
421
422 pp_bndr id = ppr id <+> dcolon <+> ppr (idType id) <+> parens (ppr (idPrimRep id))
423
424 {- Note [Dodgy unsafeCoerce 2, #3132]
425 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
426 In all other cases of a lifted Id being cast to an unlifted type, the
427 Id should be bound to bottom, otherwise this is an unsafe use of
428 unsafeCoerce. We can generate code to enter the Id and assume that
429 it will never return. Hence, we emit the usual enter/return code, and
430 because bottom must be untagged, it will be entered. The Sequel is a
431 type-correct assignment, albeit bogus. The (dead) continuation loops;
432 it would be better to invoke some kind of panic function here.
433 -}
434 cgCase scrut@(StgApp v []) _ (PrimAlt _) _
435 = do { dflags <- getDynFlags
436 ; mb_cc <- maybeSaveCostCentre True
437 ; withSequel (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut)
438 ; restoreCurrentCostCentre mb_cc
439 ; emitComment $ mkFastString "should be unreachable code"
440 ; l <- newBlockId
441 ; emitLabel l
442 ; emit (mkBranch l) -- an infinite loop
443 ; return AssignedDirectly
444 }
445
446 {- Note [Handle seq#]
447 ~~~~~~~~~~~~~~~~~~~~~
448 case seq# a s of v
449 (# s', a' #) -> e
450
451 ==>
452
453 case a of v
454 (# s', a' #) -> e
455
456 (taking advantage of the fact that the return convention for (# State#, a #)
457 is the same as the return convention for just 'a')
458 -}
459
460 cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
461 = -- Note [Handle seq#]
462 -- Use the same return convention as vanilla 'a'.
463 cgCase (StgApp a []) bndr alt_type alts
464
465 cgCase scrut bndr alt_type alts
466 = -- the general case
467 do { dflags <- getDynFlags
468 ; up_hp_usg <- getVirtHp -- Upstream heap usage
469 ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
470 alt_regs = map (idToReg dflags) ret_bndrs
471 ; simple_scrut <- isSimpleScrut scrut alt_type
472 ; let do_gc | not simple_scrut = True
473 | isSingleton alts = False
474 | up_hp_usg > 0 = False
475 | otherwise = True
476 -- cf Note [Compiling case expressions]
477 gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
478
479 ; mb_cc <- maybeSaveCostCentre simple_scrut
480
481 ; let sequel = AssignTo alt_regs do_gc{- Note [scrut sequel] -}
482 ; ret_kind <- withSequel sequel (cgExpr scrut)
483 ; restoreCurrentCostCentre mb_cc
484 ; _ <- bindArgsToRegs ret_bndrs
485 ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
486 }
487
488
489 {-
490 Note [scrut sequel]
491
492 The job of the scrutinee is to assign its value(s) to alt_regs.
493 Additionally, if we plan to do a heap-check in the alternatives (see
494 Note [Compiling case expressions]), then we *must* retreat Hp to
495 recover any unused heap before passing control to the sequel. If we
496 don't do this, then any unused heap will become slop because the heap
497 check will reset the heap usage. Slop in the heap breaks LDV profiling
498 (+RTS -hb) which needs to do a linear sweep through the nursery.
499
500
501 Note [Inlining out-of-line primops and heap checks]
502 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
503 If shouldInlinePrimOp returns True when called from StgCmmExpr for the
504 purpose of heap check placement, we *must* inline the primop later in
505 StgCmmPrim. If we don't things will go wrong.
506 -}
507
508 -----------------
509 maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
510 maybeSaveCostCentre simple_scrut
511 | simple_scrut = return Nothing
512 | otherwise = saveCurrentCostCentre
513
514
515 -----------------
516 isSimpleScrut :: StgExpr -> AltType -> FCode Bool
517 -- Simple scrutinee, does not block or allocate; hence safe to amalgamate
518 -- heap usage from alternatives into the stuff before the case
519 -- NB: if you get this wrong, and claim that the expression doesn't allocate
520 -- when it does, you'll deeply mess up allocation
521 isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args
522 isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... }
523 isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... }
524 isSimpleScrut _ _ = return False
525
526 isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
527 -- True iff the op cannot block or allocate
528 isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe)
529 isSimpleOp (StgPrimOp op) stg_args = do
530 arg_exprs <- getNonVoidArgAmodes stg_args
531 dflags <- getDynFlags
532 -- See Note [Inlining out-of-line primops and heap checks]
533 return $! isJust $ shouldInlinePrimOp dflags op arg_exprs
534 isSimpleOp (StgPrimCallOp _) _ = return False
535
536 -----------------
537 chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id]
538 -- These are the binders of a case that are assigned by the evaluation of the
539 -- scrutinee.
540 -- They're non-void, see Note [Post-unarisation invariants] in UnariseStg.
541 chooseReturnBndrs bndr (PrimAlt _) _alts
542 = assertNonVoidIds [bndr]
543
544 chooseReturnBndrs _bndr (MultiValAlt n) [(_, ids, _)]
545 = ASSERT2(n == length ids, ppr n $$ ppr ids $$ ppr _bndr)
546 assertNonVoidIds ids -- 'bndr' is not assigned!
547
548 chooseReturnBndrs bndr (AlgAlt _) _alts
549 = assertNonVoidIds [bndr] -- Only 'bndr' is assigned
550
551 chooseReturnBndrs bndr PolyAlt _alts
552 = assertNonVoidIds [bndr] -- Only 'bndr' is assigned
553
554 chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
555 -- MultiValAlt has only one alternative
556
557 -------------------------------------
558 cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [StgAlt]
559 -> FCode ReturnKind
560 -- At this point the result of the case are in the binders
561 cgAlts gc_plan _bndr PolyAlt [(_, _, rhs)]
562 = maybeAltHeapCheck gc_plan (cgExpr rhs)
563
564 cgAlts gc_plan _bndr (MultiValAlt _) [(_, _, rhs)]
565 = maybeAltHeapCheck gc_plan (cgExpr rhs)
566 -- Here bndrs are *already* in scope, so don't rebind them
567
568 cgAlts gc_plan bndr (PrimAlt _) alts
569 = do { dflags <- getDynFlags
570
571 ; tagged_cmms <- cgAltRhss gc_plan bndr alts
572
573 ; let bndr_reg = CmmLocal (idToReg dflags bndr)
574 (DEFAULT,deflt) = head tagged_cmms
575 -- PrimAlts always have a DEFAULT case
576 -- and it always comes first
577
578 tagged_cmms' = [(lit,code)
579 | (LitAlt lit, code) <- tagged_cmms]
580 ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
581 ; return AssignedDirectly }
582
583 cgAlts gc_plan bndr (AlgAlt tycon) alts
584 = do { dflags <- getDynFlags
585
586 ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
587
588 ; let fam_sz = tyConFamilySize tycon
589 bndr_reg = CmmLocal (idToReg dflags bndr)
590
591 -- Is the constructor tag in the node reg?
592 ; if isSmallFamily dflags fam_sz
593 then do
594 let -- Yes, bndr_reg has constr. tag in ls bits
595 tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
596 branches' = [(tag+1,branch) | (tag,branch) <- branches]
597 emitSwitch tag_expr branches' mb_deflt 1 fam_sz
598
599 else -- No, get tag from info table
600 do dflags <- getDynFlags
601 let -- Note that ptr _always_ has tag 1
602 -- when the family size is big enough
603 untagged_ptr = cmmRegOffB bndr_reg (-1)
604 tag_expr = getConstrTag dflags (untagged_ptr)
605 emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
606
607 ; return AssignedDirectly }
608
609 cgAlts _ _ _ _ = panic "cgAlts"
610 -- UbxTupAlt and PolyAlt have only one alternative
611
612
613 -- Note [alg-alt heap check]
614 --
615 -- In an algebraic case with more than one alternative, we will have
616 -- code like
617 --
618 -- L0:
619 -- x = R1
620 -- goto L1
621 -- L1:
622 -- if (x & 7 >= 2) then goto L2 else goto L3
623 -- L2:
624 -- Hp = Hp + 16
625 -- if (Hp > HpLim) then goto L4
626 -- ...
627 -- L4:
628 -- call gc() returns to L5
629 -- L5:
630 -- x = R1
631 -- goto L1
632
633 -------------------
634 cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
635 -> FCode ( Maybe CmmAGraphScoped
636 , [(ConTagZ, CmmAGraphScoped)] )
637 cgAlgAltRhss gc_plan bndr alts
638 = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
639
640 ; let { mb_deflt = case tagged_cmms of
641 ((DEFAULT,rhs) : _) -> Just rhs
642 _other -> Nothing
643 -- DEFAULT is always first, if present
644
645 ; branches = [ (dataConTagZ con, cmm)
646 | (DataAlt con, cmm) <- tagged_cmms ]
647 }
648
649 ; return (mb_deflt, branches)
650 }
651
652
653 -------------------
654 cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
655 -> FCode [(AltCon, CmmAGraphScoped)]
656 cgAltRhss gc_plan bndr alts = do
657 dflags <- getDynFlags
658 let
659 base_reg = idToReg dflags bndr
660 cg_alt :: StgAlt -> FCode (AltCon, CmmAGraphScoped)
661 cg_alt (con, bndrs, rhs)
662 = getCodeScoped $
663 maybeAltHeapCheck gc_plan $
664 do { _ <- bindConArgs con base_reg (assertNonVoidIds bndrs)
665 -- alt binders are always non-void,
666 -- see Note [Post-unarisation invariants] in UnariseStg
667 ; _ <- cgExpr rhs
668 ; return con }
669 forkAlts (map cg_alt alts)
670
671 maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
672 maybeAltHeapCheck (NoGcInAlts,_) code = code
673 maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
674 altHeapCheck regs code
675 maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
676 altHeapCheckReturnsTo regs lret off code
677
678 -----------------------------------------------------------------------------
679 -- Tail calls
680 -----------------------------------------------------------------------------
681
682 cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
683 cgConApp con stg_args
684 | isUnboxedTupleCon con -- Unboxed tuple: assign and return
685 = do { arg_exprs <- getNonVoidArgAmodes stg_args
686 ; tickyUnboxedTupleReturn (length arg_exprs)
687 ; emitReturn arg_exprs }
688
689 | otherwise -- Boxed constructors; allocate and return
690 = ASSERT2( stg_args `lengthIs` countConRepArgs con, ppr con <> parens (ppr (countConRepArgs con)) <+> ppr stg_args )
691 do { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) False
692 currentCCS con (assertNonVoidStgArgs stg_args)
693 -- con args are always non-void,
694 -- see Note [Post-unarisation invariants] in UnariseStg
695 -- The first "con" says that the name bound to this
696 -- closure is is "con", which is a bit of a fudge, but
697 -- it only affects profiling (hence the False)
698
699 ; emit =<< fcode_init
700 ; tickyReturnNewCon (length stg_args)
701 ; emitReturn [idInfoToAmode idinfo] }
702
703 cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
704 cgIdApp fun_id [] | isVoidTy (idType fun_id) = emitReturn []
705 cgIdApp fun_id args = do
706 dflags <- getDynFlags
707 fun_info <- getCgIdInfo fun_id
708 self_loop_info <- getSelfLoop
709 let cg_fun_id = cg_id fun_info
710 -- NB: use (cg_id fun_info) instead of fun_id, because
711 -- the former may be externalised for -split-objs.
712 -- See Note [Externalise when splitting] in StgCmmMonad
713
714 fun_arg = StgVarArg cg_fun_id
715 fun_name = idName cg_fun_id
716 fun = idInfoToAmode fun_info
717 lf_info = cg_lf fun_info
718 n_args = length args
719 v_args = length $ filter (isVoidTy . stgArgType) args
720 node_points dflags = nodeMustPointToIt dflags lf_info
721 case getCallMethod dflags fun_name cg_fun_id lf_info n_args v_args (cg_loc fun_info) self_loop_info of
722
723 -- A value in WHNF, so we can just return it.
724 ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged?
725
726 EnterIt -> ASSERT( null args ) -- Discarding arguments
727 emitEnter fun
728
729 SlowCall -> do -- A slow function call via the RTS apply routines
730 { tickySlowCall lf_info args
731 ; emitComment $ mkFastString "slowCall"
732 ; slowCall fun args }
733
734 -- A direct function call (possibly with some left-over arguments)
735 DirectEntry lbl arity -> do
736 { tickyDirectCall arity args
737 ; if node_points dflags
738 then directCall NativeNodeCall lbl arity (fun_arg:args)
739 else directCall NativeDirectCall lbl arity args }
740
741 -- Let-no-escape call or self-recursive tail-call
742 JumpToIt blk_id lne_regs -> do
743 { adjustHpBackwards -- always do this before a tail-call
744 ; cmm_args <- getNonVoidArgAmodes args
745 ; emitMultiAssign lne_regs cmm_args
746 ; emit (mkBranch blk_id)
747 ; return AssignedDirectly }
748
749 -- Note [Self-recursive tail calls]
750 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
751 --
752 -- Self-recursive tail calls can be optimized into a local jump in the same
753 -- way as let-no-escape bindings (see Note [What is a non-escaping let] in
754 -- stgSyn/CoreToStg.hs). Consider this:
755 --
756 -- foo.info:
757 -- a = R1 // calling convention
758 -- b = R2
759 -- goto L1
760 -- L1: ...
761 -- ...
762 -- ...
763 -- L2: R1 = x
764 -- R2 = y
765 -- call foo(R1,R2)
766 --
767 -- Instead of putting x and y into registers (or other locations required by the
768 -- calling convention) and performing a call we can put them into local
769 -- variables a and b and perform jump to L1:
770 --
771 -- foo.info:
772 -- a = R1
773 -- b = R2
774 -- goto L1
775 -- L1: ...
776 -- ...
777 -- ...
778 -- L2: a = x
779 -- b = y
780 -- goto L1
781 --
782 -- This can be done only when function is calling itself in a tail position
783 -- and only if the call passes number of parameters equal to function's arity.
784 -- Note that this cannot be performed if a function calls itself with a
785 -- continuation.
786 --
787 -- This in fact implements optimization known as "loopification". It was
788 -- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
789 -- by Krzysztof Woś, though we use different approach. Krzysztof performed his
790 -- optimization at the Cmm level, whereas we perform ours during code generation
791 -- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
792 -- generated in the first place.
793 --
794 -- Implementation is spread across a couple of places in the code:
795 --
796 -- * FCode monad stores additional information in its reader environment
797 -- (cgd_self_loop field). This information tells us which function can
798 -- tail call itself in an optimized way (it is the function currently
799 -- being compiled), what is the label of a loop header (L1 in example above)
800 -- and information about local registers in which we should arguments
801 -- before making a call (this would be a and b in example above).
802 --
803 -- * Whenever we are compiling a function, we set that information to reflect
804 -- the fact that function currently being compiled can be jumped to, instead
805 -- of called. This is done in closureCodyBody in StgCmmBind.
806 --
807 -- * We also have to emit a label to which we will be jumping. We make sure
808 -- that the label is placed after a stack check but before the heap
809 -- check. The reason is that making a recursive tail-call does not increase
810 -- the stack so we only need to check once. But it may grow the heap, so we
811 -- have to repeat the heap check in every self-call. This is done in
812 -- do_checks in StgCmmHeap.
813 --
814 -- * When we begin compilation of another closure we remove the additional
815 -- information from the environment. This is done by forkClosureBody
816 -- in StgCmmMonad. Other functions that duplicate the environment -
817 -- forkLneBody, forkAlts, codeOnly - duplicate that information. In other
818 -- words, we only need to clean the environment of the self-loop information
819 -- when compiling right hand side of a closure (binding).
820 --
821 -- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
822 -- of call will be generated. getCallMethod decides to generate a self
823 -- recursive tail call when (a) environment stores information about
824 -- possible self tail-call; (b) that tail call is to a function currently
825 -- being compiled; (c) number of passed non-void arguments is equal to
826 -- function's arity. (d) loopification is turned on via -floopification
827 -- command-line option.
828 --
829 -- * Command line option to turn loopification on and off is implemented in
830 -- DynFlags.
831 --
832 --
833 -- Note [Void arguments in self-recursive tail calls]
834 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
835 --
836 -- State# tokens can get in the way of the loopification optimization as seen in
837 -- #11372. Consider this:
838 --
839 -- foo :: [a]
840 -- -> (a -> State# s -> (# State s, Bool #))
841 -- -> State# s
842 -- -> (# State# s, Maybe a #)
843 -- foo [] f s = (# s, Nothing #)
844 -- foo (x:xs) f s = case f x s of
845 -- (# s', b #) -> case b of
846 -- True -> (# s', Just x #)
847 -- False -> foo xs f s'
848 --
849 -- We would like to compile the call to foo as a local jump instead of a call
850 -- (see Note [Self-recursive tail calls]). However, the generated function has
851 -- an arity of 2 while we apply it to 3 arguments, one of them being of void
852 -- type. Thus, we mustn't count arguments of void type when checking whether
853 -- we can turn a call into a self-recursive jump.
854 --
855
856 emitEnter :: CmmExpr -> FCode ReturnKind
857 emitEnter fun = do
858 { dflags <- getDynFlags
859 ; adjustHpBackwards
860 ; sequel <- getSequel
861 ; updfr_off <- getUpdFrameOff
862 ; case sequel of
863 -- For a return, we have the option of generating a tag-test or
864 -- not. If the value is tagged, we can return directly, which
865 -- is quicker than entering the value. This is a code
866 -- size/speed trade-off: when optimising for speed rather than
867 -- size we could generate the tag test.
868 --
869 -- Right now, we do what the old codegen did, and omit the tag
870 -- test, just generating an enter.
871 Return -> do
872 { let entry = entryCode dflags $ closureInfoPtr dflags $ CmmReg nodeReg
873 ; emit $ mkJump dflags NativeNodeCall entry
874 [cmmUntag dflags fun] updfr_off
875 ; return AssignedDirectly
876 }
877
878 -- The result will be scrutinised in the sequel. This is where
879 -- we generate a tag-test to avoid entering the closure if
880 -- possible.
881 --
882 -- The generated code will be something like this:
883 --
884 -- R1 = fun -- copyout
885 -- if (fun & 7 != 0) goto Lret else goto Lcall
886 -- Lcall:
887 -- call [fun] returns to Lret
888 -- Lret:
889 -- fun' = R1 -- copyin
890 -- ...
891 --
892 -- Note in particular that the label Lret is used as a
893 -- destination by both the tag-test and the call. This is
894 -- because Lret will necessarily be a proc-point, and we want to
895 -- ensure that we generate only one proc-point for this
896 -- sequence.
897 --
898 -- Furthermore, we tell the caller that we generated a native
899 -- return continuation by returning (ReturnedTo Lret off), so
900 -- that the continuation can be reused by the heap-check failure
901 -- code in the enclosing case expression.
902 --
903 AssignTo res_regs _ -> do
904 { lret <- newBlockId
905 ; let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs []
906 ; lcall <- newBlockId
907 ; updfr_off <- getUpdFrameOff
908 ; let area = Young lret
909 ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area
910 [fun] updfr_off []
911 -- refer to fun via nodeReg after the copyout, to avoid having
912 -- both live simultaneously; this sometimes enables fun to be
913 -- inlined in the RHS of the R1 assignment.
914 ; let entry = entryCode dflags (closureInfoPtr dflags (CmmReg nodeReg))
915 the_call = toCall entry (Just lret) updfr_off off outArgs regs
916 ; tscope <- getTickScope
917 ; emit $
918 copyout <*>
919 mkCbranch (cmmIsTagged dflags (CmmReg nodeReg))
920 lret lcall Nothing <*>
921 outOfLine lcall (the_call,tscope) <*>
922 mkLabel lret tscope <*>
923 copyin
924 ; return (ReturnedTo lret off)
925 }
926 }
927
928 ------------------------------------------------------------------------
929 -- Ticks
930 ------------------------------------------------------------------------
931
932 -- | Generate Cmm code for a tick. Depending on the type of Tickish,
933 -- this will either generate actual Cmm instrumentation code, or
934 -- simply pass on the annotation as a @CmmTickish@.
935 cgTick :: Tickish Id -> FCode ()
936 cgTick tick
937 = do { dflags <- getDynFlags
938 ; case tick of
939 ProfNote cc t p -> emitSetCCC cc t p
940 HpcTick m n -> emit (mkTickBox dflags m n)
941 SourceNote s n -> emitTick $ SourceNote s n
942 _other -> return () -- ignore
943 }