Update Trac ticket URLs to point to GitLab
[ghc.git] / compiler / codeGen / StgCmmExpr.hs
1 {-# LANGUAGE CPP #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Stg to C-- code generation: expressions
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmExpr ( cgExpr ) where
12
13 #include "HsVersions.h"
14
15 import GhcPrelude hiding ((<*>))
16
17 import {-# SOURCE #-} StgCmmBind ( cgBind )
18
19 import StgCmmMonad
20 import StgCmmHeap
21 import StgCmmEnv
22 import StgCmmCon
23 import StgCmmProf (saveCurrentCostCentre, restoreCurrentCostCentre, emitSetCCC)
24 import StgCmmLayout
25 import StgCmmPrim
26 import StgCmmHpc
27 import StgCmmTicky
28 import StgCmmUtils
29 import StgCmmClosure
30
31 import StgSyn
32
33 import MkGraph
34 import BlockId
35 import Cmm
36 import CmmInfo
37 import CoreSyn
38 import DataCon
39 import ForeignCall
40 import Id
41 import PrimOp
42 import TyCon
43 import Type ( isUnliftedType )
44 import RepType ( isVoidTy, countConRepArgs, primRepSlot )
45 import CostCentre ( CostCentreStack, currentCCS )
46 import Maybes
47 import Util
48 import FastString
49 import Outputable
50
51 import Control.Monad (unless,void)
52 import Control.Arrow (first)
53 import Data.Function ( on )
54
55 ------------------------------------------------------------------------
56 -- cgExpr: the main function
57 ------------------------------------------------------------------------
58
59 cgExpr :: CgStgExpr -> FCode ReturnKind
60
61 cgExpr (StgApp fun args) = cgIdApp fun args
62
63 -- seq# a s ==> a
64 -- See Note [seq# magic] in PrelRules
65 cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
66 cgIdApp a []
67
68 -- dataToTag# :: a -> Int#
69 -- See Note [dataToTag#] in primops.txt.pp
70 cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do
71 dflags <- getDynFlags
72 emitComment (mkFastString "dataToTag#")
73 tmp <- newTemp (bWord dflags)
74 _ <- withSequel (AssignTo [tmp] False) (cgIdApp a [])
75 -- TODO: For small types look at the tag bits instead of reading info table
76 emitReturn [getConstrTag dflags (cmmUntag dflags (CmmReg (CmmLocal tmp)))]
77
78 cgExpr (StgOpApp op args ty) = cgOpApp op args ty
79 cgExpr (StgConApp con args _)= cgConApp con args
80 cgExpr (StgTick t e) = cgTick t >> cgExpr e
81 cgExpr (StgLit lit) = do cmm_lit <- cgLit lit
82 emitReturn [CmmLit cmm_lit]
83
84 cgExpr (StgLet _ binds expr) = do { cgBind binds; cgExpr expr }
85 cgExpr (StgLetNoEscape _ binds expr) =
86 do { u <- newUnique
87 ; let join_id = mkBlockId u
88 ; cgLneBinds join_id binds
89 ; r <- cgExpr expr
90 ; emitLabel join_id
91 ; return r }
92
93 cgExpr (StgCase expr bndr alt_type alts) =
94 cgCase expr bndr alt_type alts
95
96 cgExpr (StgLam {}) = panic "cgExpr: StgLam"
97
98 ------------------------------------------------------------------------
99 -- Let no escape
100 ------------------------------------------------------------------------
101
102 {- Generating code for a let-no-escape binding, aka join point is very
103 very similar to what we do for a case expression. The duality is
104 between
105 let-no-escape x = b
106 in e
107 and
108 case e of ... -> b
109
110 That is, the RHS of 'x' (ie 'b') will execute *later*, just like
111 the alternative of the case; it needs to be compiled in an environment
112 in which all volatile bindings are forgotten, and the free vars are
113 bound only to stable things like stack locations.. The 'e' part will
114 execute *next*, just like the scrutinee of a case. -}
115
116 -------------------------
117 cgLneBinds :: BlockId -> CgStgBinding -> FCode ()
118 cgLneBinds join_id (StgNonRec bndr rhs)
119 = do { local_cc <- saveCurrentCostCentre
120 -- See Note [Saving the current cost centre]
121 ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
122 ; fcode
123 ; addBindC info }
124
125 cgLneBinds join_id (StgRec pairs)
126 = do { local_cc <- saveCurrentCostCentre
127 ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
128 ; let (infos, fcodes) = unzip r
129 ; addBindsC infos
130 ; sequence_ fcodes
131 }
132
133 -------------------------
134 cgLetNoEscapeRhs
135 :: BlockId -- join point for successor of let-no-escape
136 -> Maybe LocalReg -- Saved cost centre
137 -> Id
138 -> CgStgRhs
139 -> FCode (CgIdInfo, FCode ())
140
141 cgLetNoEscapeRhs join_id local_cc bndr rhs =
142 do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
143 ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
144 ; let code = do { (_, body) <- getCodeScoped rhs_code
145 ; emitOutOfLine bid (first (<*> mkBranch join_id) body) }
146 ; return (info, code)
147 }
148
149 cgLetNoEscapeRhsBody
150 :: Maybe LocalReg -- Saved cost centre
151 -> Id
152 -> CgStgRhs
153 -> FCode (CgIdInfo, FCode ())
154 cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure _ cc _upd args body)
155 = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
156 cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
157 = cgLetNoEscapeClosure bndr local_cc cc []
158 (StgConApp con args (pprPanic "cgLetNoEscapeRhsBody" $
159 text "StgRhsCon doesn't have type args"))
160 -- For a constructor RHS we want to generate a single chunk of
161 -- code which can be jumped to from many places, which will
162 -- return the constructor. It's easy; just behave as if it
163 -- was an StgRhsClosure with a ConApp inside!
164
165 -------------------------
166 cgLetNoEscapeClosure
167 :: Id -- binder
168 -> Maybe LocalReg -- Slot for saved current cost centre
169 -> CostCentreStack -- XXX: *** NOT USED *** why not?
170 -> [NonVoid Id] -- Args (as in \ args -> body)
171 -> CgStgExpr -- Body (as in above)
172 -> FCode (CgIdInfo, FCode ())
173
174 cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
175 = do dflags <- getDynFlags
176 return ( lneIdInfo dflags bndr args
177 , code )
178 where
179 code = forkLneBody $ do {
180 ; withNewTickyCounterLNE (idName bndr) args $ do
181 ; restoreCurrentCostCentre cc_slot
182 ; arg_regs <- bindArgsToRegs args
183 ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
184
185
186 ------------------------------------------------------------------------
187 -- Case expressions
188 ------------------------------------------------------------------------
189
190 {- Note [Compiling case expressions]
191 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
192 It is quite interesting to decide whether to put a heap-check at the
193 start of each alternative. Of course we certainly have to do so if
194 the case forces an evaluation, or if there is a primitive op which can
195 trigger GC.
196
197 A more interesting situation is this (a Plan-B situation)
198
199 !P!;
200 ...P...
201 case x# of
202 0# -> !Q!; ...Q...
203 default -> !R!; ...R...
204
205 where !x! indicates a possible heap-check point. The heap checks
206 in the alternatives *can* be omitted, in which case the topmost
207 heapcheck will take their worst case into account.
208
209 In favour of omitting !Q!, !R!:
210
211 - *May* save a heap overflow test,
212 if ...P... allocates anything.
213
214 - We can use relative addressing from a single Hp to
215 get at all the closures so allocated.
216
217 - No need to save volatile vars etc across heap checks
218 in !Q!, !R!
219
220 Against omitting !Q!, !R!
221
222 - May put a heap-check into the inner loop. Suppose
223 the main loop is P -> R -> P -> R...
224 Q is the loop exit, and only it does allocation.
225 This only hurts us if P does no allocation. If P allocates,
226 then there is a heap check in the inner loop anyway.
227
228 - May do more allocation than reqd. This sometimes bites us
229 badly. For example, nfib (ha!) allocates about 30\% more space if the
230 worst-casing is done, because many many calls to nfib are leaf calls
231 which don't need to allocate anything.
232
233 We can un-allocate, but that costs an instruction
234
235 Neither problem hurts us if there is only one alternative.
236
237 Suppose the inner loop is P->R->P->R etc. Then here is
238 how many heap checks we get in the *inner loop* under various
239 conditions
240
241 Alloc Heap check in branches (!Q!, !R!)?
242 P Q R yes no (absorb to !P!)
243 --------------------------------------
244 n n n 0 0
245 n y n 0 1
246 n . y 1 1
247 y . y 2 1
248 y . n 1 1
249
250 Best choices: absorb heap checks from Q and R into !P! iff
251 a) P itself does some allocation
252 or
253 b) P does allocation, or there is exactly one alternative
254
255 We adopt (b) because that is more likely to put the heap check at the
256 entry to a function, when not many things are live. After a bunch of
257 single-branch cases, we may have lots of things live
258
259 Hence: two basic plans for
260
261 case e of r { alts }
262
263 ------ Plan A: the general case ---------
264
265 ...save current cost centre...
266
267 ...code for e,
268 with sequel (SetLocals r)
269
270 ...restore current cost centre...
271 ...code for alts...
272 ...alts do their own heap checks
273
274 ------ Plan B: special case when ---------
275 (i) e does not allocate or call GC
276 (ii) either upstream code performs allocation
277 or there is just one alternative
278
279 Then heap allocation in the (single) case branch
280 is absorbed by the upstream check.
281 Very common example: primops on unboxed values
282
283 ...code for e,
284 with sequel (SetLocals r)...
285
286 ...code for alts...
287 ...no heap check...
288 -}
289
290
291
292 -------------------------------------
293 data GcPlan
294 = GcInAlts -- Put a GC check at the start the case alternatives,
295 [LocalReg] -- which binds these registers
296 | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
297 -- primitive op which does no GC. Absorb the allocation
298 -- of the case alternative(s) into the upstream check
299
300 -------------------------------------
301 cgCase :: CgStgExpr -> Id -> AltType -> [CgStgAlt] -> FCode ReturnKind
302
303 cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
304 | isEnumerationTyCon tycon -- Note [case on bool]
305 = do { tag_expr <- do_enum_primop op args
306
307 -- If the binder is not dead, convert the tag to a constructor
308 -- and assign it. See Note [Dead-binder optimisation]
309 ; unless (isDeadBinder bndr) $ do
310 { dflags <- getDynFlags
311 ; tmp_reg <- bindArgToReg (NonVoid bndr)
312 ; emitAssign (CmmLocal tmp_reg)
313 (tagToClosure dflags tycon tag_expr) }
314
315 ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
316 (NonVoid bndr) alts
317 -- See Note [GC for conditionals]
318 ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
319 ; return AssignedDirectly
320 }
321 where
322 do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
323 do_enum_primop TagToEnumOp [arg] -- No code!
324 = getArgAmode (NonVoid arg)
325 do_enum_primop primop args
326 = do dflags <- getDynFlags
327 tmp <- newTemp (bWord dflags)
328 cgPrimOp [tmp] primop args
329 return (CmmReg (CmmLocal tmp))
330
331 {-
332 Note [case on bool]
333 ~~~~~~~~~~~~~~~~~~~
334 This special case handles code like
335
336 case a <# b of
337 True ->
338 False ->
339
340 --> case tagToEnum# (a <$# b) of
341 True -> .. ; False -> ...
342
343 --> case (a <$# b) of r ->
344 case tagToEnum# r of
345 True -> .. ; False -> ...
346
347 If we let the ordinary case code handle it, we'll get something like
348
349 tmp1 = a < b
350 tmp2 = Bool_closure_tbl[tmp1]
351 if (tmp2 & 7 != 0) then ... // normal tagged case
352
353 but this junk won't optimise away. What we really want is just an
354 inline comparison:
355
356 if (a < b) then ...
357
358 So we add a special case to generate
359
360 tmp1 = a < b
361 if (tmp1 == 0) then ...
362
363 and later optimisations will further improve this.
364
365 Now that #6135 has been resolved it should be possible to remove that
366 special case. The idea behind this special case and pre-6135 implementation
367 of Bool-returning primops was that tagToEnum# was added implicitly in the
368 codegen and then optimized away. Now the call to tagToEnum# is explicit
369 in the source code, which allows to optimize it away at the earlier stages
370 of compilation (i.e. at the Core level).
371
372 Note [Scrutinising VoidRep]
373 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
374 Suppose we have this STG code:
375 f = \[s : State# RealWorld] ->
376 case s of _ -> blah
377 This is very odd. Why are we scrutinising a state token? But it
378 can arise with bizarre NOINLINE pragmas (#9964)
379 crash :: IO ()
380 crash = IO (\s -> let {-# NOINLINE s' #-}
381 s' = s
382 in (# s', () #))
383
384 Now the trouble is that 's' has VoidRep, and we do not bind void
385 arguments in the environment; they don't live anywhere. See the
386 calls to nonVoidIds in various places. So we must not look up
387 's' in the environment. Instead, just evaluate the RHS! Simple.
388
389 Note [Dead-binder optimisation]
390 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
391 A case-binder, or data-constructor argument, may be marked as dead,
392 because we preserve occurrence-info on binders in CoreTidy (see
393 CoreTidy.tidyIdBndr).
394
395 If the binder is dead, we can sometimes eliminate a load. While
396 CmmSink will eliminate that load, it's very easy to kill it at source
397 (giving CmmSink less work to do), and in any case CmmSink only runs
398 with -O. Since the majority of case binders are dead, this
399 optimisation probably still has a great benefit-cost ratio and we want
400 to keep it for -O0. See also Phab:D5358.
401
402 This probably also was the reason for occurrence hack in Phab:D5339 to
403 exist, perhaps because the occurrence information preserved by
404 'CoreTidy.tidyIdBndr' was insufficient. But now that CmmSink does the
405 job we deleted the hacks.
406 -}
407
408 cgCase (StgApp v []) _ (PrimAlt _) alts
409 | isVoidRep (idPrimRep v) -- See Note [Scrutinising VoidRep]
410 , [(DEFAULT, _, rhs)] <- alts
411 = cgExpr rhs
412
413 {- Note [Dodgy unsafeCoerce 1]
414 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
415 Consider
416 case (x :: HValue) |> co of (y :: MutVar# Int)
417 DEFAULT -> ...
418 We want to gnerate an assignment
419 y := x
420 We want to allow this assignment to be generated in the case when the
421 types are compatible, because this allows some slightly-dodgy but
422 occasionally-useful casts to be used, such as in RtClosureInspect
423 where we cast an HValue to a MutVar# so we can print out the contents
424 of the MutVar#. If instead we generate code that enters the HValue,
425 then we'll get a runtime panic, because the HValue really is a
426 MutVar#. The types are compatible though, so we can just generate an
427 assignment.
428 -}
429 cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
430 | isUnliftedType (idType v) -- Note [Dodgy unsafeCoerce 1]
431 || reps_compatible
432 = -- assignment suffices for unlifted types
433 do { dflags <- getDynFlags
434 ; unless reps_compatible $
435 pprPanic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
436 (pp_bndr v $$ pp_bndr bndr)
437 ; v_info <- getCgIdInfo v
438 ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr)))
439 (idInfoToAmode v_info)
440 -- Add bndr to the environment
441 ; _ <- bindArgToReg (NonVoid bndr)
442 ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
443 where
444 reps_compatible = ((==) `on` (primRepSlot . idPrimRep)) v bndr
445 -- Must compare SlotTys, not proper PrimReps, because with unboxed sums,
446 -- the types of the binders are generated from slotPrimRep and might not
447 -- match. Test case:
448 -- swap :: (# Int | Int #) -> (# Int | Int #)
449 -- swap (# x | #) = (# | x #)
450 -- swap (# | y #) = (# y | #)
451
452 pp_bndr id = ppr id <+> dcolon <+> ppr (idType id) <+> parens (ppr (idPrimRep id))
453
454 {- Note [Dodgy unsafeCoerce 2, #3132]
455 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
456 In all other cases of a lifted Id being cast to an unlifted type, the
457 Id should be bound to bottom, otherwise this is an unsafe use of
458 unsafeCoerce. We can generate code to enter the Id and assume that
459 it will never return. Hence, we emit the usual enter/return code, and
460 because bottom must be untagged, it will be entered. The Sequel is a
461 type-correct assignment, albeit bogus. The (dead) continuation loops;
462 it would be better to invoke some kind of panic function here.
463 -}
464 cgCase scrut@(StgApp v []) _ (PrimAlt _) _
465 = do { dflags <- getDynFlags
466 ; mb_cc <- maybeSaveCostCentre True
467 ; _ <- withSequel
468 (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut)
469 ; restoreCurrentCostCentre mb_cc
470 ; emitComment $ mkFastString "should be unreachable code"
471 ; l <- newBlockId
472 ; emitLabel l
473 ; emit (mkBranch l) -- an infinite loop
474 ; return AssignedDirectly
475 }
476
477 {- Note [Handle seq#]
478 ~~~~~~~~~~~~~~~~~~~~~
479 See Note [seq# magic] in PrelRules.
480 The special case for seq# in cgCase does this:
481
482 case seq# a s of v
483 (# s', a' #) -> e
484 ==>
485 case a of v
486 (# s', a' #) -> e
487
488 (taking advantage of the fact that the return convention for (# State#, a #)
489 is the same as the return convention for just 'a')
490 -}
491
492 cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
493 = -- Note [Handle seq#]
494 -- And see Note [seq# magic] in PrelRules
495 -- Use the same return convention as vanilla 'a'.
496 cgCase (StgApp a []) bndr alt_type alts
497
498 cgCase scrut bndr alt_type alts
499 = -- the general case
500 do { dflags <- getDynFlags
501 ; up_hp_usg <- getVirtHp -- Upstream heap usage
502 ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
503 alt_regs = map (idToReg dflags) ret_bndrs
504 ; simple_scrut <- isSimpleScrut scrut alt_type
505 ; let do_gc | is_cmp_op scrut = False -- See Note [GC for conditionals]
506 | not simple_scrut = True
507 | isSingleton alts = False
508 | up_hp_usg > 0 = False
509 | otherwise = True
510 -- cf Note [Compiling case expressions]
511 gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
512
513 ; mb_cc <- maybeSaveCostCentre simple_scrut
514
515 ; let sequel = AssignTo alt_regs do_gc{- Note [scrut sequel] -}
516 ; ret_kind <- withSequel sequel (cgExpr scrut)
517 ; restoreCurrentCostCentre mb_cc
518 ; _ <- bindArgsToRegs ret_bndrs
519 ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
520 }
521 where
522 is_cmp_op (StgOpApp (StgPrimOp op) _ _) = isComparisonPrimOp op
523 is_cmp_op _ = False
524
525 {- Note [GC for conditionals]
526 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
527 For boolean conditionals it seems that we have always done NoGcInAlts.
528 That is, we have always done the GC check before the conditional.
529 This is enshrined in the special case for
530 case tagToEnum# (a>b) of ...
531 See Note [case on bool]
532
533 It's odd, and it's flagrantly inconsistent with the rules described
534 Note [Compiling case expressions]. However, after eliminating the
535 tagToEnum# (#13397) we will have:
536 case (a>b) of ...
537 Rather than make it behave quite differently, I am testing for a
538 comparison operator here in in the general case as well.
539
540 ToDo: figure out what the Right Rule should be.
541
542 Note [scrut sequel]
543 ~~~~~~~~~~~~~~~~~~~
544 The job of the scrutinee is to assign its value(s) to alt_regs.
545 Additionally, if we plan to do a heap-check in the alternatives (see
546 Note [Compiling case expressions]), then we *must* retreat Hp to
547 recover any unused heap before passing control to the sequel. If we
548 don't do this, then any unused heap will become slop because the heap
549 check will reset the heap usage. Slop in the heap breaks LDV profiling
550 (+RTS -hb) which needs to do a linear sweep through the nursery.
551
552
553 Note [Inlining out-of-line primops and heap checks]
554 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
555 If shouldInlinePrimOp returns True when called from StgCmmExpr for the
556 purpose of heap check placement, we *must* inline the primop later in
557 StgCmmPrim. If we don't things will go wrong.
558 -}
559
560 -----------------
561 maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
562 maybeSaveCostCentre simple_scrut
563 | simple_scrut = return Nothing
564 | otherwise = saveCurrentCostCentre
565
566
567 -----------------
568 isSimpleScrut :: CgStgExpr -> AltType -> FCode Bool
569 -- Simple scrutinee, does not block or allocate; hence safe to amalgamate
570 -- heap usage from alternatives into the stuff before the case
571 -- NB: if you get this wrong, and claim that the expression doesn't allocate
572 -- when it does, you'll deeply mess up allocation
573 isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args
574 isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... }
575 isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... }
576 isSimpleScrut _ _ = return False
577
578 isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
579 -- True iff the op cannot block or allocate
580 isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe)
581 -- dataToTag# evalautes its argument, see Note [dataToTag#] in primops.txt.pp
582 isSimpleOp (StgPrimOp DataToTagOp) _ = return False
583 isSimpleOp (StgPrimOp op) stg_args = do
584 arg_exprs <- getNonVoidArgAmodes stg_args
585 dflags <- getDynFlags
586 -- See Note [Inlining out-of-line primops and heap checks]
587 return $! isJust $ shouldInlinePrimOp dflags op arg_exprs
588 isSimpleOp (StgPrimCallOp _) _ = return False
589
590 -----------------
591 chooseReturnBndrs :: Id -> AltType -> [CgStgAlt] -> [NonVoid Id]
592 -- These are the binders of a case that are assigned by the evaluation of the
593 -- scrutinee.
594 -- They're non-void, see Note [Post-unarisation invariants] in UnariseStg.
595 chooseReturnBndrs bndr (PrimAlt _) _alts
596 = assertNonVoidIds [bndr]
597
598 chooseReturnBndrs _bndr (MultiValAlt n) [(_, ids, _)]
599 = ASSERT2(ids `lengthIs` n, ppr n $$ ppr ids $$ ppr _bndr)
600 assertNonVoidIds ids -- 'bndr' is not assigned!
601
602 chooseReturnBndrs bndr (AlgAlt _) _alts
603 = assertNonVoidIds [bndr] -- Only 'bndr' is assigned
604
605 chooseReturnBndrs bndr PolyAlt _alts
606 = assertNonVoidIds [bndr] -- Only 'bndr' is assigned
607
608 chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
609 -- MultiValAlt has only one alternative
610
611 -------------------------------------
612 cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [CgStgAlt]
613 -> FCode ReturnKind
614 -- At this point the result of the case are in the binders
615 cgAlts gc_plan _bndr PolyAlt [(_, _, rhs)]
616 = maybeAltHeapCheck gc_plan (cgExpr rhs)
617
618 cgAlts gc_plan _bndr (MultiValAlt _) [(_, _, rhs)]
619 = maybeAltHeapCheck gc_plan (cgExpr rhs)
620 -- Here bndrs are *already* in scope, so don't rebind them
621
622 cgAlts gc_plan bndr (PrimAlt _) alts
623 = do { dflags <- getDynFlags
624
625 ; tagged_cmms <- cgAltRhss gc_plan bndr alts
626
627 ; let bndr_reg = CmmLocal (idToReg dflags bndr)
628 (DEFAULT,deflt) = head tagged_cmms
629 -- PrimAlts always have a DEFAULT case
630 -- and it always comes first
631
632 tagged_cmms' = [(lit,code)
633 | (LitAlt lit, code) <- tagged_cmms]
634 ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
635 ; return AssignedDirectly }
636
637 cgAlts gc_plan bndr (AlgAlt tycon) alts
638 = do { dflags <- getDynFlags
639
640 ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
641
642 ; let fam_sz = tyConFamilySize tycon
643 bndr_reg = CmmLocal (idToReg dflags bndr)
644
645 -- Is the constructor tag in the node reg?
646 ; if isSmallFamily dflags fam_sz
647 then do
648 let -- Yes, bndr_reg has constr. tag in ls bits
649 tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
650 branches' = [(tag+1,branch) | (tag,branch) <- branches]
651 emitSwitch tag_expr branches' mb_deflt 1 fam_sz
652
653 else -- No, get tag from info table
654 let -- Note that ptr _always_ has tag 1
655 -- when the family size is big enough
656 untagged_ptr = cmmRegOffB bndr_reg (-1)
657 tag_expr = getConstrTag dflags (untagged_ptr)
658 in emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
659
660 ; return AssignedDirectly }
661
662 cgAlts _ _ _ _ = panic "cgAlts"
663 -- UbxTupAlt and PolyAlt have only one alternative
664
665
666 -- Note [alg-alt heap check]
667 --
668 -- In an algebraic case with more than one alternative, we will have
669 -- code like
670 --
671 -- L0:
672 -- x = R1
673 -- goto L1
674 -- L1:
675 -- if (x & 7 >= 2) then goto L2 else goto L3
676 -- L2:
677 -- Hp = Hp + 16
678 -- if (Hp > HpLim) then goto L4
679 -- ...
680 -- L4:
681 -- call gc() returns to L5
682 -- L5:
683 -- x = R1
684 -- goto L1
685
686 -------------------
687 cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt]
688 -> FCode ( Maybe CmmAGraphScoped
689 , [(ConTagZ, CmmAGraphScoped)] )
690 cgAlgAltRhss gc_plan bndr alts
691 = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
692
693 ; let { mb_deflt = case tagged_cmms of
694 ((DEFAULT,rhs) : _) -> Just rhs
695 _other -> Nothing
696 -- DEFAULT is always first, if present
697
698 ; branches = [ (dataConTagZ con, cmm)
699 | (DataAlt con, cmm) <- tagged_cmms ]
700 }
701
702 ; return (mb_deflt, branches)
703 }
704
705
706 -------------------
707 cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt]
708 -> FCode [(AltCon, CmmAGraphScoped)]
709 cgAltRhss gc_plan bndr alts = do
710 dflags <- getDynFlags
711 let
712 base_reg = idToReg dflags bndr
713 cg_alt :: CgStgAlt -> FCode (AltCon, CmmAGraphScoped)
714 cg_alt (con, bndrs, rhs)
715 = getCodeScoped $
716 maybeAltHeapCheck gc_plan $
717 do { _ <- bindConArgs con base_reg (assertNonVoidIds bndrs)
718 -- alt binders are always non-void,
719 -- see Note [Post-unarisation invariants] in UnariseStg
720 ; _ <- cgExpr rhs
721 ; return con }
722 forkAlts (map cg_alt alts)
723
724 maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
725 maybeAltHeapCheck (NoGcInAlts,_) code = code
726 maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
727 altHeapCheck regs code
728 maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
729 altHeapCheckReturnsTo regs lret off code
730
731 -----------------------------------------------------------------------------
732 -- Tail calls
733 -----------------------------------------------------------------------------
734
735 cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
736 cgConApp con stg_args
737 | isUnboxedTupleCon con -- Unboxed tuple: assign and return
738 = do { arg_exprs <- getNonVoidArgAmodes stg_args
739 ; tickyUnboxedTupleReturn (length arg_exprs)
740 ; emitReturn arg_exprs }
741
742 | otherwise -- Boxed constructors; allocate and return
743 = ASSERT2( stg_args `lengthIs` countConRepArgs con, ppr con <> parens (ppr (countConRepArgs con)) <+> ppr stg_args )
744 do { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) False
745 currentCCS con (assertNonVoidStgArgs stg_args)
746 -- con args are always non-void,
747 -- see Note [Post-unarisation invariants] in UnariseStg
748 -- The first "con" says that the name bound to this
749 -- closure is "con", which is a bit of a fudge, but
750 -- it only affects profiling (hence the False)
751
752 ; emit =<< fcode_init
753 ; tickyReturnNewCon (length stg_args)
754 ; emitReturn [idInfoToAmode idinfo] }
755
756 cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
757 cgIdApp fun_id args = do
758 dflags <- getDynFlags
759 fun_info <- getCgIdInfo fun_id
760 self_loop_info <- getSelfLoop
761 let fun_arg = StgVarArg fun_id
762 fun_name = idName fun_id
763 fun = idInfoToAmode fun_info
764 lf_info = cg_lf fun_info
765 n_args = length args
766 v_args = length $ filter (isVoidTy . stgArgType) args
767 node_points dflags = nodeMustPointToIt dflags lf_info
768 case getCallMethod dflags fun_name fun_id lf_info n_args v_args (cg_loc fun_info) self_loop_info of
769 -- A value in WHNF, so we can just return it.
770 ReturnIt
771 | isVoidTy (idType fun_id) -> emitReturn []
772 | otherwise -> emitReturn [fun]
773 -- ToDo: does ReturnIt guarantee tagged?
774
775 EnterIt -> ASSERT( null args ) -- Discarding arguments
776 emitEnter fun
777
778 SlowCall -> do -- A slow function call via the RTS apply routines
779 { tickySlowCall lf_info args
780 ; emitComment $ mkFastString "slowCall"
781 ; slowCall fun args }
782
783 -- A direct function call (possibly with some left-over arguments)
784 DirectEntry lbl arity -> do
785 { tickyDirectCall arity args
786 ; if node_points dflags
787 then directCall NativeNodeCall lbl arity (fun_arg:args)
788 else directCall NativeDirectCall lbl arity args }
789
790 -- Let-no-escape call or self-recursive tail-call
791 JumpToIt blk_id lne_regs -> do
792 { adjustHpBackwards -- always do this before a tail-call
793 ; cmm_args <- getNonVoidArgAmodes args
794 ; emitMultiAssign lne_regs cmm_args
795 ; emit (mkBranch blk_id)
796 ; return AssignedDirectly }
797
798 -- Note [Self-recursive tail calls]
799 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
800 --
801 -- Self-recursive tail calls can be optimized into a local jump in the same
802 -- way as let-no-escape bindings (see Note [What is a non-escaping let] in
803 -- stgSyn/CoreToStg.hs). Consider this:
804 --
805 -- foo.info:
806 -- a = R1 // calling convention
807 -- b = R2
808 -- goto L1
809 -- L1: ...
810 -- ...
811 -- ...
812 -- L2: R1 = x
813 -- R2 = y
814 -- call foo(R1,R2)
815 --
816 -- Instead of putting x and y into registers (or other locations required by the
817 -- calling convention) and performing a call we can put them into local
818 -- variables a and b and perform jump to L1:
819 --
820 -- foo.info:
821 -- a = R1
822 -- b = R2
823 -- goto L1
824 -- L1: ...
825 -- ...
826 -- ...
827 -- L2: a = x
828 -- b = y
829 -- goto L1
830 --
831 -- This can be done only when function is calling itself in a tail position
832 -- and only if the call passes number of parameters equal to function's arity.
833 -- Note that this cannot be performed if a function calls itself with a
834 -- continuation.
835 --
836 -- This in fact implements optimization known as "loopification". It was
837 -- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
838 -- by Krzysztof Woś, though we use different approach. Krzysztof performed his
839 -- optimization at the Cmm level, whereas we perform ours during code generation
840 -- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
841 -- generated in the first place.
842 --
843 -- Implementation is spread across a couple of places in the code:
844 --
845 -- * FCode monad stores additional information in its reader environment
846 -- (cgd_self_loop field). This information tells us which function can
847 -- tail call itself in an optimized way (it is the function currently
848 -- being compiled), what is the label of a loop header (L1 in example above)
849 -- and information about local registers in which we should arguments
850 -- before making a call (this would be a and b in example above).
851 --
852 -- * Whenever we are compiling a function, we set that information to reflect
853 -- the fact that function currently being compiled can be jumped to, instead
854 -- of called. This is done in closureCodyBody in StgCmmBind.
855 --
856 -- * We also have to emit a label to which we will be jumping. We make sure
857 -- that the label is placed after a stack check but before the heap
858 -- check. The reason is that making a recursive tail-call does not increase
859 -- the stack so we only need to check once. But it may grow the heap, so we
860 -- have to repeat the heap check in every self-call. This is done in
861 -- do_checks in StgCmmHeap.
862 --
863 -- * When we begin compilation of another closure we remove the additional
864 -- information from the environment. This is done by forkClosureBody
865 -- in StgCmmMonad. Other functions that duplicate the environment -
866 -- forkLneBody, forkAlts, codeOnly - duplicate that information. In other
867 -- words, we only need to clean the environment of the self-loop information
868 -- when compiling right hand side of a closure (binding).
869 --
870 -- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
871 -- of call will be generated. getCallMethod decides to generate a self
872 -- recursive tail call when (a) environment stores information about
873 -- possible self tail-call; (b) that tail call is to a function currently
874 -- being compiled; (c) number of passed non-void arguments is equal to
875 -- function's arity. (d) loopification is turned on via -floopification
876 -- command-line option.
877 --
878 -- * Command line option to turn loopification on and off is implemented in
879 -- DynFlags.
880 --
881 --
882 -- Note [Void arguments in self-recursive tail calls]
883 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
884 --
885 -- State# tokens can get in the way of the loopification optimization as seen in
886 -- #11372. Consider this:
887 --
888 -- foo :: [a]
889 -- -> (a -> State# s -> (# State s, Bool #))
890 -- -> State# s
891 -- -> (# State# s, Maybe a #)
892 -- foo [] f s = (# s, Nothing #)
893 -- foo (x:xs) f s = case f x s of
894 -- (# s', b #) -> case b of
895 -- True -> (# s', Just x #)
896 -- False -> foo xs f s'
897 --
898 -- We would like to compile the call to foo as a local jump instead of a call
899 -- (see Note [Self-recursive tail calls]). However, the generated function has
900 -- an arity of 2 while we apply it to 3 arguments, one of them being of void
901 -- type. Thus, we mustn't count arguments of void type when checking whether
902 -- we can turn a call into a self-recursive jump.
903 --
904
905 emitEnter :: CmmExpr -> FCode ReturnKind
906 emitEnter fun = do
907 { dflags <- getDynFlags
908 ; adjustHpBackwards
909 ; sequel <- getSequel
910 ; updfr_off <- getUpdFrameOff
911 ; case sequel of
912 -- For a return, we have the option of generating a tag-test or
913 -- not. If the value is tagged, we can return directly, which
914 -- is quicker than entering the value. This is a code
915 -- size/speed trade-off: when optimising for speed rather than
916 -- size we could generate the tag test.
917 --
918 -- Right now, we do what the old codegen did, and omit the tag
919 -- test, just generating an enter.
920 Return -> do
921 { let entry = entryCode dflags $ closureInfoPtr dflags $ CmmReg nodeReg
922 ; emit $ mkJump dflags NativeNodeCall entry
923 [cmmUntag dflags fun] updfr_off
924 ; return AssignedDirectly
925 }
926
927 -- The result will be scrutinised in the sequel. This is where
928 -- we generate a tag-test to avoid entering the closure if
929 -- possible.
930 --
931 -- The generated code will be something like this:
932 --
933 -- R1 = fun -- copyout
934 -- if (fun & 7 != 0) goto Lret else goto Lcall
935 -- Lcall:
936 -- call [fun] returns to Lret
937 -- Lret:
938 -- fun' = R1 -- copyin
939 -- ...
940 --
941 -- Note in particular that the label Lret is used as a
942 -- destination by both the tag-test and the call. This is
943 -- because Lret will necessarily be a proc-point, and we want to
944 -- ensure that we generate only one proc-point for this
945 -- sequence.
946 --
947 -- Furthermore, we tell the caller that we generated a native
948 -- return continuation by returning (ReturnedTo Lret off), so
949 -- that the continuation can be reused by the heap-check failure
950 -- code in the enclosing case expression.
951 --
952 AssignTo res_regs _ -> do
953 { lret <- newBlockId
954 ; let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs []
955 ; lcall <- newBlockId
956 ; updfr_off <- getUpdFrameOff
957 ; let area = Young lret
958 ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area
959 [fun] updfr_off []
960 -- refer to fun via nodeReg after the copyout, to avoid having
961 -- both live simultaneously; this sometimes enables fun to be
962 -- inlined in the RHS of the R1 assignment.
963 ; let entry = entryCode dflags (closureInfoPtr dflags (CmmReg nodeReg))
964 the_call = toCall entry (Just lret) updfr_off off outArgs regs
965 ; tscope <- getTickScope
966 ; emit $
967 copyout <*>
968 mkCbranch (cmmIsTagged dflags (CmmReg nodeReg))
969 lret lcall Nothing <*>
970 outOfLine lcall (the_call,tscope) <*>
971 mkLabel lret tscope <*>
972 copyin
973 ; return (ReturnedTo lret off)
974 }
975 }
976
977 ------------------------------------------------------------------------
978 -- Ticks
979 ------------------------------------------------------------------------
980
981 -- | Generate Cmm code for a tick. Depending on the type of Tickish,
982 -- this will either generate actual Cmm instrumentation code, or
983 -- simply pass on the annotation as a @CmmTickish@.
984 cgTick :: Tickish Id -> FCode ()
985 cgTick tick
986 = do { dflags <- getDynFlags
987 ; case tick of
988 ProfNote cc t p -> emitSetCCC cc t p
989 HpcTick m n -> emit (mkTickBox dflags m n)
990 SourceNote s n -> emitTick $ SourceNote s n
991 _other -> return () -- ignore
992 }