00c6068fb01cfb0bfcb44148dd3b2004f3976ea9
[ghc.git] / compiler / codeGen / StgCmmExpr.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C-- code generation: expressions
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmExpr ( cgExpr ) where
10
11 #define FAST_STRING_NOT_NEEDED
12 #include "HsVersions.h"
13
14 import {-# SOURCE #-} StgCmmBind ( cgBind )
15
16 import StgCmmMonad
17 import StgCmmHeap
18 import StgCmmEnv
19 import StgCmmCon
20 import StgCmmProf
21 import StgCmmLayout
22 import StgCmmPrim
23 import StgCmmHpc
24 import StgCmmTicky
25 import StgCmmUtils
26 import StgCmmClosure
27
28 import StgSyn
29
30 import MkGraph
31 import BlockId
32 import Cmm
33 import CmmInfo
34 import CoreSyn
35 import DataCon
36 import ForeignCall
37 import Id
38 import PrimOp
39 import TyCon
40 import Type
41 import CostCentre ( CostCentreStack, currentCCS )
42 import Maybes
43 import Util
44 import FastString
45 import Outputable
46
47 import Control.Monad (when,void)
48
49 ------------------------------------------------------------------------
50 -- cgExpr: the main function
51 ------------------------------------------------------------------------
52
53 cgExpr :: StgExpr -> FCode ReturnKind
54
55 cgExpr (StgApp fun args) = cgIdApp fun args
56
57 {- seq# a s ==> a -}
58 cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
59 cgIdApp a []
60
61 cgExpr (StgOpApp op args ty) = cgOpApp op args ty
62 cgExpr (StgConApp con args) = cgConApp con args
63 cgExpr (StgSCC cc tick push expr) = do { emitSetCCC cc tick push; cgExpr expr }
64 cgExpr (StgTick m n expr) = do dflags <- getDynFlags
65 emit (mkTickBox dflags m n)
66 cgExpr expr
67 cgExpr (StgLit lit) = do cmm_lit <- cgLit lit
68 emitReturn [CmmLit cmm_lit]
69
70 cgExpr (StgLet binds expr) = do { cgBind binds; cgExpr expr }
71 cgExpr (StgLetNoEscape _ _ binds expr) =
72 do { u <- newUnique
73 ; let join_id = mkBlockId u
74 ; cgLneBinds join_id binds
75 ; r <- cgExpr expr
76 ; emitLabel join_id
77 ; return r }
78
79 cgExpr (StgCase expr _live_vars _save_vars bndr _srt alt_type alts) =
80 cgCase expr bndr alt_type alts
81
82 cgExpr (StgLam {}) = panic "cgExpr: StgLam"
83
84 ------------------------------------------------------------------------
85 -- Let no escape
86 ------------------------------------------------------------------------
87
88 {- Generating code for a let-no-escape binding, aka join point is very
89 very similar to what we do for a case expression. The duality is
90 between
91 let-no-escape x = b
92 in e
93 and
94 case e of ... -> b
95
96 That is, the RHS of 'x' (ie 'b') will execute *later*, just like
97 the alternative of the case; it needs to be compiled in an environment
98 in which all volatile bindings are forgotten, and the free vars are
99 bound only to stable things like stack locations.. The 'e' part will
100 execute *next*, just like the scrutinee of a case. -}
101
102 -------------------------
103 cgLneBinds :: BlockId -> StgBinding -> FCode ()
104 cgLneBinds join_id (StgNonRec bndr rhs)
105 = do { local_cc <- saveCurrentCostCentre
106 -- See Note [Saving the current cost centre]
107 ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
108 ; fcode
109 ; addBindC (cg_id info) info }
110
111 cgLneBinds join_id (StgRec pairs)
112 = do { local_cc <- saveCurrentCostCentre
113 ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
114 ; let (infos, fcodes) = unzip r
115 ; addBindsC infos
116 ; sequence_ fcodes
117 }
118
119 -------------------------
120 cgLetNoEscapeRhs
121 :: BlockId -- join point for successor of let-no-escape
122 -> Maybe LocalReg -- Saved cost centre
123 -> Id
124 -> StgRhs
125 -> FCode (CgIdInfo, FCode ())
126
127 cgLetNoEscapeRhs join_id local_cc bndr rhs =
128 do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
129 ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
130 ; let code = do { body <- getCode rhs_code
131 ; emitOutOfLine bid (body <*> mkBranch join_id) }
132 ; return (info, code)
133 }
134
135 cgLetNoEscapeRhsBody
136 :: Maybe LocalReg -- Saved cost centre
137 -> Id
138 -> StgRhs
139 -> FCode (CgIdInfo, FCode ())
140 cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd _ args body)
141 = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
142 cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
143 = cgLetNoEscapeClosure bndr local_cc cc [] (StgConApp con args)
144 -- For a constructor RHS we want to generate a single chunk of
145 -- code which can be jumped to from many places, which will
146 -- return the constructor. It's easy; just behave as if it
147 -- was an StgRhsClosure with a ConApp inside!
148
149 -------------------------
150 cgLetNoEscapeClosure
151 :: Id -- binder
152 -> Maybe LocalReg -- Slot for saved current cost centre
153 -> CostCentreStack -- XXX: *** NOT USED *** why not?
154 -> [NonVoid Id] -- Args (as in \ args -> body)
155 -> StgExpr -- Body (as in above)
156 -> FCode (CgIdInfo, FCode ())
157
158 cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
159 = do dflags <- getDynFlags
160 return ( lneIdInfo dflags bndr args
161 , code )
162 where
163 code = forkProc $ do {
164 ; withNewTickyCounterLNE (idName bndr) args $ do
165 ; restoreCurrentCostCentre cc_slot
166 ; arg_regs <- bindArgsToRegs args
167 ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
168
169
170 ------------------------------------------------------------------------
171 -- Case expressions
172 ------------------------------------------------------------------------
173
174 {- Note [Compiling case expressions]
175 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
176 It is quite interesting to decide whether to put a heap-check at the
177 start of each alternative. Of course we certainly have to do so if
178 the case forces an evaluation, or if there is a primitive op which can
179 trigger GC.
180
181 A more interesting situation is this (a Plan-B situation)
182
183 !P!;
184 ...P...
185 case x# of
186 0# -> !Q!; ...Q...
187 default -> !R!; ...R...
188
189 where !x! indicates a possible heap-check point. The heap checks
190 in the alternatives *can* be omitted, in which case the topmost
191 heapcheck will take their worst case into account.
192
193 In favour of omitting !Q!, !R!:
194
195 - *May* save a heap overflow test,
196 if ...P... allocates anything.
197
198 - We can use relative addressing from a single Hp to
199 get at all the closures so allocated.
200
201 - No need to save volatile vars etc across heap checks
202 in !Q!, !R!
203
204 Against omitting !Q!, !R!
205
206 - May put a heap-check into the inner loop. Suppose
207 the main loop is P -> R -> P -> R...
208 Q is the loop exit, and only it does allocation.
209 This only hurts us if P does no allocation. If P allocates,
210 then there is a heap check in the inner loop anyway.
211
212 - May do more allocation than reqd. This sometimes bites us
213 badly. For example, nfib (ha!) allocates about 30\% more space if the
214 worst-casing is done, because many many calls to nfib are leaf calls
215 which don't need to allocate anything.
216
217 We can un-allocate, but that costs an instruction
218
219 Neither problem hurts us if there is only one alternative.
220
221 Suppose the inner loop is P->R->P->R etc. Then here is
222 how many heap checks we get in the *inner loop* under various
223 conditions
224
225 Alooc Heap check in branches (!Q!, !R!)?
226 P Q R yes no (absorb to !P!)
227 --------------------------------------
228 n n n 0 0
229 n y n 0 1
230 n . y 1 1
231 y . y 2 1
232 y . n 1 1
233
234 Best choices: absorb heap checks from Q and R into !P! iff
235 a) P itself does some allocation
236 or
237 b) P does allocation, or there is exactly one alternative
238
239 We adopt (b) because that is more likely to put the heap check at the
240 entry to a function, when not many things are live. After a bunch of
241 single-branch cases, we may have lots of things live
242
243 Hence: two basic plans for
244
245 case e of r { alts }
246
247 ------ Plan A: the general case ---------
248
249 ...save current cost centre...
250
251 ...code for e,
252 with sequel (SetLocals r)
253
254 ...restore current cost centre...
255 ...code for alts...
256 ...alts do their own heap checks
257
258 ------ Plan B: special case when ---------
259 (i) e does not allocate or call GC
260 (ii) either upstream code performs allocation
261 or there is just one alternative
262
263 Then heap allocation in the (single) case branch
264 is absorbed by the upstream check.
265 Very common example: primops on unboxed values
266
267 ...code for e,
268 with sequel (SetLocals r)...
269
270 ...code for alts...
271 ...no heap check...
272 -}
273
274
275
276 -------------------------------------
277 data GcPlan
278 = GcInAlts -- Put a GC check at the start the case alternatives,
279 [LocalReg] -- which binds these registers
280 | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
281 -- primitive op which does no GC. Absorb the allocation
282 -- of the case alternative(s) into the upstream check
283
284 -------------------------------------
285 cgCase :: StgExpr -> Id -> AltType -> [StgAlt] -> FCode ReturnKind
286
287 cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
288 | isEnumerationTyCon tycon -- Note [case on bool]
289 = do { tag_expr <- do_enum_primop op args
290
291 -- If the binder is not dead, convert the tag to a constructor
292 -- and assign it.
293 ; when (not (isDeadBinder bndr)) $ do
294 { dflags <- getDynFlags
295 ; tmp_reg <- bindArgToReg (NonVoid bndr)
296 ; emitAssign (CmmLocal tmp_reg)
297 (tagToClosure dflags tycon tag_expr) }
298
299 ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
300 (NonVoid bndr) alts
301 ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
302 ; return AssignedDirectly
303 }
304 where
305 do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
306 do_enum_primop TagToEnumOp [arg] -- No code!
307 = getArgAmode (NonVoid arg)
308 do_enum_primop primop args
309 = do dflags <- getDynFlags
310 tmp <- newTemp (bWord dflags)
311 cgPrimOp [tmp] primop args
312 return (CmmReg (CmmLocal tmp))
313
314 {-
315 Note [case on bool]
316 ~~~~~~~~~~~~~~~~~~~
317 This special case handles code like
318
319 case a <# b of
320 True ->
321 False ->
322
323 --> case tagToEnum# (a <$# b) of
324 True -> .. ; False -> ...
325
326 --> case (a <$# b) of r ->
327 case tagToEnum# r of
328 True -> .. ; False -> ...
329
330 If we let the ordinary case code handle it, we'll get something like
331
332 tmp1 = a < b
333 tmp2 = Bool_closure_tbl[tmp1]
334 if (tmp2 & 7 != 0) then ... // normal tagged case
335
336 but this junk won't optimise away. What we really want is just an
337 inline comparison:
338
339 if (a < b) then ...
340
341 So we add a special case to generate
342
343 tmp1 = a < b
344 if (tmp1 == 0) then ...
345
346 and later optimisations will further improve this.
347
348 Now that #6135 has been resolved it should be possible to remove that
349 special case. The idea behind this special case and pre-6135 implementation
350 of Bool-returning primops was that tagToEnum# was added implicitly in the
351 codegen and then optimized away. Now the call to tagToEnum# is explicit
352 in the source code, which allows to optimize it away at the earlier stages
353 of compilation (i.e. at the Core level).
354 -}
355
356
357 -- Note [ticket #3132]: we might be looking at a case of a lifted Id
358 -- that was cast to an unlifted type. The Id will always be bottom,
359 -- but we don't want the code generator to fall over here. If we
360 -- just emit an assignment here, the assignment will be
361 -- type-incorrect Cmm. Hence, we emit the usual enter/return code,
362 -- (and because bottom must be untagged, it will be entered and the
363 -- program will crash).
364 -- The Sequel is a type-correct assignment, albeit bogus.
365 -- The (dead) continuation loops; it would be better to invoke some kind
366 -- of panic function here.
367 --
368 -- However, we also want to allow an assignment to be generated
369 -- in the case when the types are compatible, because this allows
370 -- some slightly-dodgy but occasionally-useful casts to be used,
371 -- such as in RtClosureInspect where we cast an HValue to a MutVar#
372 -- so we can print out the contents of the MutVar#. If we generate
373 -- code that enters the HValue, then we'll get a runtime panic, because
374 -- the HValue really is a MutVar#. The types are compatible though,
375 -- so we can just generate an assignment.
376 cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
377 | isUnLiftedType (idType v)
378 || reps_compatible
379 = -- assignment suffices for unlifted types
380 do { dflags <- getDynFlags
381 ; when (not reps_compatible) $
382 panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
383 ; v_info <- getCgIdInfo v
384 ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr))) (idInfoToAmode v_info)
385 ; _ <- bindArgsToRegs [NonVoid bndr]
386 ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
387 where
388 reps_compatible = idPrimRep v == idPrimRep bndr
389
390 cgCase scrut@(StgApp v []) _ (PrimAlt _) _
391 = -- fail at run-time, not compile-time
392 do { dflags <- getDynFlags
393 ; mb_cc <- maybeSaveCostCentre True
394 ; _ <- withSequel (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut)
395 ; restoreCurrentCostCentre mb_cc
396 ; emitComment $ mkFastString "should be unreachable code"
397 ; l <- newLabelC
398 ; emitLabel l
399 ; emit (mkBranch l)
400 ; return AssignedDirectly
401 }
402 {-
403 case seq# a s of v
404 (# s', a' #) -> e
405
406 ==>
407
408 case a of v
409 (# s', a' #) -> e
410
411 (taking advantage of the fact that the return convention for (# State#, a #)
412 is the same as the return convention for just 'a')
413 -}
414
415 cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
416 = -- handle seq#, same return convention as vanilla 'a'.
417 cgCase (StgApp a []) bndr alt_type alts
418
419 cgCase scrut bndr alt_type alts
420 = -- the general case
421 do { dflags <- getDynFlags
422 ; up_hp_usg <- getVirtHp -- Upstream heap usage
423 ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
424 alt_regs = map (idToReg dflags) ret_bndrs
425 simple_scrut = isSimpleScrut scrut alt_type
426 do_gc | not simple_scrut = True
427 | isSingleton alts = False
428 | up_hp_usg > 0 = False
429 | otherwise = True
430 -- cf Note [Compiling case expressions]
431 gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
432
433 ; mb_cc <- maybeSaveCostCentre simple_scrut
434
435 -- if do_gc then our sequel will be ReturnTo
436 -- - generate code for the sequel now
437 -- - pass info about the sequel to cgAlts for use in the heap check
438 -- else sequel will be AssignTo
439
440 ; ret_kind <- withSequel (AssignTo alt_regs False) (cgExpr scrut)
441 ; restoreCurrentCostCentre mb_cc
442 ; _ <- bindArgsToRegs ret_bndrs
443 ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
444 }
445
446
447 -----------------
448 maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
449 maybeSaveCostCentre simple_scrut
450 | simple_scrut = return Nothing
451 | otherwise = saveCurrentCostCentre
452
453
454 -----------------
455 isSimpleScrut :: StgExpr -> AltType -> Bool
456 -- Simple scrutinee, does not block or allocate; hence safe to amalgamate
457 -- heap usage from alternatives into the stuff before the case
458 -- NB: if you get this wrong, and claim that the expression doesn't allocate
459 -- when it does, you'll deeply mess up allocation
460 isSimpleScrut (StgOpApp op _ _) _ = isSimpleOp op
461 isSimpleScrut (StgLit _) _ = True -- case 1# of { 0# -> ..; ... }
462 isSimpleScrut (StgApp _ []) (PrimAlt _) = True -- case x# of { 0# -> ..; ... }
463 isSimpleScrut _ _ = False
464
465 isSimpleOp :: StgOp -> Bool
466 -- True iff the op cannot block or allocate
467 isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) = not (playSafe safe)
468 isSimpleOp (StgPrimOp op) = not (primOpOutOfLine op)
469 isSimpleOp (StgPrimCallOp _) = False
470
471 -----------------
472 chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id]
473 -- These are the binders of a case that are assigned
474 -- by the evaluation of the scrutinee
475 -- Only non-void ones come back
476 chooseReturnBndrs bndr (PrimAlt _) _alts
477 = nonVoidIds [bndr]
478
479 chooseReturnBndrs _bndr (UbxTupAlt _) [(_, ids, _, _)]
480 = nonVoidIds ids -- 'bndr' is not assigned!
481
482 chooseReturnBndrs bndr (AlgAlt _) _alts
483 = nonVoidIds [bndr] -- Only 'bndr' is assigned
484
485 chooseReturnBndrs bndr PolyAlt _alts
486 = nonVoidIds [bndr] -- Only 'bndr' is assigned
487
488 chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
489 -- UbxTupALt has only one alternative
490
491 -------------------------------------
492 cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [StgAlt]
493 -> FCode ReturnKind
494 -- At this point the result of the case are in the binders
495 cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)]
496 = maybeAltHeapCheck gc_plan (cgExpr rhs)
497
498 cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)]
499 = maybeAltHeapCheck gc_plan (cgExpr rhs)
500 -- Here bndrs are *already* in scope, so don't rebind them
501
502 cgAlts gc_plan bndr (PrimAlt _) alts
503 = do { dflags <- getDynFlags
504
505 ; tagged_cmms <- cgAltRhss gc_plan bndr alts
506
507 ; let bndr_reg = CmmLocal (idToReg dflags bndr)
508 (DEFAULT,deflt) = head tagged_cmms
509 -- PrimAlts always have a DEFAULT case
510 -- and it always comes first
511
512 tagged_cmms' = [(lit,code)
513 | (LitAlt lit, code) <- tagged_cmms]
514 ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
515 ; return AssignedDirectly }
516
517 cgAlts gc_plan bndr (AlgAlt tycon) alts
518 = do { dflags <- getDynFlags
519
520 ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
521
522 ; let fam_sz = tyConFamilySize tycon
523 bndr_reg = CmmLocal (idToReg dflags bndr)
524
525 -- Is the constructor tag in the node reg?
526 ; if isSmallFamily dflags fam_sz
527 then do
528 let -- Yes, bndr_reg has constr. tag in ls bits
529 tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
530 branches' = [(tag+1,branch) | (tag,branch) <- branches]
531 emitSwitch tag_expr branches' mb_deflt 1 fam_sz
532 return AssignedDirectly
533
534 else -- No, get tag from info table
535 do dflags <- getDynFlags
536 let -- Note that ptr _always_ has tag 1
537 -- when the family size is big enough
538 untagged_ptr = cmmRegOffB bndr_reg (-1)
539 tag_expr = getConstrTag dflags (untagged_ptr)
540 emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
541 return AssignedDirectly }
542
543 cgAlts _ _ _ _ = panic "cgAlts"
544 -- UbxTupAlt and PolyAlt have only one alternative
545
546
547 -- Note [alg-alt heap check]
548 --
549 -- In an algebraic case with more than one alternative, we will have
550 -- code like
551 --
552 -- L0:
553 -- x = R1
554 -- goto L1
555 -- L1:
556 -- if (x & 7 >= 2) then goto L2 else goto L3
557 -- L2:
558 -- Hp = Hp + 16
559 -- if (Hp > HpLim) then goto L4
560 -- ...
561 -- L4:
562 -- call gc() returns to L5
563 -- L5:
564 -- x = R1
565 -- goto L1
566
567 -------------------
568 cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
569 -> FCode ( Maybe CmmAGraph
570 , [(ConTagZ, CmmAGraph)] )
571 cgAlgAltRhss gc_plan bndr alts
572 = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
573
574 ; let { mb_deflt = case tagged_cmms of
575 ((DEFAULT,rhs) : _) -> Just rhs
576 _other -> Nothing
577 -- DEFAULT is always first, if present
578
579 ; branches = [ (dataConTagZ con, cmm)
580 | (DataAlt con, cmm) <- tagged_cmms ]
581 }
582
583 ; return (mb_deflt, branches)
584 }
585
586
587 -------------------
588 cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
589 -> FCode [(AltCon, CmmAGraph)]
590 cgAltRhss gc_plan bndr alts = do
591 dflags <- getDynFlags
592 let
593 base_reg = idToReg dflags bndr
594 cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph)
595 cg_alt (con, bndrs, _uses, rhs)
596 = getCodeR $
597 maybeAltHeapCheck gc_plan $
598 do { _ <- bindConArgs con base_reg bndrs
599 ; _ <- cgExpr rhs
600 ; return con }
601 forkAlts (map cg_alt alts)
602
603 maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
604 maybeAltHeapCheck (NoGcInAlts,_) code = code
605 maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
606 altHeapCheck regs code
607 maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
608 altHeapCheckReturnsTo regs lret off code
609
610 -----------------------------------------------------------------------------
611 -- Tail calls
612 -----------------------------------------------------------------------------
613
614 cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
615 cgConApp con stg_args
616 | isUnboxedTupleCon con -- Unboxed tuple: assign and return
617 = do { arg_exprs <- getNonVoidArgAmodes stg_args
618 ; tickyUnboxedTupleReturn (length arg_exprs)
619 ; emitReturn arg_exprs }
620
621 | otherwise -- Boxed constructors; allocate and return
622 = ASSERT( stg_args `lengthIs` dataConRepRepArity con )
623 do { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) False
624 currentCCS con stg_args
625 -- The first "con" says that the name bound to this
626 -- closure is is "con", which is a bit of a fudge, but
627 -- it only affects profiling (hence the False)
628
629 ; emit =<< fcode_init
630 ; emitReturn [idInfoToAmode idinfo] }
631
632
633 cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
634 cgIdApp fun_id [] | isVoidId fun_id = emitReturn []
635 cgIdApp fun_id args
636 = do { fun_info <- getCgIdInfo fun_id
637 ; case maybeLetNoEscape fun_info of
638 Just (blk_id, lne_regs) -> cgLneJump blk_id lne_regs args
639 Nothing -> cgTailCall (cg_id fun_info) fun_info args }
640 -- NB. use (cg_id fun_info) instead of fun_id, because the former
641 -- may be externalised for -split-objs.
642 -- See StgCmm.maybeExternaliseId.
643
644 cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode ReturnKind
645 cgLneJump blk_id lne_regs args -- Join point; discard sequel
646 = do { adjustHpBackwards -- always do this before a tail-call
647 ; cmm_args <- getNonVoidArgAmodes args
648 ; emitMultiAssign lne_regs cmm_args
649 ; emit (mkBranch blk_id)
650 ; return AssignedDirectly }
651
652 cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode ReturnKind
653 cgTailCall fun_id fun_info args = do
654 dflags <- getDynFlags
655 case (getCallMethod dflags fun_name (idCafInfo fun_id) lf_info (length args)) of
656
657 -- A value in WHNF, so we can just return it.
658 ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged?
659
660 EnterIt -> ASSERT( null args ) -- Discarding arguments
661 emitEnter fun
662
663 SlowCall -> do -- A slow function call via the RTS apply routines
664 { tickySlowCall lf_info args
665 ; emitComment $ mkFastString "slowCall"
666 ; slowCall fun args }
667
668 -- A direct function call (possibly with some left-over arguments)
669 DirectEntry lbl arity -> do
670 { tickyDirectCall arity args
671 ; if node_points dflags
672 then directCall NativeNodeCall lbl arity (fun_arg:args)
673 else directCall NativeDirectCall lbl arity args }
674
675 JumpToIt {} -> panic "cgTailCall" -- ???
676
677 where
678 fun_arg = StgVarArg fun_id
679 fun_name = idName fun_id
680 fun = idInfoToAmode fun_info
681 lf_info = cgIdInfoLF fun_info
682 node_points dflags = nodeMustPointToIt dflags lf_info
683
684
685 emitEnter :: CmmExpr -> FCode ReturnKind
686 emitEnter fun = do
687 { dflags <- getDynFlags
688 ; adjustHpBackwards
689 ; sequel <- getSequel
690 ; updfr_off <- getUpdFrameOff
691 ; case sequel of
692 -- For a return, we have the option of generating a tag-test or
693 -- not. If the value is tagged, we can return directly, which
694 -- is quicker than entering the value. This is a code
695 -- size/speed trade-off: when optimising for speed rather than
696 -- size we could generate the tag test.
697 --
698 -- Right now, we do what the old codegen did, and omit the tag
699 -- test, just generating an enter.
700 Return _ -> do
701 { let entry = entryCode dflags $ closureInfoPtr dflags $ CmmReg nodeReg
702 ; emit $ mkJump dflags NativeNodeCall entry
703 [cmmUntag dflags fun] updfr_off
704 ; return AssignedDirectly
705 }
706
707 -- The result will be scrutinised in the sequel. This is where
708 -- we generate a tag-test to avoid entering the closure if
709 -- possible.
710 --
711 -- The generated code will be something like this:
712 --
713 -- R1 = fun -- copyout
714 -- if (fun & 7 != 0) goto Lcall else goto Lret
715 -- Lcall:
716 -- call [fun] returns to Lret
717 -- Lret:
718 -- fun' = R1 -- copyin
719 -- ...
720 --
721 -- Note in particular that the label Lret is used as a
722 -- destination by both the tag-test and the call. This is
723 -- becase Lret will necessarily be a proc-point, and we want to
724 -- ensure that we generate only one proc-point for this
725 -- sequence.
726 --
727 -- Furthermore, we tell the caller that we generated a native
728 -- return continuation by returning (ReturnedTo Lret off), so
729 -- that the continuation can be reused by the heap-check failure
730 -- code in the enclosing case expression.
731 --
732 AssignTo res_regs _ -> do
733 { lret <- newLabelC
734 ; let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs []
735 ; lcall <- newLabelC
736 ; updfr_off <- getUpdFrameOff
737 ; let area = Young lret
738 ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area
739 [fun] updfr_off []
740 -- refer to fun via nodeReg after the copyout, to avoid having
741 -- both live simultaneously; this sometimes enables fun to be
742 -- inlined in the RHS of the R1 assignment.
743 ; let entry = entryCode dflags (closureInfoPtr dflags (CmmReg nodeReg))
744 the_call = toCall entry (Just lret) updfr_off off outArgs regs
745 ; emit $
746 copyout <*>
747 mkCbranch (cmmIsTagged dflags (CmmReg nodeReg)) lret lcall <*>
748 outOfLine lcall the_call <*>
749 mkLabel lret <*>
750 copyin
751 ; return (ReturnedTo lret off)
752 }
753 }