Comparison primops return Int# (Fixes #6135)
[ghc.git] / compiler / codeGen / StgCmmExpr.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Stg to C-- code generation: expressions
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmExpr ( cgExpr ) where
10
11 #define FAST_STRING_NOT_NEEDED
12 #include "HsVersions.h"
13
14 import {-# SOURCE #-} StgCmmBind ( cgBind )
15
16 import StgCmmMonad
17 import StgCmmHeap
18 import StgCmmEnv
19 import StgCmmCon
20 import StgCmmProf
21 import StgCmmLayout
22 import StgCmmPrim
23 import StgCmmHpc
24 import StgCmmTicky
25 import StgCmmUtils
26 import StgCmmClosure
27
28 import StgSyn
29
30 import MkGraph
31 import BlockId
32 import Cmm
33 import CmmInfo
34 import CoreSyn
35 import DataCon
36 import ForeignCall
37 import Id
38 import PrimOp
39 import TyCon
40 import Type
41 import CostCentre ( CostCentreStack, currentCCS )
42 import Maybes
43 import Util
44 import FastString
45 import Outputable
46
47 import Control.Monad (when,void)
48
49 ------------------------------------------------------------------------
50 -- cgExpr: the main function
51 ------------------------------------------------------------------------
52
53 cgExpr :: StgExpr -> FCode ReturnKind
54
55 cgExpr (StgApp fun args) = cgIdApp fun args
56
57 {- seq# a s ==> a -}
58 cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
59 cgIdApp a []
60
61 cgExpr (StgOpApp op args ty) = cgOpApp op args ty
62 cgExpr (StgConApp con args) = cgConApp con args
63 cgExpr (StgSCC cc tick push expr) = do { emitSetCCC cc tick push; cgExpr expr }
64 cgExpr (StgTick m n expr) = do dflags <- getDynFlags
65 emit (mkTickBox dflags m n)
66 cgExpr expr
67 cgExpr (StgLit lit) = do cmm_lit <- cgLit lit
68 emitReturn [CmmLit cmm_lit]
69
70 cgExpr (StgLet binds expr) = do { cgBind binds; cgExpr expr }
71 cgExpr (StgLetNoEscape _ _ binds expr) =
72 do { u <- newUnique
73 ; let join_id = mkBlockId u
74 ; cgLneBinds join_id binds
75 ; r <- cgExpr expr
76 ; emitLabel join_id
77 ; return r }
78
79 cgExpr (StgCase expr _live_vars _save_vars bndr _srt alt_type alts) =
80 cgCase expr bndr alt_type alts
81
82 cgExpr (StgLam {}) = panic "cgExpr: StgLam"
83
84 ------------------------------------------------------------------------
85 -- Let no escape
86 ------------------------------------------------------------------------
87
88 {- Generating code for a let-no-escape binding, aka join point is very
89 very similar to what we do for a case expression. The duality is
90 between
91 let-no-escape x = b
92 in e
93 and
94 case e of ... -> b
95
96 That is, the RHS of 'x' (ie 'b') will execute *later*, just like
97 the alternative of the case; it needs to be compiled in an environment
98 in which all volatile bindings are forgotten, and the free vars are
99 bound only to stable things like stack locations.. The 'e' part will
100 execute *next*, just like the scrutinee of a case. -}
101
102 -------------------------
103 cgLneBinds :: BlockId -> StgBinding -> FCode ()
104 cgLneBinds join_id (StgNonRec bndr rhs)
105 = do { local_cc <- saveCurrentCostCentre
106 -- See Note [Saving the current cost centre]
107 ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
108 ; fcode
109 ; addBindC (cg_id info) info }
110
111 cgLneBinds join_id (StgRec pairs)
112 = do { local_cc <- saveCurrentCostCentre
113 ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
114 ; let (infos, fcodes) = unzip r
115 ; addBindsC infos
116 ; sequence_ fcodes
117 }
118
119 -------------------------
120 cgLetNoEscapeRhs
121 :: BlockId -- join point for successor of let-no-escape
122 -> Maybe LocalReg -- Saved cost centre
123 -> Id
124 -> StgRhs
125 -> FCode (CgIdInfo, FCode ())
126
127 cgLetNoEscapeRhs join_id local_cc bndr rhs =
128 do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
129 ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
130 ; let code = do { body <- getCode rhs_code
131 ; emitOutOfLine bid (body <*> mkBranch join_id) }
132 ; return (info, code)
133 }
134
135 cgLetNoEscapeRhsBody
136 :: Maybe LocalReg -- Saved cost centre
137 -> Id
138 -> StgRhs
139 -> FCode (CgIdInfo, FCode ())
140 cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure cc _bi _ _upd _ args body)
141 = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
142 cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
143 = cgLetNoEscapeClosure bndr local_cc cc [] (StgConApp con args)
144 -- For a constructor RHS we want to generate a single chunk of
145 -- code which can be jumped to from many places, which will
146 -- return the constructor. It's easy; just behave as if it
147 -- was an StgRhsClosure with a ConApp inside!
148
149 -------------------------
150 cgLetNoEscapeClosure
151 :: Id -- binder
152 -> Maybe LocalReg -- Slot for saved current cost centre
153 -> CostCentreStack -- XXX: *** NOT USED *** why not?
154 -> [NonVoid Id] -- Args (as in \ args -> body)
155 -> StgExpr -- Body (as in above)
156 -> FCode (CgIdInfo, FCode ())
157
158 cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
159 = do dflags <- getDynFlags
160 return ( lneIdInfo dflags bndr args
161 , code )
162 where
163 code = forkProc $ do {
164 ; withNewTickyCounterLNE (idName bndr) args $ do
165 ; restoreCurrentCostCentre cc_slot
166 ; arg_regs <- bindArgsToRegs args
167 ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
168
169
170 ------------------------------------------------------------------------
171 -- Case expressions
172 ------------------------------------------------------------------------
173
174 {- Note [Compiling case expressions]
175 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
176 It is quite interesting to decide whether to put a heap-check at the
177 start of each alternative. Of course we certainly have to do so if
178 the case forces an evaluation, or if there is a primitive op which can
179 trigger GC.
180
181 A more interesting situation is this (a Plan-B situation)
182
183 !P!;
184 ...P...
185 case x# of
186 0# -> !Q!; ...Q...
187 default -> !R!; ...R...
188
189 where !x! indicates a possible heap-check point. The heap checks
190 in the alternatives *can* be omitted, in which case the topmost
191 heapcheck will take their worst case into account.
192
193 In favour of omitting !Q!, !R!:
194
195 - *May* save a heap overflow test,
196 if ...P... allocates anything.
197
198 - We can use relative addressing from a single Hp to
199 get at all the closures so allocated.
200
201 - No need to save volatile vars etc across heap checks
202 in !Q!, !R!
203
204 Against omitting !Q!, !R!
205
206 - May put a heap-check into the inner loop. Suppose
207 the main loop is P -> R -> P -> R...
208 Q is the loop exit, and only it does allocation.
209 This only hurts us if P does no allocation. If P allocates,
210 then there is a heap check in the inner loop anyway.
211
212 - May do more allocation than reqd. This sometimes bites us
213 badly. For example, nfib (ha!) allocates about 30\% more space if the
214 worst-casing is done, because many many calls to nfib are leaf calls
215 which don't need to allocate anything.
216
217 We can un-allocate, but that costs an instruction
218
219 Neither problem hurts us if there is only one alternative.
220
221 Suppose the inner loop is P->R->P->R etc. Then here is
222 how many heap checks we get in the *inner loop* under various
223 conditions
224
225 Alooc Heap check in branches (!Q!, !R!)?
226 P Q R yes no (absorb to !P!)
227 --------------------------------------
228 n n n 0 0
229 n y n 0 1
230 n . y 1 1
231 y . y 2 1
232 y . n 1 1
233
234 Best choices: absorb heap checks from Q and R into !P! iff
235 a) P itself does some allocation
236 or
237 b) P does allocation, or there is exactly one alternative
238
239 We adopt (b) because that is more likely to put the heap check at the
240 entry to a function, when not many things are live. After a bunch of
241 single-branch cases, we may have lots of things live
242
243 Hence: two basic plans for
244
245 case e of r { alts }
246
247 ------ Plan A: the general case ---------
248
249 ...save current cost centre...
250
251 ...code for e,
252 with sequel (SetLocals r)
253
254 ...restore current cost centre...
255 ...code for alts...
256 ...alts do their own heap checks
257
258 ------ Plan B: special case when ---------
259 (i) e does not allocate or call GC
260 (ii) either upstream code performs allocation
261 or there is just one alternative
262
263 Then heap allocation in the (single) case branch
264 is absorbed by the upstream check.
265 Very common example: primops on unboxed values
266
267 ...code for e,
268 with sequel (SetLocals r)...
269
270 ...code for alts...
271 ...no heap check...
272 -}
273
274
275
276 -------------------------------------
277 data GcPlan
278 = GcInAlts -- Put a GC check at the start the case alternatives,
279 [LocalReg] -- which binds these registers
280 | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
281 -- primitive op which does no GC. Absorb the allocation
282 -- of the case alternative(s) into the upstream check
283
284 -------------------------------------
285 cgCase :: StgExpr -> Id -> AltType -> [StgAlt] -> FCode ReturnKind
286
287 cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
288 | isEnumerationTyCon tycon -- Note [case on bool]
289 = do { tag_expr <- do_enum_primop op args
290
291 -- If the binder is not dead, convert the tag to a constructor
292 -- and assign it.
293 ; when (not (isDeadBinder bndr)) $ do
294 { dflags <- getDynFlags
295 ; tmp_reg <- bindArgToReg (NonVoid bndr)
296 ; emitAssign (CmmLocal tmp_reg)
297 (tagToClosure dflags tycon tag_expr) }
298
299 ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
300 (NonVoid bndr) alts
301 ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
302 ; return AssignedDirectly
303 }
304 where
305 do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
306 do_enum_primop TagToEnumOp [arg] -- No code!
307 = getArgAmode (NonVoid arg)
308 do_enum_primop primop args
309 = do dflags <- getDynFlags
310 tmp <- newTemp (bWord dflags)
311 cgPrimOp [tmp] primop args
312 return (CmmReg (CmmLocal tmp))
313
314 {-
315 Note [case on bool]
316
317 This special case handles code like
318
319 case a <# b of
320 True ->
321 False ->
322
323 If we let the ordinary case code handle it, we'll get something like
324
325 tmp1 = a < b
326 tmp2 = Bool_closure_tbl[tmp1]
327 if (tmp2 & 7 != 0) then ... // normal tagged case
328
329 but this junk won't optimise away. What we really want is just an
330 inline comparison:
331
332 if (a < b) then ...
333
334 So we add a special case to generate
335
336 tmp1 = a < b
337 if (tmp1 == 0) then ...
338
339 and later optimisations will further improve this.
340
341 Now that #6135 has been resolved it should be possible to remove that
342 special case. The idea behind this special case and pre-6135 implementation
343 of Bool-returning primops was that tagToEnum# was added implicitly in the
344 codegen and then optimized away. Now the call to tagToEnum# is explicit
345 in the source code, which allows to optimize it away at the earlier stages
346 of compilation (i.e. at the Core level).
347 -}
348
349
350 -- Note [ticket #3132]: we might be looking at a case of a lifted Id
351 -- that was cast to an unlifted type. The Id will always be bottom,
352 -- but we don't want the code generator to fall over here. If we
353 -- just emit an assignment here, the assignment will be
354 -- type-incorrect Cmm. Hence, we emit the usual enter/return code,
355 -- (and because bottom must be untagged, it will be entered and the
356 -- program will crash).
357 -- The Sequel is a type-correct assignment, albeit bogus.
358 -- The (dead) continuation loops; it would be better to invoke some kind
359 -- of panic function here.
360 --
361 -- However, we also want to allow an assignment to be generated
362 -- in the case when the types are compatible, because this allows
363 -- some slightly-dodgy but occasionally-useful casts to be used,
364 -- such as in RtClosureInspect where we cast an HValue to a MutVar#
365 -- so we can print out the contents of the MutVar#. If we generate
366 -- code that enters the HValue, then we'll get a runtime panic, because
367 -- the HValue really is a MutVar#. The types are compatible though,
368 -- so we can just generate an assignment.
369 cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
370 | isUnLiftedType (idType v)
371 || reps_compatible
372 = -- assignment suffices for unlifted types
373 do { dflags <- getDynFlags
374 ; when (not reps_compatible) $
375 panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
376 ; v_info <- getCgIdInfo v
377 ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr))) (idInfoToAmode v_info)
378 ; _ <- bindArgsToRegs [NonVoid bndr]
379 ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
380 where
381 reps_compatible = idPrimRep v == idPrimRep bndr
382
383 cgCase scrut@(StgApp v []) _ (PrimAlt _) _
384 = -- fail at run-time, not compile-time
385 do { dflags <- getDynFlags
386 ; mb_cc <- maybeSaveCostCentre True
387 ; _ <- withSequel (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut)
388 ; restoreCurrentCostCentre mb_cc
389 ; emitComment $ mkFastString "should be unreachable code"
390 ; l <- newLabelC
391 ; emitLabel l
392 ; emit (mkBranch l)
393 ; return AssignedDirectly
394 }
395 {-
396 case seq# a s of v
397 (# s', a' #) -> e
398
399 ==>
400
401 case a of v
402 (# s', a' #) -> e
403
404 (taking advantage of the fact that the return convention for (# State#, a #)
405 is the same as the return convention for just 'a')
406 -}
407
408 cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
409 = -- handle seq#, same return convention as vanilla 'a'.
410 cgCase (StgApp a []) bndr alt_type alts
411
412 cgCase scrut bndr alt_type alts
413 = -- the general case
414 do { dflags <- getDynFlags
415 ; up_hp_usg <- getVirtHp -- Upstream heap usage
416 ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
417 alt_regs = map (idToReg dflags) ret_bndrs
418 simple_scrut = isSimpleScrut scrut alt_type
419 do_gc | not simple_scrut = True
420 | isSingleton alts = False
421 | up_hp_usg > 0 = False
422 | otherwise = True
423 -- cf Note [Compiling case expressions]
424 gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
425
426 ; mb_cc <- maybeSaveCostCentre simple_scrut
427
428 -- if do_gc then our sequel will be ReturnTo
429 -- - generate code for the sequel now
430 -- - pass info about the sequel to cgAlts for use in the heap check
431 -- else sequel will be AssignTo
432
433 ; ret_kind <- withSequel (AssignTo alt_regs False) (cgExpr scrut)
434 ; restoreCurrentCostCentre mb_cc
435 ; _ <- bindArgsToRegs ret_bndrs
436 ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
437 }
438
439
440 -----------------
441 maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
442 maybeSaveCostCentre simple_scrut
443 | simple_scrut = return Nothing
444 | otherwise = saveCurrentCostCentre
445
446
447 -----------------
448 isSimpleScrut :: StgExpr -> AltType -> Bool
449 -- Simple scrutinee, does not block or allocate; hence safe to amalgamate
450 -- heap usage from alternatives into the stuff before the case
451 -- NB: if you get this wrong, and claim that the expression doesn't allocate
452 -- when it does, you'll deeply mess up allocation
453 isSimpleScrut (StgOpApp op _ _) _ = isSimpleOp op
454 isSimpleScrut (StgLit _) _ = True -- case 1# of { 0# -> ..; ... }
455 isSimpleScrut (StgApp _ []) (PrimAlt _) = True -- case x# of { 0# -> ..; ... }
456 isSimpleScrut _ _ = False
457
458 isSimpleOp :: StgOp -> Bool
459 -- True iff the op cannot block or allocate
460 isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) = not (playSafe safe)
461 isSimpleOp (StgPrimOp op) = not (primOpOutOfLine op)
462 isSimpleOp (StgPrimCallOp _) = False
463
464 -----------------
465 chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [NonVoid Id]
466 -- These are the binders of a case that are assigned
467 -- by the evaluation of the scrutinee
468 -- Only non-void ones come back
469 chooseReturnBndrs bndr (PrimAlt _) _alts
470 = nonVoidIds [bndr]
471
472 chooseReturnBndrs _bndr (UbxTupAlt _) [(_, ids, _, _)]
473 = nonVoidIds ids -- 'bndr' is not assigned!
474
475 chooseReturnBndrs bndr (AlgAlt _) _alts
476 = nonVoidIds [bndr] -- Only 'bndr' is assigned
477
478 chooseReturnBndrs bndr PolyAlt _alts
479 = nonVoidIds [bndr] -- Only 'bndr' is assigned
480
481 chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
482 -- UbxTupALt has only one alternative
483
484 -------------------------------------
485 cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [StgAlt]
486 -> FCode ReturnKind
487 -- At this point the result of the case are in the binders
488 cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)]
489 = maybeAltHeapCheck gc_plan (cgExpr rhs)
490
491 cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)]
492 = maybeAltHeapCheck gc_plan (cgExpr rhs)
493 -- Here bndrs are *already* in scope, so don't rebind them
494
495 cgAlts gc_plan bndr (PrimAlt _) alts
496 = do { dflags <- getDynFlags
497
498 ; tagged_cmms <- cgAltRhss gc_plan bndr alts
499
500 ; let bndr_reg = CmmLocal (idToReg dflags bndr)
501 (DEFAULT,deflt) = head tagged_cmms
502 -- PrimAlts always have a DEFAULT case
503 -- and it always comes first
504
505 tagged_cmms' = [(lit,code)
506 | (LitAlt lit, code) <- tagged_cmms]
507 ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
508 ; return AssignedDirectly }
509
510 cgAlts gc_plan bndr (AlgAlt tycon) alts
511 = do { dflags <- getDynFlags
512
513 ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
514
515 ; let fam_sz = tyConFamilySize tycon
516 bndr_reg = CmmLocal (idToReg dflags bndr)
517
518 -- Is the constructor tag in the node reg?
519 ; if isSmallFamily dflags fam_sz
520 then do
521 let -- Yes, bndr_reg has constr. tag in ls bits
522 tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
523 branches' = [(tag+1,branch) | (tag,branch) <- branches]
524 emitSwitch tag_expr branches' mb_deflt 1 fam_sz
525 return AssignedDirectly
526
527 else -- No, get tag from info table
528 do dflags <- getDynFlags
529 let -- Note that ptr _always_ has tag 1
530 -- when the family size is big enough
531 untagged_ptr = cmmRegOffB bndr_reg (-1)
532 tag_expr = getConstrTag dflags (untagged_ptr)
533 emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
534 return AssignedDirectly }
535
536 cgAlts _ _ _ _ = panic "cgAlts"
537 -- UbxTupAlt and PolyAlt have only one alternative
538
539
540 -- Note [alg-alt heap check]
541 --
542 -- In an algebraic case with more than one alternative, we will have
543 -- code like
544 --
545 -- L0:
546 -- x = R1
547 -- goto L1
548 -- L1:
549 -- if (x & 7 >= 2) then goto L2 else goto L3
550 -- L2:
551 -- Hp = Hp + 16
552 -- if (Hp > HpLim) then goto L4
553 -- ...
554 -- L4:
555 -- call gc() returns to L5
556 -- L5:
557 -- x = R1
558 -- goto L1
559
560 -------------------
561 cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
562 -> FCode ( Maybe CmmAGraph
563 , [(ConTagZ, CmmAGraph)] )
564 cgAlgAltRhss gc_plan bndr alts
565 = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
566
567 ; let { mb_deflt = case tagged_cmms of
568 ((DEFAULT,rhs) : _) -> Just rhs
569 _other -> Nothing
570 -- DEFAULT is always first, if present
571
572 ; branches = [ (dataConTagZ con, cmm)
573 | (DataAlt con, cmm) <- tagged_cmms ]
574 }
575
576 ; return (mb_deflt, branches)
577 }
578
579
580 -------------------
581 cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
582 -> FCode [(AltCon, CmmAGraph)]
583 cgAltRhss gc_plan bndr alts = do
584 dflags <- getDynFlags
585 let
586 base_reg = idToReg dflags bndr
587 cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph)
588 cg_alt (con, bndrs, _uses, rhs)
589 = getCodeR $
590 maybeAltHeapCheck gc_plan $
591 do { _ <- bindConArgs con base_reg bndrs
592 ; _ <- cgExpr rhs
593 ; return con }
594 forkAlts (map cg_alt alts)
595
596 maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
597 maybeAltHeapCheck (NoGcInAlts,_) code = code
598 maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
599 altHeapCheck regs code
600 maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
601 altHeapCheckReturnsTo regs lret off code
602
603 -----------------------------------------------------------------------------
604 -- Tail calls
605 -----------------------------------------------------------------------------
606
607 cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
608 cgConApp con stg_args
609 | isUnboxedTupleCon con -- Unboxed tuple: assign and return
610 = do { arg_exprs <- getNonVoidArgAmodes stg_args
611 ; tickyUnboxedTupleReturn (length arg_exprs)
612 ; emitReturn arg_exprs }
613
614 | otherwise -- Boxed constructors; allocate and return
615 = ASSERT( stg_args `lengthIs` dataConRepRepArity con )
616 do { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) False
617 currentCCS con stg_args
618 -- The first "con" says that the name bound to this
619 -- closure is is "con", which is a bit of a fudge, but
620 -- it only affects profiling (hence the False)
621
622 ; emit =<< fcode_init
623 ; emitReturn [idInfoToAmode idinfo] }
624
625
626 cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
627 cgIdApp fun_id [] | isVoidId fun_id = emitReturn []
628 cgIdApp fun_id args
629 = do { fun_info <- getCgIdInfo fun_id
630 ; case maybeLetNoEscape fun_info of
631 Just (blk_id, lne_regs) -> cgLneJump blk_id lne_regs args
632 Nothing -> cgTailCall (cg_id fun_info) fun_info args }
633 -- NB. use (cg_id fun_info) instead of fun_id, because the former
634 -- may be externalised for -split-objs.
635 -- See StgCmm.maybeExternaliseId.
636
637 cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode ReturnKind
638 cgLneJump blk_id lne_regs args -- Join point; discard sequel
639 = do { adjustHpBackwards -- always do this before a tail-call
640 ; cmm_args <- getNonVoidArgAmodes args
641 ; emitMultiAssign lne_regs cmm_args
642 ; emit (mkBranch blk_id)
643 ; return AssignedDirectly }
644
645 cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode ReturnKind
646 cgTailCall fun_id fun_info args = do
647 dflags <- getDynFlags
648 case (getCallMethod dflags fun_name (idCafInfo fun_id) lf_info (length args)) of
649
650 -- A value in WHNF, so we can just return it.
651 ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged?
652
653 EnterIt -> ASSERT( null args ) -- Discarding arguments
654 emitEnter fun
655
656 SlowCall -> do -- A slow function call via the RTS apply routines
657 { tickySlowCall lf_info args
658 ; emitComment $ mkFastString "slowCall"
659 ; slowCall fun args }
660
661 -- A direct function call (possibly with some left-over arguments)
662 DirectEntry lbl arity -> do
663 { tickyDirectCall arity args
664 ; if node_points dflags
665 then directCall NativeNodeCall lbl arity (fun_arg:args)
666 else directCall NativeDirectCall lbl arity args }
667
668 JumpToIt {} -> panic "cgTailCall" -- ???
669
670 where
671 fun_arg = StgVarArg fun_id
672 fun_name = idName fun_id
673 fun = idInfoToAmode fun_info
674 lf_info = cgIdInfoLF fun_info
675 node_points dflags = nodeMustPointToIt dflags lf_info
676
677
678 emitEnter :: CmmExpr -> FCode ReturnKind
679 emitEnter fun = do
680 { dflags <- getDynFlags
681 ; adjustHpBackwards
682 ; sequel <- getSequel
683 ; updfr_off <- getUpdFrameOff
684 ; case sequel of
685 -- For a return, we have the option of generating a tag-test or
686 -- not. If the value is tagged, we can return directly, which
687 -- is quicker than entering the value. This is a code
688 -- size/speed trade-off: when optimising for speed rather than
689 -- size we could generate the tag test.
690 --
691 -- Right now, we do what the old codegen did, and omit the tag
692 -- test, just generating an enter.
693 Return _ -> do
694 { let entry = entryCode dflags $ closureInfoPtr dflags $ CmmReg nodeReg
695 ; emit $ mkJump dflags NativeNodeCall entry
696 [cmmUntag dflags fun] updfr_off
697 ; return AssignedDirectly
698 }
699
700 -- The result will be scrutinised in the sequel. This is where
701 -- we generate a tag-test to avoid entering the closure if
702 -- possible.
703 --
704 -- The generated code will be something like this:
705 --
706 -- R1 = fun -- copyout
707 -- if (fun & 7 != 0) goto Lcall else goto Lret
708 -- Lcall:
709 -- call [fun] returns to Lret
710 -- Lret:
711 -- fun' = R1 -- copyin
712 -- ...
713 --
714 -- Note in particular that the label Lret is used as a
715 -- destination by both the tag-test and the call. This is
716 -- becase Lret will necessarily be a proc-point, and we want to
717 -- ensure that we generate only one proc-point for this
718 -- sequence.
719 --
720 -- Furthermore, we tell the caller that we generated a native
721 -- return continuation by returning (ReturnedTo Lret off), so
722 -- that the continuation can be reused by the heap-check failure
723 -- code in the enclosing case expression.
724 --
725 AssignTo res_regs _ -> do
726 { lret <- newLabelC
727 ; let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs []
728 ; lcall <- newLabelC
729 ; updfr_off <- getUpdFrameOff
730 ; let area = Young lret
731 ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area
732 [fun] updfr_off []
733 -- refer to fun via nodeReg after the copyout, to avoid having
734 -- both live simultaneously; this sometimes enables fun to be
735 -- inlined in the RHS of the R1 assignment.
736 ; let entry = entryCode dflags (closureInfoPtr dflags (CmmReg nodeReg))
737 the_call = toCall entry (Just lret) updfr_off off outArgs regs
738 ; emit $
739 copyout <*>
740 mkCbranch (cmmIsTagged dflags (CmmReg nodeReg)) lret lcall <*>
741 outOfLine lcall the_call <*>
742 mkLabel lret <*>
743 copyin
744 ; return (ReturnedTo lret off)
745 }
746 }