Fix #11407.
[ghc.git] / compiler / cmm / CmmNode.hs
1 {-# LANGUAGE CPP #-}
2 {-# LANGUAGE ExplicitForAll #-}
3 {-# LANGUAGE FlexibleContexts #-}
4 {-# LANGUAGE FlexibleInstances #-}
5 {-# LANGUAGE GADTs #-}
6 {-# LANGUAGE MultiParamTypeClasses #-}
7 {-# LANGUAGE StandaloneDeriving #-}
8 {-# LANGUAGE UndecidableInstances #-}
9
10 -- CmmNode type for representation using Hoopl graphs.
11
12 module CmmNode (
13 CmmNode(..), CmmFormal, CmmActual, CmmTickish,
14 UpdFrameOffset, Convention(..),
15 ForeignConvention(..), ForeignTarget(..), foreignTargetHints,
16 CmmReturnInfo(..),
17 mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf,
18 mapExpM, mapExpDeepM, wrapRecExpM, mapSuccessors,
19
20 -- * Tick scopes
21 CmmTickScope(..), isTickSubScope, combineTickScopes,
22 ) where
23
24 import CodeGen.Platform
25 import CmmExpr
26 import CmmSwitch
27 import DynFlags
28 import FastString
29 import ForeignCall
30 import Outputable
31 import SMRep
32 import CoreSyn (Tickish)
33 import qualified Unique as U
34
35 import Compiler.Hoopl
36 import Data.Maybe
37 import Data.List (tails,sort)
38 import Prelude hiding (succ)
39
40
41 ------------------------
42 -- CmmNode
43
44 #define ULabel {-# UNPACK #-} !Label
45
46 data CmmNode e x where
47 CmmEntry :: ULabel -> CmmTickScope -> CmmNode C O
48
49 CmmComment :: FastString -> CmmNode O O
50
51 -- Tick annotation, covering Cmm code in our tick scope. We only
52 -- expect non-code @Tickish@ at this point (e.g. @SourceNote@).
53 -- See Note [CmmTick scoping details]
54 CmmTick :: !CmmTickish -> CmmNode O O
55
56 -- Unwind pseudo-instruction, encoding stack unwinding
57 -- instructions for a debugger. This describes how to reconstruct
58 -- the "old" value of a register if we want to navigate the stack
59 -- up one frame. Having unwind information for @Sp@ will allow the
60 -- debugger to "walk" the stack.
61 CmmUnwind :: !GlobalReg -> !CmmExpr -> CmmNode O O
62
63 CmmAssign :: !CmmReg -> !CmmExpr -> CmmNode O O
64 -- Assign to register
65
66 CmmStore :: !CmmExpr -> !CmmExpr -> CmmNode O O
67 -- Assign to memory location. Size is
68 -- given by cmmExprType of the rhs.
69
70 CmmUnsafeForeignCall :: -- An unsafe foreign call;
71 -- see Note [Foreign calls]
72 -- Like a "fat machine instruction"; can occur
73 -- in the middle of a block
74 ForeignTarget -> -- call target
75 [CmmFormal] -> -- zero or more results
76 [CmmActual] -> -- zero or more arguments
77 CmmNode O O
78 -- Semantics: clobbers any GlobalRegs for which callerSaves r == True
79 -- See Note [Unsafe foreign calls clobber caller-save registers]
80 --
81 -- Invariant: the arguments and the ForeignTarget must not
82 -- mention any registers for which CodeGen.Platform.callerSaves
83 -- is True. See Note [Register Parameter Passing].
84
85 CmmBranch :: ULabel -> CmmNode O C
86 -- Goto another block in the same procedure
87
88 CmmCondBranch :: { -- conditional branch
89 cml_pred :: CmmExpr,
90 cml_true, cml_false :: ULabel,
91 cml_likely :: Maybe Bool -- likely result of the conditional,
92 -- if known
93 } -> CmmNode O C
94
95 CmmSwitch
96 :: CmmExpr -- Scrutinee, of some integral type
97 -> SwitchTargets -- Cases. See [Note SwitchTargets]
98 -> CmmNode O C
99
100 CmmCall :: { -- A native call or tail call
101 cml_target :: CmmExpr, -- never a CmmPrim to a CallishMachOp!
102
103 cml_cont :: Maybe Label,
104 -- Label of continuation (Nothing for return or tail call)
105 --
106 -- Note [Continuation BlockId]: these BlockIds are called
107 -- Continuation BlockIds, and are the only BlockIds that can
108 -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or
109 -- (CmmStackSlot (Young b) _).
110
111 cml_args_regs :: [GlobalReg],
112 -- The argument GlobalRegs (Rx, Fx, Dx, Lx) that are passed
113 -- to the call. This is essential information for the
114 -- native code generator's register allocator; without
115 -- knowing which GlobalRegs are live it has to assume that
116 -- they are all live. This list should only include
117 -- GlobalRegs that are mapped to real machine registers on
118 -- the target platform.
119
120 cml_args :: ByteOff,
121 -- Byte offset, from the *old* end of the Area associated with
122 -- the Label (if cml_cont = Nothing, then Old area), of
123 -- youngest outgoing arg. Set the stack pointer to this before
124 -- transferring control.
125 -- (NB: an update frame might also have been stored in the Old
126 -- area, but it'll be in an older part than the args.)
127
128 cml_ret_args :: ByteOff,
129 -- For calls *only*, the byte offset for youngest returned value
130 -- This is really needed at the *return* point rather than here
131 -- at the call, but in practice it's convenient to record it here.
132
133 cml_ret_off :: ByteOff
134 -- For calls *only*, the byte offset of the base of the frame that
135 -- must be described by the info table for the return point.
136 -- The older words are an update frames, which have their own
137 -- info-table and layout information
138
139 -- From a liveness point of view, the stack words older than
140 -- cml_ret_off are treated as live, even if the sequel of
141 -- the call goes into a loop.
142 } -> CmmNode O C
143
144 CmmForeignCall :: { -- A safe foreign call; see Note [Foreign calls]
145 -- Always the last node of a block
146 tgt :: ForeignTarget, -- call target and convention
147 res :: [CmmFormal], -- zero or more results
148 args :: [CmmActual], -- zero or more arguments; see Note [Register parameter passing]
149 succ :: ULabel, -- Label of continuation
150 ret_args :: ByteOff, -- same as cml_ret_args
151 ret_off :: ByteOff, -- same as cml_ret_off
152 intrbl:: Bool -- whether or not the call is interruptible
153 } -> CmmNode O C
154
155 {- Note [Foreign calls]
156 ~~~~~~~~~~~~~~~~~~~~~~~
157 A CmmUnsafeForeignCall is used for *unsafe* foreign calls;
158 a CmmForeignCall call is used for *safe* foreign calls.
159
160 Unsafe ones are mostly easy: think of them as a "fat machine
161 instruction". In particular, they do *not* kill all live registers,
162 just the registers they return to (there was a bit of code in GHC that
163 conservatively assumed otherwise.) However, see [Register parameter passing].
164
165 Safe ones are trickier. A safe foreign call
166 r = f(x)
167 ultimately expands to
168 push "return address" -- Never used to return to;
169 -- just points an info table
170 save registers into TSO
171 call suspendThread
172 r = f(x) -- Make the call
173 call resumeThread
174 restore registers
175 pop "return address"
176 We cannot "lower" a safe foreign call to this sequence of Cmms, because
177 after we've saved Sp all the Cmm optimiser's assumptions are broken.
178
179 Note that a safe foreign call needs an info table.
180
181 So Safe Foreign Calls must remain as last nodes until the stack is
182 made manifest in CmmLayoutStack, where they are lowered into the above
183 sequence.
184 -}
185
186 {- Note [Unsafe foreign calls clobber caller-save registers]
187 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
188
189 A foreign call is defined to clobber any GlobalRegs that are mapped to
190 caller-saves machine registers (according to the prevailing C ABI).
191 StgCmmUtils.callerSaves tells you which GlobalRegs are caller-saves.
192
193 This is a design choice that makes it easier to generate code later.
194 We could instead choose to say that foreign calls do *not* clobber
195 caller-saves regs, but then we would have to figure out which regs
196 were live across the call later and insert some saves/restores.
197
198 Furthermore when we generate code we never have any GlobalRegs live
199 across a call, because they are always copied-in to LocalRegs and
200 copied-out again before making a call/jump. So all we have to do is
201 avoid any code motion that would make a caller-saves GlobalReg live
202 across a foreign call during subsequent optimisations.
203 -}
204
205 {- Note [Register parameter passing]
206 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
207 On certain architectures, some registers are utilized for parameter
208 passing in the C calling convention. For example, in x86-64 Linux
209 convention, rdi, rsi, rdx and rcx (as well as r8 and r9) may be used for
210 argument passing. These are registers R3-R6, which our generated
211 code may also be using; as a result, it's necessary to save these
212 values before doing a foreign call. This is done during initial
213 code generation in callerSaveVolatileRegs in StgCmmUtils.hs. However,
214 one result of doing this is that the contents of these registers
215 may mysteriously change if referenced inside the arguments. This
216 is dangerous, so you'll need to disable inlining much in the same
217 way is done in cmm/CmmOpt.hs currently. We should fix this!
218 -}
219
220 ---------------------------------------------
221 -- Eq instance of CmmNode
222
223 deriving instance Eq (CmmNode e x)
224
225 ----------------------------------------------
226 -- Hoopl instances of CmmNode
227
228 instance NonLocal CmmNode where
229 entryLabel (CmmEntry l _) = l
230
231 successors (CmmBranch l) = [l]
232 successors (CmmCondBranch {cml_true=t, cml_false=f}) = [f, t] -- meets layout constraint
233 successors (CmmSwitch _ ids) = switchTargetsToList ids
234 successors (CmmCall {cml_cont=l}) = maybeToList l
235 successors (CmmForeignCall {succ=l}) = [l]
236
237
238 --------------------------------------------------
239 -- Various helper types
240
241 type CmmActual = CmmExpr
242 type CmmFormal = LocalReg
243
244 type UpdFrameOffset = ByteOff
245
246 -- | A convention maps a list of values (function arguments or return
247 -- values) to registers or stack locations.
248 data Convention
249 = NativeDirectCall
250 -- ^ top-level Haskell functions use @NativeDirectCall@, which
251 -- maps arguments to registers starting with R2, according to
252 -- how many registers are available on the platform. This
253 -- convention ignores R1, because for a top-level function call
254 -- the function closure is implicit, and doesn't need to be passed.
255 | NativeNodeCall
256 -- ^ non-top-level Haskell functions, which pass the address of
257 -- the function closure in R1 (regardless of whether R1 is a
258 -- real register or not), and the rest of the arguments in
259 -- registers or on the stack.
260 | NativeReturn
261 -- ^ a native return. The convention for returns depends on
262 -- how many values are returned: for just one value returned,
263 -- the appropriate register is used (R1, F1, etc.). regardless
264 -- of whether it is a real register or not. For multiple
265 -- values returned, they are mapped to registers or the stack.
266 | Slow
267 -- ^ Slow entry points: all args pushed on the stack
268 | GC
269 -- ^ Entry to the garbage collector: uses the node reg!
270 -- (TODO: I don't think we need this --SDM)
271 deriving( Eq )
272
273 data ForeignConvention
274 = ForeignConvention
275 CCallConv -- Which foreign-call convention
276 [ForeignHint] -- Extra info about the args
277 [ForeignHint] -- Extra info about the result
278 CmmReturnInfo
279 deriving Eq
280
281 data CmmReturnInfo
282 = CmmMayReturn
283 | CmmNeverReturns
284 deriving ( Eq )
285
286 data ForeignTarget -- The target of a foreign call
287 = ForeignTarget -- A foreign procedure
288 CmmExpr -- Its address
289 ForeignConvention -- Its calling convention
290 | PrimTarget -- A possibly-side-effecting machine operation
291 CallishMachOp -- Which one
292 deriving Eq
293
294 foreignTargetHints :: ForeignTarget -> ([ForeignHint], [ForeignHint])
295 foreignTargetHints target
296 = ( res_hints ++ repeat NoHint
297 , arg_hints ++ repeat NoHint )
298 where
299 (res_hints, arg_hints) =
300 case target of
301 PrimTarget op -> callishMachOpHints op
302 ForeignTarget _ (ForeignConvention _ arg_hints res_hints _) ->
303 (res_hints, arg_hints)
304
305 --------------------------------------------------
306 -- Instances of register and slot users / definers
307
308 instance UserOfRegs LocalReg (CmmNode e x) where
309 foldRegsUsed dflags f z n = case n of
310 CmmAssign _ expr -> fold f z expr
311 CmmStore addr rval -> fold f (fold f z addr) rval
312 CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
313 CmmCondBranch expr _ _ _ -> fold f z expr
314 CmmSwitch expr _ -> fold f z expr
315 CmmCall {cml_target=tgt} -> fold f z tgt
316 CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
317 _ -> z
318 where fold :: forall a b.
319 UserOfRegs LocalReg a =>
320 (b -> LocalReg -> b) -> b -> a -> b
321 fold f z n = foldRegsUsed dflags f z n
322
323 instance UserOfRegs GlobalReg (CmmNode e x) where
324 foldRegsUsed dflags f z n = case n of
325 CmmAssign _ expr -> fold f z expr
326 CmmStore addr rval -> fold f (fold f z addr) rval
327 CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
328 CmmCondBranch expr _ _ _ -> fold f z expr
329 CmmSwitch expr _ -> fold f z expr
330 CmmCall {cml_target=tgt, cml_args_regs=args} -> fold f (fold f z args) tgt
331 CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
332 _ -> z
333 where fold :: forall a b.
334 UserOfRegs GlobalReg a =>
335 (b -> GlobalReg -> b) -> b -> a -> b
336 fold f z n = foldRegsUsed dflags f z n
337
338 instance (Ord r, UserOfRegs r CmmExpr) => UserOfRegs r ForeignTarget where
339 -- The (Ord r) in the context is necessary here
340 -- See Note [Recursive superclasses] in TcInstDcls
341 foldRegsUsed _ _ z (PrimTarget _) = z
342 foldRegsUsed dflags f z (ForeignTarget e _) = foldRegsUsed dflags f z e
343
344 instance DefinerOfRegs LocalReg (CmmNode e x) where
345 foldRegsDefd dflags f z n = case n of
346 CmmAssign lhs _ -> fold f z lhs
347 CmmUnsafeForeignCall _ fs _ -> fold f z fs
348 CmmForeignCall {res=res} -> fold f z res
349 _ -> z
350 where fold :: forall a b.
351 DefinerOfRegs LocalReg a =>
352 (b -> LocalReg -> b) -> b -> a -> b
353 fold f z n = foldRegsDefd dflags f z n
354
355 instance DefinerOfRegs GlobalReg (CmmNode e x) where
356 foldRegsDefd dflags f z n = case n of
357 CmmAssign lhs _ -> fold f z lhs
358 CmmUnsafeForeignCall tgt _ _ -> fold f z (foreignTargetRegs tgt)
359 CmmCall {} -> fold f z activeRegs
360 CmmForeignCall {} -> fold f z activeRegs
361 -- See Note [Safe foreign calls clobber STG registers]
362 _ -> z
363 where fold :: forall a b.
364 DefinerOfRegs GlobalReg a =>
365 (b -> GlobalReg -> b) -> b -> a -> b
366 fold f z n = foldRegsDefd dflags f z n
367
368 platform = targetPlatform dflags
369 activeRegs = activeStgRegs platform
370 activeCallerSavesRegs = filter (callerSaves platform) activeRegs
371
372 foreignTargetRegs (ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns)) = []
373 foreignTargetRegs _ = activeCallerSavesRegs
374
375 -- Note [Safe foreign calls clobber STG registers]
376 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
377 --
378 -- During stack layout phase every safe foreign call is expanded into a block
379 -- that contains unsafe foreign call (instead of safe foreign call) and ends
380 -- with a normal call (See Note [Foreign calls]). This means that we must
381 -- treat safe foreign call as if it was a normal call (because eventually it
382 -- will be). This is important if we try to run sinking pass before stack
383 -- layout phase. Consider this example of what might go wrong (this is cmm
384 -- code from stablename001 test). Here is code after common block elimination
385 -- (before stack layout):
386 --
387 -- c1q6:
388 -- _s1pf::P64 = R1;
389 -- _c1q8::I64 = performMajorGC;
390 -- I64[(young<c1q9> + 8)] = c1q9;
391 -- foreign call "ccall" arg hints: [] result hints: [] (_c1q8::I64)(...)
392 -- returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
393 -- c1q9:
394 -- I64[(young<c1qb> + 8)] = c1qb;
395 -- R1 = _s1pc::P64;
396 -- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
397 --
398 -- If we run sinking pass now (still before stack layout) we will get this:
399 --
400 -- c1q6:
401 -- I64[(young<c1q9> + 8)] = c1q9;
402 -- foreign call "ccall" arg hints: [] result hints: [] performMajorGC(...)
403 -- returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
404 -- c1q9:
405 -- I64[(young<c1qb> + 8)] = c1qb;
406 -- _s1pf::P64 = R1; <------ _s1pf sunk past safe foreign call
407 -- R1 = _s1pc::P64;
408 -- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
409 --
410 -- Notice that _s1pf was sunk past a foreign call. When we run stack layout
411 -- safe call to performMajorGC will be turned into:
412 --
413 -- c1q6:
414 -- _s1pc::P64 = P64[Sp + 8];
415 -- I64[Sp - 8] = c1q9;
416 -- Sp = Sp - 8;
417 -- I64[I64[CurrentTSO + 24] + 16] = Sp;
418 -- P64[CurrentNursery + 8] = Hp + 8;
419 -- (_u1qI::I64) = call "ccall" arg hints: [PtrHint,]
420 -- result hints: [PtrHint] suspendThread(BaseReg, 0);
421 -- call "ccall" arg hints: [] result hints: [] performMajorGC();
422 -- (_u1qJ::I64) = call "ccall" arg hints: [PtrHint]
423 -- result hints: [PtrHint] resumeThread(_u1qI::I64);
424 -- BaseReg = _u1qJ::I64;
425 -- _u1qK::P64 = CurrentTSO;
426 -- _u1qL::P64 = I64[_u1qK::P64 + 24];
427 -- Sp = I64[_u1qL::P64 + 16];
428 -- SpLim = _u1qL::P64 + 192;
429 -- HpAlloc = 0;
430 -- Hp = I64[CurrentNursery + 8] - 8;
431 -- HpLim = I64[CurrentNursery] + (%MO_SS_Conv_W32_W64(I32[CurrentNursery + 48]) * 4096 - 1);
432 -- call (I64[Sp])() returns to c1q9, args: 8, res: 8, upd: 8;
433 -- c1q9:
434 -- I64[(young<c1qb> + 8)] = c1qb;
435 -- _s1pf::P64 = R1; <------ INCORRECT!
436 -- R1 = _s1pc::P64;
437 -- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
438 --
439 -- Notice that c1q6 now ends with a call. Sinking _s1pf::P64 = R1 past that
440 -- call is clearly incorrect. This is what would happen if we assumed that
441 -- safe foreign call has the same semantics as unsafe foreign call. To prevent
442 -- this we need to treat safe foreign call as if was normal call.
443
444 -----------------------------------
445 -- mapping Expr in CmmNode
446
447 mapForeignTarget :: (CmmExpr -> CmmExpr) -> ForeignTarget -> ForeignTarget
448 mapForeignTarget exp (ForeignTarget e c) = ForeignTarget (exp e) c
449 mapForeignTarget _ m@(PrimTarget _) = m
450
451 wrapRecExp :: (CmmExpr -> CmmExpr) -> CmmExpr -> CmmExpr
452 -- Take a transformer on expressions and apply it recursively.
453 -- (wrapRecExp f e) first recursively applies itself to sub-expressions of e
454 -- then uses f to rewrite the resulting expression
455 wrapRecExp f (CmmMachOp op es) = f (CmmMachOp op $ map (wrapRecExp f) es)
456 wrapRecExp f (CmmLoad addr ty) = f (CmmLoad (wrapRecExp f addr) ty)
457 wrapRecExp f e = f e
458
459 mapExp :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
460 mapExp _ f@(CmmEntry{}) = f
461 mapExp _ m@(CmmComment _) = m
462 mapExp _ m@(CmmTick _) = m
463 mapExp f (CmmUnwind r e) = CmmUnwind r (f e)
464 mapExp f (CmmAssign r e) = CmmAssign r (f e)
465 mapExp f (CmmStore addr e) = CmmStore (f addr) (f e)
466 mapExp f (CmmUnsafeForeignCall tgt fs as) = CmmUnsafeForeignCall (mapForeignTarget f tgt) fs (map f as)
467 mapExp _ l@(CmmBranch _) = l
468 mapExp f (CmmCondBranch e ti fi l) = CmmCondBranch (f e) ti fi l
469 mapExp f (CmmSwitch e ids) = CmmSwitch (f e) ids
470 mapExp f n@CmmCall {cml_target=tgt} = n{cml_target = f tgt}
471 mapExp f (CmmForeignCall tgt fs as succ ret_args updfr intrbl) = CmmForeignCall (mapForeignTarget f tgt) fs (map f as) succ ret_args updfr intrbl
472
473 mapExpDeep :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
474 mapExpDeep f = mapExp $ wrapRecExp f
475
476 ------------------------------------------------------------------------
477 -- mapping Expr in CmmNode, but not performing allocation if no changes
478
479 mapForeignTargetM :: (CmmExpr -> Maybe CmmExpr) -> ForeignTarget -> Maybe ForeignTarget
480 mapForeignTargetM f (ForeignTarget e c) = (\x -> ForeignTarget x c) `fmap` f e
481 mapForeignTargetM _ (PrimTarget _) = Nothing
482
483 wrapRecExpM :: (CmmExpr -> Maybe CmmExpr) -> (CmmExpr -> Maybe CmmExpr)
484 -- (wrapRecExpM f e) first recursively applies itself to sub-expressions of e
485 -- then gives f a chance to rewrite the resulting expression
486 wrapRecExpM f n@(CmmMachOp op es) = maybe (f n) (f . CmmMachOp op) (mapListM (wrapRecExpM f) es)
487 wrapRecExpM f n@(CmmLoad addr ty) = maybe (f n) (f . flip CmmLoad ty) (wrapRecExpM f addr)
488 wrapRecExpM f e = f e
489
490 mapExpM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
491 mapExpM _ (CmmEntry{}) = Nothing
492 mapExpM _ (CmmComment _) = Nothing
493 mapExpM _ (CmmTick _) = Nothing
494 mapExpM f (CmmUnwind r e) = CmmUnwind r `fmap` f e
495 mapExpM f (CmmAssign r e) = CmmAssign r `fmap` f e
496 mapExpM f (CmmStore addr e) = (\[addr', e'] -> CmmStore addr' e') `fmap` mapListM f [addr, e]
497 mapExpM _ (CmmBranch _) = Nothing
498 mapExpM f (CmmCondBranch e ti fi l) = (\x -> CmmCondBranch x ti fi l) `fmap` f e
499 mapExpM f (CmmSwitch e tbl) = (\x -> CmmSwitch x tbl) `fmap` f e
500 mapExpM f (CmmCall tgt mb_id r o i s) = (\x -> CmmCall x mb_id r o i s) `fmap` f tgt
501 mapExpM f (CmmUnsafeForeignCall tgt fs as)
502 = case mapForeignTargetM f tgt of
503 Just tgt' -> Just (CmmUnsafeForeignCall tgt' fs (mapListJ f as))
504 Nothing -> (\xs -> CmmUnsafeForeignCall tgt fs xs) `fmap` mapListM f as
505 mapExpM f (CmmForeignCall tgt fs as succ ret_args updfr intrbl)
506 = case mapForeignTargetM f tgt of
507 Just tgt' -> Just (CmmForeignCall tgt' fs (mapListJ f as) succ ret_args updfr intrbl)
508 Nothing -> (\xs -> CmmForeignCall tgt fs xs succ ret_args updfr intrbl) `fmap` mapListM f as
509
510 -- share as much as possible
511 mapListM :: (a -> Maybe a) -> [a] -> Maybe [a]
512 mapListM f xs = let (b, r) = mapListT f xs
513 in if b then Just r else Nothing
514
515 mapListJ :: (a -> Maybe a) -> [a] -> [a]
516 mapListJ f xs = snd (mapListT f xs)
517
518 mapListT :: (a -> Maybe a) -> [a] -> (Bool, [a])
519 mapListT f xs = foldr g (False, []) (zip3 (tails xs) xs (map f xs))
520 where g (_, y, Nothing) (True, ys) = (True, y:ys)
521 g (_, _, Just y) (True, ys) = (True, y:ys)
522 g (ys', _, Nothing) (False, _) = (False, ys')
523 g (_, _, Just y) (False, ys) = (True, y:ys)
524
525 mapExpDeepM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
526 mapExpDeepM f = mapExpM $ wrapRecExpM f
527
528 -----------------------------------
529 -- folding Expr in CmmNode
530
531 foldExpForeignTarget :: (CmmExpr -> z -> z) -> ForeignTarget -> z -> z
532 foldExpForeignTarget exp (ForeignTarget e _) z = exp e z
533 foldExpForeignTarget _ (PrimTarget _) z = z
534
535 -- Take a folder on expressions and apply it recursively.
536 -- Specifically (wrapRecExpf f e z) deals with CmmMachOp and CmmLoad
537 -- itself, delegating all the other CmmExpr forms to 'f'.
538 wrapRecExpf :: (CmmExpr -> z -> z) -> CmmExpr -> z -> z
539 wrapRecExpf f e@(CmmMachOp _ es) z = foldr (wrapRecExpf f) (f e z) es
540 wrapRecExpf f e@(CmmLoad addr _) z = wrapRecExpf f addr (f e z)
541 wrapRecExpf f e z = f e z
542
543 foldExp :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
544 foldExp _ (CmmEntry {}) z = z
545 foldExp _ (CmmComment {}) z = z
546 foldExp _ (CmmTick {}) z = z
547 foldExp f (CmmUnwind _ e) z = f e z
548 foldExp f (CmmAssign _ e) z = f e z
549 foldExp f (CmmStore addr e) z = f addr $ f e z
550 foldExp f (CmmUnsafeForeignCall t _ as) z = foldr f (foldExpForeignTarget f t z) as
551 foldExp _ (CmmBranch _) z = z
552 foldExp f (CmmCondBranch e _ _ _) z = f e z
553 foldExp f (CmmSwitch e _) z = f e z
554 foldExp f (CmmCall {cml_target=tgt}) z = f tgt z
555 foldExp f (CmmForeignCall {tgt=tgt, args=args}) z = foldr f (foldExpForeignTarget f tgt z) args
556
557 foldExpDeep :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
558 foldExpDeep f = foldExp (wrapRecExpf f)
559
560 -- -----------------------------------------------------------------------------
561
562 mapSuccessors :: (Label -> Label) -> CmmNode O C -> CmmNode O C
563 mapSuccessors f (CmmBranch bid) = CmmBranch (f bid)
564 mapSuccessors f (CmmCondBranch p y n l) = CmmCondBranch p (f y) (f n) l
565 mapSuccessors f (CmmSwitch e ids) = CmmSwitch e (mapSwitchTargets f ids)
566 mapSuccessors _ n = n
567
568 -- -----------------------------------------------------------------------------
569
570 -- | Tickish in Cmm context (annotations only)
571 type CmmTickish = Tickish ()
572
573 -- | Tick scope identifier, allowing us to reason about what
574 -- annotations in a Cmm block should scope over. We especially take
575 -- care to allow optimisations to reorganise blocks without losing
576 -- tick association in the process.
577 data CmmTickScope
578 = GlobalScope
579 -- ^ The global scope is the "root" of the scope graph. Every
580 -- scope is a sub-scope of the global scope. It doesn't make sense
581 -- to add ticks to this scope. On the other hand, this means that
582 -- setting this scope on a block means no ticks apply to it.
583
584 | SubScope U.Unique CmmTickScope
585 -- ^ Constructs a new sub-scope to an existing scope. This allows
586 -- us to translate Core-style scoping rules (see @tickishScoped@)
587 -- into the Cmm world. Suppose the following code:
588 --
589 -- tick<1> case ... of
590 -- A -> tick<2> ...
591 -- B -> tick<3> ...
592 --
593 -- We want the top-level tick annotation to apply to blocks
594 -- generated for the A and B alternatives. We can achieve that by
595 -- generating tick<1> into a block with scope a, while the code
596 -- for alternatives A and B gets generated into sub-scopes a/b and
597 -- a/c respectively.
598
599 | CombinedScope CmmTickScope CmmTickScope
600 -- ^ A combined scope scopes over everything that the two given
601 -- scopes cover. It is therefore a sub-scope of either scope. This
602 -- is required for optimisations. Consider common block elimination:
603 --
604 -- A -> tick<2> case ... of
605 -- C -> [common]
606 -- B -> tick<3> case ... of
607 -- D -> [common]
608 --
609 -- We will generate code for the C and D alternatives, and figure
610 -- out afterwards that it's actually common code. Scoping rules
611 -- dictate that the resulting common block needs to be covered by
612 -- both tick<2> and tick<3>, therefore we need to construct a
613 -- scope that is a child to *both* scope. Now we can do that - if
614 -- we assign the scopes a/c and b/d to the common-ed up blocks,
615 -- the new block could have a combined tick scope a/c+b/d, which
616 -- both tick<2> and tick<3> apply to.
617
618 -- Note [CmmTick scoping details]:
619 --
620 -- The scope of a @CmmTick@ is given by the @CmmEntry@ node of the
621 -- same block. Note that as a result of this, optimisations making
622 -- tick scopes more specific can *reduce* the amount of code a tick
623 -- scopes over. Fixing this would require a separate @CmmTickScope@
624 -- field for @CmmTick@. Right now we do not do this simply because I
625 -- couldn't find an example where it actually mattered -- multiple
626 -- blocks within the same scope generally jump to each other, which
627 -- prevents common block elimination from happening in the first
628 -- place. But this is no strong reason, so if Cmm optimisations become
629 -- more involved in future this might have to be revisited.
630
631 -- | Output all scope paths.
632 scopeToPaths :: CmmTickScope -> [[U.Unique]]
633 scopeToPaths GlobalScope = [[]]
634 scopeToPaths (SubScope u s) = map (u:) (scopeToPaths s)
635 scopeToPaths (CombinedScope s1 s2) = scopeToPaths s1 ++ scopeToPaths s2
636
637 -- | Returns the head uniques of the scopes. This is based on the
638 -- assumption that the @Unique@ of @SubScope@ identifies the
639 -- underlying super-scope. Used for efficient equality and comparison,
640 -- see below.
641 scopeUniques :: CmmTickScope -> [U.Unique]
642 scopeUniques GlobalScope = []
643 scopeUniques (SubScope u _) = [u]
644 scopeUniques (CombinedScope s1 s2) = scopeUniques s1 ++ scopeUniques s2
645
646 -- Equality and order is based on the head uniques defined above. We
647 -- take care to short-cut the (extremly) common cases.
648 instance Eq CmmTickScope where
649 GlobalScope == GlobalScope = True
650 GlobalScope == _ = False
651 _ == GlobalScope = False
652 (SubScope u _) == (SubScope u' _) = u == u'
653 (SubScope _ _) == _ = False
654 _ == (SubScope _ _) = False
655 scope == scope' = sort (scopeUniques scope) ==
656 sort (scopeUniques scope')
657 instance Ord CmmTickScope where
658 compare GlobalScope GlobalScope = EQ
659 compare GlobalScope _ = LT
660 compare _ GlobalScope = GT
661 compare (SubScope u _) (SubScope u' _) = compare u u'
662 compare scope scope' = compare (sort $ scopeUniques scope)
663 (sort $ scopeUniques scope')
664
665 instance Outputable CmmTickScope where
666 ppr GlobalScope = text "global"
667 ppr (SubScope us GlobalScope)
668 = ppr us
669 ppr (SubScope us s) = ppr s <> char '/' <> ppr us
670 ppr combined = parens $ hcat $ punctuate (char '+') $
671 map (hcat . punctuate (char '/') . map ppr . reverse) $
672 scopeToPaths combined
673
674 -- | Checks whether two tick scopes are sub-scopes of each other. True
675 -- if the two scopes are equal.
676 isTickSubScope :: CmmTickScope -> CmmTickScope -> Bool
677 isTickSubScope = cmp
678 where cmp _ GlobalScope = True
679 cmp GlobalScope _ = False
680 cmp (CombinedScope s1 s2) s' = cmp s1 s' && cmp s2 s'
681 cmp s (CombinedScope s1' s2') = cmp s s1' || cmp s s2'
682 cmp (SubScope u s) s'@(SubScope u' _) = u == u' || cmp s s'
683
684 -- | Combine two tick scopes. The new scope should be sub-scope of
685 -- both parameters. We simplfy automatically if one tick scope is a
686 -- sub-scope of the other already.
687 combineTickScopes :: CmmTickScope -> CmmTickScope -> CmmTickScope
688 combineTickScopes s1 s2
689 | s1 `isTickSubScope` s2 = s1
690 | s2 `isTickSubScope` s1 = s2
691 | otherwise = CombinedScope s1 s2