1103fdb6ecce022ff0de4ce908d26b0e83110b2d
[ghc.git] / compiler / cmm / CmmNode.hs
1 {-# LANGUAGE BangPatterns #-}
2 {-# LANGUAGE CPP #-}
3 {-# LANGUAGE ExplicitForAll #-}
4 {-# LANGUAGE FlexibleContexts #-}
5 {-# LANGUAGE FlexibleInstances #-}
6 {-# LANGUAGE GADTs #-}
7 {-# LANGUAGE MultiParamTypeClasses #-}
8 {-# LANGUAGE StandaloneDeriving #-}
9 {-# LANGUAGE UndecidableInstances #-}
10
11 -- CmmNode type for representation using Hoopl graphs.
12
13 module CmmNode (
14 CmmNode(..), CmmFormal, CmmActual, CmmTickish,
15 UpdFrameOffset, Convention(..),
16 ForeignConvention(..), ForeignTarget(..), foreignTargetHints,
17 CmmReturnInfo(..),
18 mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf,
19 mapExpM, mapExpDeepM, wrapRecExpM, mapSuccessors,
20
21 -- * Tick scopes
22 CmmTickScope(..), isTickSubScope, combineTickScopes,
23 ) where
24
25 import CodeGen.Platform
26 import CmmExpr
27 import CmmSwitch
28 import DynFlags
29 import FastString
30 import ForeignCall
31 import Outputable
32 import SMRep
33 import CoreSyn (Tickish)
34 import qualified Unique as U
35
36 import Compiler.Hoopl
37 import Data.Maybe
38 import Data.List (tails,sortBy)
39 import Prelude hiding (succ)
40 import Unique (nonDetCmpUnique)
41 import Util
42
43
44 ------------------------
45 -- CmmNode
46
47 #define ULabel {-# UNPACK #-} !Label
48
49 data CmmNode e x where
50 CmmEntry :: ULabel -> CmmTickScope -> CmmNode C O
51
52 CmmComment :: FastString -> CmmNode O O
53
54 -- Tick annotation, covering Cmm code in our tick scope. We only
55 -- expect non-code @Tickish@ at this point (e.g. @SourceNote@).
56 -- See Note [CmmTick scoping details]
57 CmmTick :: !CmmTickish -> CmmNode O O
58
59 -- Unwind pseudo-instruction, encoding stack unwinding
60 -- instructions for a debugger. This describes how to reconstruct
61 -- the "old" value of a register if we want to navigate the stack
62 -- up one frame. Having unwind information for @Sp@ will allow the
63 -- debugger to "walk" the stack.
64 CmmUnwind :: !GlobalReg -> !CmmExpr -> CmmNode O O
65
66 CmmAssign :: !CmmReg -> !CmmExpr -> CmmNode O O
67 -- Assign to register
68
69 CmmStore :: !CmmExpr -> !CmmExpr -> CmmNode O O
70 -- Assign to memory location. Size is
71 -- given by cmmExprType of the rhs.
72
73 CmmUnsafeForeignCall :: -- An unsafe foreign call;
74 -- see Note [Foreign calls]
75 -- Like a "fat machine instruction"; can occur
76 -- in the middle of a block
77 ForeignTarget -> -- call target
78 [CmmFormal] -> -- zero or more results
79 [CmmActual] -> -- zero or more arguments
80 CmmNode O O
81 -- Semantics: clobbers any GlobalRegs for which callerSaves r == True
82 -- See Note [Unsafe foreign calls clobber caller-save registers]
83 --
84 -- Invariant: the arguments and the ForeignTarget must not
85 -- mention any registers for which CodeGen.Platform.callerSaves
86 -- is True. See Note [Register Parameter Passing].
87
88 CmmBranch :: ULabel -> CmmNode O C
89 -- Goto another block in the same procedure
90
91 CmmCondBranch :: { -- conditional branch
92 cml_pred :: CmmExpr,
93 cml_true, cml_false :: ULabel,
94 cml_likely :: Maybe Bool -- likely result of the conditional,
95 -- if known
96 } -> CmmNode O C
97
98 CmmSwitch
99 :: CmmExpr -- Scrutinee, of some integral type
100 -> SwitchTargets -- Cases. See [Note SwitchTargets]
101 -> CmmNode O C
102
103 CmmCall :: { -- A native call or tail call
104 cml_target :: CmmExpr, -- never a CmmPrim to a CallishMachOp!
105
106 cml_cont :: Maybe Label,
107 -- Label of continuation (Nothing for return or tail call)
108 --
109 -- Note [Continuation BlockId]: these BlockIds are called
110 -- Continuation BlockIds, and are the only BlockIds that can
111 -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or
112 -- (CmmStackSlot (Young b) _).
113
114 cml_args_regs :: [GlobalReg],
115 -- The argument GlobalRegs (Rx, Fx, Dx, Lx) that are passed
116 -- to the call. This is essential information for the
117 -- native code generator's register allocator; without
118 -- knowing which GlobalRegs are live it has to assume that
119 -- they are all live. This list should only include
120 -- GlobalRegs that are mapped to real machine registers on
121 -- the target platform.
122
123 cml_args :: ByteOff,
124 -- Byte offset, from the *old* end of the Area associated with
125 -- the Label (if cml_cont = Nothing, then Old area), of
126 -- youngest outgoing arg. Set the stack pointer to this before
127 -- transferring control.
128 -- (NB: an update frame might also have been stored in the Old
129 -- area, but it'll be in an older part than the args.)
130
131 cml_ret_args :: ByteOff,
132 -- For calls *only*, the byte offset for youngest returned value
133 -- This is really needed at the *return* point rather than here
134 -- at the call, but in practice it's convenient to record it here.
135
136 cml_ret_off :: ByteOff
137 -- For calls *only*, the byte offset of the base of the frame that
138 -- must be described by the info table for the return point.
139 -- The older words are an update frames, which have their own
140 -- info-table and layout information
141
142 -- From a liveness point of view, the stack words older than
143 -- cml_ret_off are treated as live, even if the sequel of
144 -- the call goes into a loop.
145 } -> CmmNode O C
146
147 CmmForeignCall :: { -- A safe foreign call; see Note [Foreign calls]
148 -- Always the last node of a block
149 tgt :: ForeignTarget, -- call target and convention
150 res :: [CmmFormal], -- zero or more results
151 args :: [CmmActual], -- zero or more arguments; see Note [Register parameter passing]
152 succ :: ULabel, -- Label of continuation
153 ret_args :: ByteOff, -- same as cml_ret_args
154 ret_off :: ByteOff, -- same as cml_ret_off
155 intrbl:: Bool -- whether or not the call is interruptible
156 } -> CmmNode O C
157
158 {- Note [Foreign calls]
159 ~~~~~~~~~~~~~~~~~~~~~~~
160 A CmmUnsafeForeignCall is used for *unsafe* foreign calls;
161 a CmmForeignCall call is used for *safe* foreign calls.
162
163 Unsafe ones are mostly easy: think of them as a "fat machine
164 instruction". In particular, they do *not* kill all live registers,
165 just the registers they return to (there was a bit of code in GHC that
166 conservatively assumed otherwise.) However, see [Register parameter passing].
167
168 Safe ones are trickier. A safe foreign call
169 r = f(x)
170 ultimately expands to
171 push "return address" -- Never used to return to;
172 -- just points an info table
173 save registers into TSO
174 call suspendThread
175 r = f(x) -- Make the call
176 call resumeThread
177 restore registers
178 pop "return address"
179 We cannot "lower" a safe foreign call to this sequence of Cmms, because
180 after we've saved Sp all the Cmm optimiser's assumptions are broken.
181
182 Note that a safe foreign call needs an info table.
183
184 So Safe Foreign Calls must remain as last nodes until the stack is
185 made manifest in CmmLayoutStack, where they are lowered into the above
186 sequence.
187 -}
188
189 {- Note [Unsafe foreign calls clobber caller-save registers]
190 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
191
192 A foreign call is defined to clobber any GlobalRegs that are mapped to
193 caller-saves machine registers (according to the prevailing C ABI).
194 StgCmmUtils.callerSaves tells you which GlobalRegs are caller-saves.
195
196 This is a design choice that makes it easier to generate code later.
197 We could instead choose to say that foreign calls do *not* clobber
198 caller-saves regs, but then we would have to figure out which regs
199 were live across the call later and insert some saves/restores.
200
201 Furthermore when we generate code we never have any GlobalRegs live
202 across a call, because they are always copied-in to LocalRegs and
203 copied-out again before making a call/jump. So all we have to do is
204 avoid any code motion that would make a caller-saves GlobalReg live
205 across a foreign call during subsequent optimisations.
206 -}
207
208 {- Note [Register parameter passing]
209 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
210 On certain architectures, some registers are utilized for parameter
211 passing in the C calling convention. For example, in x86-64 Linux
212 convention, rdi, rsi, rdx and rcx (as well as r8 and r9) may be used for
213 argument passing. These are registers R3-R6, which our generated
214 code may also be using; as a result, it's necessary to save these
215 values before doing a foreign call. This is done during initial
216 code generation in callerSaveVolatileRegs in StgCmmUtils.hs. However,
217 one result of doing this is that the contents of these registers
218 may mysteriously change if referenced inside the arguments. This
219 is dangerous, so you'll need to disable inlining much in the same
220 way is done in cmm/CmmOpt.hs currently. We should fix this!
221 -}
222
223 ---------------------------------------------
224 -- Eq instance of CmmNode
225
226 deriving instance Eq (CmmNode e x)
227
228 ----------------------------------------------
229 -- Hoopl instances of CmmNode
230
231 instance NonLocal CmmNode where
232 entryLabel (CmmEntry l _) = l
233
234 successors (CmmBranch l) = [l]
235 successors (CmmCondBranch {cml_true=t, cml_false=f}) = [f, t] -- meets layout constraint
236 successors (CmmSwitch _ ids) = switchTargetsToList ids
237 successors (CmmCall {cml_cont=l}) = maybeToList l
238 successors (CmmForeignCall {succ=l}) = [l]
239
240
241 --------------------------------------------------
242 -- Various helper types
243
244 type CmmActual = CmmExpr
245 type CmmFormal = LocalReg
246
247 type UpdFrameOffset = ByteOff
248
249 -- | A convention maps a list of values (function arguments or return
250 -- values) to registers or stack locations.
251 data Convention
252 = NativeDirectCall
253 -- ^ top-level Haskell functions use @NativeDirectCall@, which
254 -- maps arguments to registers starting with R2, according to
255 -- how many registers are available on the platform. This
256 -- convention ignores R1, because for a top-level function call
257 -- the function closure is implicit, and doesn't need to be passed.
258 | NativeNodeCall
259 -- ^ non-top-level Haskell functions, which pass the address of
260 -- the function closure in R1 (regardless of whether R1 is a
261 -- real register or not), and the rest of the arguments in
262 -- registers or on the stack.
263 | NativeReturn
264 -- ^ a native return. The convention for returns depends on
265 -- how many values are returned: for just one value returned,
266 -- the appropriate register is used (R1, F1, etc.). regardless
267 -- of whether it is a real register or not. For multiple
268 -- values returned, they are mapped to registers or the stack.
269 | Slow
270 -- ^ Slow entry points: all args pushed on the stack
271 | GC
272 -- ^ Entry to the garbage collector: uses the node reg!
273 -- (TODO: I don't think we need this --SDM)
274 deriving( Eq )
275
276 data ForeignConvention
277 = ForeignConvention
278 CCallConv -- Which foreign-call convention
279 [ForeignHint] -- Extra info about the args
280 [ForeignHint] -- Extra info about the result
281 CmmReturnInfo
282 deriving Eq
283
284 data CmmReturnInfo
285 = CmmMayReturn
286 | CmmNeverReturns
287 deriving ( Eq )
288
289 data ForeignTarget -- The target of a foreign call
290 = ForeignTarget -- A foreign procedure
291 CmmExpr -- Its address
292 ForeignConvention -- Its calling convention
293 | PrimTarget -- A possibly-side-effecting machine operation
294 CallishMachOp -- Which one
295 deriving Eq
296
297 foreignTargetHints :: ForeignTarget -> ([ForeignHint], [ForeignHint])
298 foreignTargetHints target
299 = ( res_hints ++ repeat NoHint
300 , arg_hints ++ repeat NoHint )
301 where
302 (res_hints, arg_hints) =
303 case target of
304 PrimTarget op -> callishMachOpHints op
305 ForeignTarget _ (ForeignConvention _ arg_hints res_hints _) ->
306 (res_hints, arg_hints)
307
308 --------------------------------------------------
309 -- Instances of register and slot users / definers
310
311 instance UserOfRegs LocalReg (CmmNode e x) where
312 foldRegsUsed dflags f !z n = case n of
313 CmmAssign _ expr -> fold f z expr
314 CmmStore addr rval -> fold f (fold f z addr) rval
315 CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
316 CmmCondBranch expr _ _ _ -> fold f z expr
317 CmmSwitch expr _ -> fold f z expr
318 CmmCall {cml_target=tgt} -> fold f z tgt
319 CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
320 _ -> z
321 where fold :: forall a b. UserOfRegs LocalReg a
322 => (b -> LocalReg -> b) -> b -> a -> b
323 fold f z n = foldRegsUsed dflags f z n
324
325 instance UserOfRegs GlobalReg (CmmNode e x) where
326 foldRegsUsed dflags f !z n = case n of
327 CmmAssign _ expr -> fold f z expr
328 CmmStore addr rval -> fold f (fold f z addr) rval
329 CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
330 CmmCondBranch expr _ _ _ -> fold f z expr
331 CmmSwitch expr _ -> fold f z expr
332 CmmCall {cml_target=tgt, cml_args_regs=args} -> fold f (fold f z args) tgt
333 CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
334 _ -> z
335 where fold :: forall a b. UserOfRegs GlobalReg a
336 => (b -> GlobalReg -> b) -> b -> a -> b
337 fold f z n = foldRegsUsed dflags f z n
338
339 instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r ForeignTarget where
340 -- The (Ord r) in the context is necessary here
341 -- See Note [Recursive superclasses] in TcInstDcls
342 foldRegsUsed _ _ !z (PrimTarget _) = z
343 foldRegsUsed dflags f !z (ForeignTarget e _) = foldRegsUsed dflags f z e
344
345 instance DefinerOfRegs LocalReg (CmmNode e x) where
346 foldRegsDefd dflags f !z n = case n of
347 CmmAssign lhs _ -> fold f z lhs
348 CmmUnsafeForeignCall _ fs _ -> fold f z fs
349 CmmForeignCall {res=res} -> fold f z res
350 _ -> z
351 where fold :: forall a b. DefinerOfRegs LocalReg a
352 => (b -> LocalReg -> b) -> b -> a -> b
353 fold f z n = foldRegsDefd dflags f z n
354
355 instance DefinerOfRegs GlobalReg (CmmNode e x) where
356 foldRegsDefd dflags f !z n = case n of
357 CmmAssign lhs _ -> fold f z lhs
358 CmmUnsafeForeignCall tgt _ _ -> fold f z (foreignTargetRegs tgt)
359 CmmCall {} -> fold f z activeRegs
360 CmmForeignCall {} -> fold f z activeRegs
361 -- See Note [Safe foreign calls clobber STG registers]
362 _ -> z
363 where fold :: forall a b. DefinerOfRegs GlobalReg a
364 => (b -> GlobalReg -> b) -> b -> a -> b
365 fold f z n = foldRegsDefd dflags f z n
366
367 platform = targetPlatform dflags
368 activeRegs = activeStgRegs platform
369 activeCallerSavesRegs = filter (callerSaves platform) activeRegs
370
371 foreignTargetRegs (ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns)) = []
372 foreignTargetRegs _ = activeCallerSavesRegs
373
374 -- Note [Safe foreign calls clobber STG registers]
375 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
376 --
377 -- During stack layout phase every safe foreign call is expanded into a block
378 -- that contains unsafe foreign call (instead of safe foreign call) and ends
379 -- with a normal call (See Note [Foreign calls]). This means that we must
380 -- treat safe foreign call as if it was a normal call (because eventually it
381 -- will be). This is important if we try to run sinking pass before stack
382 -- layout phase. Consider this example of what might go wrong (this is cmm
383 -- code from stablename001 test). Here is code after common block elimination
384 -- (before stack layout):
385 --
386 -- c1q6:
387 -- _s1pf::P64 = R1;
388 -- _c1q8::I64 = performMajorGC;
389 -- I64[(young<c1q9> + 8)] = c1q9;
390 -- foreign call "ccall" arg hints: [] result hints: [] (_c1q8::I64)(...)
391 -- returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
392 -- c1q9:
393 -- I64[(young<c1qb> + 8)] = c1qb;
394 -- R1 = _s1pc::P64;
395 -- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
396 --
397 -- If we run sinking pass now (still before stack layout) we will get this:
398 --
399 -- c1q6:
400 -- I64[(young<c1q9> + 8)] = c1q9;
401 -- foreign call "ccall" arg hints: [] result hints: [] performMajorGC(...)
402 -- returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
403 -- c1q9:
404 -- I64[(young<c1qb> + 8)] = c1qb;
405 -- _s1pf::P64 = R1; <------ _s1pf sunk past safe foreign call
406 -- R1 = _s1pc::P64;
407 -- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
408 --
409 -- Notice that _s1pf was sunk past a foreign call. When we run stack layout
410 -- safe call to performMajorGC will be turned into:
411 --
412 -- c1q6:
413 -- _s1pc::P64 = P64[Sp + 8];
414 -- I64[Sp - 8] = c1q9;
415 -- Sp = Sp - 8;
416 -- I64[I64[CurrentTSO + 24] + 16] = Sp;
417 -- P64[CurrentNursery + 8] = Hp + 8;
418 -- (_u1qI::I64) = call "ccall" arg hints: [PtrHint,]
419 -- result hints: [PtrHint] suspendThread(BaseReg, 0);
420 -- call "ccall" arg hints: [] result hints: [] performMajorGC();
421 -- (_u1qJ::I64) = call "ccall" arg hints: [PtrHint]
422 -- result hints: [PtrHint] resumeThread(_u1qI::I64);
423 -- BaseReg = _u1qJ::I64;
424 -- _u1qK::P64 = CurrentTSO;
425 -- _u1qL::P64 = I64[_u1qK::P64 + 24];
426 -- Sp = I64[_u1qL::P64 + 16];
427 -- SpLim = _u1qL::P64 + 192;
428 -- HpAlloc = 0;
429 -- Hp = I64[CurrentNursery + 8] - 8;
430 -- HpLim = I64[CurrentNursery] + (%MO_SS_Conv_W32_W64(I32[CurrentNursery + 48]) * 4096 - 1);
431 -- call (I64[Sp])() returns to c1q9, args: 8, res: 8, upd: 8;
432 -- c1q9:
433 -- I64[(young<c1qb> + 8)] = c1qb;
434 -- _s1pf::P64 = R1; <------ INCORRECT!
435 -- R1 = _s1pc::P64;
436 -- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
437 --
438 -- Notice that c1q6 now ends with a call. Sinking _s1pf::P64 = R1 past that
439 -- call is clearly incorrect. This is what would happen if we assumed that
440 -- safe foreign call has the same semantics as unsafe foreign call. To prevent
441 -- this we need to treat safe foreign call as if was normal call.
442
443 -----------------------------------
444 -- mapping Expr in CmmNode
445
446 mapForeignTarget :: (CmmExpr -> CmmExpr) -> ForeignTarget -> ForeignTarget
447 mapForeignTarget exp (ForeignTarget e c) = ForeignTarget (exp e) c
448 mapForeignTarget _ m@(PrimTarget _) = m
449
450 wrapRecExp :: (CmmExpr -> CmmExpr) -> CmmExpr -> CmmExpr
451 -- Take a transformer on expressions and apply it recursively.
452 -- (wrapRecExp f e) first recursively applies itself to sub-expressions of e
453 -- then uses f to rewrite the resulting expression
454 wrapRecExp f (CmmMachOp op es) = f (CmmMachOp op $ map (wrapRecExp f) es)
455 wrapRecExp f (CmmLoad addr ty) = f (CmmLoad (wrapRecExp f addr) ty)
456 wrapRecExp f e = f e
457
458 mapExp :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
459 mapExp _ f@(CmmEntry{}) = f
460 mapExp _ m@(CmmComment _) = m
461 mapExp _ m@(CmmTick _) = m
462 mapExp f (CmmUnwind r e) = CmmUnwind r (f e)
463 mapExp f (CmmAssign r e) = CmmAssign r (f e)
464 mapExp f (CmmStore addr e) = CmmStore (f addr) (f e)
465 mapExp f (CmmUnsafeForeignCall tgt fs as) = CmmUnsafeForeignCall (mapForeignTarget f tgt) fs (map f as)
466 mapExp _ l@(CmmBranch _) = l
467 mapExp f (CmmCondBranch e ti fi l) = CmmCondBranch (f e) ti fi l
468 mapExp f (CmmSwitch e ids) = CmmSwitch (f e) ids
469 mapExp f n@CmmCall {cml_target=tgt} = n{cml_target = f tgt}
470 mapExp f (CmmForeignCall tgt fs as succ ret_args updfr intrbl) = CmmForeignCall (mapForeignTarget f tgt) fs (map f as) succ ret_args updfr intrbl
471
472 mapExpDeep :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
473 mapExpDeep f = mapExp $ wrapRecExp f
474
475 ------------------------------------------------------------------------
476 -- mapping Expr in CmmNode, but not performing allocation if no changes
477
478 mapForeignTargetM :: (CmmExpr -> Maybe CmmExpr) -> ForeignTarget -> Maybe ForeignTarget
479 mapForeignTargetM f (ForeignTarget e c) = (\x -> ForeignTarget x c) `fmap` f e
480 mapForeignTargetM _ (PrimTarget _) = Nothing
481
482 wrapRecExpM :: (CmmExpr -> Maybe CmmExpr) -> (CmmExpr -> Maybe CmmExpr)
483 -- (wrapRecExpM f e) first recursively applies itself to sub-expressions of e
484 -- then gives f a chance to rewrite the resulting expression
485 wrapRecExpM f n@(CmmMachOp op es) = maybe (f n) (f . CmmMachOp op) (mapListM (wrapRecExpM f) es)
486 wrapRecExpM f n@(CmmLoad addr ty) = maybe (f n) (f . flip CmmLoad ty) (wrapRecExpM f addr)
487 wrapRecExpM f e = f e
488
489 mapExpM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
490 mapExpM _ (CmmEntry{}) = Nothing
491 mapExpM _ (CmmComment _) = Nothing
492 mapExpM _ (CmmTick _) = Nothing
493 mapExpM f (CmmUnwind r e) = CmmUnwind r `fmap` f e
494 mapExpM f (CmmAssign r e) = CmmAssign r `fmap` f e
495 mapExpM f (CmmStore addr e) = (\[addr', e'] -> CmmStore addr' e') `fmap` mapListM f [addr, e]
496 mapExpM _ (CmmBranch _) = Nothing
497 mapExpM f (CmmCondBranch e ti fi l) = (\x -> CmmCondBranch x ti fi l) `fmap` f e
498 mapExpM f (CmmSwitch e tbl) = (\x -> CmmSwitch x tbl) `fmap` f e
499 mapExpM f (CmmCall tgt mb_id r o i s) = (\x -> CmmCall x mb_id r o i s) `fmap` f tgt
500 mapExpM f (CmmUnsafeForeignCall tgt fs as)
501 = case mapForeignTargetM f tgt of
502 Just tgt' -> Just (CmmUnsafeForeignCall tgt' fs (mapListJ f as))
503 Nothing -> (\xs -> CmmUnsafeForeignCall tgt fs xs) `fmap` mapListM f as
504 mapExpM f (CmmForeignCall tgt fs as succ ret_args updfr intrbl)
505 = case mapForeignTargetM f tgt of
506 Just tgt' -> Just (CmmForeignCall tgt' fs (mapListJ f as) succ ret_args updfr intrbl)
507 Nothing -> (\xs -> CmmForeignCall tgt fs xs succ ret_args updfr intrbl) `fmap` mapListM f as
508
509 -- share as much as possible
510 mapListM :: (a -> Maybe a) -> [a] -> Maybe [a]
511 mapListM f xs = let (b, r) = mapListT f xs
512 in if b then Just r else Nothing
513
514 mapListJ :: (a -> Maybe a) -> [a] -> [a]
515 mapListJ f xs = snd (mapListT f xs)
516
517 mapListT :: (a -> Maybe a) -> [a] -> (Bool, [a])
518 mapListT f xs = foldr g (False, []) (zip3 (tails xs) xs (map f xs))
519 where g (_, y, Nothing) (True, ys) = (True, y:ys)
520 g (_, _, Just y) (True, ys) = (True, y:ys)
521 g (ys', _, Nothing) (False, _) = (False, ys')
522 g (_, _, Just y) (False, ys) = (True, y:ys)
523
524 mapExpDeepM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
525 mapExpDeepM f = mapExpM $ wrapRecExpM f
526
527 -----------------------------------
528 -- folding Expr in CmmNode
529
530 foldExpForeignTarget :: (CmmExpr -> z -> z) -> ForeignTarget -> z -> z
531 foldExpForeignTarget exp (ForeignTarget e _) z = exp e z
532 foldExpForeignTarget _ (PrimTarget _) z = z
533
534 -- Take a folder on expressions and apply it recursively.
535 -- Specifically (wrapRecExpf f e z) deals with CmmMachOp and CmmLoad
536 -- itself, delegating all the other CmmExpr forms to 'f'.
537 wrapRecExpf :: (CmmExpr -> z -> z) -> CmmExpr -> z -> z
538 wrapRecExpf f e@(CmmMachOp _ es) z = foldr (wrapRecExpf f) (f e z) es
539 wrapRecExpf f e@(CmmLoad addr _) z = wrapRecExpf f addr (f e z)
540 wrapRecExpf f e z = f e z
541
542 foldExp :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
543 foldExp _ (CmmEntry {}) z = z
544 foldExp _ (CmmComment {}) z = z
545 foldExp _ (CmmTick {}) z = z
546 foldExp f (CmmUnwind _ e) z = f e z
547 foldExp f (CmmAssign _ e) z = f e z
548 foldExp f (CmmStore addr e) z = f addr $ f e z
549 foldExp f (CmmUnsafeForeignCall t _ as) z = foldr f (foldExpForeignTarget f t z) as
550 foldExp _ (CmmBranch _) z = z
551 foldExp f (CmmCondBranch e _ _ _) z = f e z
552 foldExp f (CmmSwitch e _) z = f e z
553 foldExp f (CmmCall {cml_target=tgt}) z = f tgt z
554 foldExp f (CmmForeignCall {tgt=tgt, args=args}) z = foldr f (foldExpForeignTarget f tgt z) args
555
556 foldExpDeep :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
557 foldExpDeep f = foldExp (wrapRecExpf f)
558
559 -- -----------------------------------------------------------------------------
560
561 mapSuccessors :: (Label -> Label) -> CmmNode O C -> CmmNode O C
562 mapSuccessors f (CmmBranch bid) = CmmBranch (f bid)
563 mapSuccessors f (CmmCondBranch p y n l) = CmmCondBranch p (f y) (f n) l
564 mapSuccessors f (CmmSwitch e ids) = CmmSwitch e (mapSwitchTargets f ids)
565 mapSuccessors _ n = n
566
567 -- -----------------------------------------------------------------------------
568
569 -- | Tickish in Cmm context (annotations only)
570 type CmmTickish = Tickish ()
571
572 -- | Tick scope identifier, allowing us to reason about what
573 -- annotations in a Cmm block should scope over. We especially take
574 -- care to allow optimisations to reorganise blocks without losing
575 -- tick association in the process.
576 data CmmTickScope
577 = GlobalScope
578 -- ^ The global scope is the "root" of the scope graph. Every
579 -- scope is a sub-scope of the global scope. It doesn't make sense
580 -- to add ticks to this scope. On the other hand, this means that
581 -- setting this scope on a block means no ticks apply to it.
582
583 | SubScope !U.Unique CmmTickScope
584 -- ^ Constructs a new sub-scope to an existing scope. This allows
585 -- us to translate Core-style scoping rules (see @tickishScoped@)
586 -- into the Cmm world. Suppose the following code:
587 --
588 -- tick<1> case ... of
589 -- A -> tick<2> ...
590 -- B -> tick<3> ...
591 --
592 -- We want the top-level tick annotation to apply to blocks
593 -- generated for the A and B alternatives. We can achieve that by
594 -- generating tick<1> into a block with scope a, while the code
595 -- for alternatives A and B gets generated into sub-scopes a/b and
596 -- a/c respectively.
597
598 | CombinedScope CmmTickScope CmmTickScope
599 -- ^ A combined scope scopes over everything that the two given
600 -- scopes cover. It is therefore a sub-scope of either scope. This
601 -- is required for optimisations. Consider common block elimination:
602 --
603 -- A -> tick<2> case ... of
604 -- C -> [common]
605 -- B -> tick<3> case ... of
606 -- D -> [common]
607 --
608 -- We will generate code for the C and D alternatives, and figure
609 -- out afterwards that it's actually common code. Scoping rules
610 -- dictate that the resulting common block needs to be covered by
611 -- both tick<2> and tick<3>, therefore we need to construct a
612 -- scope that is a child to *both* scope. Now we can do that - if
613 -- we assign the scopes a/c and b/d to the common-ed up blocks,
614 -- the new block could have a combined tick scope a/c+b/d, which
615 -- both tick<2> and tick<3> apply to.
616
617 -- Note [CmmTick scoping details]:
618 --
619 -- The scope of a @CmmTick@ is given by the @CmmEntry@ node of the
620 -- same block. Note that as a result of this, optimisations making
621 -- tick scopes more specific can *reduce* the amount of code a tick
622 -- scopes over. Fixing this would require a separate @CmmTickScope@
623 -- field for @CmmTick@. Right now we do not do this simply because I
624 -- couldn't find an example where it actually mattered -- multiple
625 -- blocks within the same scope generally jump to each other, which
626 -- prevents common block elimination from happening in the first
627 -- place. But this is no strong reason, so if Cmm optimisations become
628 -- more involved in future this might have to be revisited.
629
630 -- | Output all scope paths.
631 scopeToPaths :: CmmTickScope -> [[U.Unique]]
632 scopeToPaths GlobalScope = [[]]
633 scopeToPaths (SubScope u s) = map (u:) (scopeToPaths s)
634 scopeToPaths (CombinedScope s1 s2) = scopeToPaths s1 ++ scopeToPaths s2
635
636 -- | Returns the head uniques of the scopes. This is based on the
637 -- assumption that the @Unique@ of @SubScope@ identifies the
638 -- underlying super-scope. Used for efficient equality and comparison,
639 -- see below.
640 scopeUniques :: CmmTickScope -> [U.Unique]
641 scopeUniques GlobalScope = []
642 scopeUniques (SubScope u _) = [u]
643 scopeUniques (CombinedScope s1 s2) = scopeUniques s1 ++ scopeUniques s2
644
645 -- Equality and order is based on the head uniques defined above. We
646 -- take care to short-cut the (extremly) common cases.
647 instance Eq CmmTickScope where
648 GlobalScope == GlobalScope = True
649 GlobalScope == _ = False
650 _ == GlobalScope = False
651 (SubScope u _) == (SubScope u' _) = u == u'
652 (SubScope _ _) == _ = False
653 _ == (SubScope _ _) = False
654 scope == scope' =
655 sortBy nonDetCmpUnique (scopeUniques scope) ==
656 sortBy nonDetCmpUnique (scopeUniques scope')
657 -- This is still deterministic because
658 -- the order is the same for equal lists
659
660 -- This is non-deterministic but we do not currently support deterministic
661 -- code-generation. See Note [Unique Determinism and code generation]
662 -- See Note [No Ord for Unique]
663 instance Ord CmmTickScope where
664 compare GlobalScope GlobalScope = EQ
665 compare GlobalScope _ = LT
666 compare _ GlobalScope = GT
667 compare (SubScope u _) (SubScope u' _) = nonDetCmpUnique u u'
668 compare scope scope' = cmpList nonDetCmpUnique
669 (sortBy nonDetCmpUnique $ scopeUniques scope)
670 (sortBy nonDetCmpUnique $ scopeUniques scope')
671
672 instance Outputable CmmTickScope where
673 ppr GlobalScope = text "global"
674 ppr (SubScope us GlobalScope)
675 = ppr us
676 ppr (SubScope us s) = ppr s <> char '/' <> ppr us
677 ppr combined = parens $ hcat $ punctuate (char '+') $
678 map (hcat . punctuate (char '/') . map ppr . reverse) $
679 scopeToPaths combined
680
681 -- | Checks whether two tick scopes are sub-scopes of each other. True
682 -- if the two scopes are equal.
683 isTickSubScope :: CmmTickScope -> CmmTickScope -> Bool
684 isTickSubScope = cmp
685 where cmp _ GlobalScope = True
686 cmp GlobalScope _ = False
687 cmp (CombinedScope s1 s2) s' = cmp s1 s' && cmp s2 s'
688 cmp s (CombinedScope s1' s2') = cmp s s1' || cmp s s2'
689 cmp (SubScope u s) s'@(SubScope u' _) = u == u' || cmp s s'
690
691 -- | Combine two tick scopes. The new scope should be sub-scope of
692 -- both parameters. We simplfy automatically if one tick scope is a
693 -- sub-scope of the other already.
694 combineTickScopes :: CmmTickScope -> CmmTickScope -> CmmTickScope
695 combineTickScopes s1 s2
696 | s1 `isTickSubScope` s2 = s1
697 | s2 `isTickSubScope` s1 = s2
698 | otherwise = CombinedScope s1 s2