Add new mbmi and mbmi2 compiler flags
[ghc.git] / compiler / cmm / CmmNode.hs
1 {-# LANGUAGE BangPatterns #-}
2 {-# LANGUAGE CPP #-}
3 {-# LANGUAGE ExplicitForAll #-}
4 {-# LANGUAGE FlexibleContexts #-}
5 {-# LANGUAGE FlexibleInstances #-}
6 {-# LANGUAGE GADTs #-}
7 {-# LANGUAGE MultiParamTypeClasses #-}
8 {-# LANGUAGE StandaloneDeriving #-}
9 {-# LANGUAGE UndecidableInstances #-}
10
11 -- CmmNode type for representation using Hoopl graphs.
12
13 module CmmNode (
14 CmmNode(..), CmmFormal, CmmActual, CmmTickish,
15 UpdFrameOffset, Convention(..),
16 ForeignConvention(..), ForeignTarget(..), foreignTargetHints,
17 CmmReturnInfo(..),
18 mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf,
19 mapExpM, mapExpDeepM, wrapRecExpM, mapSuccessors,
20
21 -- * Tick scopes
22 CmmTickScope(..), isTickSubScope, combineTickScopes,
23 ) where
24
25 import GhcPrelude hiding (succ)
26
27 import CodeGen.Platform
28 import CmmExpr
29 import CmmSwitch
30 import DynFlags
31 import FastString
32 import ForeignCall
33 import Outputable
34 import SMRep
35 import CoreSyn (Tickish)
36 import qualified Unique as U
37
38 import Hoopl.Block
39 import Hoopl.Graph
40 import Hoopl.Label
41 import Data.Maybe
42 import Data.List (tails,sortBy)
43 import Unique (nonDetCmpUnique)
44 import Util
45
46
47 ------------------------
48 -- CmmNode
49
50 #define ULabel {-# UNPACK #-} !Label
51
52 data CmmNode e x where
53 CmmEntry :: ULabel -> CmmTickScope -> CmmNode C O
54
55 CmmComment :: FastString -> CmmNode O O
56
57 -- Tick annotation, covering Cmm code in our tick scope. We only
58 -- expect non-code @Tickish@ at this point (e.g. @SourceNote@).
59 -- See Note [CmmTick scoping details]
60 CmmTick :: !CmmTickish -> CmmNode O O
61
62 -- Unwind pseudo-instruction, encoding stack unwinding
63 -- instructions for a debugger. This describes how to reconstruct
64 -- the "old" value of a register if we want to navigate the stack
65 -- up one frame. Having unwind information for @Sp@ will allow the
66 -- debugger to "walk" the stack.
67 --
68 -- See Note [What is this unwinding business?] in Debug
69 CmmUnwind :: [(GlobalReg, Maybe CmmExpr)] -> CmmNode O O
70
71 CmmAssign :: !CmmReg -> !CmmExpr -> CmmNode O O
72 -- Assign to register
73
74 CmmStore :: !CmmExpr -> !CmmExpr -> CmmNode O O
75 -- Assign to memory location. Size is
76 -- given by cmmExprType of the rhs.
77
78 CmmUnsafeForeignCall :: -- An unsafe foreign call;
79 -- see Note [Foreign calls]
80 -- Like a "fat machine instruction"; can occur
81 -- in the middle of a block
82 ForeignTarget -> -- call target
83 [CmmFormal] -> -- zero or more results
84 [CmmActual] -> -- zero or more arguments
85 CmmNode O O
86 -- Semantics: clobbers any GlobalRegs for which callerSaves r == True
87 -- See Note [Unsafe foreign calls clobber caller-save registers]
88 --
89 -- Invariant: the arguments and the ForeignTarget must not
90 -- mention any registers for which CodeGen.Platform.callerSaves
91 -- is True. See Note [Register Parameter Passing].
92
93 CmmBranch :: ULabel -> CmmNode O C
94 -- Goto another block in the same procedure
95
96 CmmCondBranch :: { -- conditional branch
97 cml_pred :: CmmExpr,
98 cml_true, cml_false :: ULabel,
99 cml_likely :: Maybe Bool -- likely result of the conditional,
100 -- if known
101 } -> CmmNode O C
102
103 CmmSwitch
104 :: CmmExpr -- Scrutinee, of some integral type
105 -> SwitchTargets -- Cases. See [Note SwitchTargets]
106 -> CmmNode O C
107
108 CmmCall :: { -- A native call or tail call
109 cml_target :: CmmExpr, -- never a CmmPrim to a CallishMachOp!
110
111 cml_cont :: Maybe Label,
112 -- Label of continuation (Nothing for return or tail call)
113 --
114 -- Note [Continuation BlockId]: these BlockIds are called
115 -- Continuation BlockIds, and are the only BlockIds that can
116 -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or
117 -- (CmmStackSlot (Young b) _).
118
119 cml_args_regs :: [GlobalReg],
120 -- The argument GlobalRegs (Rx, Fx, Dx, Lx) that are passed
121 -- to the call. This is essential information for the
122 -- native code generator's register allocator; without
123 -- knowing which GlobalRegs are live it has to assume that
124 -- they are all live. This list should only include
125 -- GlobalRegs that are mapped to real machine registers on
126 -- the target platform.
127
128 cml_args :: ByteOff,
129 -- Byte offset, from the *old* end of the Area associated with
130 -- the Label (if cml_cont = Nothing, then Old area), of
131 -- youngest outgoing arg. Set the stack pointer to this before
132 -- transferring control.
133 -- (NB: an update frame might also have been stored in the Old
134 -- area, but it'll be in an older part than the args.)
135
136 cml_ret_args :: ByteOff,
137 -- For calls *only*, the byte offset for youngest returned value
138 -- This is really needed at the *return* point rather than here
139 -- at the call, but in practice it's convenient to record it here.
140
141 cml_ret_off :: ByteOff
142 -- For calls *only*, the byte offset of the base of the frame that
143 -- must be described by the info table for the return point.
144 -- The older words are an update frames, which have their own
145 -- info-table and layout information
146
147 -- From a liveness point of view, the stack words older than
148 -- cml_ret_off are treated as live, even if the sequel of
149 -- the call goes into a loop.
150 } -> CmmNode O C
151
152 CmmForeignCall :: { -- A safe foreign call; see Note [Foreign calls]
153 -- Always the last node of a block
154 tgt :: ForeignTarget, -- call target and convention
155 res :: [CmmFormal], -- zero or more results
156 args :: [CmmActual], -- zero or more arguments; see Note [Register parameter passing]
157 succ :: ULabel, -- Label of continuation
158 ret_args :: ByteOff, -- same as cml_ret_args
159 ret_off :: ByteOff, -- same as cml_ret_off
160 intrbl:: Bool -- whether or not the call is interruptible
161 } -> CmmNode O C
162
163 {- Note [Foreign calls]
164 ~~~~~~~~~~~~~~~~~~~~~~~
165 A CmmUnsafeForeignCall is used for *unsafe* foreign calls;
166 a CmmForeignCall call is used for *safe* foreign calls.
167
168 Unsafe ones are mostly easy: think of them as a "fat machine
169 instruction". In particular, they do *not* kill all live registers,
170 just the registers they return to (there was a bit of code in GHC that
171 conservatively assumed otherwise.) However, see [Register parameter passing].
172
173 Safe ones are trickier. A safe foreign call
174 r = f(x)
175 ultimately expands to
176 push "return address" -- Never used to return to;
177 -- just points an info table
178 save registers into TSO
179 call suspendThread
180 r = f(x) -- Make the call
181 call resumeThread
182 restore registers
183 pop "return address"
184 We cannot "lower" a safe foreign call to this sequence of Cmms, because
185 after we've saved Sp all the Cmm optimiser's assumptions are broken.
186
187 Note that a safe foreign call needs an info table.
188
189 So Safe Foreign Calls must remain as last nodes until the stack is
190 made manifest in CmmLayoutStack, where they are lowered into the above
191 sequence.
192 -}
193
194 {- Note [Unsafe foreign calls clobber caller-save registers]
195 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
196
197 A foreign call is defined to clobber any GlobalRegs that are mapped to
198 caller-saves machine registers (according to the prevailing C ABI).
199 StgCmmUtils.callerSaves tells you which GlobalRegs are caller-saves.
200
201 This is a design choice that makes it easier to generate code later.
202 We could instead choose to say that foreign calls do *not* clobber
203 caller-saves regs, but then we would have to figure out which regs
204 were live across the call later and insert some saves/restores.
205
206 Furthermore when we generate code we never have any GlobalRegs live
207 across a call, because they are always copied-in to LocalRegs and
208 copied-out again before making a call/jump. So all we have to do is
209 avoid any code motion that would make a caller-saves GlobalReg live
210 across a foreign call during subsequent optimisations.
211 -}
212
213 {- Note [Register parameter passing]
214 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
215 On certain architectures, some registers are utilized for parameter
216 passing in the C calling convention. For example, in x86-64 Linux
217 convention, rdi, rsi, rdx and rcx (as well as r8 and r9) may be used for
218 argument passing. These are registers R3-R6, which our generated
219 code may also be using; as a result, it's necessary to save these
220 values before doing a foreign call. This is done during initial
221 code generation in callerSaveVolatileRegs in StgCmmUtils.hs. However,
222 one result of doing this is that the contents of these registers
223 may mysteriously change if referenced inside the arguments. This
224 is dangerous, so you'll need to disable inlining much in the same
225 way is done in cmm/CmmOpt.hs currently. We should fix this!
226 -}
227
228 ---------------------------------------------
229 -- Eq instance of CmmNode
230
231 deriving instance Eq (CmmNode e x)
232
233 ----------------------------------------------
234 -- Hoopl instances of CmmNode
235
236 instance NonLocal CmmNode where
237 entryLabel (CmmEntry l _) = l
238
239 successors (CmmBranch l) = [l]
240 successors (CmmCondBranch {cml_true=t, cml_false=f}) = [f, t] -- meets layout constraint
241 successors (CmmSwitch _ ids) = switchTargetsToList ids
242 successors (CmmCall {cml_cont=l}) = maybeToList l
243 successors (CmmForeignCall {succ=l}) = [l]
244
245
246 --------------------------------------------------
247 -- Various helper types
248
249 type CmmActual = CmmExpr
250 type CmmFormal = LocalReg
251
252 type UpdFrameOffset = ByteOff
253
254 -- | A convention maps a list of values (function arguments or return
255 -- values) to registers or stack locations.
256 data Convention
257 = NativeDirectCall
258 -- ^ top-level Haskell functions use @NativeDirectCall@, which
259 -- maps arguments to registers starting with R2, according to
260 -- how many registers are available on the platform. This
261 -- convention ignores R1, because for a top-level function call
262 -- the function closure is implicit, and doesn't need to be passed.
263 | NativeNodeCall
264 -- ^ non-top-level Haskell functions, which pass the address of
265 -- the function closure in R1 (regardless of whether R1 is a
266 -- real register or not), and the rest of the arguments in
267 -- registers or on the stack.
268 | NativeReturn
269 -- ^ a native return. The convention for returns depends on
270 -- how many values are returned: for just one value returned,
271 -- the appropriate register is used (R1, F1, etc.). regardless
272 -- of whether it is a real register or not. For multiple
273 -- values returned, they are mapped to registers or the stack.
274 | Slow
275 -- ^ Slow entry points: all args pushed on the stack
276 | GC
277 -- ^ Entry to the garbage collector: uses the node reg!
278 -- (TODO: I don't think we need this --SDM)
279 deriving( Eq )
280
281 data ForeignConvention
282 = ForeignConvention
283 CCallConv -- Which foreign-call convention
284 [ForeignHint] -- Extra info about the args
285 [ForeignHint] -- Extra info about the result
286 CmmReturnInfo
287 deriving Eq
288
289 data CmmReturnInfo
290 = CmmMayReturn
291 | CmmNeverReturns
292 deriving ( Eq )
293
294 data ForeignTarget -- The target of a foreign call
295 = ForeignTarget -- A foreign procedure
296 CmmExpr -- Its address
297 ForeignConvention -- Its calling convention
298 | PrimTarget -- A possibly-side-effecting machine operation
299 CallishMachOp -- Which one
300 deriving Eq
301
302 foreignTargetHints :: ForeignTarget -> ([ForeignHint], [ForeignHint])
303 foreignTargetHints target
304 = ( res_hints ++ repeat NoHint
305 , arg_hints ++ repeat NoHint )
306 where
307 (res_hints, arg_hints) =
308 case target of
309 PrimTarget op -> callishMachOpHints op
310 ForeignTarget _ (ForeignConvention _ arg_hints res_hints _) ->
311 (res_hints, arg_hints)
312
313 --------------------------------------------------
314 -- Instances of register and slot users / definers
315
316 instance UserOfRegs LocalReg (CmmNode e x) where
317 foldRegsUsed dflags f !z n = case n of
318 CmmAssign _ expr -> fold f z expr
319 CmmStore addr rval -> fold f (fold f z addr) rval
320 CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
321 CmmCondBranch expr _ _ _ -> fold f z expr
322 CmmSwitch expr _ -> fold f z expr
323 CmmCall {cml_target=tgt} -> fold f z tgt
324 CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
325 _ -> z
326 where fold :: forall a b. UserOfRegs LocalReg a
327 => (b -> LocalReg -> b) -> b -> a -> b
328 fold f z n = foldRegsUsed dflags f z n
329
330 instance UserOfRegs GlobalReg (CmmNode e x) where
331 foldRegsUsed dflags f !z n = case n of
332 CmmAssign _ expr -> fold f z expr
333 CmmStore addr rval -> fold f (fold f z addr) rval
334 CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args
335 CmmCondBranch expr _ _ _ -> fold f z expr
336 CmmSwitch expr _ -> fold f z expr
337 CmmCall {cml_target=tgt, cml_args_regs=args} -> fold f (fold f z args) tgt
338 CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
339 _ -> z
340 where fold :: forall a b. UserOfRegs GlobalReg a
341 => (b -> GlobalReg -> b) -> b -> a -> b
342 fold f z n = foldRegsUsed dflags f z n
343
344 instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r ForeignTarget where
345 -- The (Ord r) in the context is necessary here
346 -- See Note [Recursive superclasses] in TcInstDcls
347 foldRegsUsed _ _ !z (PrimTarget _) = z
348 foldRegsUsed dflags f !z (ForeignTarget e _) = foldRegsUsed dflags f z e
349
350 instance DefinerOfRegs LocalReg (CmmNode e x) where
351 foldRegsDefd dflags f !z n = case n of
352 CmmAssign lhs _ -> fold f z lhs
353 CmmUnsafeForeignCall _ fs _ -> fold f z fs
354 CmmForeignCall {res=res} -> fold f z res
355 _ -> z
356 where fold :: forall a b. DefinerOfRegs LocalReg a
357 => (b -> LocalReg -> b) -> b -> a -> b
358 fold f z n = foldRegsDefd dflags f z n
359
360 instance DefinerOfRegs GlobalReg (CmmNode e x) where
361 foldRegsDefd dflags f !z n = case n of
362 CmmAssign lhs _ -> fold f z lhs
363 CmmUnsafeForeignCall tgt _ _ -> fold f z (foreignTargetRegs tgt)
364 CmmCall {} -> fold f z activeRegs
365 CmmForeignCall {} -> fold f z activeRegs
366 -- See Note [Safe foreign calls clobber STG registers]
367 _ -> z
368 where fold :: forall a b. DefinerOfRegs GlobalReg a
369 => (b -> GlobalReg -> b) -> b -> a -> b
370 fold f z n = foldRegsDefd dflags f z n
371
372 platform = targetPlatform dflags
373 activeRegs = activeStgRegs platform
374 activeCallerSavesRegs = filter (callerSaves platform) activeRegs
375
376 foreignTargetRegs (ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns)) = []
377 foreignTargetRegs _ = activeCallerSavesRegs
378
379 -- Note [Safe foreign calls clobber STG registers]
380 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
381 --
382 -- During stack layout phase every safe foreign call is expanded into a block
383 -- that contains unsafe foreign call (instead of safe foreign call) and ends
384 -- with a normal call (See Note [Foreign calls]). This means that we must
385 -- treat safe foreign call as if it was a normal call (because eventually it
386 -- will be). This is important if we try to run sinking pass before stack
387 -- layout phase. Consider this example of what might go wrong (this is cmm
388 -- code from stablename001 test). Here is code after common block elimination
389 -- (before stack layout):
390 --
391 -- c1q6:
392 -- _s1pf::P64 = R1;
393 -- _c1q8::I64 = performMajorGC;
394 -- I64[(young<c1q9> + 8)] = c1q9;
395 -- foreign call "ccall" arg hints: [] result hints: [] (_c1q8::I64)(...)
396 -- returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
397 -- c1q9:
398 -- I64[(young<c1qb> + 8)] = c1qb;
399 -- R1 = _s1pc::P64;
400 -- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
401 --
402 -- If we run sinking pass now (still before stack layout) we will get this:
403 --
404 -- c1q6:
405 -- I64[(young<c1q9> + 8)] = c1q9;
406 -- foreign call "ccall" arg hints: [] result hints: [] performMajorGC(...)
407 -- returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
408 -- c1q9:
409 -- I64[(young<c1qb> + 8)] = c1qb;
410 -- _s1pf::P64 = R1; <------ _s1pf sunk past safe foreign call
411 -- R1 = _s1pc::P64;
412 -- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
413 --
414 -- Notice that _s1pf was sunk past a foreign call. When we run stack layout
415 -- safe call to performMajorGC will be turned into:
416 --
417 -- c1q6:
418 -- _s1pc::P64 = P64[Sp + 8];
419 -- I64[Sp - 8] = c1q9;
420 -- Sp = Sp - 8;
421 -- I64[I64[CurrentTSO + 24] + 16] = Sp;
422 -- P64[CurrentNursery + 8] = Hp + 8;
423 -- (_u1qI::I64) = call "ccall" arg hints: [PtrHint,]
424 -- result hints: [PtrHint] suspendThread(BaseReg, 0);
425 -- call "ccall" arg hints: [] result hints: [] performMajorGC();
426 -- (_u1qJ::I64) = call "ccall" arg hints: [PtrHint]
427 -- result hints: [PtrHint] resumeThread(_u1qI::I64);
428 -- BaseReg = _u1qJ::I64;
429 -- _u1qK::P64 = CurrentTSO;
430 -- _u1qL::P64 = I64[_u1qK::P64 + 24];
431 -- Sp = I64[_u1qL::P64 + 16];
432 -- SpLim = _u1qL::P64 + 192;
433 -- HpAlloc = 0;
434 -- Hp = I64[CurrentNursery + 8] - 8;
435 -- HpLim = I64[CurrentNursery] + (%MO_SS_Conv_W32_W64(I32[CurrentNursery + 48]) * 4096 - 1);
436 -- call (I64[Sp])() returns to c1q9, args: 8, res: 8, upd: 8;
437 -- c1q9:
438 -- I64[(young<c1qb> + 8)] = c1qb;
439 -- _s1pf::P64 = R1; <------ INCORRECT!
440 -- R1 = _s1pc::P64;
441 -- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
442 --
443 -- Notice that c1q6 now ends with a call. Sinking _s1pf::P64 = R1 past that
444 -- call is clearly incorrect. This is what would happen if we assumed that
445 -- safe foreign call has the same semantics as unsafe foreign call. To prevent
446 -- this we need to treat safe foreign call as if was normal call.
447
448 -----------------------------------
449 -- mapping Expr in CmmNode
450
451 mapForeignTarget :: (CmmExpr -> CmmExpr) -> ForeignTarget -> ForeignTarget
452 mapForeignTarget exp (ForeignTarget e c) = ForeignTarget (exp e) c
453 mapForeignTarget _ m@(PrimTarget _) = m
454
455 wrapRecExp :: (CmmExpr -> CmmExpr) -> CmmExpr -> CmmExpr
456 -- Take a transformer on expressions and apply it recursively.
457 -- (wrapRecExp f e) first recursively applies itself to sub-expressions of e
458 -- then uses f to rewrite the resulting expression
459 wrapRecExp f (CmmMachOp op es) = f (CmmMachOp op $ map (wrapRecExp f) es)
460 wrapRecExp f (CmmLoad addr ty) = f (CmmLoad (wrapRecExp f addr) ty)
461 wrapRecExp f e = f e
462
463 mapExp :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
464 mapExp _ f@(CmmEntry{}) = f
465 mapExp _ m@(CmmComment _) = m
466 mapExp _ m@(CmmTick _) = m
467 mapExp f (CmmUnwind regs) = CmmUnwind (map (fmap (fmap f)) regs)
468 mapExp f (CmmAssign r e) = CmmAssign r (f e)
469 mapExp f (CmmStore addr e) = CmmStore (f addr) (f e)
470 mapExp f (CmmUnsafeForeignCall tgt fs as) = CmmUnsafeForeignCall (mapForeignTarget f tgt) fs (map f as)
471 mapExp _ l@(CmmBranch _) = l
472 mapExp f (CmmCondBranch e ti fi l) = CmmCondBranch (f e) ti fi l
473 mapExp f (CmmSwitch e ids) = CmmSwitch (f e) ids
474 mapExp f n@CmmCall {cml_target=tgt} = n{cml_target = f tgt}
475 mapExp f (CmmForeignCall tgt fs as succ ret_args updfr intrbl) = CmmForeignCall (mapForeignTarget f tgt) fs (map f as) succ ret_args updfr intrbl
476
477 mapExpDeep :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
478 mapExpDeep f = mapExp $ wrapRecExp f
479
480 ------------------------------------------------------------------------
481 -- mapping Expr in CmmNode, but not performing allocation if no changes
482
483 mapForeignTargetM :: (CmmExpr -> Maybe CmmExpr) -> ForeignTarget -> Maybe ForeignTarget
484 mapForeignTargetM f (ForeignTarget e c) = (\x -> ForeignTarget x c) `fmap` f e
485 mapForeignTargetM _ (PrimTarget _) = Nothing
486
487 wrapRecExpM :: (CmmExpr -> Maybe CmmExpr) -> (CmmExpr -> Maybe CmmExpr)
488 -- (wrapRecExpM f e) first recursively applies itself to sub-expressions of e
489 -- then gives f a chance to rewrite the resulting expression
490 wrapRecExpM f n@(CmmMachOp op es) = maybe (f n) (f . CmmMachOp op) (mapListM (wrapRecExpM f) es)
491 wrapRecExpM f n@(CmmLoad addr ty) = maybe (f n) (f . flip CmmLoad ty) (wrapRecExpM f addr)
492 wrapRecExpM f e = f e
493
494 mapExpM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
495 mapExpM _ (CmmEntry{}) = Nothing
496 mapExpM _ (CmmComment _) = Nothing
497 mapExpM _ (CmmTick _) = Nothing
498 mapExpM f (CmmUnwind regs) = CmmUnwind `fmap` mapM (\(r,e) -> mapM f e >>= \e' -> pure (r,e')) regs
499 mapExpM f (CmmAssign r e) = CmmAssign r `fmap` f e
500 mapExpM f (CmmStore addr e) = (\[addr', e'] -> CmmStore addr' e') `fmap` mapListM f [addr, e]
501 mapExpM _ (CmmBranch _) = Nothing
502 mapExpM f (CmmCondBranch e ti fi l) = (\x -> CmmCondBranch x ti fi l) `fmap` f e
503 mapExpM f (CmmSwitch e tbl) = (\x -> CmmSwitch x tbl) `fmap` f e
504 mapExpM f (CmmCall tgt mb_id r o i s) = (\x -> CmmCall x mb_id r o i s) `fmap` f tgt
505 mapExpM f (CmmUnsafeForeignCall tgt fs as)
506 = case mapForeignTargetM f tgt of
507 Just tgt' -> Just (CmmUnsafeForeignCall tgt' fs (mapListJ f as))
508 Nothing -> (\xs -> CmmUnsafeForeignCall tgt fs xs) `fmap` mapListM f as
509 mapExpM f (CmmForeignCall tgt fs as succ ret_args updfr intrbl)
510 = case mapForeignTargetM f tgt of
511 Just tgt' -> Just (CmmForeignCall tgt' fs (mapListJ f as) succ ret_args updfr intrbl)
512 Nothing -> (\xs -> CmmForeignCall tgt fs xs succ ret_args updfr intrbl) `fmap` mapListM f as
513
514 -- share as much as possible
515 mapListM :: (a -> Maybe a) -> [a] -> Maybe [a]
516 mapListM f xs = let (b, r) = mapListT f xs
517 in if b then Just r else Nothing
518
519 mapListJ :: (a -> Maybe a) -> [a] -> [a]
520 mapListJ f xs = snd (mapListT f xs)
521
522 mapListT :: (a -> Maybe a) -> [a] -> (Bool, [a])
523 mapListT f xs = foldr g (False, []) (zip3 (tails xs) xs (map f xs))
524 where g (_, y, Nothing) (True, ys) = (True, y:ys)
525 g (_, _, Just y) (True, ys) = (True, y:ys)
526 g (ys', _, Nothing) (False, _) = (False, ys')
527 g (_, _, Just y) (False, ys) = (True, y:ys)
528
529 mapExpDeepM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
530 mapExpDeepM f = mapExpM $ wrapRecExpM f
531
532 -----------------------------------
533 -- folding Expr in CmmNode
534
535 foldExpForeignTarget :: (CmmExpr -> z -> z) -> ForeignTarget -> z -> z
536 foldExpForeignTarget exp (ForeignTarget e _) z = exp e z
537 foldExpForeignTarget _ (PrimTarget _) z = z
538
539 -- Take a folder on expressions and apply it recursively.
540 -- Specifically (wrapRecExpf f e z) deals with CmmMachOp and CmmLoad
541 -- itself, delegating all the other CmmExpr forms to 'f'.
542 wrapRecExpf :: (CmmExpr -> z -> z) -> CmmExpr -> z -> z
543 wrapRecExpf f e@(CmmMachOp _ es) z = foldr (wrapRecExpf f) (f e z) es
544 wrapRecExpf f e@(CmmLoad addr _) z = wrapRecExpf f addr (f e z)
545 wrapRecExpf f e z = f e z
546
547 foldExp :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
548 foldExp _ (CmmEntry {}) z = z
549 foldExp _ (CmmComment {}) z = z
550 foldExp _ (CmmTick {}) z = z
551 foldExp f (CmmUnwind xs) z = foldr (maybe id f) z (map snd xs)
552 foldExp f (CmmAssign _ e) z = f e z
553 foldExp f (CmmStore addr e) z = f addr $ f e z
554 foldExp f (CmmUnsafeForeignCall t _ as) z = foldr f (foldExpForeignTarget f t z) as
555 foldExp _ (CmmBranch _) z = z
556 foldExp f (CmmCondBranch e _ _ _) z = f e z
557 foldExp f (CmmSwitch e _) z = f e z
558 foldExp f (CmmCall {cml_target=tgt}) z = f tgt z
559 foldExp f (CmmForeignCall {tgt=tgt, args=args}) z = foldr f (foldExpForeignTarget f tgt z) args
560
561 foldExpDeep :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
562 foldExpDeep f = foldExp (wrapRecExpf f)
563
564 -- -----------------------------------------------------------------------------
565
566 mapSuccessors :: (Label -> Label) -> CmmNode O C -> CmmNode O C
567 mapSuccessors f (CmmBranch bid) = CmmBranch (f bid)
568 mapSuccessors f (CmmCondBranch p y n l) = CmmCondBranch p (f y) (f n) l
569 mapSuccessors f (CmmSwitch e ids) = CmmSwitch e (mapSwitchTargets f ids)
570 mapSuccessors _ n = n
571
572 -- -----------------------------------------------------------------------------
573
574 -- | Tickish in Cmm context (annotations only)
575 type CmmTickish = Tickish ()
576
577 -- | Tick scope identifier, allowing us to reason about what
578 -- annotations in a Cmm block should scope over. We especially take
579 -- care to allow optimisations to reorganise blocks without losing
580 -- tick association in the process.
581 data CmmTickScope
582 = GlobalScope
583 -- ^ The global scope is the "root" of the scope graph. Every
584 -- scope is a sub-scope of the global scope. It doesn't make sense
585 -- to add ticks to this scope. On the other hand, this means that
586 -- setting this scope on a block means no ticks apply to it.
587
588 | SubScope !U.Unique CmmTickScope
589 -- ^ Constructs a new sub-scope to an existing scope. This allows
590 -- us to translate Core-style scoping rules (see @tickishScoped@)
591 -- into the Cmm world. Suppose the following code:
592 --
593 -- tick<1> case ... of
594 -- A -> tick<2> ...
595 -- B -> tick<3> ...
596 --
597 -- We want the top-level tick annotation to apply to blocks
598 -- generated for the A and B alternatives. We can achieve that by
599 -- generating tick<1> into a block with scope a, while the code
600 -- for alternatives A and B gets generated into sub-scopes a/b and
601 -- a/c respectively.
602
603 | CombinedScope CmmTickScope CmmTickScope
604 -- ^ A combined scope scopes over everything that the two given
605 -- scopes cover. It is therefore a sub-scope of either scope. This
606 -- is required for optimisations. Consider common block elimination:
607 --
608 -- A -> tick<2> case ... of
609 -- C -> [common]
610 -- B -> tick<3> case ... of
611 -- D -> [common]
612 --
613 -- We will generate code for the C and D alternatives, and figure
614 -- out afterwards that it's actually common code. Scoping rules
615 -- dictate that the resulting common block needs to be covered by
616 -- both tick<2> and tick<3>, therefore we need to construct a
617 -- scope that is a child to *both* scope. Now we can do that - if
618 -- we assign the scopes a/c and b/d to the common-ed up blocks,
619 -- the new block could have a combined tick scope a/c+b/d, which
620 -- both tick<2> and tick<3> apply to.
621
622 -- Note [CmmTick scoping details]:
623 --
624 -- The scope of a @CmmTick@ is given by the @CmmEntry@ node of the
625 -- same block. Note that as a result of this, optimisations making
626 -- tick scopes more specific can *reduce* the amount of code a tick
627 -- scopes over. Fixing this would require a separate @CmmTickScope@
628 -- field for @CmmTick@. Right now we do not do this simply because I
629 -- couldn't find an example where it actually mattered -- multiple
630 -- blocks within the same scope generally jump to each other, which
631 -- prevents common block elimination from happening in the first
632 -- place. But this is no strong reason, so if Cmm optimisations become
633 -- more involved in future this might have to be revisited.
634
635 -- | Output all scope paths.
636 scopeToPaths :: CmmTickScope -> [[U.Unique]]
637 scopeToPaths GlobalScope = [[]]
638 scopeToPaths (SubScope u s) = map (u:) (scopeToPaths s)
639 scopeToPaths (CombinedScope s1 s2) = scopeToPaths s1 ++ scopeToPaths s2
640
641 -- | Returns the head uniques of the scopes. This is based on the
642 -- assumption that the @Unique@ of @SubScope@ identifies the
643 -- underlying super-scope. Used for efficient equality and comparison,
644 -- see below.
645 scopeUniques :: CmmTickScope -> [U.Unique]
646 scopeUniques GlobalScope = []
647 scopeUniques (SubScope u _) = [u]
648 scopeUniques (CombinedScope s1 s2) = scopeUniques s1 ++ scopeUniques s2
649
650 -- Equality and order is based on the head uniques defined above. We
651 -- take care to short-cut the (extremly) common cases.
652 instance Eq CmmTickScope where
653 GlobalScope == GlobalScope = True
654 GlobalScope == _ = False
655 _ == GlobalScope = False
656 (SubScope u _) == (SubScope u' _) = u == u'
657 (SubScope _ _) == _ = False
658 _ == (SubScope _ _) = False
659 scope == scope' =
660 sortBy nonDetCmpUnique (scopeUniques scope) ==
661 sortBy nonDetCmpUnique (scopeUniques scope')
662 -- This is still deterministic because
663 -- the order is the same for equal lists
664
665 -- This is non-deterministic but we do not currently support deterministic
666 -- code-generation. See Note [Unique Determinism and code generation]
667 -- See Note [No Ord for Unique]
668 instance Ord CmmTickScope where
669 compare GlobalScope GlobalScope = EQ
670 compare GlobalScope _ = LT
671 compare _ GlobalScope = GT
672 compare (SubScope u _) (SubScope u' _) = nonDetCmpUnique u u'
673 compare scope scope' = cmpList nonDetCmpUnique
674 (sortBy nonDetCmpUnique $ scopeUniques scope)
675 (sortBy nonDetCmpUnique $ scopeUniques scope')
676
677 instance Outputable CmmTickScope where
678 ppr GlobalScope = text "global"
679 ppr (SubScope us GlobalScope)
680 = ppr us
681 ppr (SubScope us s) = ppr s <> char '/' <> ppr us
682 ppr combined = parens $ hcat $ punctuate (char '+') $
683 map (hcat . punctuate (char '/') . map ppr . reverse) $
684 scopeToPaths combined
685
686 -- | Checks whether two tick scopes are sub-scopes of each other. True
687 -- if the two scopes are equal.
688 isTickSubScope :: CmmTickScope -> CmmTickScope -> Bool
689 isTickSubScope = cmp
690 where cmp _ GlobalScope = True
691 cmp GlobalScope _ = False
692 cmp (CombinedScope s1 s2) s' = cmp s1 s' && cmp s2 s'
693 cmp s (CombinedScope s1' s2') = cmp s s1' || cmp s s2'
694 cmp (SubScope u s) s'@(SubScope u' _) = u == u' || cmp s s'
695
696 -- | Combine two tick scopes. The new scope should be sub-scope of
697 -- both parameters. We simplfy automatically if one tick scope is a
698 -- sub-scope of the other already.
699 combineTickScopes :: CmmTickScope -> CmmTickScope -> CmmTickScope
700 combineTickScopes s1 s2
701 | s1 `isTickSubScope` s2 = s1
702 | s2 `isTickSubScope` s1 = s2
703 | otherwise = CombinedScope s1 s2