base: Reintroduce fusion for scanr
[ghc.git] / compiler / cmm / MkGraph.hs
1 {-# LANGUAGE BangPatterns, GADTs #-}
2
3 module MkGraph
4 ( CmmAGraph, CmmAGraphScoped, CgStmt(..)
5 , (<*>), catAGraphs
6 , mkLabel, mkMiddle, mkLast, outOfLine
7 , lgraphOfAGraph, labelAGraph
8
9 , stackStubExpr
10 , mkNop, mkAssign, mkStore
11 , mkUnsafeCall, mkFinalCall, mkCallReturnsTo
12 , mkJumpReturnsTo
13 , mkJump, mkJumpExtra
14 , mkRawJump
15 , mkCbranch, mkSwitch
16 , mkReturn, mkComment, mkCallEntry, mkBranch
17 , mkUnwind
18 , copyInOflow, copyOutOflow
19 , noExtraStack
20 , toCall, Transfer(..)
21 )
22 where
23
24 import GhcPrelude hiding ( (<*>) ) -- avoid importing (<*>)
25
26 import BlockId
27 import Cmm
28 import CmmCallConv
29 import CmmSwitch (SwitchTargets)
30
31 import Hoopl.Block
32 import Hoopl.Graph
33 import Hoopl.Label
34 import DynFlags
35 import FastString
36 import ForeignCall
37 import OrdList
38 import SMRep (ByteOff)
39 import UniqSupply
40 import Util
41 import Panic
42
43
44 -----------------------------------------------------------------------------
45 -- Building Graphs
46
47
48 -- | CmmAGraph is a chunk of code consisting of:
49 --
50 -- * ordinary statements (assignments, stores etc.)
51 -- * jumps
52 -- * labels
53 -- * out-of-line labelled blocks
54 --
55 -- The semantics is that control falls through labels and out-of-line
56 -- blocks. Everything after a jump up to the next label is by
57 -- definition unreachable code, and will be discarded.
58 --
59 -- Two CmmAGraphs can be stuck together with <*>, with the meaning that
60 -- control flows from the first to the second.
61 --
62 -- A 'CmmAGraph' can be turned into a 'CmmGraph' (closed at both ends)
63 -- by providing a label for the entry point and a tick scope; see
64 -- 'labelAGraph'.
65 type CmmAGraph = OrdList CgStmt
66 -- | Unlabeled graph with tick scope
67 type CmmAGraphScoped = (CmmAGraph, CmmTickScope)
68
69 data CgStmt
70 = CgLabel BlockId CmmTickScope
71 | CgStmt (CmmNode O O)
72 | CgLast (CmmNode O C)
73 | CgFork BlockId CmmAGraph CmmTickScope
74
75 flattenCmmAGraph :: BlockId -> CmmAGraphScoped -> CmmGraph
76 flattenCmmAGraph id (stmts_t, tscope) =
77 CmmGraph { g_entry = id,
78 g_graph = GMany NothingO body NothingO }
79 where
80 body = foldr addBlock emptyBody $ flatten id stmts_t tscope []
81
82 --
83 -- flatten: given an entry label and a CmmAGraph, make a list of blocks.
84 --
85 -- NB. avoid the quadratic-append trap by passing in the tail of the
86 -- list. This is important for Very Long Functions (e.g. in T783).
87 --
88 flatten :: Label -> CmmAGraph -> CmmTickScope -> [Block CmmNode C C]
89 -> [Block CmmNode C C]
90 flatten id g tscope blocks
91 = flatten1 (fromOL g) block' blocks
92 where !block' = blockJoinHead (CmmEntry id tscope) emptyBlock
93 --
94 -- flatten0: we are outside a block at this point: any code before
95 -- the first label is unreachable, so just drop it.
96 --
97 flatten0 :: [CgStmt] -> [Block CmmNode C C] -> [Block CmmNode C C]
98 flatten0 [] blocks = blocks
99
100 flatten0 (CgLabel id tscope : stmts) blocks
101 = flatten1 stmts block blocks
102 where !block = blockJoinHead (CmmEntry id tscope) emptyBlock
103
104 flatten0 (CgFork fork_id stmts_t tscope : rest) blocks
105 = flatten fork_id stmts_t tscope $ flatten0 rest blocks
106
107 flatten0 (CgLast _ : stmts) blocks = flatten0 stmts blocks
108 flatten0 (CgStmt _ : stmts) blocks = flatten0 stmts blocks
109
110 --
111 -- flatten1: we have a partial block, collect statements until the
112 -- next last node to make a block, then call flatten0 to get the rest
113 -- of the blocks
114 --
115 flatten1 :: [CgStmt] -> Block CmmNode C O
116 -> [Block CmmNode C C] -> [Block CmmNode C C]
117
118 -- The current block falls through to the end of a function or fork:
119 -- this code should not be reachable, but it may be referenced by
120 -- other code that is not reachable. We'll remove it later with
121 -- dead-code analysis, but for now we have to keep the graph
122 -- well-formed, so we terminate the block with a branch to the
123 -- beginning of the current block.
124 flatten1 [] block blocks
125 = blockJoinTail block (CmmBranch (entryLabel block)) : blocks
126
127 flatten1 (CgLast stmt : stmts) block blocks
128 = block' : flatten0 stmts blocks
129 where !block' = blockJoinTail block stmt
130
131 flatten1 (CgStmt stmt : stmts) block blocks
132 = flatten1 stmts block' blocks
133 where !block' = blockSnoc block stmt
134
135 flatten1 (CgFork fork_id stmts_t tscope : rest) block blocks
136 = flatten fork_id stmts_t tscope $ flatten1 rest block blocks
137
138 -- a label here means that we should start a new block, and the
139 -- current block should fall through to the new block.
140 flatten1 (CgLabel id tscp : stmts) block blocks
141 = blockJoinTail block (CmmBranch id) :
142 flatten1 stmts (blockJoinHead (CmmEntry id tscp) emptyBlock) blocks
143
144
145
146 ---------- AGraph manipulation
147
148 (<*>) :: CmmAGraph -> CmmAGraph -> CmmAGraph
149 (<*>) = appOL
150
151 catAGraphs :: [CmmAGraph] -> CmmAGraph
152 catAGraphs = concatOL
153
154 -- | created a sequence "goto id; id:" as an AGraph
155 mkLabel :: BlockId -> CmmTickScope -> CmmAGraph
156 mkLabel bid scp = unitOL (CgLabel bid scp)
157
158 -- | creates an open AGraph from a given node
159 mkMiddle :: CmmNode O O -> CmmAGraph
160 mkMiddle middle = unitOL (CgStmt middle)
161
162 -- | created a closed AGraph from a given node
163 mkLast :: CmmNode O C -> CmmAGraph
164 mkLast last = unitOL (CgLast last)
165
166 -- | A labelled code block; should end in a last node
167 outOfLine :: BlockId -> CmmAGraphScoped -> CmmAGraph
168 outOfLine l (c,s) = unitOL (CgFork l c s)
169
170 -- | allocate a fresh label for the entry point
171 lgraphOfAGraph :: CmmAGraphScoped -> UniqSM CmmGraph
172 lgraphOfAGraph g = do
173 u <- getUniqueM
174 return (labelAGraph (mkBlockId u) g)
175
176 -- | use the given BlockId as the label of the entry point
177 labelAGraph :: BlockId -> CmmAGraphScoped -> CmmGraph
178 labelAGraph lbl ag = flattenCmmAGraph lbl ag
179
180 ---------- No-ops
181 mkNop :: CmmAGraph
182 mkNop = nilOL
183
184 mkComment :: FastString -> CmmAGraph
185 mkComment fs
186 -- SDM: generating all those comments takes time, this saved about 4% for me
187 | debugIsOn = mkMiddle $ CmmComment fs
188 | otherwise = nilOL
189
190 ---------- Assignment and store
191 mkAssign :: CmmReg -> CmmExpr -> CmmAGraph
192 mkAssign l (CmmReg r) | l == r = mkNop
193 mkAssign l r = mkMiddle $ CmmAssign l r
194
195 mkStore :: CmmExpr -> CmmExpr -> CmmAGraph
196 mkStore l r = mkMiddle $ CmmStore l r
197
198 ---------- Control transfer
199 mkJump :: DynFlags -> Convention -> CmmExpr
200 -> [CmmExpr]
201 -> UpdFrameOffset
202 -> CmmAGraph
203 mkJump dflags conv e actuals updfr_off =
204 lastWithArgs dflags Jump Old conv actuals updfr_off $
205 toCall e Nothing updfr_off 0
206
207 -- | A jump where the caller says what the live GlobalRegs are. Used
208 -- for low-level hand-written Cmm.
209 mkRawJump :: DynFlags -> CmmExpr -> UpdFrameOffset -> [GlobalReg]
210 -> CmmAGraph
211 mkRawJump dflags e updfr_off vols =
212 lastWithArgs dflags Jump Old NativeNodeCall [] updfr_off $
213 \arg_space _ -> toCall e Nothing updfr_off 0 arg_space vols
214
215
216 mkJumpExtra :: DynFlags -> Convention -> CmmExpr -> [CmmExpr]
217 -> UpdFrameOffset -> [CmmExpr]
218 -> CmmAGraph
219 mkJumpExtra dflags conv e actuals updfr_off extra_stack =
220 lastWithArgsAndExtraStack dflags Jump Old conv actuals updfr_off extra_stack $
221 toCall e Nothing updfr_off 0
222
223 mkCbranch :: CmmExpr -> BlockId -> BlockId -> Maybe Bool -> CmmAGraph
224 mkCbranch pred ifso ifnot likely =
225 mkLast (CmmCondBranch pred ifso ifnot likely)
226
227 mkSwitch :: CmmExpr -> SwitchTargets -> CmmAGraph
228 mkSwitch e tbl = mkLast $ CmmSwitch e tbl
229
230 mkReturn :: DynFlags -> CmmExpr -> [CmmExpr] -> UpdFrameOffset
231 -> CmmAGraph
232 mkReturn dflags e actuals updfr_off =
233 lastWithArgs dflags Ret Old NativeReturn actuals updfr_off $
234 toCall e Nothing updfr_off 0
235
236 mkBranch :: BlockId -> CmmAGraph
237 mkBranch bid = mkLast (CmmBranch bid)
238
239 mkFinalCall :: DynFlags
240 -> CmmExpr -> CCallConv -> [CmmExpr] -> UpdFrameOffset
241 -> CmmAGraph
242 mkFinalCall dflags f _ actuals updfr_off =
243 lastWithArgs dflags Call Old NativeDirectCall actuals updfr_off $
244 toCall f Nothing updfr_off 0
245
246 mkCallReturnsTo :: DynFlags -> CmmExpr -> Convention -> [CmmExpr]
247 -> BlockId
248 -> ByteOff
249 -> UpdFrameOffset
250 -> [CmmExpr]
251 -> CmmAGraph
252 mkCallReturnsTo dflags f callConv actuals ret_lbl ret_off updfr_off extra_stack = do
253 lastWithArgsAndExtraStack dflags Call (Young ret_lbl) callConv actuals
254 updfr_off extra_stack $
255 toCall f (Just ret_lbl) updfr_off ret_off
256
257 -- Like mkCallReturnsTo, but does not push the return address (it is assumed to be
258 -- already on the stack).
259 mkJumpReturnsTo :: DynFlags -> CmmExpr -> Convention -> [CmmExpr]
260 -> BlockId
261 -> ByteOff
262 -> UpdFrameOffset
263 -> CmmAGraph
264 mkJumpReturnsTo dflags f callConv actuals ret_lbl ret_off updfr_off = do
265 lastWithArgs dflags JumpRet (Young ret_lbl) callConv actuals updfr_off $
266 toCall f (Just ret_lbl) updfr_off ret_off
267
268 mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph
269 mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as
270
271 -- | Construct a 'CmmUnwind' node for the given register and unwinding
272 -- expression.
273 mkUnwind :: GlobalReg -> CmmExpr -> CmmAGraph
274 mkUnwind r e = mkMiddle $ CmmUnwind [(r, Just e)]
275
276 --------------------------------------------------------------------------
277
278
279
280
281 -- Why are we inserting extra blocks that simply branch to the successors?
282 -- Because in addition to the branch instruction, @mkBranch@ will insert
283 -- a necessary adjustment to the stack pointer.
284
285
286 -- For debugging purposes, we can stub out dead stack slots:
287 stackStubExpr :: Width -> CmmExpr
288 stackStubExpr w = CmmLit (CmmInt 0 w)
289
290 -- When we copy in parameters, we usually want to put overflow
291 -- parameters on the stack, but sometimes we want to pass the
292 -- variables in their spill slots. Therefore, for copying arguments
293 -- and results, we provide different functions to pass the arguments
294 -- in an overflow area and to pass them in spill slots.
295 copyInOflow :: DynFlags -> Convention -> Area
296 -> [CmmFormal]
297 -> [CmmFormal]
298 -> (Int, [GlobalReg], CmmAGraph)
299
300 copyInOflow dflags conv area formals extra_stk
301 = (offset, gregs, catAGraphs $ map mkMiddle nodes)
302 where (offset, gregs, nodes) = copyIn dflags conv area formals extra_stk
303
304 -- Return the number of bytes used for copying arguments, as well as the
305 -- instructions to copy the arguments.
306 copyIn :: DynFlags -> Convention -> Area
307 -> [CmmFormal]
308 -> [CmmFormal]
309 -> (ByteOff, [GlobalReg], [CmmNode O O])
310 copyIn dflags conv area formals extra_stk
311 = (stk_size, [r | (_, RegisterParam r) <- args], map ci (stk_args ++ args))
312 where
313 -- See Note [Width of parameters]
314 ci (reg, RegisterParam r@(VanillaReg {})) =
315 let local = CmmLocal reg
316 global = CmmReg (CmmGlobal r)
317 width = cmmRegWidth dflags local
318 expr
319 | width == wordWidth dflags = global
320 | width < wordWidth dflags =
321 CmmMachOp (MO_XX_Conv (wordWidth dflags) width) [global]
322 | otherwise = panic "Parameter width greater than word width"
323
324 in CmmAssign local expr
325
326 -- Non VanillaRegs
327 ci (reg, RegisterParam r) =
328 CmmAssign (CmmLocal reg) (CmmReg (CmmGlobal r))
329
330 ci (reg, StackParam off)
331 | isBitsType $ localRegType reg
332 , typeWidth (localRegType reg) < wordWidth dflags =
333 let
334 stack_slot = (CmmLoad (CmmStackSlot area off) (cmmBits $ wordWidth dflags))
335 local = CmmLocal reg
336 width = cmmRegWidth dflags local
337 expr = CmmMachOp (MO_XX_Conv (wordWidth dflags) width) [stack_slot]
338 in CmmAssign local expr
339
340 | otherwise =
341 CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty)
342 where ty = localRegType reg
343
344 init_offset = widthInBytes (wordWidth dflags) -- infotable
345
346 (stk_off, stk_args) = assignStack dflags init_offset localRegType extra_stk
347
348 (stk_size, args) = assignArgumentsPos dflags stk_off conv
349 localRegType formals
350
351 -- Factoring out the common parts of the copyout functions yielded something
352 -- more complicated:
353
354 data Transfer = Call | JumpRet | Jump | Ret deriving Eq
355
356 copyOutOflow :: DynFlags -> Convention -> Transfer -> Area -> [CmmExpr]
357 -> UpdFrameOffset
358 -> [CmmExpr] -- extra stack args
359 -> (Int, [GlobalReg], CmmAGraph)
360
361 -- Generate code to move the actual parameters into the locations
362 -- required by the calling convention. This includes a store for the
363 -- return address.
364 --
365 -- The argument layout function ignores the pointer to the info table,
366 -- so we slot that in here. When copying-out to a young area, we set
367 -- the info table for return and adjust the offsets of the other
368 -- parameters. If this is a call instruction, we adjust the offsets
369 -- of the other parameters.
370 copyOutOflow dflags conv transfer area actuals updfr_off extra_stack_stuff
371 = (stk_size, regs, graph)
372 where
373 (regs, graph) = foldr co ([], mkNop) (setRA ++ args ++ stack_params)
374
375 -- See Note [Width of parameters]
376 co (v, RegisterParam r@(VanillaReg {})) (rs, ms) =
377 let width = cmmExprWidth dflags v
378 value
379 | width == wordWidth dflags = v
380 | width < wordWidth dflags =
381 CmmMachOp (MO_XX_Conv width (wordWidth dflags)) [v]
382 | otherwise = panic "Parameter width greater than word width"
383
384 in (r:rs, mkAssign (CmmGlobal r) value <*> ms)
385
386 -- Non VanillaRegs
387 co (v, RegisterParam r) (rs, ms) =
388 (r:rs, mkAssign (CmmGlobal r) v <*> ms)
389
390 -- See Note [Width of parameters]
391 co (v, StackParam off) (rs, ms)
392 = (rs, mkStore (CmmStackSlot area off) (value v) <*> ms)
393
394 width v = cmmExprWidth dflags v
395 value v
396 | isBitsType $ cmmExprType dflags v
397 , width v < wordWidth dflags =
398 CmmMachOp (MO_XX_Conv (width v) (wordWidth dflags)) [v]
399 | otherwise = v
400
401 (setRA, init_offset) =
402 case area of
403 Young id -> -- Generate a store instruction for
404 -- the return address if making a call
405 case transfer of
406 Call ->
407 ([(CmmLit (CmmBlock id), StackParam init_offset)],
408 widthInBytes (wordWidth dflags))
409 JumpRet ->
410 ([],
411 widthInBytes (wordWidth dflags))
412 _other ->
413 ([], 0)
414 Old -> ([], updfr_off)
415
416 (extra_stack_off, stack_params) =
417 assignStack dflags init_offset (cmmExprType dflags) extra_stack_stuff
418
419 args :: [(CmmExpr, ParamLocation)] -- The argument and where to put it
420 (stk_size, args) = assignArgumentsPos dflags extra_stack_off conv
421 (cmmExprType dflags) actuals
422
423
424 -- Note [Width of parameters]
425 --
426 -- Consider passing a small (< word width) primitive like Int8# to a function.
427 -- It's actually non-trivial to do this without extending/narrowing:
428 -- * Global registers are considered to have native word width (i.e., 64-bits on
429 -- x86-64), so CmmLint would complain if we assigned an 8-bit parameter to a
430 -- global register.
431 -- * Same problem exists with LLVM IR.
432 -- * Lowering gets harder since on x86-32 not every register exposes its lower
433 -- 8 bits (e.g., for %eax we can use %al, but there isn't a corresponding
434 -- 8-bit register for %edi). So we would either need to extend/narrow anyway,
435 -- or complicate the calling convention.
436 -- * Passing a small integer in a stack slot, which has native word width,
437 -- requires extending to word width when writing to the stack and narrowing
438 -- when reading off the stack (see #16258).
439 -- So instead, we always extend every parameter smaller than native word width
440 -- in copyOutOflow and then truncate it back to the expected width in copyIn.
441 -- Note that we do this in cmm using MO_XX_Conv to avoid requiring
442 -- zero-/sign-extending - it's up to a backend to handle this in a most
443 -- efficient way (e.g., a simple register move or a smaller size store).
444 -- This convention (of ignoring the upper bits) is different from some C ABIs,
445 -- e.g. all PowerPC ELF ABIs, that require sign or zero extending parameters.
446 --
447 -- There was some discussion about this on this PR:
448 -- https://github.com/ghc-proposals/ghc-proposals/pull/74
449
450
451 mkCallEntry :: DynFlags -> Convention -> [CmmFormal] -> [CmmFormal]
452 -> (Int, [GlobalReg], CmmAGraph)
453 mkCallEntry dflags conv formals extra_stk
454 = copyInOflow dflags conv Old formals extra_stk
455
456 lastWithArgs :: DynFlags -> Transfer -> Area -> Convention -> [CmmExpr]
457 -> UpdFrameOffset
458 -> (ByteOff -> [GlobalReg] -> CmmAGraph)
459 -> CmmAGraph
460 lastWithArgs dflags transfer area conv actuals updfr_off last =
461 lastWithArgsAndExtraStack dflags transfer area conv actuals
462 updfr_off noExtraStack last
463
464 lastWithArgsAndExtraStack :: DynFlags
465 -> Transfer -> Area -> Convention -> [CmmExpr]
466 -> UpdFrameOffset -> [CmmExpr]
467 -> (ByteOff -> [GlobalReg] -> CmmAGraph)
468 -> CmmAGraph
469 lastWithArgsAndExtraStack dflags transfer area conv actuals updfr_off
470 extra_stack last =
471 copies <*> last outArgs regs
472 where
473 (outArgs, regs, copies) = copyOutOflow dflags conv transfer area actuals
474 updfr_off extra_stack
475
476
477 noExtraStack :: [CmmExpr]
478 noExtraStack = []
479
480 toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff
481 -> ByteOff -> [GlobalReg]
482 -> CmmAGraph
483 toCall e cont updfr_off res_space arg_space regs =
484 mkLast $ CmmCall e cont regs arg_space res_space updfr_off