Generalize CmmUnwind and pass unwind information through NCG
[ghc.git] / compiler / nativeGen / AsmCodeGen.hs
1 -- -----------------------------------------------------------------------------
2 --
3 -- (c) The University of Glasgow 1993-2004
4 --
5 -- This is the top-level module in the native code generator.
6 --
7 -- -----------------------------------------------------------------------------
8
9 {-# LANGUAGE BangPatterns, CPP, GADTs, ScopedTypeVariables, UnboxedTuples #-}
10
11 module AsmCodeGen (
12 -- * Module entry point
13 nativeCodeGen
14
15 -- * Test-only exports: see trac #12744
16 -- used by testGraphNoSpills, which needs to access
17 -- the register allocator intermediate data structures
18 -- cmmNativeGen emits
19 , cmmNativeGen
20 , NcgImpl(..)
21 , x86NcgImpl
22 ) where
23
24 #include "HsVersions.h"
25 #include "nativeGen/NCG.h"
26
27
28 import qualified X86.CodeGen
29 import qualified X86.Regs
30 import qualified X86.Instr
31 import qualified X86.Ppr
32
33 import qualified SPARC.CodeGen
34 import qualified SPARC.Regs
35 import qualified SPARC.Instr
36 import qualified SPARC.Ppr
37 import qualified SPARC.ShortcutJump
38 import qualified SPARC.CodeGen.Expand
39
40 import qualified PPC.CodeGen
41 import qualified PPC.Regs
42 import qualified PPC.RegInfo
43 import qualified PPC.Instr
44 import qualified PPC.Ppr
45
46 import RegAlloc.Liveness
47 import qualified RegAlloc.Linear.Main as Linear
48
49 import qualified GraphColor as Color
50 import qualified RegAlloc.Graph.Main as Color
51 import qualified RegAlloc.Graph.Stats as Color
52 import qualified RegAlloc.Graph.TrivColorable as Color
53
54 import TargetReg
55 import Platform
56 import Config
57 import Instruction
58 import PIC
59 import Reg
60 import NCGMonad
61 import Dwarf
62 import Debug
63
64 import BlockId
65 import CgUtils ( fixStgRegisters )
66 import Cmm
67 import CmmUtils
68 import Hoopl
69 import CmmOpt ( cmmMachOpFold )
70 import PprCmm
71 import CLabel
72
73 import UniqFM
74 import UniqSupply
75 import DynFlags
76 import Util
77 import Unique
78
79 import BasicTypes ( Alignment )
80 import Digraph
81 import qualified Pretty
82 import BufWrite
83 import Outputable
84 import FastString
85 import UniqSet
86 import ErrUtils
87 import Module
88 import Stream (Stream)
89 import qualified Stream
90
91 -- DEBUGGING ONLY
92 --import OrdList
93
94 import Data.List
95 import Data.Maybe
96 import Data.Ord ( comparing )
97 import Control.Exception
98 import Control.Monad
99 import System.IO
100
101 {-
102 The native-code generator has machine-independent and
103 machine-dependent modules.
104
105 This module ("AsmCodeGen") is the top-level machine-independent
106 module. Before entering machine-dependent land, we do some
107 machine-independent optimisations (defined below) on the
108 'CmmStmts's.
109
110 We convert to the machine-specific 'Instr' datatype with
111 'cmmCodeGen', assuming an infinite supply of registers. We then use
112 a machine-independent register allocator ('regAlloc') to rejoin
113 reality. Obviously, 'regAlloc' has machine-specific helper
114 functions (see about "RegAllocInfo" below).
115
116 Finally, we order the basic blocks of the function so as to minimise
117 the number of jumps between blocks, by utilising fallthrough wherever
118 possible.
119
120 The machine-dependent bits break down as follows:
121
122 * ["MachRegs"] Everything about the target platform's machine
123 registers (and immediate operands, and addresses, which tend to
124 intermingle/interact with registers).
125
126 * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
127 have a module of its own), plus a miscellany of other things
128 (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
129
130 * ["MachCodeGen"] is where 'Cmm' stuff turns into
131 machine instructions.
132
133 * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
134 a 'SDoc').
135
136 * ["RegAllocInfo"] In the register allocator, we manipulate
137 'MRegsState's, which are 'BitSet's, one bit per machine register.
138 When we want to say something about a specific machine register
139 (e.g., ``it gets clobbered by this instruction''), we set/unset
140 its bit. Obviously, we do this 'BitSet' thing for efficiency
141 reasons.
142
143 The 'RegAllocInfo' module collects together the machine-specific
144 info needed to do register allocation.
145
146 * ["RegisterAlloc"] The (machine-independent) register allocator.
147 -}
148
149 -- -----------------------------------------------------------------------------
150 -- Top-level of the native codegen
151
152 data NcgImpl statics instr jumpDest = NcgImpl {
153 cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl statics instr],
154 generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr),
155 getJumpDestBlockId :: jumpDest -> Maybe BlockId,
156 canShortcut :: instr -> Maybe jumpDest,
157 shortcutStatics :: (BlockId -> Maybe jumpDest) -> statics -> statics,
158 shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr,
159 pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc,
160 maxSpillSlots :: Int,
161 allocatableRegs :: [RealReg],
162 ncg_x86fp_kludge :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
163 ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
164 ncgAllocMoreStack :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr),
165 ncgMakeFarBranches :: LabelMap CmmStatics
166 -> [NatBasicBlock instr] -> [NatBasicBlock instr],
167 extractUnwindPoints :: [instr] -> [UnwindPoint]
168 -- ^ given the instruction sequence of a block, produce a list of
169 -- the block's 'UnwindPoint's
170 -- See Note [What is this unwinding business?] in Debug
171 -- and Note [Unwinding information in the NCG] in this module.
172 }
173
174 --------------------
175 nativeCodeGen :: DynFlags -> Module -> ModLocation -> Handle -> UniqSupply
176 -> Stream IO RawCmmGroup ()
177 -> IO UniqSupply
178 nativeCodeGen dflags this_mod modLoc h us cmms
179 = let platform = targetPlatform dflags
180 nCG' :: (Outputable statics, Outputable instr, Instruction instr)
181 => NcgImpl statics instr jumpDest -> IO UniqSupply
182 nCG' ncgImpl = nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
183 in case platformArch platform of
184 ArchX86 -> nCG' (x86NcgImpl dflags)
185 ArchX86_64 -> nCG' (x86_64NcgImpl dflags)
186 ArchPPC -> nCG' (ppcNcgImpl dflags)
187 ArchSPARC -> nCG' (sparcNcgImpl dflags)
188 ArchSPARC64 -> panic "nativeCodeGen: No NCG for SPARC64"
189 ArchARM {} -> panic "nativeCodeGen: No NCG for ARM"
190 ArchARM64 -> panic "nativeCodeGen: No NCG for ARM64"
191 ArchPPC_64 _ -> nCG' (ppcNcgImpl dflags)
192 ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha"
193 ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb"
194 ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel"
195 ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch"
196 ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript"
197
198 x86NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest
199 x86NcgImpl dflags
200 = (x86_64NcgImpl dflags) { ncg_x86fp_kludge = map x86fp_kludge }
201
202 x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest
203 x86_64NcgImpl dflags
204 = NcgImpl {
205 cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen
206 ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags
207 ,getJumpDestBlockId = X86.Instr.getJumpDestBlockId
208 ,canShortcut = X86.Instr.canShortcut
209 ,shortcutStatics = X86.Instr.shortcutStatics
210 ,shortcutJump = X86.Instr.shortcutJump
211 ,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl
212 ,maxSpillSlots = X86.Instr.maxSpillSlots dflags
213 ,allocatableRegs = X86.Regs.allocatableRegs platform
214 ,ncg_x86fp_kludge = id
215 ,ncgAllocMoreStack = X86.Instr.allocMoreStack platform
216 ,ncgExpandTop = id
217 ,ncgMakeFarBranches = const id
218 ,extractUnwindPoints = X86.CodeGen.extractUnwindPoints
219 }
220 where platform = targetPlatform dflags
221
222 ppcNcgImpl :: DynFlags -> NcgImpl CmmStatics PPC.Instr.Instr PPC.RegInfo.JumpDest
223 ppcNcgImpl dflags
224 = NcgImpl {
225 cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen
226 ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags
227 ,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId
228 ,canShortcut = PPC.RegInfo.canShortcut
229 ,shortcutStatics = PPC.RegInfo.shortcutStatics
230 ,shortcutJump = PPC.RegInfo.shortcutJump
231 ,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl
232 ,maxSpillSlots = PPC.Instr.maxSpillSlots dflags
233 ,allocatableRegs = PPC.Regs.allocatableRegs platform
234 ,ncg_x86fp_kludge = id
235 ,ncgAllocMoreStack = PPC.Instr.allocMoreStack platform
236 ,ncgExpandTop = id
237 ,ncgMakeFarBranches = PPC.Instr.makeFarBranches
238 ,extractUnwindPoints = const []
239 }
240 where platform = targetPlatform dflags
241
242 sparcNcgImpl :: DynFlags -> NcgImpl CmmStatics SPARC.Instr.Instr SPARC.ShortcutJump.JumpDest
243 sparcNcgImpl dflags
244 = NcgImpl {
245 cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen
246 ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
247 ,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId
248 ,canShortcut = SPARC.ShortcutJump.canShortcut
249 ,shortcutStatics = SPARC.ShortcutJump.shortcutStatics
250 ,shortcutJump = SPARC.ShortcutJump.shortcutJump
251 ,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl
252 ,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags
253 ,allocatableRegs = SPARC.Regs.allocatableRegs
254 ,ncg_x86fp_kludge = id
255 ,ncgAllocMoreStack = noAllocMoreStack
256 ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop
257 ,ncgMakeFarBranches = const id
258 ,extractUnwindPoints = const []
259 }
260
261 --
262 -- Allocating more stack space for spilling is currently only
263 -- supported for the linear register allocator on x86/x86_64, the rest
264 -- default to the panic below. To support allocating extra stack on
265 -- more platforms provide a definition of ncgAllocMoreStack.
266 --
267 noAllocMoreStack :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr)
268 noAllocMoreStack amount _
269 = panic $ "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
270 ++ " If you are trying to compile SHA1.hs from the crypto library then this\n"
271 ++ " is a known limitation in the linear allocator.\n"
272 ++ "\n"
273 ++ " Try enabling the graph colouring allocator with -fregs-graph instead."
274 ++ " You can still file a bug report if you like.\n"
275
276
277 -- | Data accumulated during code generation. Mostly about statistics,
278 -- but also collects debug data for DWARF generation.
279 data NativeGenAcc statics instr
280 = NGS { ngs_imports :: ![[CLabel]]
281 , ngs_natives :: ![[NatCmmDecl statics instr]]
282 -- ^ Native code generated, for statistics. This might
283 -- hold a lot of data, so it is important to clear this
284 -- field as early as possible if it isn't actually
285 -- required.
286 , ngs_colorStats :: ![[Color.RegAllocStats statics instr]]
287 , ngs_linearStats :: ![[Linear.RegAllocStats]]
288 , ngs_labels :: ![Label]
289 , ngs_debug :: ![DebugBlock]
290 , ngs_dwarfFiles :: !DwarfFiles
291 , ngs_unwinds :: !(LabelMap [UnwindPoint])
292 -- ^ see Note [Unwinding information in the NCG]
293 -- and Note [What is this unwinding business?] in Debug.
294 }
295
296 {-
297 Note [Unwinding information in the NCG]
298 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
299
300 Unwind information is a type of metadata which allows a debugging tool
301 to reconstruct the values of machine registers at the time a procedure was
302 entered. For the most part, the production of unwind information is handled by
303 the Cmm stage, where it is represented by CmmUnwind nodes.
304
305 Unfortunately, the Cmm stage doesn't know everything necessary to produce
306 accurate unwinding information. For instance, the x86-64 calling convention
307 requires that the stack pointer be aligned to 16 bytes, which in turn means that
308 GHC must sometimes add padding to $sp prior to performing a foreign call. When
309 this happens unwind information must be updated accordingly.
310 For this reason, we make the NCG backends responsible for producing
311 unwinding tables (with the extractUnwindPoints function in NcgImpl).
312
313 We accumulate the produced unwind tables over CmmGroups in the ngs_unwinds
314 field of NativeGenAcc. This is a label map which contains an entry for each
315 procedure, containing a list of unwinding points (e.g. a label and an associated
316 unwinding table).
317
318 See also Note [What is this unwinding business?] in Debug.
319 -}
320
321 nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr)
322 => DynFlags
323 -> Module -> ModLocation
324 -> NcgImpl statics instr jumpDest
325 -> Handle
326 -> UniqSupply
327 -> Stream IO RawCmmGroup ()
328 -> IO UniqSupply
329 nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
330 = do
331 -- BufHandle is a performance hack. We could hide it inside
332 -- Pretty if it weren't for the fact that we do lots of little
333 -- printDocs here (in order to do codegen in constant space).
334 bufh <- newBufHandle h
335 let ngs0 = NGS [] [] [] [] [] [] emptyUFM mapEmpty
336 (ngs, us') <- cmmNativeGenStream dflags this_mod modLoc ncgImpl bufh us
337 cmms ngs0
338 finishNativeGen dflags modLoc bufh us' ngs
339
340 finishNativeGen :: Instruction instr
341 => DynFlags
342 -> ModLocation
343 -> BufHandle
344 -> UniqSupply
345 -> NativeGenAcc statics instr
346 -> IO UniqSupply
347 finishNativeGen dflags modLoc bufh@(BufHandle _ _ h) us ngs
348 = do
349 -- Write debug data and finish
350 let emitDw = debugLevel dflags > 0 && not (gopt Opt_SplitObjs dflags)
351 us' <- if not emitDw then return us else do
352 (dwarf, us') <- dwarfGen dflags modLoc us (ngs_debug ngs)
353 emitNativeCode dflags bufh dwarf
354 return us'
355 bFlush bufh
356
357 -- dump global NCG stats for graph coloring allocator
358 let stats = concat (ngs_colorStats ngs)
359 when (not (null stats)) $ do
360
361 -- build the global register conflict graph
362 let graphGlobal
363 = foldl Color.union Color.initGraph
364 $ [ Color.raGraph stat
365 | stat@Color.RegAllocStatsStart{} <- stats]
366
367 dump_stats (Color.pprStats stats graphGlobal)
368
369 let platform = targetPlatform dflags
370 dumpIfSet_dyn dflags
371 Opt_D_dump_asm_conflicts "Register conflict graph"
372 $ Color.dotGraph
373 (targetRegDotColor platform)
374 (Color.trivColorable platform
375 (targetVirtualRegSqueeze platform)
376 (targetRealRegSqueeze platform))
377 $ graphGlobal
378
379
380 -- dump global NCG stats for linear allocator
381 let linearStats = concat (ngs_linearStats ngs)
382 when (not (null linearStats)) $
383 dump_stats (Linear.pprStats (concat (ngs_natives ngs)) linearStats)
384
385 -- write out the imports
386 printSDocLn Pretty.LeftMode dflags h (mkCodeStyle AsmStyle)
387 $ makeImportsDoc dflags (concat (ngs_imports ngs))
388 return us'
389 where
390 dump_stats = dumpSDoc dflags alwaysQualify Opt_D_dump_asm_stats "NCG stats"
391
392 cmmNativeGenStream :: (Outputable statics, Outputable instr, Instruction instr)
393 => DynFlags
394 -> Module -> ModLocation
395 -> NcgImpl statics instr jumpDest
396 -> BufHandle
397 -> UniqSupply
398 -> Stream IO RawCmmGroup ()
399 -> NativeGenAcc statics instr
400 -> IO (NativeGenAcc statics instr, UniqSupply)
401
402 cmmNativeGenStream dflags this_mod modLoc ncgImpl h us cmm_stream ngs
403 = do r <- Stream.runStream cmm_stream
404 case r of
405 Left () ->
406 return (ngs { ngs_imports = reverse $ ngs_imports ngs
407 , ngs_natives = reverse $ ngs_natives ngs
408 , ngs_colorStats = reverse $ ngs_colorStats ngs
409 , ngs_linearStats = reverse $ ngs_linearStats ngs
410 },
411 us)
412 Right (cmms, cmm_stream') -> do
413
414 -- Generate debug information
415 let debugFlag = debugLevel dflags > 0
416 !ndbgs | debugFlag = cmmDebugGen modLoc cmms
417 | otherwise = []
418 dbgMap = debugToMap ndbgs
419
420 -- Insert split marker, generate native code
421 let splitObjs = gopt Opt_SplitObjs dflags
422 split_marker = CmmProc mapEmpty mkSplitMarkerLabel [] $
423 ofBlockList (panic "split_marker_entry") []
424 cmms' | splitObjs = split_marker : cmms
425 | otherwise = cmms
426 (ngs',us') <- cmmNativeGens dflags this_mod modLoc ncgImpl h
427 dbgMap us cmms' ngs 0
428
429 -- Link native code information into debug blocks
430 -- See Note [What is this unwinding business?] in Debug.
431 let !ldbgs = cmmDebugLink (ngs_labels ngs') (ngs_unwinds ngs') ndbgs
432 dumpIfSet_dyn dflags Opt_D_dump_debug "Debug Infos"
433 (vcat $ map ppr ldbgs)
434
435 -- Emit & clear DWARF information when generating split
436 -- object files, as we need it to land in the same object file
437 -- When using split sections, note that we do not split the debug
438 -- info but emit all the info at once in finishNativeGen.
439 (ngs'', us'') <-
440 if debugFlag && splitObjs
441 then do (dwarf, us'') <- dwarfGen dflags modLoc us ldbgs
442 emitNativeCode dflags h dwarf
443 return (ngs' { ngs_debug = []
444 , ngs_dwarfFiles = emptyUFM
445 , ngs_labels = [] },
446 us'')
447 else return (ngs' { ngs_debug = ngs_debug ngs' ++ ldbgs
448 , ngs_labels = [] },
449 us')
450
451 cmmNativeGenStream dflags this_mod modLoc ncgImpl h us''
452 cmm_stream' ngs''
453
454 -- | Do native code generation on all these cmms.
455 --
456 cmmNativeGens :: forall statics instr jumpDest.
457 (Outputable statics, Outputable instr, Instruction instr)
458 => DynFlags
459 -> Module -> ModLocation
460 -> NcgImpl statics instr jumpDest
461 -> BufHandle
462 -> LabelMap DebugBlock
463 -> UniqSupply
464 -> [RawCmmDecl]
465 -> NativeGenAcc statics instr
466 -> Int
467 -> IO (NativeGenAcc statics instr, UniqSupply)
468
469 cmmNativeGens dflags this_mod modLoc ncgImpl h dbgMap = go
470 where
471 go :: UniqSupply -> [RawCmmDecl]
472 -> NativeGenAcc statics instr -> Int
473 -> IO (NativeGenAcc statics instr, UniqSupply)
474
475 go us [] ngs !_ =
476 return (ngs, us)
477
478 go us (cmm : cmms) ngs count = do
479 let fileIds = ngs_dwarfFiles ngs
480 (us', fileIds', native, imports, colorStats, linearStats, unwinds)
481 <- {-# SCC "cmmNativeGen" #-}
482 cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap
483 cmm count
484
485 -- Generate .file directives for every new file that has been
486 -- used. Note that it is important that we generate these in
487 -- ascending order, as Clang's 3.6 assembler complains.
488 let newFileIds = sortBy (comparing snd) $
489 nonDetEltsUFM $ fileIds' `minusUFM` fileIds
490 -- See Note [Unique Determinism and code generation]
491 pprDecl (f,n) = text "\t.file " <> ppr n <+>
492 doubleQuotes (ftext f)
493
494 emitNativeCode dflags h $ vcat $
495 map pprDecl newFileIds ++
496 map (pprNatCmmDecl ncgImpl) native
497
498 -- force evaluation all this stuff to avoid space leaks
499 {-# SCC "seqString" #-} evaluate $ seqString (showSDoc dflags $ vcat $ map ppr imports)
500
501 let !labels' = if debugLevel dflags > 0
502 then cmmDebugLabels isMetaInstr native else []
503 !natives' = if dopt Opt_D_dump_asm_stats dflags
504 then native : ngs_natives ngs else []
505
506 mCon = maybe id (:)
507 ngs' = ngs{ ngs_imports = imports : ngs_imports ngs
508 , ngs_natives = natives'
509 , ngs_colorStats = colorStats `mCon` ngs_colorStats ngs
510 , ngs_linearStats = linearStats `mCon` ngs_linearStats ngs
511 , ngs_labels = ngs_labels ngs ++ labels'
512 , ngs_dwarfFiles = fileIds'
513 , ngs_unwinds = ngs_unwinds ngs `mapUnion` unwinds
514 }
515 go us' cmms ngs' (count + 1)
516
517 seqString [] = ()
518 seqString (x:xs) = x `seq` seqString xs
519
520
521 emitNativeCode :: DynFlags -> BufHandle -> SDoc -> IO ()
522 emitNativeCode dflags h sdoc = do
523
524 {-# SCC "pprNativeCode" #-} bufLeftRenderSDoc dflags h
525 (mkCodeStyle AsmStyle) sdoc
526
527 -- dump native code
528 dumpIfSet_dyn dflags
529 Opt_D_dump_asm "Asm code"
530 sdoc
531
532 -- | Complete native code generation phase for a single top-level chunk of Cmm.
533 -- Dumping the output of each stage along the way.
534 -- Global conflict graph and NGC stats
535 cmmNativeGen
536 :: (Outputable statics, Outputable instr, Instruction instr)
537 => DynFlags
538 -> Module -> ModLocation
539 -> NcgImpl statics instr jumpDest
540 -> UniqSupply
541 -> DwarfFiles
542 -> LabelMap DebugBlock
543 -> RawCmmDecl -- ^ the cmm to generate code for
544 -> Int -- ^ sequence number of this top thing
545 -> IO ( UniqSupply
546 , DwarfFiles
547 , [NatCmmDecl statics instr] -- native code
548 , [CLabel] -- things imported by this cmm
549 , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator
550 , Maybe [Linear.RegAllocStats] -- stats for the linear register allocators
551 , LabelMap [UnwindPoint] -- unwinding information for blocks
552 )
553
554 cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
555 = do
556 let platform = targetPlatform dflags
557
558 -- rewrite assignments to global regs
559 let fixed_cmm =
560 {-# SCC "fixStgRegisters" #-}
561 fixStgRegisters dflags cmm
562
563 -- cmm to cmm optimisations
564 let (opt_cmm, imports) =
565 {-# SCC "cmmToCmm" #-}
566 cmmToCmm dflags this_mod fixed_cmm
567
568 dumpIfSet_dyn dflags
569 Opt_D_dump_opt_cmm "Optimised Cmm"
570 (pprCmmGroup [opt_cmm])
571
572 -- generate native code from cmm
573 let ((native, lastMinuteImports, fileIds'), usGen) =
574 {-# SCC "genMachCode" #-}
575 initUs us $ genMachCode dflags this_mod modLoc
576 (cmmTopCodeGen ncgImpl)
577 fileIds dbgMap opt_cmm
578
579 dumpIfSet_dyn dflags
580 Opt_D_dump_asm_native "Native code"
581 (vcat $ map (pprNatCmmDecl ncgImpl) native)
582
583 -- tag instructions with register liveness information
584 let (withLiveness, usLive) =
585 {-# SCC "regLiveness" #-}
586 initUs usGen
587 $ mapM (regLiveness platform)
588 $ map natCmmTopToLive native
589
590 dumpIfSet_dyn dflags
591 Opt_D_dump_asm_liveness "Liveness annotations added"
592 (vcat $ map ppr withLiveness)
593
594 -- allocate registers
595 (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <-
596 if ( gopt Opt_RegsGraph dflags
597 || gopt Opt_RegsIterative dflags )
598 then do
599 -- the regs usable for allocation
600 let (alloc_regs :: UniqFM (UniqSet RealReg))
601 = foldr (\r -> plusUFM_C unionUniqSets
602 $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
603 emptyUFM
604 $ allocatableRegs ncgImpl
605
606 -- do the graph coloring register allocation
607 let ((alloced, regAllocStats), usAlloc)
608 = {-# SCC "RegAlloc-color" #-}
609 initUs usLive
610 $ Color.regAlloc
611 dflags
612 alloc_regs
613 (mkUniqSet [0 .. maxSpillSlots ncgImpl])
614 withLiveness
615
616 -- dump out what happened during register allocation
617 dumpIfSet_dyn dflags
618 Opt_D_dump_asm_regalloc "Registers allocated"
619 (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
620
621 dumpIfSet_dyn dflags
622 Opt_D_dump_asm_regalloc_stages "Build/spill stages"
623 (vcat $ map (\(stage, stats)
624 -> text "# --------------------------"
625 $$ text "# cmm " <> int count <> text " Stage " <> int stage
626 $$ ppr stats)
627 $ zip [0..] regAllocStats)
628
629 let mPprStats =
630 if dopt Opt_D_dump_asm_stats dflags
631 then Just regAllocStats else Nothing
632
633 -- force evaluation of the Maybe to avoid space leak
634 mPprStats `seq` return ()
635
636 return ( alloced, usAlloc
637 , mPprStats
638 , Nothing)
639
640 else do
641 -- do linear register allocation
642 let reg_alloc proc = do
643 (alloced, maybe_more_stack, ra_stats) <-
644 Linear.regAlloc dflags proc
645 case maybe_more_stack of
646 Nothing -> return ( alloced, ra_stats )
647 Just amount -> do
648 alloced' <- ncgAllocMoreStack ncgImpl amount alloced
649 return (alloced', ra_stats )
650
651 let ((alloced, regAllocStats), usAlloc)
652 = {-# SCC "RegAlloc-linear" #-}
653 initUs usLive
654 $ liftM unzip
655 $ mapM reg_alloc withLiveness
656
657 dumpIfSet_dyn dflags
658 Opt_D_dump_asm_regalloc "Registers allocated"
659 (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
660
661 let mPprStats =
662 if dopt Opt_D_dump_asm_stats dflags
663 then Just (catMaybes regAllocStats) else Nothing
664
665 -- force evaluation of the Maybe to avoid space leak
666 mPprStats `seq` return ()
667
668 return ( alloced, usAlloc
669 , Nothing
670 , mPprStats)
671
672 ---- x86fp_kludge. This pass inserts ffree instructions to clear
673 ---- the FPU stack on x86. The x86 ABI requires that the FPU stack
674 ---- is clear, and library functions can return odd results if it
675 ---- isn't.
676 ----
677 ---- NB. must happen before shortcutBranches, because that
678 ---- generates JXX_GBLs which we can't fix up in x86fp_kludge.
679 let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced
680
681 ---- generate jump tables
682 let tabled =
683 {-# SCC "generateJumpTables" #-}
684 generateJumpTables ncgImpl kludged
685
686 ---- shortcut branches
687 let shorted =
688 {-# SCC "shortcutBranches" #-}
689 shortcutBranches dflags ncgImpl tabled
690
691 ---- sequence blocks
692 let sequenced =
693 {-# SCC "sequenceBlocks" #-}
694 map (sequenceTop ncgImpl) shorted
695
696 ---- expansion of SPARC synthetic instrs
697 let expanded =
698 {-# SCC "sparc_expand" #-}
699 ncgExpandTop ncgImpl sequenced
700
701 dumpIfSet_dyn dflags
702 Opt_D_dump_asm_expanded "Synthetic instructions expanded"
703 (vcat $ map (pprNatCmmDecl ncgImpl) expanded)
704
705 -- generate unwinding information from cmm
706 let unwinds :: BlockMap [UnwindPoint]
707 unwinds =
708 {-# SCC "unwindingInfo" #-}
709 foldl' addUnwind mapEmpty expanded
710 where
711 addUnwind acc proc =
712 acc `mapUnion` computeUnwinding dflags ncgImpl proc
713
714 return ( usAlloc
715 , fileIds'
716 , expanded
717 , lastMinuteImports ++ imports
718 , ppr_raStatsColor
719 , ppr_raStatsLinear
720 , unwinds )
721
722
723 x86fp_kludge :: NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr -> NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr
724 x86fp_kludge top@(CmmData _ _) = top
725 x86fp_kludge (CmmProc info lbl live (ListGraph code)) =
726 CmmProc info lbl live (ListGraph $ X86.Instr.i386_insert_ffrees code)
727
728 -- | Compute unwinding tables for the blocks of a procedure
729 computeUnwinding :: Instruction instr
730 => DynFlags -> NcgImpl statics instr jumpDest
731 -> NatCmmDecl statics instr
732 -- ^ the native code generated for the procedure
733 -> LabelMap [UnwindPoint]
734 -- ^ unwinding tables for all points of all blocks of the
735 -- procedure
736 computeUnwinding dflags _ _
737 | debugLevel dflags == 0 = mapEmpty
738 computeUnwinding _ _ (CmmData _ _) = mapEmpty
739 computeUnwinding _ ncgImpl (CmmProc _ _ _ (ListGraph blks)) =
740 -- In general we would need to push unwinding information down the
741 -- block-level call-graph to ensure that we fully account for all
742 -- relevant register writes within a procedure.
743 --
744 -- However, the only unwinding information that we care about in GHC is for
745 -- Sp. The fact that CmmLayoutStack already ensures that we have unwind
746 -- information at the beginning of every block means that there is no need
747 -- to perform this sort of push-down.
748 mapFromList [ (blk_lbl, extractUnwindPoints ncgImpl instrs)
749 | BasicBlock blk_lbl instrs <- blks ]
750
751 -- | Build a doc for all the imports.
752 --
753 makeImportsDoc :: DynFlags -> [CLabel] -> SDoc
754 makeImportsDoc dflags imports
755 = dyld_stubs imports
756 $$
757 -- On recent versions of Darwin, the linker supports
758 -- dead-stripping of code and data on a per-symbol basis.
759 -- There's a hack to make this work in PprMach.pprNatCmmDecl.
760 (if platformHasSubsectionsViaSymbols platform
761 then text ".subsections_via_symbols"
762 else Outputable.empty)
763 $$
764 -- On recent GNU ELF systems one can mark an object file
765 -- as not requiring an executable stack. If all objects
766 -- linked into a program have this note then the program
767 -- will not use an executable stack, which is good for
768 -- security. GHC generated code does not need an executable
769 -- stack so add the note in:
770 (if platformHasGnuNonexecStack platform
771 then text ".section .note.GNU-stack,\"\",@progbits"
772 else Outputable.empty)
773 $$
774 -- And just because every other compiler does, let's stick in
775 -- an identifier directive: .ident "GHC x.y.z"
776 (if platformHasIdentDirective platform
777 then let compilerIdent = text "GHC" <+> text cProjectVersion
778 in text ".ident" <+> doubleQuotes compilerIdent
779 else Outputable.empty)
780
781 where
782 platform = targetPlatform dflags
783 arch = platformArch platform
784 os = platformOS platform
785
786 -- Generate "symbol stubs" for all external symbols that might
787 -- come from a dynamic library.
788 dyld_stubs :: [CLabel] -> SDoc
789 {- dyld_stubs imps = vcat $ map pprDyldSymbolStub $
790 map head $ group $ sort imps-}
791 -- (Hack) sometimes two Labels pretty-print the same, but have
792 -- different uniques; so we compare their text versions...
793 dyld_stubs imps
794 | needImportedSymbols dflags arch os
795 = vcat $
796 (pprGotDeclaration dflags arch os :) $
797 map ( pprImportedSymbol dflags platform . fst . head) $
798 groupBy (\(_,a) (_,b) -> a == b) $
799 sortBy (\(_,a) (_,b) -> compare a b) $
800 map doPpr $
801 imps
802 | otherwise
803 = Outputable.empty
804
805 doPpr lbl = (lbl, renderWithStyle dflags (pprCLabel platform lbl) astyle)
806 astyle = mkCodeStyle AsmStyle
807
808
809 -- -----------------------------------------------------------------------------
810 -- Sequencing the basic blocks
811
812 -- Cmm BasicBlocks are self-contained entities: they always end in a
813 -- jump, either non-local or to another basic block in the same proc.
814 -- In this phase, we attempt to place the basic blocks in a sequence
815 -- such that as many of the local jumps as possible turn into
816 -- fallthroughs.
817
818 sequenceTop
819 :: Instruction instr
820 => NcgImpl statics instr jumpDest -> NatCmmDecl statics instr -> NatCmmDecl statics instr
821
822 sequenceTop _ top@(CmmData _ _) = top
823 sequenceTop ncgImpl (CmmProc info lbl live (ListGraph blocks)) =
824 CmmProc info lbl live (ListGraph $ ncgMakeFarBranches ncgImpl info $ sequenceBlocks info blocks)
825
826 -- The algorithm is very simple (and stupid): we make a graph out of
827 -- the blocks where there is an edge from one block to another iff the
828 -- first block ends by jumping to the second. Then we topologically
829 -- sort this graph. Then traverse the list: for each block, we first
830 -- output the block, then if it has an out edge, we move the
831 -- destination of the out edge to the front of the list, and continue.
832
833 -- FYI, the classic layout for basic blocks uses postorder DFS; this
834 -- algorithm is implemented in Hoopl.
835
836 sequenceBlocks
837 :: Instruction instr
838 => LabelMap i
839 -> [NatBasicBlock instr]
840 -> [NatBasicBlock instr]
841
842 sequenceBlocks _ [] = []
843 sequenceBlocks infos (entry:blocks) =
844 seqBlocks infos (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
845 -- the first block is the entry point ==> it must remain at the start.
846
847
848 sccBlocks
849 :: Instruction instr
850 => [NatBasicBlock instr]
851 -> [SCC ( NatBasicBlock instr
852 , BlockId
853 , [BlockId])]
854
855 sccBlocks blocks = stronglyConnCompFromEdgedVerticesUniqR (map mkNode blocks)
856
857 -- we're only interested in the last instruction of
858 -- the block, and only if it has a single destination.
859 getOutEdges
860 :: Instruction instr
861 => [instr] -> [BlockId]
862
863 getOutEdges instrs
864 = case jumpDestsOfInstr (last instrs) of
865 [one] -> [one]
866 _many -> []
867
868 mkNode :: (Instruction t)
869 => GenBasicBlock t
870 -> (GenBasicBlock t, BlockId, [BlockId])
871 mkNode block@(BasicBlock id instrs) = (block, id, getOutEdges instrs)
872
873 seqBlocks :: LabelMap i -> [(GenBasicBlock t1, BlockId, [BlockId])]
874 -> [GenBasicBlock t1]
875 seqBlocks infos blocks = placeNext pullable0 todo0
876 where
877 -- pullable: Blocks that are not yet placed
878 -- todo: Original order of blocks, to be followed if we have no good
879 -- reason not to;
880 -- may include blocks that have already been placed, but then
881 -- these are not in pullable
882 pullable0 = listToUFM [ (i,(b,n)) | (b,i,n) <- blocks ]
883 todo0 = [i | (_,i,_) <- blocks ]
884
885 placeNext _ [] = []
886 placeNext pullable (i:rest)
887 | Just (block, pullable') <- lookupDeleteUFM pullable i
888 = place pullable' rest block
889 | otherwise
890 -- We already placed this block, so ignore
891 = placeNext pullable rest
892
893 place pullable todo (block,[])
894 = block : placeNext pullable todo
895 place pullable todo (block@(BasicBlock id instrs),[next])
896 | mapMember next infos
897 = block : placeNext pullable todo
898 | Just (nextBlock, pullable') <- lookupDeleteUFM pullable next
899 = BasicBlock id (init instrs) : place pullable' todo nextBlock
900 | otherwise
901 = block : placeNext pullable todo
902 place _ _ (_,tooManyNextNodes)
903 = pprPanic "seqBlocks" (ppr tooManyNextNodes)
904
905
906 lookupDeleteUFM :: Uniquable key => UniqFM elt -> key -> Maybe (elt, UniqFM elt)
907 lookupDeleteUFM m k = do -- Maybe monad
908 v <- lookupUFM m k
909 return (v, delFromUFM m k)
910
911 -- -----------------------------------------------------------------------------
912 -- Generate jump tables
913
914 -- Analyzes all native code and generates data sections for all jump
915 -- table instructions.
916 generateJumpTables
917 :: NcgImpl statics instr jumpDest
918 -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
919 generateJumpTables ncgImpl xs = concatMap f xs
920 where f p@(CmmProc _ _ _ (ListGraph xs)) = p : concatMap g xs
921 f p = [p]
922 g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
923
924 -- -----------------------------------------------------------------------------
925 -- Shortcut branches
926
927 shortcutBranches
928 :: DynFlags
929 -> NcgImpl statics instr jumpDest
930 -> [NatCmmDecl statics instr]
931 -> [NatCmmDecl statics instr]
932
933 shortcutBranches dflags ncgImpl tops
934 | optLevel dflags < 1 = tops -- only with -O or higher
935 | otherwise = map (apply_mapping ncgImpl mapping) tops'
936 where
937 (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
938 mapping = foldr plusUFM emptyUFM mappings
939
940 build_mapping :: NcgImpl statics instr jumpDest
941 -> GenCmmDecl d (LabelMap t) (ListGraph instr)
942 -> (GenCmmDecl d (LabelMap t) (ListGraph instr), UniqFM jumpDest)
943 build_mapping _ top@(CmmData _ _) = (top, emptyUFM)
944 build_mapping _ (CmmProc info lbl live (ListGraph []))
945 = (CmmProc info lbl live (ListGraph []), emptyUFM)
946 build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks)))
947 = (CmmProc info lbl live (ListGraph (head:others)), mapping)
948 -- drop the shorted blocks, but don't ever drop the first one,
949 -- because it is pointed to by a global label.
950 where
951 -- find all the blocks that just consist of a jump that can be
952 -- shorted.
953 -- Don't completely eliminate loops here -- that can leave a dangling jump!
954 (_, shortcut_blocks, others) =
955 foldl split (setEmpty :: LabelSet, [], []) blocks
956 split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
957 | Just jd <- canShortcut ncgImpl insn,
958 Just dest <- getJumpDestBlockId ncgImpl jd,
959 not (has_info id),
960 (setMember dest s) || dest == id -- loop checks
961 = (s, shortcut_blocks, b : others)
962 split (s, shortcut_blocks, others) (BasicBlock id [insn])
963 | Just dest <- canShortcut ncgImpl insn,
964 not (has_info id)
965 = (setInsert id s, (id,dest) : shortcut_blocks, others)
966 split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)
967
968 -- do not eliminate blocks that have an info table
969 has_info l = mapMember l info
970
971 -- build a mapping from BlockId to JumpDest for shorting branches
972 mapping = foldl add emptyUFM shortcut_blocks
973 add ufm (id,dest) = addToUFM ufm id dest
974
975 apply_mapping :: NcgImpl statics instr jumpDest
976 -> UniqFM jumpDest
977 -> GenCmmDecl statics h (ListGraph instr)
978 -> GenCmmDecl statics h (ListGraph instr)
979 apply_mapping ncgImpl ufm (CmmData sec statics)
980 = CmmData sec (shortcutStatics ncgImpl (lookupUFM ufm) statics)
981 apply_mapping ncgImpl ufm (CmmProc info lbl live (ListGraph blocks))
982 = CmmProc info lbl live (ListGraph $ map short_bb blocks)
983 where
984 short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
985 short_insn i = shortcutJump ncgImpl (lookupUFM ufm) i
986 -- shortcutJump should apply the mapping repeatedly,
987 -- just in case we can short multiple branches.
988
989 -- -----------------------------------------------------------------------------
990 -- Instruction selection
991
992 -- Native code instruction selection for a chunk of stix code. For
993 -- this part of the computation, we switch from the UniqSM monad to
994 -- the NatM monad. The latter carries not only a Unique, but also an
995 -- Int denoting the current C stack pointer offset in the generated
996 -- code; this is needed for creating correct spill offsets on
997 -- architectures which don't offer, or for which it would be
998 -- prohibitively expensive to employ, a frame pointer register. Viz,
999 -- x86.
1000
1001 -- The offset is measured in bytes, and indicates the difference
1002 -- between the current (simulated) C stack-ptr and the value it was at
1003 -- the beginning of the block. For stacks which grow down, this value
1004 -- should be either zero or negative.
1005
1006 -- Along with the stack pointer offset, we also carry along a LabelMap of
1007 -- DebugBlocks, which we read to generate .location directives.
1008 --
1009 -- Switching between the two monads whilst carrying along the same
1010 -- Unique supply breaks abstraction. Is that bad?
1011
1012 genMachCode
1013 :: DynFlags
1014 -> Module -> ModLocation
1015 -> (RawCmmDecl -> NatM [NatCmmDecl statics instr])
1016 -> DwarfFiles
1017 -> LabelMap DebugBlock
1018 -> RawCmmDecl
1019 -> UniqSM
1020 ( [NatCmmDecl statics instr]
1021 , [CLabel]
1022 , DwarfFiles)
1023
1024 genMachCode dflags this_mod modLoc cmmTopCodeGen fileIds dbgMap cmm_top
1025 = do { initial_us <- getUniqueSupplyM
1026 ; let initial_st = mkNatM_State initial_us 0 dflags this_mod
1027 modLoc fileIds dbgMap
1028 (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
1029 final_delta = natm_delta final_st
1030 final_imports = natm_imports final_st
1031 ; if final_delta == 0
1032 then return (new_tops, final_imports, natm_fileid final_st)
1033 else pprPanic "genMachCode: nonzero final delta" (int final_delta)
1034 }
1035
1036 -- -----------------------------------------------------------------------------
1037 -- Generic Cmm optimiser
1038
1039 {-
1040 Here we do:
1041
1042 (a) Constant folding
1043 (c) Position independent code and dynamic linking
1044 (i) introduce the appropriate indirections
1045 and position independent refs
1046 (ii) compile a list of imported symbols
1047 (d) Some arch-specific optimizations
1048
1049 (a) will be moving to the new Hoopl pipeline, however, (c) and
1050 (d) are only needed by the native backend and will continue to live
1051 here.
1052
1053 Ideas for other things we could do (put these in Hoopl please!):
1054
1055 - shortcut jumps-to-jumps
1056 - simple CSE: if an expr is assigned to a temp, then replace later occs of
1057 that expr with the temp, until the expr is no longer valid (can push through
1058 temp assignments, and certain assigns to mem...)
1059 -}
1060
1061 cmmToCmm :: DynFlags -> Module -> RawCmmDecl -> (RawCmmDecl, [CLabel])
1062 cmmToCmm _ _ top@(CmmData _ _) = (top, [])
1063 cmmToCmm dflags this_mod (CmmProc info lbl live graph)
1064 = runCmmOpt dflags this_mod $
1065 do blocks' <- mapM cmmBlockConFold (toBlockList graph)
1066 return $ CmmProc info lbl live (ofBlockList (g_entry graph) blocks')
1067
1068 newtype CmmOptM a = CmmOptM (DynFlags -> Module -> [CLabel] -> (# a, [CLabel] #))
1069
1070 instance Functor CmmOptM where
1071 fmap = liftM
1072
1073 instance Applicative CmmOptM where
1074 pure x = CmmOptM $ \_ _ imports -> (# x, imports #)
1075 (<*>) = ap
1076
1077 instance Monad CmmOptM where
1078 (CmmOptM f) >>= g =
1079 CmmOptM $ \dflags this_mod imports ->
1080 case f dflags this_mod imports of
1081 (# x, imports' #) ->
1082 case g x of
1083 CmmOptM g' -> g' dflags this_mod imports'
1084
1085 instance CmmMakeDynamicReferenceM CmmOptM where
1086 addImport = addImportCmmOpt
1087 getThisModule = CmmOptM $ \_ this_mod imports -> (# this_mod, imports #)
1088
1089 addImportCmmOpt :: CLabel -> CmmOptM ()
1090 addImportCmmOpt lbl = CmmOptM $ \_ _ imports -> (# (), lbl:imports #)
1091
1092 instance HasDynFlags CmmOptM where
1093 getDynFlags = CmmOptM $ \dflags _ imports -> (# dflags, imports #)
1094
1095 runCmmOpt :: DynFlags -> Module -> CmmOptM a -> (a, [CLabel])
1096 runCmmOpt dflags this_mod (CmmOptM f) = case f dflags this_mod [] of
1097 (# result, imports #) -> (result, imports)
1098
1099 cmmBlockConFold :: CmmBlock -> CmmOptM CmmBlock
1100 cmmBlockConFold block = do
1101 let (entry, middle, last) = blockSplit block
1102 stmts = blockToList middle
1103 stmts' <- mapM cmmStmtConFold stmts
1104 last' <- cmmStmtConFold last
1105 return $ blockJoin entry (blockFromList stmts') last'
1106
1107 -- This does three optimizations, but they're very quick to check, so we don't
1108 -- bother turning them off even when the Hoopl code is active. Since
1109 -- this is on the old Cmm representation, we can't reuse the code either:
1110 -- * reg = reg --> nop
1111 -- * if 0 then jump --> nop
1112 -- * if 1 then jump --> jump
1113 -- We might be tempted to skip this step entirely of not Opt_PIC, but
1114 -- there is some PowerPC code for the non-PIC case, which would also
1115 -- have to be separated.
1116 cmmStmtConFold :: CmmNode e x -> CmmOptM (CmmNode e x)
1117 cmmStmtConFold stmt
1118 = case stmt of
1119 CmmAssign reg src
1120 -> do src' <- cmmExprConFold DataReference src
1121 return $ case src' of
1122 CmmReg reg' | reg == reg' -> CmmComment (fsLit "nop")
1123 new_src -> CmmAssign reg new_src
1124
1125 CmmStore addr src
1126 -> do addr' <- cmmExprConFold DataReference addr
1127 src' <- cmmExprConFold DataReference src
1128 return $ CmmStore addr' src'
1129
1130 CmmCall { cml_target = addr }
1131 -> do addr' <- cmmExprConFold JumpReference addr
1132 return $ stmt { cml_target = addr' }
1133
1134 CmmUnsafeForeignCall target regs args
1135 -> do target' <- case target of
1136 ForeignTarget e conv -> do
1137 e' <- cmmExprConFold CallReference e
1138 return $ ForeignTarget e' conv
1139 PrimTarget _ ->
1140 return target
1141 args' <- mapM (cmmExprConFold DataReference) args
1142 return $ CmmUnsafeForeignCall target' regs args'
1143
1144 CmmCondBranch test true false likely
1145 -> do test' <- cmmExprConFold DataReference test
1146 return $ case test' of
1147 CmmLit (CmmInt 0 _) -> CmmBranch false
1148 CmmLit (CmmInt _ _) -> CmmBranch true
1149 _other -> CmmCondBranch test' true false likely
1150
1151 CmmSwitch expr ids
1152 -> do expr' <- cmmExprConFold DataReference expr
1153 return $ CmmSwitch expr' ids
1154
1155 other
1156 -> return other
1157
1158 cmmExprConFold :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
1159 cmmExprConFold referenceKind expr = do
1160 dflags <- getDynFlags
1161
1162 -- With -O1 and greater, the cmmSink pass does constant-folding, so
1163 -- we don't need to do it again here.
1164 let expr' = if optLevel dflags >= 1
1165 then expr
1166 else cmmExprCon dflags expr
1167
1168 cmmExprNative referenceKind expr'
1169
1170 cmmExprCon :: DynFlags -> CmmExpr -> CmmExpr
1171 cmmExprCon dflags (CmmLoad addr rep) = CmmLoad (cmmExprCon dflags addr) rep
1172 cmmExprCon dflags (CmmMachOp mop args)
1173 = cmmMachOpFold dflags mop (map (cmmExprCon dflags) args)
1174 cmmExprCon _ other = other
1175
1176 -- handles both PIC and non-PIC cases... a very strange mixture
1177 -- of things to do.
1178 cmmExprNative :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
1179 cmmExprNative referenceKind expr = do
1180 dflags <- getDynFlags
1181 let platform = targetPlatform dflags
1182 arch = platformArch platform
1183 case expr of
1184 CmmLoad addr rep
1185 -> do addr' <- cmmExprNative DataReference addr
1186 return $ CmmLoad addr' rep
1187
1188 CmmMachOp mop args
1189 -> do args' <- mapM (cmmExprNative DataReference) args
1190 return $ CmmMachOp mop args'
1191
1192 CmmLit (CmmBlock id)
1193 -> cmmExprNative referenceKind (CmmLit (CmmLabel (infoTblLbl id)))
1194 -- we must convert block Ids to CLabels here, because we
1195 -- might have to do the PIC transformation. Hence we must
1196 -- not modify BlockIds beyond this point.
1197
1198 CmmLit (CmmLabel lbl)
1199 -> do
1200 cmmMakeDynamicReference dflags referenceKind lbl
1201 CmmLit (CmmLabelOff lbl off)
1202 -> do
1203 dynRef <- cmmMakeDynamicReference dflags referenceKind lbl
1204 -- need to optimize here, since it's late
1205 return $ cmmMachOpFold dflags (MO_Add (wordWidth dflags)) [
1206 dynRef,
1207 (CmmLit $ CmmInt (fromIntegral off) (wordWidth dflags))
1208 ]
1209
1210 -- On powerpc (non-PIC), it's easier to jump directly to a label than
1211 -- to use the register table, so we replace these registers
1212 -- with the corresponding labels:
1213 CmmReg (CmmGlobal EagerBlackholeInfo)
1214 | arch == ArchPPC && not (gopt Opt_PIC dflags)
1215 -> cmmExprNative referenceKind $
1216 CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_EAGER_BLACKHOLE_info")))
1217 CmmReg (CmmGlobal GCEnter1)
1218 | arch == ArchPPC && not (gopt Opt_PIC dflags)
1219 -> cmmExprNative referenceKind $
1220 CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_enter_1")))
1221 CmmReg (CmmGlobal GCFun)
1222 | arch == ArchPPC && not (gopt Opt_PIC dflags)
1223 -> cmmExprNative referenceKind $
1224 CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_fun")))
1225
1226 other
1227 -> return other