Omit Typeable from the "naturally coherent" list
[ghc.git] / compiler / nativeGen / AsmCodeGen.hs
1 -- -----------------------------------------------------------------------------
2 --
3 -- (c) The University of Glasgow 1993-2004
4 --
5 -- This is the top-level module in the native code generator.
6 --
7 -- -----------------------------------------------------------------------------
8
9 {-# LANGUAGE BangPatterns, CPP, GADTs, ScopedTypeVariables, UnboxedTuples #-}
10
11 module AsmCodeGen (
12 -- * Module entry point
13 nativeCodeGen
14
15 -- * Test-only exports: see trac #12744
16 -- used by testGraphNoSpills, which needs to access
17 -- the register allocator intermediate data structures
18 -- cmmNativeGen emits
19 , cmmNativeGen
20 , NcgImpl(..)
21 , x86NcgImpl
22 ) where
23
24 #include "HsVersions.h"
25 #include "nativeGen/NCG.h"
26
27
28 import GhcPrelude
29
30 import qualified X86.CodeGen
31 import qualified X86.Regs
32 import qualified X86.Instr
33 import qualified X86.Ppr
34
35 import qualified SPARC.CodeGen
36 import qualified SPARC.Regs
37 import qualified SPARC.Instr
38 import qualified SPARC.Ppr
39 import qualified SPARC.ShortcutJump
40 import qualified SPARC.CodeGen.Expand
41
42 import qualified PPC.CodeGen
43 import qualified PPC.Regs
44 import qualified PPC.RegInfo
45 import qualified PPC.Instr
46 import qualified PPC.Ppr
47
48 import RegAlloc.Liveness
49 import qualified RegAlloc.Linear.Main as Linear
50
51 import qualified GraphColor as Color
52 import qualified RegAlloc.Graph.Main as Color
53 import qualified RegAlloc.Graph.Stats as Color
54 import qualified RegAlloc.Graph.TrivColorable as Color
55
56 import AsmUtils
57 import TargetReg
58 import Platform
59 import Config
60 import Instruction
61 import PIC
62 import Reg
63 import NCGMonad
64 import Dwarf
65 import Debug
66
67 import BlockId
68 import CgUtils ( fixStgRegisters )
69 import Cmm
70 import CmmUtils
71 import Hoopl.Collections
72 import Hoopl.Label
73 import Hoopl.Block
74 import CmmOpt ( cmmMachOpFold )
75 import PprCmm
76 import CLabel
77
78 import UniqFM
79 import UniqSupply
80 import DynFlags
81 import Util
82 import Unique
83
84 import BasicTypes ( Alignment )
85 import Digraph
86 import qualified Pretty
87 import BufWrite
88 import Outputable
89 import FastString
90 import UniqSet
91 import ErrUtils
92 import Module
93 import Stream (Stream)
94 import qualified Stream
95
96 -- DEBUGGING ONLY
97 --import OrdList
98
99 import Data.List
100 import Data.Maybe
101 import Data.Ord ( comparing )
102 import Control.Exception
103 import Control.Monad
104 import System.IO
105
106 {-
107 The native-code generator has machine-independent and
108 machine-dependent modules.
109
110 This module ("AsmCodeGen") is the top-level machine-independent
111 module. Before entering machine-dependent land, we do some
112 machine-independent optimisations (defined below) on the
113 'CmmStmts's.
114
115 We convert to the machine-specific 'Instr' datatype with
116 'cmmCodeGen', assuming an infinite supply of registers. We then use
117 a machine-independent register allocator ('regAlloc') to rejoin
118 reality. Obviously, 'regAlloc' has machine-specific helper
119 functions (see about "RegAllocInfo" below).
120
121 Finally, we order the basic blocks of the function so as to minimise
122 the number of jumps between blocks, by utilising fallthrough wherever
123 possible.
124
125 The machine-dependent bits break down as follows:
126
127 * ["MachRegs"] Everything about the target platform's machine
128 registers (and immediate operands, and addresses, which tend to
129 intermingle/interact with registers).
130
131 * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
132 have a module of its own), plus a miscellany of other things
133 (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
134
135 * ["MachCodeGen"] is where 'Cmm' stuff turns into
136 machine instructions.
137
138 * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
139 a 'SDoc').
140
141 * ["RegAllocInfo"] In the register allocator, we manipulate
142 'MRegsState's, which are 'BitSet's, one bit per machine register.
143 When we want to say something about a specific machine register
144 (e.g., ``it gets clobbered by this instruction''), we set/unset
145 its bit. Obviously, we do this 'BitSet' thing for efficiency
146 reasons.
147
148 The 'RegAllocInfo' module collects together the machine-specific
149 info needed to do register allocation.
150
151 * ["RegisterAlloc"] The (machine-independent) register allocator.
152 -}
153
154 -- -----------------------------------------------------------------------------
155 -- Top-level of the native codegen
156
157 data NcgImpl statics instr jumpDest = NcgImpl {
158 cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl statics instr],
159 generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr),
160 getJumpDestBlockId :: jumpDest -> Maybe BlockId,
161 canShortcut :: instr -> Maybe jumpDest,
162 shortcutStatics :: (BlockId -> Maybe jumpDest) -> statics -> statics,
163 shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr,
164 pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc,
165 maxSpillSlots :: Int,
166 allocatableRegs :: [RealReg],
167 ncg_x86fp_kludge :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
168 ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
169 ncgAllocMoreStack :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr),
170 ncgMakeFarBranches :: LabelMap CmmStatics
171 -> [NatBasicBlock instr] -> [NatBasicBlock instr],
172 extractUnwindPoints :: [instr] -> [UnwindPoint]
173 -- ^ given the instruction sequence of a block, produce a list of
174 -- the block's 'UnwindPoint's
175 -- See Note [What is this unwinding business?] in Debug
176 -- and Note [Unwinding information in the NCG] in this module.
177 }
178
179 --------------------
180 nativeCodeGen :: DynFlags -> Module -> ModLocation -> Handle -> UniqSupply
181 -> Stream IO RawCmmGroup ()
182 -> IO UniqSupply
183 nativeCodeGen dflags this_mod modLoc h us cmms
184 = let platform = targetPlatform dflags
185 nCG' :: (Outputable statics, Outputable instr, Instruction instr)
186 => NcgImpl statics instr jumpDest -> IO UniqSupply
187 nCG' ncgImpl = nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
188 in case platformArch platform of
189 ArchX86 -> nCG' (x86NcgImpl dflags)
190 ArchX86_64 -> nCG' (x86_64NcgImpl dflags)
191 ArchPPC -> nCG' (ppcNcgImpl dflags)
192 ArchSPARC -> nCG' (sparcNcgImpl dflags)
193 ArchSPARC64 -> panic "nativeCodeGen: No NCG for SPARC64"
194 ArchARM {} -> panic "nativeCodeGen: No NCG for ARM"
195 ArchARM64 -> panic "nativeCodeGen: No NCG for ARM64"
196 ArchPPC_64 _ -> nCG' (ppcNcgImpl dflags)
197 ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha"
198 ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb"
199 ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel"
200 ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch"
201 ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript"
202
203 x86NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest
204 x86NcgImpl dflags
205 = (x86_64NcgImpl dflags) { ncg_x86fp_kludge = map x86fp_kludge }
206
207 x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest
208 x86_64NcgImpl dflags
209 = NcgImpl {
210 cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen
211 ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags
212 ,getJumpDestBlockId = X86.Instr.getJumpDestBlockId
213 ,canShortcut = X86.Instr.canShortcut
214 ,shortcutStatics = X86.Instr.shortcutStatics
215 ,shortcutJump = X86.Instr.shortcutJump
216 ,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl
217 ,maxSpillSlots = X86.Instr.maxSpillSlots dflags
218 ,allocatableRegs = X86.Regs.allocatableRegs platform
219 ,ncg_x86fp_kludge = id
220 ,ncgAllocMoreStack = X86.Instr.allocMoreStack platform
221 ,ncgExpandTop = id
222 ,ncgMakeFarBranches = const id
223 ,extractUnwindPoints = X86.CodeGen.extractUnwindPoints
224 }
225 where platform = targetPlatform dflags
226
227 ppcNcgImpl :: DynFlags -> NcgImpl CmmStatics PPC.Instr.Instr PPC.RegInfo.JumpDest
228 ppcNcgImpl dflags
229 = NcgImpl {
230 cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen
231 ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags
232 ,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId
233 ,canShortcut = PPC.RegInfo.canShortcut
234 ,shortcutStatics = PPC.RegInfo.shortcutStatics
235 ,shortcutJump = PPC.RegInfo.shortcutJump
236 ,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl
237 ,maxSpillSlots = PPC.Instr.maxSpillSlots dflags
238 ,allocatableRegs = PPC.Regs.allocatableRegs platform
239 ,ncg_x86fp_kludge = id
240 ,ncgAllocMoreStack = PPC.Instr.allocMoreStack platform
241 ,ncgExpandTop = id
242 ,ncgMakeFarBranches = PPC.Instr.makeFarBranches
243 ,extractUnwindPoints = const []
244 }
245 where platform = targetPlatform dflags
246
247 sparcNcgImpl :: DynFlags -> NcgImpl CmmStatics SPARC.Instr.Instr SPARC.ShortcutJump.JumpDest
248 sparcNcgImpl dflags
249 = NcgImpl {
250 cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen
251 ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
252 ,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId
253 ,canShortcut = SPARC.ShortcutJump.canShortcut
254 ,shortcutStatics = SPARC.ShortcutJump.shortcutStatics
255 ,shortcutJump = SPARC.ShortcutJump.shortcutJump
256 ,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl
257 ,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags
258 ,allocatableRegs = SPARC.Regs.allocatableRegs
259 ,ncg_x86fp_kludge = id
260 ,ncgAllocMoreStack = noAllocMoreStack
261 ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop
262 ,ncgMakeFarBranches = const id
263 ,extractUnwindPoints = const []
264 }
265
266 --
267 -- Allocating more stack space for spilling is currently only
268 -- supported for the linear register allocator on x86/x86_64, the rest
269 -- default to the panic below. To support allocating extra stack on
270 -- more platforms provide a definition of ncgAllocMoreStack.
271 --
272 noAllocMoreStack :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr)
273 noAllocMoreStack amount _
274 = panic $ "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
275 ++ " If you are trying to compile SHA1.hs from the crypto library then this\n"
276 ++ " is a known limitation in the linear allocator.\n"
277 ++ "\n"
278 ++ " Try enabling the graph colouring allocator with -fregs-graph instead."
279 ++ " You can still file a bug report if you like.\n"
280
281
282 -- | Data accumulated during code generation. Mostly about statistics,
283 -- but also collects debug data for DWARF generation.
284 data NativeGenAcc statics instr
285 = NGS { ngs_imports :: ![[CLabel]]
286 , ngs_natives :: ![[NatCmmDecl statics instr]]
287 -- ^ Native code generated, for statistics. This might
288 -- hold a lot of data, so it is important to clear this
289 -- field as early as possible if it isn't actually
290 -- required.
291 , ngs_colorStats :: ![[Color.RegAllocStats statics instr]]
292 , ngs_linearStats :: ![[Linear.RegAllocStats]]
293 , ngs_labels :: ![Label]
294 , ngs_debug :: ![DebugBlock]
295 , ngs_dwarfFiles :: !DwarfFiles
296 , ngs_unwinds :: !(LabelMap [UnwindPoint])
297 -- ^ see Note [Unwinding information in the NCG]
298 -- and Note [What is this unwinding business?] in Debug.
299 }
300
301 {-
302 Note [Unwinding information in the NCG]
303 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
304
305 Unwind information is a type of metadata which allows a debugging tool
306 to reconstruct the values of machine registers at the time a procedure was
307 entered. For the most part, the production of unwind information is handled by
308 the Cmm stage, where it is represented by CmmUnwind nodes.
309
310 Unfortunately, the Cmm stage doesn't know everything necessary to produce
311 accurate unwinding information. For instance, the x86-64 calling convention
312 requires that the stack pointer be aligned to 16 bytes, which in turn means that
313 GHC must sometimes add padding to $sp prior to performing a foreign call. When
314 this happens unwind information must be updated accordingly.
315 For this reason, we make the NCG backends responsible for producing
316 unwinding tables (with the extractUnwindPoints function in NcgImpl).
317
318 We accumulate the produced unwind tables over CmmGroups in the ngs_unwinds
319 field of NativeGenAcc. This is a label map which contains an entry for each
320 procedure, containing a list of unwinding points (e.g. a label and an associated
321 unwinding table).
322
323 See also Note [What is this unwinding business?] in Debug.
324 -}
325
326 nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr)
327 => DynFlags
328 -> Module -> ModLocation
329 -> NcgImpl statics instr jumpDest
330 -> Handle
331 -> UniqSupply
332 -> Stream IO RawCmmGroup ()
333 -> IO UniqSupply
334 nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
335 = do
336 -- BufHandle is a performance hack. We could hide it inside
337 -- Pretty if it weren't for the fact that we do lots of little
338 -- printDocs here (in order to do codegen in constant space).
339 bufh <- newBufHandle h
340 let ngs0 = NGS [] [] [] [] [] [] emptyUFM mapEmpty
341 (ngs, us') <- cmmNativeGenStream dflags this_mod modLoc ncgImpl bufh us
342 cmms ngs0
343 finishNativeGen dflags modLoc bufh us' ngs
344
345 finishNativeGen :: Instruction instr
346 => DynFlags
347 -> ModLocation
348 -> BufHandle
349 -> UniqSupply
350 -> NativeGenAcc statics instr
351 -> IO UniqSupply
352 finishNativeGen dflags modLoc bufh@(BufHandle _ _ h) us ngs
353 = do
354 -- Write debug data and finish
355 let emitDw = debugLevel dflags > 0 && not (gopt Opt_SplitObjs dflags)
356 us' <- if not emitDw then return us else do
357 (dwarf, us') <- dwarfGen dflags modLoc us (ngs_debug ngs)
358 emitNativeCode dflags bufh dwarf
359 return us'
360 bFlush bufh
361
362 -- dump global NCG stats for graph coloring allocator
363 let stats = concat (ngs_colorStats ngs)
364 when (not (null stats)) $ do
365
366 -- build the global register conflict graph
367 let graphGlobal
368 = foldl Color.union Color.initGraph
369 $ [ Color.raGraph stat
370 | stat@Color.RegAllocStatsStart{} <- stats]
371
372 dump_stats (Color.pprStats stats graphGlobal)
373
374 let platform = targetPlatform dflags
375 dumpIfSet_dyn dflags
376 Opt_D_dump_asm_conflicts "Register conflict graph"
377 $ Color.dotGraph
378 (targetRegDotColor platform)
379 (Color.trivColorable platform
380 (targetVirtualRegSqueeze platform)
381 (targetRealRegSqueeze platform))
382 $ graphGlobal
383
384
385 -- dump global NCG stats for linear allocator
386 let linearStats = concat (ngs_linearStats ngs)
387 when (not (null linearStats)) $
388 dump_stats (Linear.pprStats (concat (ngs_natives ngs)) linearStats)
389
390 -- write out the imports
391 printSDocLn Pretty.LeftMode dflags h (mkCodeStyle AsmStyle)
392 $ makeImportsDoc dflags (concat (ngs_imports ngs))
393 return us'
394 where
395 dump_stats = dumpSDoc dflags alwaysQualify Opt_D_dump_asm_stats "NCG stats"
396
397 cmmNativeGenStream :: (Outputable statics, Outputable instr, Instruction instr)
398 => DynFlags
399 -> Module -> ModLocation
400 -> NcgImpl statics instr jumpDest
401 -> BufHandle
402 -> UniqSupply
403 -> Stream IO RawCmmGroup ()
404 -> NativeGenAcc statics instr
405 -> IO (NativeGenAcc statics instr, UniqSupply)
406
407 cmmNativeGenStream dflags this_mod modLoc ncgImpl h us cmm_stream ngs
408 = do r <- Stream.runStream cmm_stream
409 case r of
410 Left () ->
411 return (ngs { ngs_imports = reverse $ ngs_imports ngs
412 , ngs_natives = reverse $ ngs_natives ngs
413 , ngs_colorStats = reverse $ ngs_colorStats ngs
414 , ngs_linearStats = reverse $ ngs_linearStats ngs
415 },
416 us)
417 Right (cmms, cmm_stream') -> do
418
419 -- Generate debug information
420 let debugFlag = debugLevel dflags > 0
421 !ndbgs | debugFlag = cmmDebugGen modLoc cmms
422 | otherwise = []
423 dbgMap = debugToMap ndbgs
424
425 -- Insert split marker, generate native code
426 let splitObjs = gopt Opt_SplitObjs dflags
427 split_marker = CmmProc mapEmpty mkSplitMarkerLabel [] $
428 ofBlockList (panic "split_marker_entry") []
429 cmms' | splitObjs = split_marker : cmms
430 | otherwise = cmms
431 (ngs',us') <- cmmNativeGens dflags this_mod modLoc ncgImpl h
432 dbgMap us cmms' ngs 0
433
434 -- Link native code information into debug blocks
435 -- See Note [What is this unwinding business?] in Debug.
436 let !ldbgs = cmmDebugLink (ngs_labels ngs') (ngs_unwinds ngs') ndbgs
437 dumpIfSet_dyn dflags Opt_D_dump_debug "Debug Infos"
438 (vcat $ map ppr ldbgs)
439
440 -- Emit & clear DWARF information when generating split
441 -- object files, as we need it to land in the same object file
442 -- When using split sections, note that we do not split the debug
443 -- info but emit all the info at once in finishNativeGen.
444 (ngs'', us'') <-
445 if debugFlag && splitObjs
446 then do (dwarf, us'') <- dwarfGen dflags modLoc us ldbgs
447 emitNativeCode dflags h dwarf
448 return (ngs' { ngs_debug = []
449 , ngs_dwarfFiles = emptyUFM
450 , ngs_labels = [] },
451 us'')
452 else return (ngs' { ngs_debug = ngs_debug ngs' ++ ldbgs
453 , ngs_labels = [] },
454 us')
455
456 cmmNativeGenStream dflags this_mod modLoc ncgImpl h us''
457 cmm_stream' ngs''
458
459 -- | Do native code generation on all these cmms.
460 --
461 cmmNativeGens :: forall statics instr jumpDest.
462 (Outputable statics, Outputable instr, Instruction instr)
463 => DynFlags
464 -> Module -> ModLocation
465 -> NcgImpl statics instr jumpDest
466 -> BufHandle
467 -> LabelMap DebugBlock
468 -> UniqSupply
469 -> [RawCmmDecl]
470 -> NativeGenAcc statics instr
471 -> Int
472 -> IO (NativeGenAcc statics instr, UniqSupply)
473
474 cmmNativeGens dflags this_mod modLoc ncgImpl h dbgMap = go
475 where
476 go :: UniqSupply -> [RawCmmDecl]
477 -> NativeGenAcc statics instr -> Int
478 -> IO (NativeGenAcc statics instr, UniqSupply)
479
480 go us [] ngs !_ =
481 return (ngs, us)
482
483 go us (cmm : cmms) ngs count = do
484 let fileIds = ngs_dwarfFiles ngs
485 (us', fileIds', native, imports, colorStats, linearStats, unwinds)
486 <- {-# SCC "cmmNativeGen" #-}
487 cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap
488 cmm count
489
490 -- Generate .file directives for every new file that has been
491 -- used. Note that it is important that we generate these in
492 -- ascending order, as Clang's 3.6 assembler complains.
493 let newFileIds = sortBy (comparing snd) $
494 nonDetEltsUFM $ fileIds' `minusUFM` fileIds
495 -- See Note [Unique Determinism and code generation]
496 pprDecl (f,n) = text "\t.file " <> ppr n <+>
497 doubleQuotes (ftext f)
498
499 emitNativeCode dflags h $ vcat $
500 map pprDecl newFileIds ++
501 map (pprNatCmmDecl ncgImpl) native
502
503 -- force evaluation all this stuff to avoid space leaks
504 {-# SCC "seqString" #-} evaluate $ seqString (showSDoc dflags $ vcat $ map ppr imports)
505
506 let !labels' = if debugLevel dflags > 0
507 then cmmDebugLabels isMetaInstr native else []
508 !natives' = if dopt Opt_D_dump_asm_stats dflags
509 then native : ngs_natives ngs else []
510
511 mCon = maybe id (:)
512 ngs' = ngs{ ngs_imports = imports : ngs_imports ngs
513 , ngs_natives = natives'
514 , ngs_colorStats = colorStats `mCon` ngs_colorStats ngs
515 , ngs_linearStats = linearStats `mCon` ngs_linearStats ngs
516 , ngs_labels = ngs_labels ngs ++ labels'
517 , ngs_dwarfFiles = fileIds'
518 , ngs_unwinds = ngs_unwinds ngs `mapUnion` unwinds
519 }
520 go us' cmms ngs' (count + 1)
521
522 seqString [] = ()
523 seqString (x:xs) = x `seq` seqString xs
524
525
526 emitNativeCode :: DynFlags -> BufHandle -> SDoc -> IO ()
527 emitNativeCode dflags h sdoc = do
528
529 {-# SCC "pprNativeCode" #-} bufLeftRenderSDoc dflags h
530 (mkCodeStyle AsmStyle) sdoc
531
532 -- dump native code
533 dumpIfSet_dyn dflags
534 Opt_D_dump_asm "Asm code"
535 sdoc
536
537 -- | Complete native code generation phase for a single top-level chunk of Cmm.
538 -- Dumping the output of each stage along the way.
539 -- Global conflict graph and NGC stats
540 cmmNativeGen
541 :: (Outputable statics, Outputable instr, Instruction instr)
542 => DynFlags
543 -> Module -> ModLocation
544 -> NcgImpl statics instr jumpDest
545 -> UniqSupply
546 -> DwarfFiles
547 -> LabelMap DebugBlock
548 -> RawCmmDecl -- ^ the cmm to generate code for
549 -> Int -- ^ sequence number of this top thing
550 -> IO ( UniqSupply
551 , DwarfFiles
552 , [NatCmmDecl statics instr] -- native code
553 , [CLabel] -- things imported by this cmm
554 , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator
555 , Maybe [Linear.RegAllocStats] -- stats for the linear register allocators
556 , LabelMap [UnwindPoint] -- unwinding information for blocks
557 )
558
559 cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
560 = do
561 let platform = targetPlatform dflags
562
563 -- rewrite assignments to global regs
564 let fixed_cmm =
565 {-# SCC "fixStgRegisters" #-}
566 fixStgRegisters dflags cmm
567
568 -- cmm to cmm optimisations
569 let (opt_cmm, imports) =
570 {-# SCC "cmmToCmm" #-}
571 cmmToCmm dflags this_mod fixed_cmm
572
573 dumpIfSet_dyn dflags
574 Opt_D_dump_opt_cmm "Optimised Cmm"
575 (pprCmmGroup [opt_cmm])
576
577 -- generate native code from cmm
578 let ((native, lastMinuteImports, fileIds'), usGen) =
579 {-# SCC "genMachCode" #-}
580 initUs us $ genMachCode dflags this_mod modLoc
581 (cmmTopCodeGen ncgImpl)
582 fileIds dbgMap opt_cmm
583
584 dumpIfSet_dyn dflags
585 Opt_D_dump_asm_native "Native code"
586 (vcat $ map (pprNatCmmDecl ncgImpl) native)
587
588 -- tag instructions with register liveness information
589 let (withLiveness, usLive) =
590 {-# SCC "regLiveness" #-}
591 initUs usGen
592 $ mapM (regLiveness platform)
593 $ map natCmmTopToLive native
594
595 dumpIfSet_dyn dflags
596 Opt_D_dump_asm_liveness "Liveness annotations added"
597 (vcat $ map ppr withLiveness)
598
599 -- allocate registers
600 (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <-
601 if ( gopt Opt_RegsGraph dflags
602 || gopt Opt_RegsIterative dflags )
603 then do
604 -- the regs usable for allocation
605 let (alloc_regs :: UniqFM (UniqSet RealReg))
606 = foldr (\r -> plusUFM_C unionUniqSets
607 $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
608 emptyUFM
609 $ allocatableRegs ncgImpl
610
611 -- do the graph coloring register allocation
612 let ((alloced, regAllocStats), usAlloc)
613 = {-# SCC "RegAlloc-color" #-}
614 initUs usLive
615 $ Color.regAlloc
616 dflags
617 alloc_regs
618 (mkUniqSet [0 .. maxSpillSlots ncgImpl])
619 withLiveness
620
621 -- dump out what happened during register allocation
622 dumpIfSet_dyn dflags
623 Opt_D_dump_asm_regalloc "Registers allocated"
624 (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
625
626 dumpIfSet_dyn dflags
627 Opt_D_dump_asm_regalloc_stages "Build/spill stages"
628 (vcat $ map (\(stage, stats)
629 -> text "# --------------------------"
630 $$ text "# cmm " <> int count <> text " Stage " <> int stage
631 $$ ppr stats)
632 $ zip [0..] regAllocStats)
633
634 let mPprStats =
635 if dopt Opt_D_dump_asm_stats dflags
636 then Just regAllocStats else Nothing
637
638 -- force evaluation of the Maybe to avoid space leak
639 mPprStats `seq` return ()
640
641 return ( alloced, usAlloc
642 , mPprStats
643 , Nothing)
644
645 else do
646 -- do linear register allocation
647 let reg_alloc proc = do
648 (alloced, maybe_more_stack, ra_stats) <-
649 Linear.regAlloc dflags proc
650 case maybe_more_stack of
651 Nothing -> return ( alloced, ra_stats )
652 Just amount -> do
653 alloced' <- ncgAllocMoreStack ncgImpl amount alloced
654 return (alloced', ra_stats )
655
656 let ((alloced, regAllocStats), usAlloc)
657 = {-# SCC "RegAlloc-linear" #-}
658 initUs usLive
659 $ liftM unzip
660 $ mapM reg_alloc withLiveness
661
662 dumpIfSet_dyn dflags
663 Opt_D_dump_asm_regalloc "Registers allocated"
664 (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
665
666 let mPprStats =
667 if dopt Opt_D_dump_asm_stats dflags
668 then Just (catMaybes regAllocStats) else Nothing
669
670 -- force evaluation of the Maybe to avoid space leak
671 mPprStats `seq` return ()
672
673 return ( alloced, usAlloc
674 , Nothing
675 , mPprStats)
676
677 ---- x86fp_kludge. This pass inserts ffree instructions to clear
678 ---- the FPU stack on x86. The x86 ABI requires that the FPU stack
679 ---- is clear, and library functions can return odd results if it
680 ---- isn't.
681 ----
682 ---- NB. must happen before shortcutBranches, because that
683 ---- generates JXX_GBLs which we can't fix up in x86fp_kludge.
684 let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced
685
686 ---- generate jump tables
687 let tabled =
688 {-# SCC "generateJumpTables" #-}
689 generateJumpTables ncgImpl kludged
690
691 ---- shortcut branches
692 let shorted =
693 {-# SCC "shortcutBranches" #-}
694 shortcutBranches dflags ncgImpl tabled
695
696 ---- sequence blocks
697 let sequenced =
698 {-# SCC "sequenceBlocks" #-}
699 map (sequenceTop ncgImpl) shorted
700
701 ---- expansion of SPARC synthetic instrs
702 let expanded =
703 {-# SCC "sparc_expand" #-}
704 ncgExpandTop ncgImpl sequenced
705
706 dumpIfSet_dyn dflags
707 Opt_D_dump_asm_expanded "Synthetic instructions expanded"
708 (vcat $ map (pprNatCmmDecl ncgImpl) expanded)
709
710 -- generate unwinding information from cmm
711 let unwinds :: BlockMap [UnwindPoint]
712 unwinds =
713 {-# SCC "unwindingInfo" #-}
714 foldl' addUnwind mapEmpty expanded
715 where
716 addUnwind acc proc =
717 acc `mapUnion` computeUnwinding dflags ncgImpl proc
718
719 return ( usAlloc
720 , fileIds'
721 , expanded
722 , lastMinuteImports ++ imports
723 , ppr_raStatsColor
724 , ppr_raStatsLinear
725 , unwinds )
726
727
728 x86fp_kludge :: NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr -> NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr
729 x86fp_kludge top@(CmmData _ _) = top
730 x86fp_kludge (CmmProc info lbl live (ListGraph code)) =
731 CmmProc info lbl live (ListGraph $ X86.Instr.i386_insert_ffrees code)
732
733 -- | Compute unwinding tables for the blocks of a procedure
734 computeUnwinding :: Instruction instr
735 => DynFlags -> NcgImpl statics instr jumpDest
736 -> NatCmmDecl statics instr
737 -- ^ the native code generated for the procedure
738 -> LabelMap [UnwindPoint]
739 -- ^ unwinding tables for all points of all blocks of the
740 -- procedure
741 computeUnwinding dflags _ _
742 | debugLevel dflags == 0 = mapEmpty
743 computeUnwinding _ _ (CmmData _ _) = mapEmpty
744 computeUnwinding _ ncgImpl (CmmProc _ _ _ (ListGraph blks)) =
745 -- In general we would need to push unwinding information down the
746 -- block-level call-graph to ensure that we fully account for all
747 -- relevant register writes within a procedure.
748 --
749 -- However, the only unwinding information that we care about in GHC is for
750 -- Sp. The fact that CmmLayoutStack already ensures that we have unwind
751 -- information at the beginning of every block means that there is no need
752 -- to perform this sort of push-down.
753 mapFromList [ (blk_lbl, extractUnwindPoints ncgImpl instrs)
754 | BasicBlock blk_lbl instrs <- blks ]
755
756 -- | Build a doc for all the imports.
757 --
758 makeImportsDoc :: DynFlags -> [CLabel] -> SDoc
759 makeImportsDoc dflags imports
760 = dyld_stubs imports
761 $$
762 -- On recent versions of Darwin, the linker supports
763 -- dead-stripping of code and data on a per-symbol basis.
764 -- There's a hack to make this work in PprMach.pprNatCmmDecl.
765 (if platformHasSubsectionsViaSymbols platform
766 then text ".subsections_via_symbols"
767 else Outputable.empty)
768 $$
769 -- On recent GNU ELF systems one can mark an object file
770 -- as not requiring an executable stack. If all objects
771 -- linked into a program have this note then the program
772 -- will not use an executable stack, which is good for
773 -- security. GHC generated code does not need an executable
774 -- stack so add the note in:
775 (if platformHasGnuNonexecStack platform
776 then text ".section .note.GNU-stack,\"\"," <> sectionType "progbits"
777 else Outputable.empty)
778 $$
779 -- And just because every other compiler does, let's stick in
780 -- an identifier directive: .ident "GHC x.y.z"
781 (if platformHasIdentDirective platform
782 then let compilerIdent = text "GHC" <+> text cProjectVersion
783 in text ".ident" <+> doubleQuotes compilerIdent
784 else Outputable.empty)
785
786 where
787 platform = targetPlatform dflags
788 arch = platformArch platform
789 os = platformOS platform
790
791 -- Generate "symbol stubs" for all external symbols that might
792 -- come from a dynamic library.
793 dyld_stubs :: [CLabel] -> SDoc
794 {- dyld_stubs imps = vcat $ map pprDyldSymbolStub $
795 map head $ group $ sort imps-}
796 -- (Hack) sometimes two Labels pretty-print the same, but have
797 -- different uniques; so we compare their text versions...
798 dyld_stubs imps
799 | needImportedSymbols dflags arch os
800 = vcat $
801 (pprGotDeclaration dflags arch os :) $
802 map ( pprImportedSymbol dflags platform . fst . head) $
803 groupBy (\(_,a) (_,b) -> a == b) $
804 sortBy (\(_,a) (_,b) -> compare a b) $
805 map doPpr $
806 imps
807 | otherwise
808 = Outputable.empty
809
810 doPpr lbl = (lbl, renderWithStyle dflags (pprCLabel platform lbl) astyle)
811 astyle = mkCodeStyle AsmStyle
812
813
814 -- -----------------------------------------------------------------------------
815 -- Sequencing the basic blocks
816
817 -- Cmm BasicBlocks are self-contained entities: they always end in a
818 -- jump, either non-local or to another basic block in the same proc.
819 -- In this phase, we attempt to place the basic blocks in a sequence
820 -- such that as many of the local jumps as possible turn into
821 -- fallthroughs.
822
823 sequenceTop
824 :: Instruction instr
825 => NcgImpl statics instr jumpDest -> NatCmmDecl statics instr -> NatCmmDecl statics instr
826
827 sequenceTop _ top@(CmmData _ _) = top
828 sequenceTop ncgImpl (CmmProc info lbl live (ListGraph blocks)) =
829 CmmProc info lbl live (ListGraph $ ncgMakeFarBranches ncgImpl info $ sequenceBlocks info blocks)
830
831 -- The algorithm is very simple (and stupid): we make a graph out of
832 -- the blocks where there is an edge from one block to another iff the
833 -- first block ends by jumping to the second. Then we topologically
834 -- sort this graph. Then traverse the list: for each block, we first
835 -- output the block, then if it has an out edge, we move the
836 -- destination of the out edge to the front of the list, and continue.
837
838 -- FYI, the classic layout for basic blocks uses postorder DFS; this
839 -- algorithm is implemented in Hoopl.
840
841 sequenceBlocks
842 :: Instruction instr
843 => LabelMap i
844 -> [NatBasicBlock instr]
845 -> [NatBasicBlock instr]
846
847 sequenceBlocks _ [] = []
848 sequenceBlocks infos (entry:blocks) =
849 seqBlocks infos (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
850 -- the first block is the entry point ==> it must remain at the start.
851
852
853 sccBlocks
854 :: Instruction instr
855 => [NatBasicBlock instr]
856 -> [SCC (Node BlockId (NatBasicBlock instr))]
857
858 sccBlocks blocks = stronglyConnCompFromEdgedVerticesUniqR (map mkNode blocks)
859
860 -- we're only interested in the last instruction of
861 -- the block, and only if it has a single destination.
862 getOutEdges
863 :: Instruction instr
864 => [instr] -> [BlockId]
865
866 getOutEdges instrs
867 = case jumpDestsOfInstr (last instrs) of
868 [one] -> [one]
869 _many -> []
870
871 mkNode :: (Instruction t)
872 => GenBasicBlock t
873 -> Node BlockId (GenBasicBlock t)
874 mkNode block@(BasicBlock id instrs) = DigraphNode block id (getOutEdges instrs)
875
876 seqBlocks :: LabelMap i -> [Node BlockId (GenBasicBlock t1)]
877 -> [GenBasicBlock t1]
878 seqBlocks infos blocks = placeNext pullable0 todo0
879 where
880 -- pullable: Blocks that are not yet placed
881 -- todo: Original order of blocks, to be followed if we have no good
882 -- reason not to;
883 -- may include blocks that have already been placed, but then
884 -- these are not in pullable
885 pullable0 = listToUFM [ (i,(b,n)) | DigraphNode b i n <- blocks ]
886 todo0 = map node_key blocks
887
888 placeNext _ [] = []
889 placeNext pullable (i:rest)
890 | Just (block, pullable') <- lookupDeleteUFM pullable i
891 = place pullable' rest block
892 | otherwise
893 -- We already placed this block, so ignore
894 = placeNext pullable rest
895
896 place pullable todo (block,[])
897 = block : placeNext pullable todo
898 place pullable todo (block@(BasicBlock id instrs),[next])
899 | mapMember next infos
900 = block : placeNext pullable todo
901 | Just (nextBlock, pullable') <- lookupDeleteUFM pullable next
902 = BasicBlock id (init instrs) : place pullable' todo nextBlock
903 | otherwise
904 = block : placeNext pullable todo
905 place _ _ (_,tooManyNextNodes)
906 = pprPanic "seqBlocks" (ppr tooManyNextNodes)
907
908
909 lookupDeleteUFM :: Uniquable key => UniqFM elt -> key -> Maybe (elt, UniqFM elt)
910 lookupDeleteUFM m k = do -- Maybe monad
911 v <- lookupUFM m k
912 return (v, delFromUFM m k)
913
914 -- -----------------------------------------------------------------------------
915 -- Generate jump tables
916
917 -- Analyzes all native code and generates data sections for all jump
918 -- table instructions.
919 generateJumpTables
920 :: NcgImpl statics instr jumpDest
921 -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
922 generateJumpTables ncgImpl xs = concatMap f xs
923 where f p@(CmmProc _ _ _ (ListGraph xs)) = p : concatMap g xs
924 f p = [p]
925 g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
926
927 -- -----------------------------------------------------------------------------
928 -- Shortcut branches
929
930 shortcutBranches
931 :: DynFlags
932 -> NcgImpl statics instr jumpDest
933 -> [NatCmmDecl statics instr]
934 -> [NatCmmDecl statics instr]
935
936 shortcutBranches dflags ncgImpl tops
937 | optLevel dflags < 1 = tops -- only with -O or higher
938 | otherwise = map (apply_mapping ncgImpl mapping) tops'
939 where
940 (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
941 mapping = foldr plusUFM emptyUFM mappings
942
943 build_mapping :: NcgImpl statics instr jumpDest
944 -> GenCmmDecl d (LabelMap t) (ListGraph instr)
945 -> (GenCmmDecl d (LabelMap t) (ListGraph instr), UniqFM jumpDest)
946 build_mapping _ top@(CmmData _ _) = (top, emptyUFM)
947 build_mapping _ (CmmProc info lbl live (ListGraph []))
948 = (CmmProc info lbl live (ListGraph []), emptyUFM)
949 build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks)))
950 = (CmmProc info lbl live (ListGraph (head:others)), mapping)
951 -- drop the shorted blocks, but don't ever drop the first one,
952 -- because it is pointed to by a global label.
953 where
954 -- find all the blocks that just consist of a jump that can be
955 -- shorted.
956 -- Don't completely eliminate loops here -- that can leave a dangling jump!
957 (_, shortcut_blocks, others) =
958 foldl split (setEmpty :: LabelSet, [], []) blocks
959 split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
960 | Just jd <- canShortcut ncgImpl insn,
961 Just dest <- getJumpDestBlockId ncgImpl jd,
962 not (has_info id),
963 (setMember dest s) || dest == id -- loop checks
964 = (s, shortcut_blocks, b : others)
965 split (s, shortcut_blocks, others) (BasicBlock id [insn])
966 | Just dest <- canShortcut ncgImpl insn,
967 not (has_info id)
968 = (setInsert id s, (id,dest) : shortcut_blocks, others)
969 split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)
970
971 -- do not eliminate blocks that have an info table
972 has_info l = mapMember l info
973
974 -- build a mapping from BlockId to JumpDest for shorting branches
975 mapping = foldl add emptyUFM shortcut_blocks
976 add ufm (id,dest) = addToUFM ufm id dest
977
978 apply_mapping :: NcgImpl statics instr jumpDest
979 -> UniqFM jumpDest
980 -> GenCmmDecl statics h (ListGraph instr)
981 -> GenCmmDecl statics h (ListGraph instr)
982 apply_mapping ncgImpl ufm (CmmData sec statics)
983 = CmmData sec (shortcutStatics ncgImpl (lookupUFM ufm) statics)
984 apply_mapping ncgImpl ufm (CmmProc info lbl live (ListGraph blocks))
985 = CmmProc info lbl live (ListGraph $ map short_bb blocks)
986 where
987 short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
988 short_insn i = shortcutJump ncgImpl (lookupUFM ufm) i
989 -- shortcutJump should apply the mapping repeatedly,
990 -- just in case we can short multiple branches.
991
992 -- -----------------------------------------------------------------------------
993 -- Instruction selection
994
995 -- Native code instruction selection for a chunk of stix code. For
996 -- this part of the computation, we switch from the UniqSM monad to
997 -- the NatM monad. The latter carries not only a Unique, but also an
998 -- Int denoting the current C stack pointer offset in the generated
999 -- code; this is needed for creating correct spill offsets on
1000 -- architectures which don't offer, or for which it would be
1001 -- prohibitively expensive to employ, a frame pointer register. Viz,
1002 -- x86.
1003
1004 -- The offset is measured in bytes, and indicates the difference
1005 -- between the current (simulated) C stack-ptr and the value it was at
1006 -- the beginning of the block. For stacks which grow down, this value
1007 -- should be either zero or negative.
1008
1009 -- Along with the stack pointer offset, we also carry along a LabelMap of
1010 -- DebugBlocks, which we read to generate .location directives.
1011 --
1012 -- Switching between the two monads whilst carrying along the same
1013 -- Unique supply breaks abstraction. Is that bad?
1014
1015 genMachCode
1016 :: DynFlags
1017 -> Module -> ModLocation
1018 -> (RawCmmDecl -> NatM [NatCmmDecl statics instr])
1019 -> DwarfFiles
1020 -> LabelMap DebugBlock
1021 -> RawCmmDecl
1022 -> UniqSM
1023 ( [NatCmmDecl statics instr]
1024 , [CLabel]
1025 , DwarfFiles)
1026
1027 genMachCode dflags this_mod modLoc cmmTopCodeGen fileIds dbgMap cmm_top
1028 = do { initial_us <- getUniqueSupplyM
1029 ; let initial_st = mkNatM_State initial_us 0 dflags this_mod
1030 modLoc fileIds dbgMap
1031 (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
1032 final_delta = natm_delta final_st
1033 final_imports = natm_imports final_st
1034 ; if final_delta == 0
1035 then return (new_tops, final_imports, natm_fileid final_st)
1036 else pprPanic "genMachCode: nonzero final delta" (int final_delta)
1037 }
1038
1039 -- -----------------------------------------------------------------------------
1040 -- Generic Cmm optimiser
1041
1042 {-
1043 Here we do:
1044
1045 (a) Constant folding
1046 (c) Position independent code and dynamic linking
1047 (i) introduce the appropriate indirections
1048 and position independent refs
1049 (ii) compile a list of imported symbols
1050 (d) Some arch-specific optimizations
1051
1052 (a) will be moving to the new Hoopl pipeline, however, (c) and
1053 (d) are only needed by the native backend and will continue to live
1054 here.
1055
1056 Ideas for other things we could do (put these in Hoopl please!):
1057
1058 - shortcut jumps-to-jumps
1059 - simple CSE: if an expr is assigned to a temp, then replace later occs of
1060 that expr with the temp, until the expr is no longer valid (can push through
1061 temp assignments, and certain assigns to mem...)
1062 -}
1063
1064 cmmToCmm :: DynFlags -> Module -> RawCmmDecl -> (RawCmmDecl, [CLabel])
1065 cmmToCmm _ _ top@(CmmData _ _) = (top, [])
1066 cmmToCmm dflags this_mod (CmmProc info lbl live graph)
1067 = runCmmOpt dflags this_mod $
1068 do blocks' <- mapM cmmBlockConFold (toBlockList graph)
1069 return $ CmmProc info lbl live (ofBlockList (g_entry graph) blocks')
1070
1071 newtype CmmOptM a = CmmOptM (DynFlags -> Module -> [CLabel] -> (# a, [CLabel] #))
1072
1073 instance Functor CmmOptM where
1074 fmap = liftM
1075
1076 instance Applicative CmmOptM where
1077 pure x = CmmOptM $ \_ _ imports -> (# x, imports #)
1078 (<*>) = ap
1079
1080 instance Monad CmmOptM where
1081 (CmmOptM f) >>= g =
1082 CmmOptM $ \dflags this_mod imports ->
1083 case f dflags this_mod imports of
1084 (# x, imports' #) ->
1085 case g x of
1086 CmmOptM g' -> g' dflags this_mod imports'
1087
1088 instance CmmMakeDynamicReferenceM CmmOptM where
1089 addImport = addImportCmmOpt
1090 getThisModule = CmmOptM $ \_ this_mod imports -> (# this_mod, imports #)
1091
1092 addImportCmmOpt :: CLabel -> CmmOptM ()
1093 addImportCmmOpt lbl = CmmOptM $ \_ _ imports -> (# (), lbl:imports #)
1094
1095 instance HasDynFlags CmmOptM where
1096 getDynFlags = CmmOptM $ \dflags _ imports -> (# dflags, imports #)
1097
1098 runCmmOpt :: DynFlags -> Module -> CmmOptM a -> (a, [CLabel])
1099 runCmmOpt dflags this_mod (CmmOptM f) = case f dflags this_mod [] of
1100 (# result, imports #) -> (result, imports)
1101
1102 cmmBlockConFold :: CmmBlock -> CmmOptM CmmBlock
1103 cmmBlockConFold block = do
1104 let (entry, middle, last) = blockSplit block
1105 stmts = blockToList middle
1106 stmts' <- mapM cmmStmtConFold stmts
1107 last' <- cmmStmtConFold last
1108 return $ blockJoin entry (blockFromList stmts') last'
1109
1110 -- This does three optimizations, but they're very quick to check, so we don't
1111 -- bother turning them off even when the Hoopl code is active. Since
1112 -- this is on the old Cmm representation, we can't reuse the code either:
1113 -- * reg = reg --> nop
1114 -- * if 0 then jump --> nop
1115 -- * if 1 then jump --> jump
1116 -- We might be tempted to skip this step entirely of not Opt_PIC, but
1117 -- there is some PowerPC code for the non-PIC case, which would also
1118 -- have to be separated.
1119 cmmStmtConFold :: CmmNode e x -> CmmOptM (CmmNode e x)
1120 cmmStmtConFold stmt
1121 = case stmt of
1122 CmmAssign reg src
1123 -> do src' <- cmmExprConFold DataReference src
1124 return $ case src' of
1125 CmmReg reg' | reg == reg' -> CmmComment (fsLit "nop")
1126 new_src -> CmmAssign reg new_src
1127
1128 CmmStore addr src
1129 -> do addr' <- cmmExprConFold DataReference addr
1130 src' <- cmmExprConFold DataReference src
1131 return $ CmmStore addr' src'
1132
1133 CmmCall { cml_target = addr }
1134 -> do addr' <- cmmExprConFold JumpReference addr
1135 return $ stmt { cml_target = addr' }
1136
1137 CmmUnsafeForeignCall target regs args
1138 -> do target' <- case target of
1139 ForeignTarget e conv -> do
1140 e' <- cmmExprConFold CallReference e
1141 return $ ForeignTarget e' conv
1142 PrimTarget _ ->
1143 return target
1144 args' <- mapM (cmmExprConFold DataReference) args
1145 return $ CmmUnsafeForeignCall target' regs args'
1146
1147 CmmCondBranch test true false likely
1148 -> do test' <- cmmExprConFold DataReference test
1149 return $ case test' of
1150 CmmLit (CmmInt 0 _) -> CmmBranch false
1151 CmmLit (CmmInt _ _) -> CmmBranch true
1152 _other -> CmmCondBranch test' true false likely
1153
1154 CmmSwitch expr ids
1155 -> do expr' <- cmmExprConFold DataReference expr
1156 return $ CmmSwitch expr' ids
1157
1158 other
1159 -> return other
1160
1161 cmmExprConFold :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
1162 cmmExprConFold referenceKind expr = do
1163 dflags <- getDynFlags
1164
1165 -- With -O1 and greater, the cmmSink pass does constant-folding, so
1166 -- we don't need to do it again here.
1167 let expr' = if optLevel dflags >= 1
1168 then expr
1169 else cmmExprCon dflags expr
1170
1171 cmmExprNative referenceKind expr'
1172
1173 cmmExprCon :: DynFlags -> CmmExpr -> CmmExpr
1174 cmmExprCon dflags (CmmLoad addr rep) = CmmLoad (cmmExprCon dflags addr) rep
1175 cmmExprCon dflags (CmmMachOp mop args)
1176 = cmmMachOpFold dflags mop (map (cmmExprCon dflags) args)
1177 cmmExprCon _ other = other
1178
1179 -- handles both PIC and non-PIC cases... a very strange mixture
1180 -- of things to do.
1181 cmmExprNative :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
1182 cmmExprNative referenceKind expr = do
1183 dflags <- getDynFlags
1184 let platform = targetPlatform dflags
1185 arch = platformArch platform
1186 case expr of
1187 CmmLoad addr rep
1188 -> do addr' <- cmmExprNative DataReference addr
1189 return $ CmmLoad addr' rep
1190
1191 CmmMachOp mop args
1192 -> do args' <- mapM (cmmExprNative DataReference) args
1193 return $ CmmMachOp mop args'
1194
1195 CmmLit (CmmBlock id)
1196 -> cmmExprNative referenceKind (CmmLit (CmmLabel (infoTblLbl id)))
1197 -- we must convert block Ids to CLabels here, because we
1198 -- might have to do the PIC transformation. Hence we must
1199 -- not modify BlockIds beyond this point.
1200
1201 CmmLit (CmmLabel lbl)
1202 -> do
1203 cmmMakeDynamicReference dflags referenceKind lbl
1204 CmmLit (CmmLabelOff lbl off)
1205 -> do
1206 dynRef <- cmmMakeDynamicReference dflags referenceKind lbl
1207 -- need to optimize here, since it's late
1208 return $ cmmMachOpFold dflags (MO_Add (wordWidth dflags)) [
1209 dynRef,
1210 (CmmLit $ CmmInt (fromIntegral off) (wordWidth dflags))
1211 ]
1212
1213 -- On powerpc (non-PIC), it's easier to jump directly to a label than
1214 -- to use the register table, so we replace these registers
1215 -- with the corresponding labels:
1216 CmmReg (CmmGlobal EagerBlackholeInfo)
1217 | arch == ArchPPC && not (positionIndependent dflags)
1218 -> cmmExprNative referenceKind $
1219 CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_EAGER_BLACKHOLE_info")))
1220 CmmReg (CmmGlobal GCEnter1)
1221 | arch == ArchPPC && not (positionIndependent dflags)
1222 -> cmmExprNative referenceKind $
1223 CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_enter_1")))
1224 CmmReg (CmmGlobal GCFun)
1225 | arch == ArchPPC && not (positionIndependent dflags)
1226 -> cmmExprNative referenceKind $
1227 CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_fun")))
1228
1229 other
1230 -> return other