Disable the SRT offset optimisation on MachO platforms
[ghc.git] / compiler / nativeGen / AsmCodeGen.hs
1 -- -----------------------------------------------------------------------------
2 --
3 -- (c) The University of Glasgow 1993-2004
4 --
5 -- This is the top-level module in the native code generator.
6 --
7 -- -----------------------------------------------------------------------------
8
9 {-# LANGUAGE BangPatterns, CPP, GADTs, ScopedTypeVariables, UnboxedTuples #-}
10
11 module AsmCodeGen (
12 -- * Module entry point
13 nativeCodeGen
14
15 -- * Test-only exports: see trac #12744
16 -- used by testGraphNoSpills, which needs to access
17 -- the register allocator intermediate data structures
18 -- cmmNativeGen emits
19 , cmmNativeGen
20 , NcgImpl(..)
21 , x86NcgImpl
22 ) where
23
24 #include "HsVersions.h"
25 #include "nativeGen/NCG.h"
26
27
28 import GhcPrelude
29
30 import qualified X86.CodeGen
31 import qualified X86.Regs
32 import qualified X86.Instr
33 import qualified X86.Ppr
34
35 import qualified SPARC.CodeGen
36 import qualified SPARC.Regs
37 import qualified SPARC.Instr
38 import qualified SPARC.Ppr
39 import qualified SPARC.ShortcutJump
40 import qualified SPARC.CodeGen.Expand
41
42 import qualified PPC.CodeGen
43 import qualified PPC.Regs
44 import qualified PPC.RegInfo
45 import qualified PPC.Instr
46 import qualified PPC.Ppr
47
48 import RegAlloc.Liveness
49 import qualified RegAlloc.Linear.Main as Linear
50
51 import qualified GraphColor as Color
52 import qualified RegAlloc.Graph.Main as Color
53 import qualified RegAlloc.Graph.Stats as Color
54 import qualified RegAlloc.Graph.TrivColorable as Color
55
56 import AsmUtils
57 import TargetReg
58 import Platform
59 import Config
60 import Instruction
61 import PIC
62 import Reg
63 import NCGMonad
64 import Dwarf
65 import Debug
66
67 import BlockId
68 import CgUtils ( fixStgRegisters )
69 import Cmm
70 import CmmUtils
71 import Hoopl.Collections
72 import Hoopl.Label
73 import Hoopl.Block
74 import CmmOpt ( cmmMachOpFold )
75 import PprCmm
76 import CLabel
77
78 import UniqFM
79 import UniqSupply
80 import DynFlags
81 import Util
82 import Unique
83
84 import BasicTypes ( Alignment )
85 import Digraph
86 import qualified Pretty
87 import BufWrite
88 import Outputable
89 import FastString
90 import UniqSet
91 import ErrUtils
92 import Module
93 import Stream (Stream)
94 import qualified Stream
95
96 -- DEBUGGING ONLY
97 --import OrdList
98
99 import Data.List
100 import Data.Maybe
101 import Data.Ord ( comparing )
102 import Control.Exception
103 import Control.Monad
104 import System.IO
105
106 {-
107 The native-code generator has machine-independent and
108 machine-dependent modules.
109
110 This module ("AsmCodeGen") is the top-level machine-independent
111 module. Before entering machine-dependent land, we do some
112 machine-independent optimisations (defined below) on the
113 'CmmStmts's.
114
115 We convert to the machine-specific 'Instr' datatype with
116 'cmmCodeGen', assuming an infinite supply of registers. We then use
117 a machine-independent register allocator ('regAlloc') to rejoin
118 reality. Obviously, 'regAlloc' has machine-specific helper
119 functions (see about "RegAllocInfo" below).
120
121 Finally, we order the basic blocks of the function so as to minimise
122 the number of jumps between blocks, by utilising fallthrough wherever
123 possible.
124
125 The machine-dependent bits break down as follows:
126
127 * ["MachRegs"] Everything about the target platform's machine
128 registers (and immediate operands, and addresses, which tend to
129 intermingle/interact with registers).
130
131 * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
132 have a module of its own), plus a miscellany of other things
133 (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
134
135 * ["MachCodeGen"] is where 'Cmm' stuff turns into
136 machine instructions.
137
138 * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
139 a 'SDoc').
140
141 * ["RegAllocInfo"] In the register allocator, we manipulate
142 'MRegsState's, which are 'BitSet's, one bit per machine register.
143 When we want to say something about a specific machine register
144 (e.g., ``it gets clobbered by this instruction''), we set/unset
145 its bit. Obviously, we do this 'BitSet' thing for efficiency
146 reasons.
147
148 The 'RegAllocInfo' module collects together the machine-specific
149 info needed to do register allocation.
150
151 * ["RegisterAlloc"] The (machine-independent) register allocator.
152 -}
153
154 -- -----------------------------------------------------------------------------
155 -- Top-level of the native codegen
156
157 data NcgImpl statics instr jumpDest = NcgImpl {
158 cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl statics instr],
159 generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr),
160 getJumpDestBlockId :: jumpDest -> Maybe BlockId,
161 canShortcut :: instr -> Maybe jumpDest,
162 shortcutStatics :: (BlockId -> Maybe jumpDest) -> statics -> statics,
163 shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr,
164 pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc,
165 maxSpillSlots :: Int,
166 allocatableRegs :: [RealReg],
167 ncg_x86fp_kludge :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
168 ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
169 ncgAllocMoreStack :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr),
170 ncgMakeFarBranches :: LabelMap CmmStatics
171 -> [NatBasicBlock instr] -> [NatBasicBlock instr],
172 extractUnwindPoints :: [instr] -> [UnwindPoint]
173 -- ^ given the instruction sequence of a block, produce a list of
174 -- the block's 'UnwindPoint's
175 -- See Note [What is this unwinding business?] in Debug
176 -- and Note [Unwinding information in the NCG] in this module.
177 }
178
179 --------------------
180 nativeCodeGen :: DynFlags -> Module -> ModLocation -> Handle -> UniqSupply
181 -> Stream IO RawCmmGroup ()
182 -> IO UniqSupply
183 nativeCodeGen dflags this_mod modLoc h us cmms
184 = let platform = targetPlatform dflags
185 nCG' :: (Outputable statics, Outputable instr, Instruction instr)
186 => NcgImpl statics instr jumpDest -> IO UniqSupply
187 nCG' ncgImpl = nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
188 in case platformArch platform of
189 ArchX86 -> nCG' (x86NcgImpl dflags)
190 ArchX86_64 -> nCG' (x86_64NcgImpl dflags)
191 ArchPPC -> nCG' (ppcNcgImpl dflags)
192 ArchSPARC -> nCG' (sparcNcgImpl dflags)
193 ArchSPARC64 -> panic "nativeCodeGen: No NCG for SPARC64"
194 ArchARM {} -> panic "nativeCodeGen: No NCG for ARM"
195 ArchARM64 -> panic "nativeCodeGen: No NCG for ARM64"
196 ArchPPC_64 _ -> nCG' (ppcNcgImpl dflags)
197 ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha"
198 ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb"
199 ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel"
200 ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch"
201 ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript"
202
203 x86NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest
204 x86NcgImpl dflags
205 = (x86_64NcgImpl dflags) { ncg_x86fp_kludge = map x86fp_kludge }
206
207 x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, CmmStatics) X86.Instr.Instr X86.Instr.JumpDest
208 x86_64NcgImpl dflags
209 = NcgImpl {
210 cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen
211 ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags
212 ,getJumpDestBlockId = X86.Instr.getJumpDestBlockId
213 ,canShortcut = X86.Instr.canShortcut
214 ,shortcutStatics = X86.Instr.shortcutStatics
215 ,shortcutJump = X86.Instr.shortcutJump
216 ,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl
217 ,maxSpillSlots = X86.Instr.maxSpillSlots dflags
218 ,allocatableRegs = X86.Regs.allocatableRegs platform
219 ,ncg_x86fp_kludge = id
220 ,ncgAllocMoreStack = X86.Instr.allocMoreStack platform
221 ,ncgExpandTop = id
222 ,ncgMakeFarBranches = const id
223 ,extractUnwindPoints = X86.CodeGen.extractUnwindPoints
224 }
225 where platform = targetPlatform dflags
226
227 ppcNcgImpl :: DynFlags -> NcgImpl CmmStatics PPC.Instr.Instr PPC.RegInfo.JumpDest
228 ppcNcgImpl dflags
229 = NcgImpl {
230 cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen
231 ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags
232 ,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId
233 ,canShortcut = PPC.RegInfo.canShortcut
234 ,shortcutStatics = PPC.RegInfo.shortcutStatics
235 ,shortcutJump = PPC.RegInfo.shortcutJump
236 ,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl
237 ,maxSpillSlots = PPC.Instr.maxSpillSlots dflags
238 ,allocatableRegs = PPC.Regs.allocatableRegs platform
239 ,ncg_x86fp_kludge = id
240 ,ncgAllocMoreStack = PPC.Instr.allocMoreStack platform
241 ,ncgExpandTop = id
242 ,ncgMakeFarBranches = PPC.Instr.makeFarBranches
243 ,extractUnwindPoints = const []
244 }
245 where platform = targetPlatform dflags
246
247 sparcNcgImpl :: DynFlags -> NcgImpl CmmStatics SPARC.Instr.Instr SPARC.ShortcutJump.JumpDest
248 sparcNcgImpl dflags
249 = NcgImpl {
250 cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen
251 ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
252 ,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId
253 ,canShortcut = SPARC.ShortcutJump.canShortcut
254 ,shortcutStatics = SPARC.ShortcutJump.shortcutStatics
255 ,shortcutJump = SPARC.ShortcutJump.shortcutJump
256 ,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl
257 ,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags
258 ,allocatableRegs = SPARC.Regs.allocatableRegs
259 ,ncg_x86fp_kludge = id
260 ,ncgAllocMoreStack = noAllocMoreStack
261 ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop
262 ,ncgMakeFarBranches = const id
263 ,extractUnwindPoints = const []
264 }
265
266 --
267 -- Allocating more stack space for spilling is currently only
268 -- supported for the linear register allocator on x86/x86_64, the rest
269 -- default to the panic below. To support allocating extra stack on
270 -- more platforms provide a definition of ncgAllocMoreStack.
271 --
272 noAllocMoreStack :: Int -> NatCmmDecl statics instr -> UniqSM (NatCmmDecl statics instr)
273 noAllocMoreStack amount _
274 = panic $ "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
275 ++ " If you are trying to compile SHA1.hs from the crypto library then this\n"
276 ++ " is a known limitation in the linear allocator.\n"
277 ++ "\n"
278 ++ " Try enabling the graph colouring allocator with -fregs-graph instead."
279 ++ " You can still file a bug report if you like.\n"
280
281
282 -- | Data accumulated during code generation. Mostly about statistics,
283 -- but also collects debug data for DWARF generation.
284 data NativeGenAcc statics instr
285 = NGS { ngs_imports :: ![[CLabel]]
286 , ngs_natives :: ![[NatCmmDecl statics instr]]
287 -- ^ Native code generated, for statistics. This might
288 -- hold a lot of data, so it is important to clear this
289 -- field as early as possible if it isn't actually
290 -- required.
291 , ngs_colorStats :: ![[Color.RegAllocStats statics instr]]
292 , ngs_linearStats :: ![[Linear.RegAllocStats]]
293 , ngs_labels :: ![Label]
294 , ngs_debug :: ![DebugBlock]
295 , ngs_dwarfFiles :: !DwarfFiles
296 , ngs_unwinds :: !(LabelMap [UnwindPoint])
297 -- ^ see Note [Unwinding information in the NCG]
298 -- and Note [What is this unwinding business?] in Debug.
299 }
300
301 {-
302 Note [Unwinding information in the NCG]
303 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
304
305 Unwind information is a type of metadata which allows a debugging tool
306 to reconstruct the values of machine registers at the time a procedure was
307 entered. For the most part, the production of unwind information is handled by
308 the Cmm stage, where it is represented by CmmUnwind nodes.
309
310 Unfortunately, the Cmm stage doesn't know everything necessary to produce
311 accurate unwinding information. For instance, the x86-64 calling convention
312 requires that the stack pointer be aligned to 16 bytes, which in turn means that
313 GHC must sometimes add padding to $sp prior to performing a foreign call. When
314 this happens unwind information must be updated accordingly.
315 For this reason, we make the NCG backends responsible for producing
316 unwinding tables (with the extractUnwindPoints function in NcgImpl).
317
318 We accumulate the produced unwind tables over CmmGroups in the ngs_unwinds
319 field of NativeGenAcc. This is a label map which contains an entry for each
320 procedure, containing a list of unwinding points (e.g. a label and an associated
321 unwinding table).
322
323 See also Note [What is this unwinding business?] in Debug.
324 -}
325
326 nativeCodeGen' :: (Outputable statics, Outputable instr, Instruction instr)
327 => DynFlags
328 -> Module -> ModLocation
329 -> NcgImpl statics instr jumpDest
330 -> Handle
331 -> UniqSupply
332 -> Stream IO RawCmmGroup ()
333 -> IO UniqSupply
334 nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
335 = do
336 -- BufHandle is a performance hack. We could hide it inside
337 -- Pretty if it weren't for the fact that we do lots of little
338 -- printDocs here (in order to do codegen in constant space).
339 bufh <- newBufHandle h
340 let ngs0 = NGS [] [] [] [] [] [] emptyUFM mapEmpty
341 (ngs, us') <- cmmNativeGenStream dflags this_mod modLoc ncgImpl bufh us
342 cmms ngs0
343 finishNativeGen dflags modLoc bufh us' ngs
344
345 finishNativeGen :: Instruction instr
346 => DynFlags
347 -> ModLocation
348 -> BufHandle
349 -> UniqSupply
350 -> NativeGenAcc statics instr
351 -> IO UniqSupply
352 finishNativeGen dflags modLoc bufh@(BufHandle _ _ h) us ngs
353 = do
354 -- Write debug data and finish
355 let emitDw = debugLevel dflags > 0 && not (gopt Opt_SplitObjs dflags)
356 us' <- if not emitDw then return us else do
357 (dwarf, us') <- dwarfGen dflags modLoc us (ngs_debug ngs)
358 emitNativeCode dflags bufh dwarf
359 return us'
360 bFlush bufh
361
362 -- dump global NCG stats for graph coloring allocator
363 let stats = concat (ngs_colorStats ngs)
364 when (not (null stats)) $ do
365
366 -- build the global register conflict graph
367 let graphGlobal
368 = foldl Color.union Color.initGraph
369 $ [ Color.raGraph stat
370 | stat@Color.RegAllocStatsStart{} <- stats]
371
372 dump_stats (Color.pprStats stats graphGlobal)
373
374 let platform = targetPlatform dflags
375 dumpIfSet_dyn dflags
376 Opt_D_dump_asm_conflicts "Register conflict graph"
377 $ Color.dotGraph
378 (targetRegDotColor platform)
379 (Color.trivColorable platform
380 (targetVirtualRegSqueeze platform)
381 (targetRealRegSqueeze platform))
382 $ graphGlobal
383
384
385 -- dump global NCG stats for linear allocator
386 let linearStats = concat (ngs_linearStats ngs)
387 when (not (null linearStats)) $
388 dump_stats (Linear.pprStats (concat (ngs_natives ngs)) linearStats)
389
390 -- write out the imports
391 printSDocLn Pretty.LeftMode dflags h (mkCodeStyle AsmStyle)
392 $ makeImportsDoc dflags (concat (ngs_imports ngs))
393 return us'
394 where
395 dump_stats = dumpSDoc dflags alwaysQualify Opt_D_dump_asm_stats "NCG stats"
396
397 cmmNativeGenStream :: (Outputable statics, Outputable instr, Instruction instr)
398 => DynFlags
399 -> Module -> ModLocation
400 -> NcgImpl statics instr jumpDest
401 -> BufHandle
402 -> UniqSupply
403 -> Stream IO RawCmmGroup ()
404 -> NativeGenAcc statics instr
405 -> IO (NativeGenAcc statics instr, UniqSupply)
406
407 cmmNativeGenStream dflags this_mod modLoc ncgImpl h us cmm_stream ngs
408 = do r <- Stream.runStream cmm_stream
409 case r of
410 Left () ->
411 return (ngs { ngs_imports = reverse $ ngs_imports ngs
412 , ngs_natives = reverse $ ngs_natives ngs
413 , ngs_colorStats = reverse $ ngs_colorStats ngs
414 , ngs_linearStats = reverse $ ngs_linearStats ngs
415 },
416 us)
417 Right (cmms, cmm_stream') -> do
418
419 -- Generate debug information
420 let debugFlag = debugLevel dflags > 0
421 !ndbgs | debugFlag = cmmDebugGen modLoc cmms
422 | otherwise = []
423 dbgMap = debugToMap ndbgs
424
425 -- Insert split marker, generate native code
426 let splitObjs = gopt Opt_SplitObjs dflags
427 split_marker = CmmProc mapEmpty mkSplitMarkerLabel [] $
428 ofBlockList (panic "split_marker_entry") []
429 cmms' | splitObjs = split_marker : cmms
430 | otherwise = cmms
431 (ngs',us') <- cmmNativeGens dflags this_mod modLoc ncgImpl h
432 dbgMap us cmms' ngs 0
433
434 -- Link native code information into debug blocks
435 -- See Note [What is this unwinding business?] in Debug.
436 let !ldbgs = cmmDebugLink (ngs_labels ngs') (ngs_unwinds ngs') ndbgs
437 dumpIfSet_dyn dflags Opt_D_dump_debug "Debug Infos"
438 (vcat $ map ppr ldbgs)
439
440 -- Emit & clear DWARF information when generating split
441 -- object files, as we need it to land in the same object file
442 -- When using split sections, note that we do not split the debug
443 -- info but emit all the info at once in finishNativeGen.
444 (ngs'', us'') <-
445 if debugFlag && splitObjs
446 then do (dwarf, us'') <- dwarfGen dflags modLoc us ldbgs
447 emitNativeCode dflags h dwarf
448 return (ngs' { ngs_debug = []
449 , ngs_dwarfFiles = emptyUFM
450 , ngs_labels = [] },
451 us'')
452 else return (ngs' { ngs_debug = ngs_debug ngs' ++ ldbgs
453 , ngs_labels = [] },
454 us')
455
456 cmmNativeGenStream dflags this_mod modLoc ncgImpl h us''
457 cmm_stream' ngs''
458
459 -- | Do native code generation on all these cmms.
460 --
461 cmmNativeGens :: forall statics instr jumpDest.
462 (Outputable statics, Outputable instr, Instruction instr)
463 => DynFlags
464 -> Module -> ModLocation
465 -> NcgImpl statics instr jumpDest
466 -> BufHandle
467 -> LabelMap DebugBlock
468 -> UniqSupply
469 -> [RawCmmDecl]
470 -> NativeGenAcc statics instr
471 -> Int
472 -> IO (NativeGenAcc statics instr, UniqSupply)
473
474 cmmNativeGens dflags this_mod modLoc ncgImpl h dbgMap = go
475 where
476 go :: UniqSupply -> [RawCmmDecl]
477 -> NativeGenAcc statics instr -> Int
478 -> IO (NativeGenAcc statics instr, UniqSupply)
479
480 go us [] ngs !_ =
481 return (ngs, us)
482
483 go us (cmm : cmms) ngs count = do
484 let fileIds = ngs_dwarfFiles ngs
485 (us', fileIds', native, imports, colorStats, linearStats, unwinds)
486 <- {-# SCC "cmmNativeGen" #-}
487 cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap
488 cmm count
489
490 -- Generate .file directives for every new file that has been
491 -- used. Note that it is important that we generate these in
492 -- ascending order, as Clang's 3.6 assembler complains.
493 let newFileIds = sortBy (comparing snd) $
494 nonDetEltsUFM $ fileIds' `minusUFM` fileIds
495 -- See Note [Unique Determinism and code generation]
496 pprDecl (f,n) = text "\t.file " <> ppr n <+>
497 doubleQuotes (ftext f)
498
499 emitNativeCode dflags h $ vcat $
500 map pprDecl newFileIds ++
501 map (pprNatCmmDecl ncgImpl) native
502
503 -- force evaluation all this stuff to avoid space leaks
504 {-# SCC "seqString" #-} evaluate $ seqString (showSDoc dflags $ vcat $ map ppr imports)
505
506 let !labels' = if debugLevel dflags > 0
507 then cmmDebugLabels isMetaInstr native else []
508 !natives' = if dopt Opt_D_dump_asm_stats dflags
509 then native : ngs_natives ngs else []
510
511 mCon = maybe id (:)
512 ngs' = ngs{ ngs_imports = imports : ngs_imports ngs
513 , ngs_natives = natives'
514 , ngs_colorStats = colorStats `mCon` ngs_colorStats ngs
515 , ngs_linearStats = linearStats `mCon` ngs_linearStats ngs
516 , ngs_labels = ngs_labels ngs ++ labels'
517 , ngs_dwarfFiles = fileIds'
518 , ngs_unwinds = ngs_unwinds ngs `mapUnion` unwinds
519 }
520 go us' cmms ngs' (count + 1)
521
522 seqString [] = ()
523 seqString (x:xs) = x `seq` seqString xs
524
525
526 emitNativeCode :: DynFlags -> BufHandle -> SDoc -> IO ()
527 emitNativeCode dflags h sdoc = do
528
529 {-# SCC "pprNativeCode" #-} bufLeftRenderSDoc dflags h
530 (mkCodeStyle AsmStyle) sdoc
531
532 -- dump native code
533 dumpIfSet_dyn dflags
534 Opt_D_dump_asm "Asm code"
535 sdoc
536
537 -- | Complete native code generation phase for a single top-level chunk of Cmm.
538 -- Dumping the output of each stage along the way.
539 -- Global conflict graph and NGC stats
540 cmmNativeGen
541 :: (Outputable statics, Outputable instr, Instruction instr)
542 => DynFlags
543 -> Module -> ModLocation
544 -> NcgImpl statics instr jumpDest
545 -> UniqSupply
546 -> DwarfFiles
547 -> LabelMap DebugBlock
548 -> RawCmmDecl -- ^ the cmm to generate code for
549 -> Int -- ^ sequence number of this top thing
550 -> IO ( UniqSupply
551 , DwarfFiles
552 , [NatCmmDecl statics instr] -- native code
553 , [CLabel] -- things imported by this cmm
554 , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator
555 , Maybe [Linear.RegAllocStats] -- stats for the linear register allocators
556 , LabelMap [UnwindPoint] -- unwinding information for blocks
557 )
558
559 cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
560 = do
561 let platform = targetPlatform dflags
562
563 -- rewrite assignments to global regs
564 let fixed_cmm =
565 {-# SCC "fixStgRegisters" #-}
566 fixStgRegisters dflags cmm
567
568 -- cmm to cmm optimisations
569 let (opt_cmm, imports) =
570 {-# SCC "cmmToCmm" #-}
571 cmmToCmm dflags this_mod fixed_cmm
572
573 dumpIfSet_dyn dflags
574 Opt_D_dump_opt_cmm "Optimised Cmm"
575 (pprCmmGroup [opt_cmm])
576
577 -- generate native code from cmm
578 let ((native, lastMinuteImports, fileIds'), usGen) =
579 {-# SCC "genMachCode" #-}
580 initUs us $ genMachCode dflags this_mod modLoc
581 (cmmTopCodeGen ncgImpl)
582 fileIds dbgMap opt_cmm
583
584 dumpIfSet_dyn dflags
585 Opt_D_dump_asm_native "Native code"
586 (vcat $ map (pprNatCmmDecl ncgImpl) native)
587
588 -- tag instructions with register liveness information
589 let (withLiveness, usLive) =
590 {-# SCC "regLiveness" #-}
591 initUs usGen
592 $ mapM (regLiveness platform)
593 $ map natCmmTopToLive native
594
595 dumpIfSet_dyn dflags
596 Opt_D_dump_asm_liveness "Liveness annotations added"
597 (vcat $ map ppr withLiveness)
598
599 -- allocate registers
600 (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear) <-
601 if ( gopt Opt_RegsGraph dflags
602 || gopt Opt_RegsIterative dflags )
603 then do
604 -- the regs usable for allocation
605 let (alloc_regs :: UniqFM (UniqSet RealReg))
606 = foldr (\r -> plusUFM_C unionUniqSets
607 $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
608 emptyUFM
609 $ allocatableRegs ncgImpl
610
611 -- do the graph coloring register allocation
612 let ((alloced, regAllocStats), usAlloc)
613 = {-# SCC "RegAlloc-color" #-}
614 initUs usLive
615 $ Color.regAlloc
616 dflags
617 alloc_regs
618 (mkUniqSet [0 .. maxSpillSlots ncgImpl])
619 withLiveness
620
621 -- dump out what happened during register allocation
622 dumpIfSet_dyn dflags
623 Opt_D_dump_asm_regalloc "Registers allocated"
624 (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
625
626 dumpIfSet_dyn dflags
627 Opt_D_dump_asm_regalloc_stages "Build/spill stages"
628 (vcat $ map (\(stage, stats)
629 -> text "# --------------------------"
630 $$ text "# cmm " <> int count <> text " Stage " <> int stage
631 $$ ppr stats)
632 $ zip [0..] regAllocStats)
633
634 let mPprStats =
635 if dopt Opt_D_dump_asm_stats dflags
636 then Just regAllocStats else Nothing
637
638 -- force evaluation of the Maybe to avoid space leak
639 mPprStats `seq` return ()
640
641 return ( alloced, usAlloc
642 , mPprStats
643 , Nothing)
644
645 else do
646 -- do linear register allocation
647 let reg_alloc proc = do
648 (alloced, maybe_more_stack, ra_stats) <-
649 Linear.regAlloc dflags proc
650 case maybe_more_stack of
651 Nothing -> return ( alloced, ra_stats )
652 Just amount -> do
653 alloced' <- ncgAllocMoreStack ncgImpl amount alloced
654 return (alloced', ra_stats )
655
656 let ((alloced, regAllocStats), usAlloc)
657 = {-# SCC "RegAlloc-linear" #-}
658 initUs usLive
659 $ liftM unzip
660 $ mapM reg_alloc withLiveness
661
662 dumpIfSet_dyn dflags
663 Opt_D_dump_asm_regalloc "Registers allocated"
664 (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
665
666 let mPprStats =
667 if dopt Opt_D_dump_asm_stats dflags
668 then Just (catMaybes regAllocStats) else Nothing
669
670 -- force evaluation of the Maybe to avoid space leak
671 mPprStats `seq` return ()
672
673 return ( alloced, usAlloc
674 , Nothing
675 , mPprStats)
676
677 ---- x86fp_kludge. This pass inserts ffree instructions to clear
678 ---- the FPU stack on x86. The x86 ABI requires that the FPU stack
679 ---- is clear, and library functions can return odd results if it
680 ---- isn't.
681 ----
682 ---- NB. must happen before shortcutBranches, because that
683 ---- generates JXX_GBLs which we can't fix up in x86fp_kludge.
684 let kludged = {-# SCC "x86fp_kludge" #-} ncg_x86fp_kludge ncgImpl alloced
685
686 ---- generate jump tables
687 let tabled =
688 {-# SCC "generateJumpTables" #-}
689 generateJumpTables ncgImpl kludged
690
691 ---- shortcut branches
692 let shorted =
693 {-# SCC "shortcutBranches" #-}
694 shortcutBranches dflags ncgImpl tabled
695
696 ---- sequence blocks
697 let sequenced =
698 {-# SCC "sequenceBlocks" #-}
699 map (sequenceTop ncgImpl) shorted
700
701 ---- expansion of SPARC synthetic instrs
702 let expanded =
703 {-# SCC "sparc_expand" #-}
704 ncgExpandTop ncgImpl sequenced
705
706 dumpIfSet_dyn dflags
707 Opt_D_dump_asm_expanded "Synthetic instructions expanded"
708 (vcat $ map (pprNatCmmDecl ncgImpl) expanded)
709
710 -- generate unwinding information from cmm
711 let unwinds :: BlockMap [UnwindPoint]
712 unwinds =
713 {-# SCC "unwindingInfo" #-}
714 foldl' addUnwind mapEmpty expanded
715 where
716 addUnwind acc proc =
717 acc `mapUnion` computeUnwinding dflags ncgImpl proc
718
719 return ( usAlloc
720 , fileIds'
721 , expanded
722 , lastMinuteImports ++ imports
723 , ppr_raStatsColor
724 , ppr_raStatsLinear
725 , unwinds )
726
727
728 x86fp_kludge :: NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr -> NatCmmDecl (Alignment, CmmStatics) X86.Instr.Instr
729 x86fp_kludge top@(CmmData _ _) = top
730 x86fp_kludge (CmmProc info lbl live (ListGraph code)) =
731 CmmProc info lbl live (ListGraph $ X86.Instr.i386_insert_ffrees code)
732
733 -- | Compute unwinding tables for the blocks of a procedure
734 computeUnwinding :: Instruction instr
735 => DynFlags -> NcgImpl statics instr jumpDest
736 -> NatCmmDecl statics instr
737 -- ^ the native code generated for the procedure
738 -> LabelMap [UnwindPoint]
739 -- ^ unwinding tables for all points of all blocks of the
740 -- procedure
741 computeUnwinding dflags _ _
742 | debugLevel dflags == 0 = mapEmpty
743 computeUnwinding _ _ (CmmData _ _) = mapEmpty
744 computeUnwinding _ ncgImpl (CmmProc _ _ _ (ListGraph blks)) =
745 -- In general we would need to push unwinding information down the
746 -- block-level call-graph to ensure that we fully account for all
747 -- relevant register writes within a procedure.
748 --
749 -- However, the only unwinding information that we care about in GHC is for
750 -- Sp. The fact that CmmLayoutStack already ensures that we have unwind
751 -- information at the beginning of every block means that there is no need
752 -- to perform this sort of push-down.
753 mapFromList [ (blk_lbl, extractUnwindPoints ncgImpl instrs)
754 | BasicBlock blk_lbl instrs <- blks ]
755
756 -- | Build a doc for all the imports.
757 --
758 makeImportsDoc :: DynFlags -> [CLabel] -> SDoc
759 makeImportsDoc dflags imports
760 = dyld_stubs imports
761 $$
762 -- On recent versions of Darwin, the linker supports
763 -- dead-stripping of code and data on a per-symbol basis.
764 -- There's a hack to make this work in PprMach.pprNatCmmDecl.
765 (if platformHasSubsectionsViaSymbols platform
766 then text ".subsections_via_symbols"
767 else Outputable.empty)
768 $$
769 -- On recent GNU ELF systems one can mark an object file
770 -- as not requiring an executable stack. If all objects
771 -- linked into a program have this note then the program
772 -- will not use an executable stack, which is good for
773 -- security. GHC generated code does not need an executable
774 -- stack so add the note in:
775 (if platformHasGnuNonexecStack platform
776 then text ".section .note.GNU-stack,\"\"," <> sectionType "progbits"
777 else Outputable.empty)
778 $$
779 -- And just because every other compiler does, let's stick in
780 -- an identifier directive: .ident "GHC x.y.z"
781 (if platformHasIdentDirective platform
782 then let compilerIdent = text "GHC" <+> text cProjectVersion
783 in text ".ident" <+> doubleQuotes compilerIdent
784 else Outputable.empty)
785
786 where
787 platform = targetPlatform dflags
788 arch = platformArch platform
789 os = platformOS platform
790
791 -- Generate "symbol stubs" for all external symbols that might
792 -- come from a dynamic library.
793 dyld_stubs :: [CLabel] -> SDoc
794 {- dyld_stubs imps = vcat $ map pprDyldSymbolStub $
795 map head $ group $ sort imps-}
796 -- (Hack) sometimes two Labels pretty-print the same, but have
797 -- different uniques; so we compare their text versions...
798 dyld_stubs imps
799 | needImportedSymbols dflags arch os
800 = vcat $
801 (pprGotDeclaration dflags arch os :) $
802 map ( pprImportedSymbol dflags platform . fst . head) $
803 groupBy (\(_,a) (_,b) -> a == b) $
804 sortBy (\(_,a) (_,b) -> compare a b) $
805 map doPpr $
806 imps
807 | otherwise
808 = Outputable.empty
809
810 doPpr lbl = (lbl, renderWithStyle dflags (pprCLabel platform lbl) astyle)
811 astyle = mkCodeStyle AsmStyle
812
813
814 -- -----------------------------------------------------------------------------
815 -- Sequencing the basic blocks
816
817 -- Cmm BasicBlocks are self-contained entities: they always end in a
818 -- jump, either non-local or to another basic block in the same proc.
819 -- In this phase, we attempt to place the basic blocks in a sequence
820 -- such that as many of the local jumps as possible turn into
821 -- fallthroughs.
822
823 sequenceTop
824 :: Instruction instr
825 => NcgImpl statics instr jumpDest -> NatCmmDecl statics instr -> NatCmmDecl statics instr
826
827 sequenceTop _ top@(CmmData _ _) = top
828 sequenceTop ncgImpl (CmmProc info lbl live (ListGraph blocks)) =
829 CmmProc info lbl live (ListGraph $ ncgMakeFarBranches ncgImpl info $ sequenceBlocks info blocks)
830
831 -- The algorithm is very simple (and stupid): we make a graph out of
832 -- the blocks where there is an edge from one block to another iff the
833 -- first block ends by jumping to the second. Then we topologically
834 -- sort this graph. Then traverse the list: for each block, we first
835 -- output the block, then if it has an out edge, we move the
836 -- destination of the out edge to the front of the list, and continue.
837
838 -- FYI, the classic layout for basic blocks uses postorder DFS; this
839 -- algorithm is implemented in Hoopl.
840
841 sequenceBlocks
842 :: Instruction instr
843 => LabelMap i
844 -> [NatBasicBlock instr]
845 -> [NatBasicBlock instr]
846
847 sequenceBlocks _ [] = []
848 sequenceBlocks infos (entry:blocks) =
849 seqBlocks infos (mkNode entry : reverse (flattenSCCs (sccBlocks blocks)))
850 -- the first block is the entry point ==> it must remain at the start.
851
852
853 sccBlocks
854 :: Instruction instr
855 => [NatBasicBlock instr]
856 -> [SCC (Node BlockId (NatBasicBlock instr))]
857
858 sccBlocks blocks = stronglyConnCompFromEdgedVerticesUniqR (map mkNode blocks)
859
860 -- we're only interested in the last instruction of
861 -- the block, and only if it has a single destination.
862 getOutEdges
863 :: Instruction instr
864 => [instr] -> [BlockId]
865
866 getOutEdges instrs
867 = case jumpDestsOfInstr (last instrs) of
868 [one] -> [one]
869 _many -> []
870
871 mkNode :: (Instruction t)
872 => GenBasicBlock t
873 -> Node BlockId (GenBasicBlock t)
874 mkNode block@(BasicBlock id instrs) = DigraphNode block id (getOutEdges instrs)
875
876 seqBlocks :: LabelMap i -> [Node BlockId (GenBasicBlock t1)]
877 -> [GenBasicBlock t1]
878 seqBlocks infos blocks = placeNext pullable0 todo0
879 where
880 -- pullable: Blocks that are not yet placed
881 -- todo: Original order of blocks, to be followed if we have no good
882 -- reason not to;
883 -- may include blocks that have already been placed, but then
884 -- these are not in pullable
885 pullable0 = listToUFM [ (i,(b,n)) | DigraphNode b i n <- blocks ]
886 todo0 = map node_key blocks
887
888 placeNext _ [] = []
889 placeNext pullable (i:rest)
890 | Just (block, pullable') <- lookupDeleteUFM pullable i
891 = place pullable' rest block
892 | otherwise
893 -- We already placed this block, so ignore
894 = placeNext pullable rest
895
896 place pullable todo (block,[])
897 = block : placeNext pullable todo
898 place pullable todo (block@(BasicBlock id instrs),[next])
899 | mapMember next infos
900 = block : placeNext pullable todo
901 | Just (nextBlock, pullable') <- lookupDeleteUFM pullable next
902 = BasicBlock id (init instrs) : place pullable' todo nextBlock
903 | otherwise
904 = block : placeNext pullable todo
905 place _ _ (_,tooManyNextNodes)
906 = pprPanic "seqBlocks" (ppr tooManyNextNodes)
907
908
909 lookupDeleteUFM :: Uniquable key => UniqFM elt -> key -> Maybe (elt, UniqFM elt)
910 lookupDeleteUFM m k = do -- Maybe monad
911 v <- lookupUFM m k
912 return (v, delFromUFM m k)
913
914 -- -----------------------------------------------------------------------------
915 -- Generate jump tables
916
917 -- Analyzes all native code and generates data sections for all jump
918 -- table instructions.
919 generateJumpTables
920 :: NcgImpl statics instr jumpDest
921 -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
922 generateJumpTables ncgImpl xs = concatMap f xs
923 where f p@(CmmProc _ _ _ (ListGraph xs)) = p : concatMap g xs
924 f p = [p]
925 g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
926
927 -- -----------------------------------------------------------------------------
928 -- Shortcut branches
929
930 shortcutBranches
931 :: DynFlags
932 -> NcgImpl statics instr jumpDest
933 -> [NatCmmDecl statics instr]
934 -> [NatCmmDecl statics instr]
935
936 shortcutBranches dflags ncgImpl tops
937 | gopt Opt_AsmShortcutting dflags
938 = map (apply_mapping ncgImpl mapping) tops'
939 | otherwise
940 = tops
941 where
942 (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
943 mapping = plusUFMList mappings
944
945 build_mapping :: NcgImpl statics instr jumpDest
946 -> GenCmmDecl d (LabelMap t) (ListGraph instr)
947 -> (GenCmmDecl d (LabelMap t) (ListGraph instr), UniqFM jumpDest)
948 build_mapping _ top@(CmmData _ _) = (top, emptyUFM)
949 build_mapping _ (CmmProc info lbl live (ListGraph []))
950 = (CmmProc info lbl live (ListGraph []), emptyUFM)
951 build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks)))
952 = (CmmProc info lbl live (ListGraph (head:others)), mapping)
953 -- drop the shorted blocks, but don't ever drop the first one,
954 -- because it is pointed to by a global label.
955 where
956 -- find all the blocks that just consist of a jump that can be
957 -- shorted.
958 -- Don't completely eliminate loops here -- that can leave a dangling jump!
959 (_, shortcut_blocks, others) =
960 foldl split (setEmpty :: LabelSet, [], []) blocks
961 split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
962 | Just jd <- canShortcut ncgImpl insn,
963 Just dest <- getJumpDestBlockId ncgImpl jd,
964 not (has_info id),
965 (setMember dest s) || dest == id -- loop checks
966 = (s, shortcut_blocks, b : others)
967 split (s, shortcut_blocks, others) (BasicBlock id [insn])
968 | Just dest <- canShortcut ncgImpl insn,
969 not (has_info id)
970 = (setInsert id s, (id,dest) : shortcut_blocks, others)
971 split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)
972
973 -- do not eliminate blocks that have an info table
974 has_info l = mapMember l info
975
976 -- build a mapping from BlockId to JumpDest for shorting branches
977 mapping = foldl' add emptyUFM shortcut_blocks
978 add ufm (id,dest) = addToUFM ufm id dest
979
980 apply_mapping :: NcgImpl statics instr jumpDest
981 -> UniqFM jumpDest
982 -> GenCmmDecl statics h (ListGraph instr)
983 -> GenCmmDecl statics h (ListGraph instr)
984 apply_mapping ncgImpl ufm (CmmData sec statics)
985 = CmmData sec (shortcutStatics ncgImpl (lookupUFM ufm) statics)
986 apply_mapping ncgImpl ufm (CmmProc info lbl live (ListGraph blocks))
987 = CmmProc info lbl live (ListGraph $ map short_bb blocks)
988 where
989 short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
990 short_insn i = shortcutJump ncgImpl (lookupUFM ufm) i
991 -- shortcutJump should apply the mapping repeatedly,
992 -- just in case we can short multiple branches.
993
994 -- -----------------------------------------------------------------------------
995 -- Instruction selection
996
997 -- Native code instruction selection for a chunk of stix code. For
998 -- this part of the computation, we switch from the UniqSM monad to
999 -- the NatM monad. The latter carries not only a Unique, but also an
1000 -- Int denoting the current C stack pointer offset in the generated
1001 -- code; this is needed for creating correct spill offsets on
1002 -- architectures which don't offer, or for which it would be
1003 -- prohibitively expensive to employ, a frame pointer register. Viz,
1004 -- x86.
1005
1006 -- The offset is measured in bytes, and indicates the difference
1007 -- between the current (simulated) C stack-ptr and the value it was at
1008 -- the beginning of the block. For stacks which grow down, this value
1009 -- should be either zero or negative.
1010
1011 -- Along with the stack pointer offset, we also carry along a LabelMap of
1012 -- DebugBlocks, which we read to generate .location directives.
1013 --
1014 -- Switching between the two monads whilst carrying along the same
1015 -- Unique supply breaks abstraction. Is that bad?
1016
1017 genMachCode
1018 :: DynFlags
1019 -> Module -> ModLocation
1020 -> (RawCmmDecl -> NatM [NatCmmDecl statics instr])
1021 -> DwarfFiles
1022 -> LabelMap DebugBlock
1023 -> RawCmmDecl
1024 -> UniqSM
1025 ( [NatCmmDecl statics instr]
1026 , [CLabel]
1027 , DwarfFiles)
1028
1029 genMachCode dflags this_mod modLoc cmmTopCodeGen fileIds dbgMap cmm_top
1030 = do { initial_us <- getUniqueSupplyM
1031 ; let initial_st = mkNatM_State initial_us 0 dflags this_mod
1032 modLoc fileIds dbgMap
1033 (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
1034 final_delta = natm_delta final_st
1035 final_imports = natm_imports final_st
1036 ; if final_delta == 0
1037 then return (new_tops, final_imports, natm_fileid final_st)
1038 else pprPanic "genMachCode: nonzero final delta" (int final_delta)
1039 }
1040
1041 -- -----------------------------------------------------------------------------
1042 -- Generic Cmm optimiser
1043
1044 {-
1045 Here we do:
1046
1047 (a) Constant folding
1048 (c) Position independent code and dynamic linking
1049 (i) introduce the appropriate indirections
1050 and position independent refs
1051 (ii) compile a list of imported symbols
1052 (d) Some arch-specific optimizations
1053
1054 (a) will be moving to the new Hoopl pipeline, however, (c) and
1055 (d) are only needed by the native backend and will continue to live
1056 here.
1057
1058 Ideas for other things we could do (put these in Hoopl please!):
1059
1060 - shortcut jumps-to-jumps
1061 - simple CSE: if an expr is assigned to a temp, then replace later occs of
1062 that expr with the temp, until the expr is no longer valid (can push through
1063 temp assignments, and certain assigns to mem...)
1064 -}
1065
1066 cmmToCmm :: DynFlags -> Module -> RawCmmDecl -> (RawCmmDecl, [CLabel])
1067 cmmToCmm _ _ top@(CmmData _ _) = (top, [])
1068 cmmToCmm dflags this_mod (CmmProc info lbl live graph)
1069 = runCmmOpt dflags this_mod $
1070 do blocks' <- mapM cmmBlockConFold (toBlockList graph)
1071 return $ CmmProc info lbl live (ofBlockList (g_entry graph) blocks')
1072
1073 newtype CmmOptM a = CmmOptM (DynFlags -> Module -> [CLabel] -> (# a, [CLabel] #))
1074
1075 instance Functor CmmOptM where
1076 fmap = liftM
1077
1078 instance Applicative CmmOptM where
1079 pure x = CmmOptM $ \_ _ imports -> (# x, imports #)
1080 (<*>) = ap
1081
1082 instance Monad CmmOptM where
1083 (CmmOptM f) >>= g =
1084 CmmOptM $ \dflags this_mod imports ->
1085 case f dflags this_mod imports of
1086 (# x, imports' #) ->
1087 case g x of
1088 CmmOptM g' -> g' dflags this_mod imports'
1089
1090 instance CmmMakeDynamicReferenceM CmmOptM where
1091 addImport = addImportCmmOpt
1092 getThisModule = CmmOptM $ \_ this_mod imports -> (# this_mod, imports #)
1093
1094 addImportCmmOpt :: CLabel -> CmmOptM ()
1095 addImportCmmOpt lbl = CmmOptM $ \_ _ imports -> (# (), lbl:imports #)
1096
1097 instance HasDynFlags CmmOptM where
1098 getDynFlags = CmmOptM $ \dflags _ imports -> (# dflags, imports #)
1099
1100 runCmmOpt :: DynFlags -> Module -> CmmOptM a -> (a, [CLabel])
1101 runCmmOpt dflags this_mod (CmmOptM f) = case f dflags this_mod [] of
1102 (# result, imports #) -> (result, imports)
1103
1104 cmmBlockConFold :: CmmBlock -> CmmOptM CmmBlock
1105 cmmBlockConFold block = do
1106 let (entry, middle, last) = blockSplit block
1107 stmts = blockToList middle
1108 stmts' <- mapM cmmStmtConFold stmts
1109 last' <- cmmStmtConFold last
1110 return $ blockJoin entry (blockFromList stmts') last'
1111
1112 -- This does three optimizations, but they're very quick to check, so we don't
1113 -- bother turning them off even when the Hoopl code is active. Since
1114 -- this is on the old Cmm representation, we can't reuse the code either:
1115 -- * reg = reg --> nop
1116 -- * if 0 then jump --> nop
1117 -- * if 1 then jump --> jump
1118 -- We might be tempted to skip this step entirely of not Opt_PIC, but
1119 -- there is some PowerPC code for the non-PIC case, which would also
1120 -- have to be separated.
1121 cmmStmtConFold :: CmmNode e x -> CmmOptM (CmmNode e x)
1122 cmmStmtConFold stmt
1123 = case stmt of
1124 CmmAssign reg src
1125 -> do src' <- cmmExprConFold DataReference src
1126 return $ case src' of
1127 CmmReg reg' | reg == reg' -> CmmComment (fsLit "nop")
1128 new_src -> CmmAssign reg new_src
1129
1130 CmmStore addr src
1131 -> do addr' <- cmmExprConFold DataReference addr
1132 src' <- cmmExprConFold DataReference src
1133 return $ CmmStore addr' src'
1134
1135 CmmCall { cml_target = addr }
1136 -> do addr' <- cmmExprConFold JumpReference addr
1137 return $ stmt { cml_target = addr' }
1138
1139 CmmUnsafeForeignCall target regs args
1140 -> do target' <- case target of
1141 ForeignTarget e conv -> do
1142 e' <- cmmExprConFold CallReference e
1143 return $ ForeignTarget e' conv
1144 PrimTarget _ ->
1145 return target
1146 args' <- mapM (cmmExprConFold DataReference) args
1147 return $ CmmUnsafeForeignCall target' regs args'
1148
1149 CmmCondBranch test true false likely
1150 -> do test' <- cmmExprConFold DataReference test
1151 return $ case test' of
1152 CmmLit (CmmInt 0 _) -> CmmBranch false
1153 CmmLit (CmmInt _ _) -> CmmBranch true
1154 _other -> CmmCondBranch test' true false likely
1155
1156 CmmSwitch expr ids
1157 -> do expr' <- cmmExprConFold DataReference expr
1158 return $ CmmSwitch expr' ids
1159
1160 other
1161 -> return other
1162
1163 cmmExprConFold :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
1164 cmmExprConFold referenceKind expr = do
1165 dflags <- getDynFlags
1166
1167 -- With -O1 and greater, the cmmSink pass does constant-folding, so
1168 -- we don't need to do it again here.
1169 let expr' = if optLevel dflags >= 1
1170 then expr
1171 else cmmExprCon dflags expr
1172
1173 cmmExprNative referenceKind expr'
1174
1175 cmmExprCon :: DynFlags -> CmmExpr -> CmmExpr
1176 cmmExprCon dflags (CmmLoad addr rep) = CmmLoad (cmmExprCon dflags addr) rep
1177 cmmExprCon dflags (CmmMachOp mop args)
1178 = cmmMachOpFold dflags mop (map (cmmExprCon dflags) args)
1179 cmmExprCon _ other = other
1180
1181 -- handles both PIC and non-PIC cases... a very strange mixture
1182 -- of things to do.
1183 cmmExprNative :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
1184 cmmExprNative referenceKind expr = do
1185 dflags <- getDynFlags
1186 let platform = targetPlatform dflags
1187 arch = platformArch platform
1188 case expr of
1189 CmmLoad addr rep
1190 -> do addr' <- cmmExprNative DataReference addr
1191 return $ CmmLoad addr' rep
1192
1193 CmmMachOp mop args
1194 -> do args' <- mapM (cmmExprNative DataReference) args
1195 return $ CmmMachOp mop args'
1196
1197 CmmLit (CmmBlock id)
1198 -> cmmExprNative referenceKind (CmmLit (CmmLabel (infoTblLbl id)))
1199 -- we must convert block Ids to CLabels here, because we
1200 -- might have to do the PIC transformation. Hence we must
1201 -- not modify BlockIds beyond this point.
1202
1203 CmmLit (CmmLabel lbl)
1204 -> do
1205 cmmMakeDynamicReference dflags referenceKind lbl
1206 CmmLit (CmmLabelOff lbl off)
1207 -> do
1208 dynRef <- cmmMakeDynamicReference dflags referenceKind lbl
1209 -- need to optimize here, since it's late
1210 return $ cmmMachOpFold dflags (MO_Add (wordWidth dflags)) [
1211 dynRef,
1212 (CmmLit $ CmmInt (fromIntegral off) (wordWidth dflags))
1213 ]
1214
1215 -- On powerpc (non-PIC), it's easier to jump directly to a label than
1216 -- to use the register table, so we replace these registers
1217 -- with the corresponding labels:
1218 CmmReg (CmmGlobal EagerBlackholeInfo)
1219 | arch == ArchPPC && not (positionIndependent dflags)
1220 -> cmmExprNative referenceKind $
1221 CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_EAGER_BLACKHOLE_info")))
1222 CmmReg (CmmGlobal GCEnter1)
1223 | arch == ArchPPC && not (positionIndependent dflags)
1224 -> cmmExprNative referenceKind $
1225 CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_enter_1")))
1226 CmmReg (CmmGlobal GCFun)
1227 | arch == ArchPPC && not (positionIndependent dflags)
1228 -> cmmExprNative referenceKind $
1229 CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_fun")))
1230
1231 other
1232 -> return other