330d4fae104b949c9cf3102aa5cc4a1bd203b056
[ghc.git] / compiler / nativeGen / SPARC / CodeGen.hs
1 {-# LANGUAGE CPP #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Generating machine code (instruction selection)
6 --
7 -- (c) The University of Glasgow 1996-2013
8 --
9 -----------------------------------------------------------------------------
10
11 {-# LANGUAGE GADTs #-}
12 module SPARC.CodeGen (
13 cmmTopCodeGen,
14 generateJumpTableForInstr,
15 InstrBlock
16 )
17
18 where
19
20 #include "HsVersions.h"
21 #include "nativeGen/NCG.h"
22 #include "../includes/MachDeps.h"
23
24 -- NCG stuff:
25 import SPARC.Base
26 import SPARC.CodeGen.Sanity
27 import SPARC.CodeGen.Amode
28 import SPARC.CodeGen.CondCode
29 import SPARC.CodeGen.Gen64
30 import SPARC.CodeGen.Gen32
31 import SPARC.CodeGen.Base
32 import SPARC.Ppr ()
33 import SPARC.Instr
34 import SPARC.Imm
35 import SPARC.AddrMode
36 import SPARC.Regs
37 import SPARC.Stack
38 import Instruction
39 import Format
40 import NCGMonad
41
42 -- Our intermediate code:
43 import BlockId
44 import Cmm
45 import CmmUtils
46 import CmmSwitch
47 import Hoopl
48 import PIC
49 import Reg
50 import CLabel
51 import CPrim
52
53 -- The rest:
54 import BasicTypes
55 import DynFlags
56 import FastString
57 import OrdList
58 import Outputable
59 import Platform
60 import Unique
61
62 import Control.Monad ( mapAndUnzipM )
63
64 -- | Top level code generation
65 cmmTopCodeGen :: RawCmmDecl
66 -> NatM [NatCmmDecl CmmStatics Instr]
67
68 cmmTopCodeGen (CmmProc info lab live graph)
69 = do let blocks = toBlockListEntryFirst graph
70 (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
71
72 let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
73 let tops = proc : concat statics
74
75 return tops
76
77 cmmTopCodeGen (CmmData sec dat) = do
78 return [CmmData sec dat] -- no translation, we just use CmmStatic
79
80
81 -- | Do code generation on a single block of CMM code.
82 -- code generation may introduce new basic block boundaries, which
83 -- are indicated by the NEWBLOCK instruction. We must split up the
84 -- instruction stream into basic blocks again. Also, we extract
85 -- LDATAs here too.
86 basicBlockCodeGen :: CmmBlock
87 -> NatM ( [NatBasicBlock Instr]
88 , [NatCmmDecl CmmStatics Instr])
89
90 basicBlockCodeGen block = do
91 let (_, nodes, tail) = blockSplit block
92 id = entryLabel block
93 stmts = blockToList nodes
94 mid_instrs <- stmtsToInstrs stmts
95 tail_instrs <- stmtToInstrs tail
96 let instrs = mid_instrs `appOL` tail_instrs
97 let
98 (top,other_blocks,statics)
99 = foldrOL mkBlocks ([],[],[]) instrs
100
101 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
102 = ([], BasicBlock id instrs : blocks, statics)
103
104 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
105 = (instrs, blocks, CmmData sec dat:statics)
106
107 mkBlocks instr (instrs,blocks,statics)
108 = (instr:instrs, blocks, statics)
109
110 -- do intra-block sanity checking
111 blocksChecked
112 = map (checkBlock block)
113 $ BasicBlock id top : other_blocks
114
115 return (blocksChecked, statics)
116
117
118 -- | Convert some Cmm statements to SPARC instructions.
119 stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
120 stmtsToInstrs stmts
121 = do instrss <- mapM stmtToInstrs stmts
122 return (concatOL instrss)
123
124
125 stmtToInstrs :: CmmNode e x -> NatM InstrBlock
126 stmtToInstrs stmt = do
127 dflags <- getDynFlags
128 case stmt of
129 CmmComment s -> return (unitOL (COMMENT s))
130 CmmTick {} -> return nilOL
131 CmmUnwind {} -> return nilOL
132
133 CmmAssign reg src
134 | isFloatType ty -> assignReg_FltCode format reg src
135 | isWord64 ty -> assignReg_I64Code reg src
136 | otherwise -> assignReg_IntCode format reg src
137 where ty = cmmRegType dflags reg
138 format = cmmTypeFormat ty
139
140 CmmStore addr src
141 | isFloatType ty -> assignMem_FltCode format addr src
142 | isWord64 ty -> assignMem_I64Code addr src
143 | otherwise -> assignMem_IntCode format addr src
144 where ty = cmmExprType dflags src
145 format = cmmTypeFormat ty
146
147 CmmUnsafeForeignCall target result_regs args
148 -> genCCall target result_regs args
149
150 CmmBranch id -> genBranch id
151 CmmCondBranch arg true false _ -> do
152 b1 <- genCondJump true arg
153 b2 <- genBranch false
154 return (b1 `appOL` b2)
155 CmmSwitch arg ids -> do dflags <- getDynFlags
156 genSwitch dflags arg ids
157 CmmCall { cml_target = arg } -> genJump arg
158
159 _
160 -> panic "stmtToInstrs: statement should have been cps'd away"
161
162
163 {-
164 Now, given a tree (the argument to an CmmLoad) that references memory,
165 produce a suitable addressing mode.
166
167 A Rule of the Game (tm) for Amodes: use of the addr bit must
168 immediately follow use of the code part, since the code part puts
169 values in registers which the addr then refers to. So you can't put
170 anything in between, lest it overwrite some of those registers. If
171 you need to do some other computation between the code part and use of
172 the addr bit, first store the effective address from the amode in a
173 temporary, then do the other computation, and then use the temporary:
174
175 code
176 LEA amode, tmp
177 ... other computation ...
178 ... (tmp) ...
179 -}
180
181
182
183 -- | Convert a BlockId to some CmmStatic data
184 jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
185 jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
186 jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
187 where blockLabel = mkAsmTempLabel (getUnique blockid)
188
189
190
191 -- -----------------------------------------------------------------------------
192 -- Generating assignments
193
194 -- Assignments are really at the heart of the whole code generation
195 -- business. Almost all top-level nodes of any real importance are
196 -- assignments, which correspond to loads, stores, or register
197 -- transfers. If we're really lucky, some of the register transfers
198 -- will go away, because we can use the destination register to
199 -- complete the code generation for the right hand side. This only
200 -- fails when the right hand side is forced into a fixed register
201 -- (e.g. the result of a call).
202
203 assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
204 assignMem_IntCode pk addr src = do
205 (srcReg, code) <- getSomeReg src
206 Amode dstAddr addr_code <- getAmode addr
207 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
208
209
210 assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
211 assignReg_IntCode _ reg src = do
212 dflags <- getDynFlags
213 r <- getRegister src
214 let dst = getRegisterReg (targetPlatform dflags) reg
215 return $ case r of
216 Any _ code -> code dst
217 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
218
219
220
221 -- Floating point assignment to memory
222 assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
223 assignMem_FltCode pk addr src = do
224 dflags <- getDynFlags
225 Amode dst__2 code1 <- getAmode addr
226 (src__2, code2) <- getSomeReg src
227 tmp1 <- getNewRegNat pk
228 let
229 pk__2 = cmmExprType dflags src
230 code__2 = code1 `appOL` code2 `appOL`
231 if formatToWidth pk == typeWidth pk__2
232 then unitOL (ST pk src__2 dst__2)
233 else toOL [ FxTOy (cmmTypeFormat pk__2) pk src__2 tmp1
234 , ST pk tmp1 dst__2]
235 return code__2
236
237 -- Floating point assignment to a register/temporary
238 assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
239 assignReg_FltCode pk dstCmmReg srcCmmExpr = do
240 dflags <- getDynFlags
241 let platform = targetPlatform dflags
242 srcRegister <- getRegister srcCmmExpr
243 let dstReg = getRegisterReg platform dstCmmReg
244
245 return $ case srcRegister of
246 Any _ code -> code dstReg
247 Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
248
249
250
251
252 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
253
254 genJump (CmmLit (CmmLabel lbl))
255 = return (toOL [CALL (Left target) 0 True, NOP])
256 where
257 target = ImmCLbl lbl
258
259 genJump tree
260 = do
261 (target, code) <- getSomeReg tree
262 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
263
264 -- -----------------------------------------------------------------------------
265 -- Unconditional branches
266
267 genBranch :: BlockId -> NatM InstrBlock
268 genBranch = return . toOL . mkJumpInstr
269
270
271 -- -----------------------------------------------------------------------------
272 -- Conditional jumps
273
274 {-
275 Conditional jumps are always to local labels, so we can use branch
276 instructions. We peek at the arguments to decide what kind of
277 comparison to do.
278
279 SPARC: First, we have to ensure that the condition codes are set
280 according to the supplied comparison operation. We generate slightly
281 different code for floating point comparisons, because a floating
282 point operation cannot directly precede a @BF@. We assume the worst
283 and fill that slot with a @NOP@.
284
285 SPARC: Do not fill the delay slots here; you will confuse the register
286 allocator.
287 -}
288
289
290 genCondJump
291 :: BlockId -- the branch target
292 -> CmmExpr -- the condition on which to branch
293 -> NatM InstrBlock
294
295
296
297 genCondJump bid bool = do
298 CondCode is_float cond code <- getCondCode bool
299 return (
300 code `appOL`
301 toOL (
302 if is_float
303 then [NOP, BF cond False bid, NOP]
304 else [BI cond False bid, NOP]
305 )
306 )
307
308
309
310 -- -----------------------------------------------------------------------------
311 -- Generating a table-branch
312
313 genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
314 genSwitch dflags expr targets
315 | gopt Opt_PIC dflags
316 = error "MachCodeGen: sparc genSwitch PIC not finished\n"
317
318 | otherwise
319 = do (e_reg, e_code) <- getSomeReg (cmmOffset dflags expr offset)
320
321 base_reg <- getNewRegNat II32
322 offset_reg <- getNewRegNat II32
323 dst <- getNewRegNat II32
324
325 label <- getNewLabelNat
326
327 return $ e_code `appOL`
328 toOL
329 [ -- load base of jump table
330 SETHI (HI (ImmCLbl label)) base_reg
331 , OR False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
332
333 -- the addrs in the table are 32 bits wide..
334 , SLL e_reg (RIImm $ ImmInt 2) offset_reg
335
336 -- load and jump to the destination
337 , LD II32 (AddrRegReg base_reg offset_reg) dst
338 , JMP_TBL (AddrRegImm dst (ImmInt 0)) ids label
339 , NOP ]
340 where (offset, ids) = switchTargetsToTable targets
341
342 generateJumpTableForInstr :: DynFlags -> Instr
343 -> Maybe (NatCmmDecl CmmStatics Instr)
344 generateJumpTableForInstr dflags (JMP_TBL _ ids label) =
345 let jumpTable = map (jumpTableEntry dflags) ids
346 in Just (CmmData ReadOnlyData (Statics label jumpTable))
347 generateJumpTableForInstr _ _ = Nothing
348
349
350
351 -- -----------------------------------------------------------------------------
352 -- Generating C calls
353
354 {-
355 Now the biggest nightmare---calls. Most of the nastiness is buried in
356 @get_arg@, which moves the arguments to the correct registers/stack
357 locations. Apart from that, the code is easy.
358
359 The SPARC calling convention is an absolute
360 nightmare. The first 6x32 bits of arguments are mapped into
361 %o0 through %o5, and the remaining arguments are dumped to the
362 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
363
364 If we have to put args on the stack, move %o6==%sp down by
365 the number of words to go on the stack, to ensure there's enough space.
366
367 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
368 16 words above the stack pointer is a word for the address of
369 a structure return value. I use this as a temporary location
370 for moving values from float to int regs. Certainly it isn't
371 safe to put anything in the 16 words starting at %sp, since
372 this area can get trashed at any time due to window overflows
373 caused by signal handlers.
374
375 A final complication (if the above isn't enough) is that
376 we can't blithely calculate the arguments one by one into
377 %o0 .. %o5. Consider the following nested calls:
378
379 fff a (fff b c)
380
381 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
382 the inner call will itself use %o0, which trashes the value put there
383 in preparation for the outer call. Upshot: we need to calculate the
384 args into temporary regs, and move those to arg regs or onto the
385 stack only immediately prior to the call proper. Sigh.
386 -}
387
388 genCCall
389 :: ForeignTarget -- function to call
390 -> [CmmFormal] -- where to put the result
391 -> [CmmActual] -- arguments (of mixed type)
392 -> NatM InstrBlock
393
394
395
396 -- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream
397 -- are guaranteed to take place before writes afterwards (unlike on PowerPC).
398 -- Ref: Section 8.4 of the SPARC V9 Architecture manual.
399 --
400 -- In the SPARC case we don't need a barrier.
401 --
402 genCCall (PrimTarget MO_WriteBarrier) _ _
403 = return $ nilOL
404
405 genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
406 = return $ nilOL
407
408 genCCall target dest_regs args
409 = do -- work out the arguments, and assign them to integer regs
410 argcode_and_vregs <- mapM arg_to_int_vregs args
411 let (argcodes, vregss) = unzip argcode_and_vregs
412 let vregs = concat vregss
413
414 let n_argRegs = length allArgRegs
415 let n_argRegs_used = min (length vregs) n_argRegs
416
417
418 -- deal with static vs dynamic call targets
419 callinsns <- case target of
420 ForeignTarget (CmmLit (CmmLabel lbl)) _ ->
421 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
422
423 ForeignTarget expr _
424 -> do (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
425 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
426
427 PrimTarget mop
428 -> do res <- outOfLineMachOp mop
429 lblOrMopExpr <- case res of
430 Left lbl -> do
431 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
432
433 Right mopExpr -> do
434 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
435 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
436
437 return lblOrMopExpr
438
439 let argcode = concatOL argcodes
440
441 let (move_sp_down, move_sp_up)
442 = let diff = length vregs - n_argRegs
443 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
444 in if nn <= 0
445 then (nilOL, nilOL)
446 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
447
448 let transfer_code
449 = toOL (move_final vregs allArgRegs extraStackArgsHere)
450
451 dflags <- getDynFlags
452 return
453 $ argcode `appOL`
454 move_sp_down `appOL`
455 transfer_code `appOL`
456 callinsns `appOL`
457 unitOL NOP `appOL`
458 move_sp_up `appOL`
459 assign_code (targetPlatform dflags) dest_regs
460
461
462 -- | Generate code to calculate an argument, and move it into one
463 -- or two integer vregs.
464 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
465 arg_to_int_vregs arg = do dflags <- getDynFlags
466 arg_to_int_vregs' dflags arg
467
468 arg_to_int_vregs' :: DynFlags -> CmmExpr -> NatM (OrdList Instr, [Reg])
469 arg_to_int_vregs' dflags arg
470
471 -- If the expr produces a 64 bit int, then we can just use iselExpr64
472 | isWord64 (cmmExprType dflags arg)
473 = do (ChildCode64 code r_lo) <- iselExpr64 arg
474 let r_hi = getHiVRegFromLo r_lo
475 return (code, [r_hi, r_lo])
476
477 | otherwise
478 = do (src, code) <- getSomeReg arg
479 let pk = cmmExprType dflags arg
480
481 case cmmTypeFormat pk of
482
483 -- Load a 64 bit float return value into two integer regs.
484 FF64 -> do
485 v1 <- getNewRegNat II32
486 v2 <- getNewRegNat II32
487
488 let code2 =
489 code `snocOL`
490 FMOV FF64 src f0 `snocOL`
491 ST FF32 f0 (spRel 16) `snocOL`
492 LD II32 (spRel 16) v1 `snocOL`
493 ST FF32 f1 (spRel 16) `snocOL`
494 LD II32 (spRel 16) v2
495
496 return (code2, [v1,v2])
497
498 -- Load a 32 bit float return value into an integer reg
499 FF32 -> do
500 v1 <- getNewRegNat II32
501
502 let code2 =
503 code `snocOL`
504 ST FF32 src (spRel 16) `snocOL`
505 LD II32 (spRel 16) v1
506
507 return (code2, [v1])
508
509 -- Move an integer return value into its destination reg.
510 _ -> do
511 v1 <- getNewRegNat II32
512
513 let code2 =
514 code `snocOL`
515 OR False g0 (RIReg src) v1
516
517 return (code2, [v1])
518
519
520 -- | Move args from the integer vregs into which they have been
521 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
522 --
523 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
524
525 -- all args done
526 move_final [] _ _
527 = []
528
529 -- out of aregs; move to stack
530 move_final (v:vs) [] offset
531 = ST II32 v (spRel offset)
532 : move_final vs [] (offset+1)
533
534 -- move into an arg (%o[0..5]) reg
535 move_final (v:vs) (a:az) offset
536 = OR False g0 (RIReg v) a
537 : move_final vs az offset
538
539
540 -- | Assign results returned from the call into their
541 -- destination regs.
542 --
543 assign_code :: Platform -> [LocalReg] -> OrdList Instr
544
545 assign_code _ [] = nilOL
546
547 assign_code platform [dest]
548 = let rep = localRegType dest
549 width = typeWidth rep
550 r_dest = getRegisterReg platform (CmmLocal dest)
551
552 result
553 | isFloatType rep
554 , W32 <- width
555 = unitOL $ FMOV FF32 (regSingle $ fReg 0) r_dest
556
557 | isFloatType rep
558 , W64 <- width
559 = unitOL $ FMOV FF64 (regSingle $ fReg 0) r_dest
560
561 | not $ isFloatType rep
562 , W32 <- width
563 = unitOL $ mkRegRegMoveInstr platform (regSingle $ oReg 0) r_dest
564
565 | not $ isFloatType rep
566 , W64 <- width
567 , r_dest_hi <- getHiVRegFromLo r_dest
568 = toOL [ mkRegRegMoveInstr platform (regSingle $ oReg 0) r_dest_hi
569 , mkRegRegMoveInstr platform (regSingle $ oReg 1) r_dest]
570
571 | otherwise
572 = panic "SPARC.CodeGen.GenCCall: no match"
573
574 in result
575
576 assign_code _ _
577 = panic "SPARC.CodeGen.GenCCall: no match"
578
579
580
581 -- | Generate a call to implement an out-of-line floating point operation
582 outOfLineMachOp
583 :: CallishMachOp
584 -> NatM (Either CLabel CmmExpr)
585
586 outOfLineMachOp mop
587 = do let functionName
588 = outOfLineMachOp_table mop
589
590 dflags <- getDynFlags
591 mopExpr <- cmmMakeDynamicReference dflags CallReference
592 $ mkForeignLabel functionName Nothing ForeignLabelInExternalPackage IsFunction
593
594 let mopLabelOrExpr
595 = case mopExpr of
596 CmmLit (CmmLabel lbl) -> Left lbl
597 _ -> Right mopExpr
598
599 return mopLabelOrExpr
600
601
602 -- | Decide what C function to use to implement a CallishMachOp
603 --
604 outOfLineMachOp_table
605 :: CallishMachOp
606 -> FastString
607
608 outOfLineMachOp_table mop
609 = case mop of
610 MO_F32_Exp -> fsLit "expf"
611 MO_F32_Log -> fsLit "logf"
612 MO_F32_Sqrt -> fsLit "sqrtf"
613 MO_F32_Pwr -> fsLit "powf"
614
615 MO_F32_Sin -> fsLit "sinf"
616 MO_F32_Cos -> fsLit "cosf"
617 MO_F32_Tan -> fsLit "tanf"
618
619 MO_F32_Asin -> fsLit "asinf"
620 MO_F32_Acos -> fsLit "acosf"
621 MO_F32_Atan -> fsLit "atanf"
622
623 MO_F32_Sinh -> fsLit "sinhf"
624 MO_F32_Cosh -> fsLit "coshf"
625 MO_F32_Tanh -> fsLit "tanhf"
626
627 MO_F64_Exp -> fsLit "exp"
628 MO_F64_Log -> fsLit "log"
629 MO_F64_Sqrt -> fsLit "sqrt"
630 MO_F64_Pwr -> fsLit "pow"
631
632 MO_F64_Sin -> fsLit "sin"
633 MO_F64_Cos -> fsLit "cos"
634 MO_F64_Tan -> fsLit "tan"
635
636 MO_F64_Asin -> fsLit "asin"
637 MO_F64_Acos -> fsLit "acos"
638 MO_F64_Atan -> fsLit "atan"
639
640 MO_F64_Sinh -> fsLit "sinh"
641 MO_F64_Cosh -> fsLit "cosh"
642 MO_F64_Tanh -> fsLit "tanh"
643
644 MO_UF_Conv w -> fsLit $ word2FloatLabel w
645
646 MO_Memcpy _ -> fsLit "memcpy"
647 MO_Memset _ -> fsLit "memset"
648 MO_Memmove _ -> fsLit "memmove"
649
650 MO_BSwap w -> fsLit $ bSwapLabel w
651 MO_PopCnt w -> fsLit $ popCntLabel w
652 MO_Clz w -> fsLit $ clzLabel w
653 MO_Ctz w -> fsLit $ ctzLabel w
654 MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop
655 MO_Cmpxchg w -> fsLit $ cmpxchgLabel w
656 MO_AtomicRead w -> fsLit $ atomicReadLabel w
657 MO_AtomicWrite w -> fsLit $ atomicWriteLabel w
658
659 MO_S_QuotRem {} -> unsupported
660 MO_U_QuotRem {} -> unsupported
661 MO_U_QuotRem2 {} -> unsupported
662 MO_Add2 {} -> unsupported
663 MO_SubWordC {} -> unsupported
664 MO_AddIntC {} -> unsupported
665 MO_SubIntC {} -> unsupported
666 MO_U_Mul2 {} -> unsupported
667 MO_WriteBarrier -> unsupported
668 MO_Touch -> unsupported
669 (MO_Prefetch_Data _) -> unsupported
670 where unsupported = panic ("outOfLineCmmOp: " ++ show mop
671 ++ " not supported here")
672