Add a Word add-with-carry primop
[ghc.git] / compiler / nativeGen / SPARC / CodeGen.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Generating machine code (instruction selection)
4 --
5 -- (c) The University of Glasgow 1996-2004
6 --
7 -----------------------------------------------------------------------------
8
9 module SPARC.CodeGen (
10 cmmTopCodeGen,
11 generateJumpTableForInstr,
12 InstrBlock
13 )
14
15 where
16
17 #include "HsVersions.h"
18 #include "nativeGen/NCG.h"
19 #include "../includes/MachDeps.h"
20
21 -- NCG stuff:
22 import SPARC.Base
23 import SPARC.CodeGen.Sanity
24 import SPARC.CodeGen.Amode
25 import SPARC.CodeGen.CondCode
26 import SPARC.CodeGen.Gen64
27 import SPARC.CodeGen.Gen32
28 import SPARC.CodeGen.Base
29 import SPARC.Ppr ()
30 import SPARC.Instr
31 import SPARC.Imm
32 import SPARC.AddrMode
33 import SPARC.Regs
34 import SPARC.Stack
35 import Instruction
36 import Size
37 import NCGMonad
38
39 -- Our intermediate code:
40 import BlockId
41 import OldCmm
42 import PIC
43 import Reg
44 import CLabel
45 import CPrim
46
47 -- The rest:
48 import BasicTypes
49 import DynFlags
50 import FastString
51 import StaticFlags ( opt_PIC )
52 import OrdList
53 import Outputable
54 import Platform
55 import Unique
56
57 import Control.Monad ( mapAndUnzipM )
58
59 -- | Top level code generation
60 cmmTopCodeGen :: RawCmmDecl
61 -> NatM [NatCmmDecl CmmStatics Instr]
62
63 cmmTopCodeGen (CmmProc info lab (ListGraph blocks))
64 = do
65 dflags <- getDynFlags
66 let platform = targetPlatform dflags
67 (nat_blocks,statics) <- mapAndUnzipM (basicBlockCodeGen platform) blocks
68
69 let proc = CmmProc info lab (ListGraph $ concat nat_blocks)
70 let tops = proc : concat statics
71
72 return tops
73
74 cmmTopCodeGen (CmmData sec dat) = do
75 return [CmmData sec dat] -- no translation, we just use CmmStatic
76
77
78 -- | Do code generation on a single block of CMM code.
79 -- code generation may introduce new basic block boundaries, which
80 -- are indicated by the NEWBLOCK instruction. We must split up the
81 -- instruction stream into basic blocks again. Also, we extract
82 -- LDATAs here too.
83 basicBlockCodeGen :: Platform
84 -> CmmBasicBlock
85 -> NatM ( [NatBasicBlock Instr]
86 , [NatCmmDecl CmmStatics Instr])
87
88 basicBlockCodeGen platform cmm@(BasicBlock id stmts) = do
89 instrs <- stmtsToInstrs stmts
90 let
91 (top,other_blocks,statics)
92 = foldrOL mkBlocks ([],[],[]) instrs
93
94 mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
95 = ([], BasicBlock id instrs : blocks, statics)
96
97 mkBlocks (LDATA sec dat) (instrs,blocks,statics)
98 = (instrs, blocks, CmmData sec dat:statics)
99
100 mkBlocks instr (instrs,blocks,statics)
101 = (instr:instrs, blocks, statics)
102
103 -- do intra-block sanity checking
104 blocksChecked
105 = map (checkBlock platform cmm)
106 $ BasicBlock id top : other_blocks
107
108 return (blocksChecked, statics)
109
110
111 -- | Convert some Cmm statements to SPARC instructions.
112 stmtsToInstrs :: [CmmStmt] -> NatM InstrBlock
113 stmtsToInstrs stmts
114 = do instrss <- mapM stmtToInstrs stmts
115 return (concatOL instrss)
116
117
118 stmtToInstrs :: CmmStmt -> NatM InstrBlock
119 stmtToInstrs stmt = case stmt of
120 CmmNop -> return nilOL
121 CmmComment s -> return (unitOL (COMMENT s))
122
123 CmmAssign reg src
124 | isFloatType ty -> assignReg_FltCode size reg src
125 | isWord64 ty -> assignReg_I64Code reg src
126 | otherwise -> assignReg_IntCode size reg src
127 where ty = cmmRegType reg
128 size = cmmTypeSize ty
129
130 CmmStore addr src
131 | isFloatType ty -> assignMem_FltCode size addr src
132 | isWord64 ty -> assignMem_I64Code addr src
133 | otherwise -> assignMem_IntCode size addr src
134 where ty = cmmExprType src
135 size = cmmTypeSize ty
136
137 CmmCall target result_regs args _
138 -> genCCall target result_regs args
139
140 CmmBranch id -> genBranch id
141 CmmCondBranch arg id -> genCondJump id arg
142 CmmSwitch arg ids -> genSwitch arg ids
143 CmmJump arg _ -> genJump arg
144
145 CmmReturn
146 -> panic "stmtToInstrs: return statement should have been cps'd away"
147
148
149 {-
150 Now, given a tree (the argument to an CmmLoad) that references memory,
151 produce a suitable addressing mode.
152
153 A Rule of the Game (tm) for Amodes: use of the addr bit must
154 immediately follow use of the code part, since the code part puts
155 values in registers which the addr then refers to. So you can't put
156 anything in between, lest it overwrite some of those registers. If
157 you need to do some other computation between the code part and use of
158 the addr bit, first store the effective address from the amode in a
159 temporary, then do the other computation, and then use the temporary:
160
161 code
162 LEA amode, tmp
163 ... other computation ...
164 ... (tmp) ...
165 -}
166
167
168
169 -- | Convert a BlockId to some CmmStatic data
170 jumpTableEntry :: Maybe BlockId -> CmmStatic
171 jumpTableEntry Nothing = CmmStaticLit (CmmInt 0 wordWidth)
172 jumpTableEntry (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
173 where blockLabel = mkAsmTempLabel (getUnique blockid)
174
175
176
177 -- -----------------------------------------------------------------------------
178 -- Generating assignments
179
180 -- Assignments are really at the heart of the whole code generation
181 -- business. Almost all top-level nodes of any real importance are
182 -- assignments, which correspond to loads, stores, or register
183 -- transfers. If we're really lucky, some of the register transfers
184 -- will go away, because we can use the destination register to
185 -- complete the code generation for the right hand side. This only
186 -- fails when the right hand side is forced into a fixed register
187 -- (e.g. the result of a call).
188
189 assignMem_IntCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
190 assignMem_IntCode pk addr src = do
191 (srcReg, code) <- getSomeReg src
192 Amode dstAddr addr_code <- getAmode addr
193 return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
194
195
196 assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
197 assignReg_IntCode _ reg src = do
198 r <- getRegister src
199 return $ case r of
200 Any _ code -> code dst
201 Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
202 where
203 dst = getRegisterReg reg
204
205
206
207 -- Floating point assignment to memory
208 assignMem_FltCode :: Size -> CmmExpr -> CmmExpr -> NatM InstrBlock
209 assignMem_FltCode pk addr src = do
210 Amode dst__2 code1 <- getAmode addr
211 (src__2, code2) <- getSomeReg src
212 tmp1 <- getNewRegNat pk
213 let
214 pk__2 = cmmExprType src
215 code__2 = code1 `appOL` code2 `appOL`
216 if sizeToWidth pk == typeWidth pk__2
217 then unitOL (ST pk src__2 dst__2)
218 else toOL [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
219 , ST pk tmp1 dst__2]
220 return code__2
221
222 -- Floating point assignment to a register/temporary
223 assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
224 assignReg_FltCode pk dstCmmReg srcCmmExpr = do
225 srcRegister <- getRegister srcCmmExpr
226 let dstReg = getRegisterReg dstCmmReg
227
228 return $ case srcRegister of
229 Any _ code -> code dstReg
230 Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
231
232
233
234
235 genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
236
237 genJump (CmmLit (CmmLabel lbl))
238 = return (toOL [CALL (Left target) 0 True, NOP])
239 where
240 target = ImmCLbl lbl
241
242 genJump tree
243 = do
244 (target, code) <- getSomeReg tree
245 return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
246
247 -- -----------------------------------------------------------------------------
248 -- Unconditional branches
249
250 genBranch :: BlockId -> NatM InstrBlock
251 genBranch = return . toOL . mkJumpInstr
252
253
254 -- -----------------------------------------------------------------------------
255 -- Conditional jumps
256
257 {-
258 Conditional jumps are always to local labels, so we can use branch
259 instructions. We peek at the arguments to decide what kind of
260 comparison to do.
261
262 SPARC: First, we have to ensure that the condition codes are set
263 according to the supplied comparison operation. We generate slightly
264 different code for floating point comparisons, because a floating
265 point operation cannot directly precede a @BF@. We assume the worst
266 and fill that slot with a @NOP@.
267
268 SPARC: Do not fill the delay slots here; you will confuse the register
269 allocator.
270 -}
271
272
273 genCondJump
274 :: BlockId -- the branch target
275 -> CmmExpr -- the condition on which to branch
276 -> NatM InstrBlock
277
278
279
280 genCondJump bid bool = do
281 CondCode is_float cond code <- getCondCode bool
282 return (
283 code `appOL`
284 toOL (
285 if is_float
286 then [NOP, BF cond False bid, NOP]
287 else [BI cond False bid, NOP]
288 )
289 )
290
291
292
293 -- -----------------------------------------------------------------------------
294 -- Generating a table-branch
295
296 genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
297 genSwitch expr ids
298 | opt_PIC
299 = error "MachCodeGen: sparc genSwitch PIC not finished\n"
300
301 | otherwise
302 = do (e_reg, e_code) <- getSomeReg expr
303
304 base_reg <- getNewRegNat II32
305 offset_reg <- getNewRegNat II32
306 dst <- getNewRegNat II32
307
308 label <- getNewLabelNat
309
310 return $ e_code `appOL`
311 toOL
312 [ -- load base of jump table
313 SETHI (HI (ImmCLbl label)) base_reg
314 , OR False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
315
316 -- the addrs in the table are 32 bits wide..
317 , SLL e_reg (RIImm $ ImmInt 2) offset_reg
318
319 -- load and jump to the destination
320 , LD II32 (AddrRegReg base_reg offset_reg) dst
321 , JMP_TBL (AddrRegImm dst (ImmInt 0)) ids label
322 , NOP ]
323
324 generateJumpTableForInstr :: Instr -> Maybe (NatCmmDecl CmmStatics Instr)
325 generateJumpTableForInstr (JMP_TBL _ ids label) =
326 let jumpTable = map jumpTableEntry ids
327 in Just (CmmData ReadOnlyData (Statics label jumpTable))
328 generateJumpTableForInstr _ = Nothing
329
330
331
332 -- -----------------------------------------------------------------------------
333 -- Generating C calls
334
335 {-
336 Now the biggest nightmare---calls. Most of the nastiness is buried in
337 @get_arg@, which moves the arguments to the correct registers/stack
338 locations. Apart from that, the code is easy.
339
340 The SPARC calling convention is an absolute
341 nightmare. The first 6x32 bits of arguments are mapped into
342 %o0 through %o5, and the remaining arguments are dumped to the
343 stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
344
345 If we have to put args on the stack, move %o6==%sp down by
346 the number of words to go on the stack, to ensure there's enough space.
347
348 According to Fraser and Hanson's lcc book, page 478, fig 17.2,
349 16 words above the stack pointer is a word for the address of
350 a structure return value. I use this as a temporary location
351 for moving values from float to int regs. Certainly it isn't
352 safe to put anything in the 16 words starting at %sp, since
353 this area can get trashed at any time due to window overflows
354 caused by signal handlers.
355
356 A final complication (if the above isn't enough) is that
357 we can't blithely calculate the arguments one by one into
358 %o0 .. %o5. Consider the following nested calls:
359
360 fff a (fff b c)
361
362 Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
363 the inner call will itself use %o0, which trashes the value put there
364 in preparation for the outer call. Upshot: we need to calculate the
365 args into temporary regs, and move those to arg regs or onto the
366 stack only immediately prior to the call proper. Sigh.
367 -}
368
369 genCCall
370 :: CmmCallTarget -- function to call
371 -> [HintedCmmFormal] -- where to put the result
372 -> [HintedCmmActual] -- arguments (of mixed type)
373 -> NatM InstrBlock
374
375
376
377 -- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream
378 -- are guaranteed to take place before writes afterwards (unlike on PowerPC).
379 -- Ref: Section 8.4 of the SPARC V9 Architecture manual.
380 --
381 -- In the SPARC case we don't need a barrier.
382 --
383 genCCall (CmmPrim (MO_WriteBarrier) _) _ _
384 = do return nilOL
385
386 genCCall (CmmPrim _ (Just mkStmts)) results args
387 = stmtsToInstrs (mkStmts results args)
388
389 genCCall target dest_regs argsAndHints
390 = do
391 -- need to remove alignment information
392 let argsAndHints' | (CmmPrim mop _) <- target,
393 (mop == MO_Memcpy ||
394 mop == MO_Memset ||
395 mop == MO_Memmove)
396 = init argsAndHints
397
398 | otherwise
399 = argsAndHints
400
401 -- strip hints from the arg regs
402 let args :: [CmmExpr]
403 args = map hintlessCmm argsAndHints'
404
405
406 -- work out the arguments, and assign them to integer regs
407 argcode_and_vregs <- mapM arg_to_int_vregs args
408 let (argcodes, vregss) = unzip argcode_and_vregs
409 let vregs = concat vregss
410
411 let n_argRegs = length allArgRegs
412 let n_argRegs_used = min (length vregs) n_argRegs
413
414
415 -- deal with static vs dynamic call targets
416 callinsns <- case target of
417 CmmCallee (CmmLit (CmmLabel lbl)) _ ->
418 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
419
420 CmmCallee expr _
421 -> do (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
422 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
423
424 CmmPrim mop _
425 -> do res <- outOfLineMachOp mop
426 lblOrMopExpr <- case res of
427 Left lbl -> do
428 return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
429
430 Right mopExpr -> do
431 (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
432 return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
433
434 return lblOrMopExpr
435
436 let argcode = concatOL argcodes
437
438 let (move_sp_down, move_sp_up)
439 = let diff = length vregs - n_argRegs
440 nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
441 in if nn <= 0
442 then (nilOL, nilOL)
443 else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
444
445 let transfer_code
446 = toOL (move_final vregs allArgRegs extraStackArgsHere)
447
448 dflags <- getDynFlags
449 return
450 $ argcode `appOL`
451 move_sp_down `appOL`
452 transfer_code `appOL`
453 callinsns `appOL`
454 unitOL NOP `appOL`
455 move_sp_up `appOL`
456 assign_code (targetPlatform dflags) dest_regs
457
458
459 -- | Generate code to calculate an argument, and move it into one
460 -- or two integer vregs.
461 arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
462 arg_to_int_vregs arg
463
464 -- If the expr produces a 64 bit int, then we can just use iselExpr64
465 | isWord64 (cmmExprType arg)
466 = do (ChildCode64 code r_lo) <- iselExpr64 arg
467 let r_hi = getHiVRegFromLo r_lo
468 return (code, [r_hi, r_lo])
469
470 | otherwise
471 = do (src, code) <- getSomeReg arg
472 let pk = cmmExprType arg
473
474 case cmmTypeSize pk of
475
476 -- Load a 64 bit float return value into two integer regs.
477 FF64 -> do
478 v1 <- getNewRegNat II32
479 v2 <- getNewRegNat II32
480
481 let code2 =
482 code `snocOL`
483 FMOV FF64 src f0 `snocOL`
484 ST FF32 f0 (spRel 16) `snocOL`
485 LD II32 (spRel 16) v1 `snocOL`
486 ST FF32 f1 (spRel 16) `snocOL`
487 LD II32 (spRel 16) v2
488
489 return (code2, [v1,v2])
490
491 -- Load a 32 bit float return value into an integer reg
492 FF32 -> do
493 v1 <- getNewRegNat II32
494
495 let code2 =
496 code `snocOL`
497 ST FF32 src (spRel 16) `snocOL`
498 LD II32 (spRel 16) v1
499
500 return (code2, [v1])
501
502 -- Move an integer return value into its destination reg.
503 _ -> do
504 v1 <- getNewRegNat II32
505
506 let code2 =
507 code `snocOL`
508 OR False g0 (RIReg src) v1
509
510 return (code2, [v1])
511
512
513 -- | Move args from the integer vregs into which they have been
514 -- marshalled, into %o0 .. %o5, and the rest onto the stack.
515 --
516 move_final :: [Reg] -> [Reg] -> Int -> [Instr]
517
518 -- all args done
519 move_final [] _ _
520 = []
521
522 -- out of aregs; move to stack
523 move_final (v:vs) [] offset
524 = ST II32 v (spRel offset)
525 : move_final vs [] (offset+1)
526
527 -- move into an arg (%o[0..5]) reg
528 move_final (v:vs) (a:az) offset
529 = OR False g0 (RIReg v) a
530 : move_final vs az offset
531
532
533 -- | Assign results returned from the call into their
534 -- desination regs.
535 --
536 assign_code :: Platform -> [CmmHinted LocalReg] -> OrdList Instr
537
538 assign_code _ [] = nilOL
539
540 assign_code platform [CmmHinted dest _hint]
541 = let rep = localRegType dest
542 width = typeWidth rep
543 r_dest = getRegisterReg (CmmLocal dest)
544
545 result
546 | isFloatType rep
547 , W32 <- width
548 = unitOL $ FMOV FF32 (regSingle $ fReg 0) r_dest
549
550 | isFloatType rep
551 , W64 <- width
552 = unitOL $ FMOV FF64 (regSingle $ fReg 0) r_dest
553
554 | not $ isFloatType rep
555 , W32 <- width
556 = unitOL $ mkRegRegMoveInstr platform (regSingle $ oReg 0) r_dest
557
558 | not $ isFloatType rep
559 , W64 <- width
560 , r_dest_hi <- getHiVRegFromLo r_dest
561 = toOL [ mkRegRegMoveInstr platform (regSingle $ oReg 0) r_dest_hi
562 , mkRegRegMoveInstr platform (regSingle $ oReg 1) r_dest]
563
564 | otherwise
565 = panic "SPARC.CodeGen.GenCCall: no match"
566
567 in result
568
569 assign_code _ _
570 = panic "SPARC.CodeGen.GenCCall: no match"
571
572
573
574 -- | Generate a call to implement an out-of-line floating point operation
575 outOfLineMachOp
576 :: CallishMachOp
577 -> NatM (Either CLabel CmmExpr)
578
579 outOfLineMachOp mop
580 = do let functionName
581 = outOfLineMachOp_table mop
582
583 dflags <- getDynFlags
584 mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference
585 $ mkForeignLabel functionName Nothing ForeignLabelInExternalPackage IsFunction
586
587 let mopLabelOrExpr
588 = case mopExpr of
589 CmmLit (CmmLabel lbl) -> Left lbl
590 _ -> Right mopExpr
591
592 return mopLabelOrExpr
593
594
595 -- | Decide what C function to use to implement a CallishMachOp
596 --
597 outOfLineMachOp_table
598 :: CallishMachOp
599 -> FastString
600
601 outOfLineMachOp_table mop
602 = case mop of
603 MO_F32_Exp -> fsLit "expf"
604 MO_F32_Log -> fsLit "logf"
605 MO_F32_Sqrt -> fsLit "sqrtf"
606 MO_F32_Pwr -> fsLit "powf"
607
608 MO_F32_Sin -> fsLit "sinf"
609 MO_F32_Cos -> fsLit "cosf"
610 MO_F32_Tan -> fsLit "tanf"
611
612 MO_F32_Asin -> fsLit "asinf"
613 MO_F32_Acos -> fsLit "acosf"
614 MO_F32_Atan -> fsLit "atanf"
615
616 MO_F32_Sinh -> fsLit "sinhf"
617 MO_F32_Cosh -> fsLit "coshf"
618 MO_F32_Tanh -> fsLit "tanhf"
619
620 MO_F64_Exp -> fsLit "exp"
621 MO_F64_Log -> fsLit "log"
622 MO_F64_Sqrt -> fsLit "sqrt"
623 MO_F64_Pwr -> fsLit "pow"
624
625 MO_F64_Sin -> fsLit "sin"
626 MO_F64_Cos -> fsLit "cos"
627 MO_F64_Tan -> fsLit "tan"
628
629 MO_F64_Asin -> fsLit "asin"
630 MO_F64_Acos -> fsLit "acos"
631 MO_F64_Atan -> fsLit "atan"
632
633 MO_F64_Sinh -> fsLit "sinh"
634 MO_F64_Cosh -> fsLit "cosh"
635 MO_F64_Tanh -> fsLit "tanh"
636
637 MO_Memcpy -> fsLit "memcpy"
638 MO_Memset -> fsLit "memset"
639 MO_Memmove -> fsLit "memmove"
640
641 MO_PopCnt w -> fsLit $ popCntLabel w
642
643 MO_S_QuotRem {} -> unsupported
644 MO_U_QuotRem {} -> unsupported
645 MO_Add2 {} -> unsupported
646 MO_WriteBarrier -> unsupported
647 MO_Touch -> unsupported
648 where unsupported = panic ("outOfLineCmmOp: " ++ show mop
649 ++ " not supported here")
650