LLVM: Implement atomic operations in terms of LLVM primitives
[ghc.git] / compiler / llvmGen / LlvmCodeGen / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs #-}
2 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
3 -- ----------------------------------------------------------------------------
4 -- | Handle conversion of CmmProc to LLVM code.
5 --
6 module LlvmCodeGen.CodeGen ( genLlvmProc ) where
7
8 #include "HsVersions.h"
9
10 import Llvm
11 import LlvmCodeGen.Base
12 import LlvmCodeGen.Regs
13
14 import BlockId
15 import CodeGen.Platform ( activeStgRegs, callerSaves )
16 import CLabel
17 import Cmm
18 import PprCmm
19 import CmmUtils
20 import CmmSwitch
21 import Hoopl
22
23 import DynFlags
24 import FastString
25 import ForeignCall
26 import Outputable hiding (panic, pprPanic)
27 import qualified Outputable
28 import Platform
29 import OrdList
30 import UniqSupply
31 import Unique
32 import Util
33
34 import Control.Monad.Trans.Class
35 import Control.Monad.Trans.Writer
36
37 #if MIN_VERSION_base(4,8,0)
38 #else
39 import Data.Monoid ( Monoid, mappend, mempty )
40 #endif
41 import Data.List ( nub )
42 import Data.Maybe ( catMaybes )
43
44 type Atomic = Bool
45 type LlvmStatements = OrdList LlvmStatement
46
47 -- -----------------------------------------------------------------------------
48 -- | Top-level of the LLVM proc Code generator
49 --
50 genLlvmProc :: RawCmmDecl -> LlvmM [LlvmCmmDecl]
51 genLlvmProc (CmmProc infos lbl live graph) = do
52 let blocks = toBlockListEntryFirstFalseFallthrough graph
53 (lmblocks, lmdata) <- basicBlocksCodeGen live blocks
54 let info = mapLookup (g_entry graph) infos
55 proc = CmmProc info lbl live (ListGraph lmblocks)
56 return (proc:lmdata)
57
58 genLlvmProc _ = panic "genLlvmProc: case that shouldn't reach here!"
59
60 -- -----------------------------------------------------------------------------
61 -- * Block code generation
62 --
63
64 -- | Generate code for a list of blocks that make up a complete
65 -- procedure. The first block in the list is exepected to be the entry
66 -- point and will get the prologue.
67 basicBlocksCodeGen :: LiveGlobalRegs -> [CmmBlock]
68 -> LlvmM ([LlvmBasicBlock], [LlvmCmmDecl])
69 basicBlocksCodeGen _ [] = panic "no entry block!"
70 basicBlocksCodeGen live (entryBlock:cmmBlocks)
71 = do (prologue, prologueTops) <- funPrologue live (entryBlock:cmmBlocks)
72
73 -- Generate code
74 (BasicBlock bid entry, entryTops) <- basicBlockCodeGen entryBlock
75 (blocks, topss) <- fmap unzip $ mapM basicBlockCodeGen cmmBlocks
76
77 -- Compose
78 let entryBlock = BasicBlock bid (fromOL prologue ++ entry)
79 return (entryBlock : blocks, prologueTops ++ entryTops ++ concat topss)
80
81
82 -- | Generate code for one block
83 basicBlockCodeGen :: CmmBlock -> LlvmM ( LlvmBasicBlock, [LlvmCmmDecl] )
84 basicBlockCodeGen block
85 = do let (_, nodes, tail) = blockSplit block
86 id = entryLabel block
87 (mid_instrs, top) <- stmtsToInstrs $ blockToList nodes
88 (tail_instrs, top') <- stmtToInstrs tail
89 let instrs = fromOL (mid_instrs `appOL` tail_instrs)
90 return (BasicBlock id instrs, top' ++ top)
91
92 -- -----------------------------------------------------------------------------
93 -- * CmmNode code generation
94 --
95
96 -- A statement conversion return data.
97 -- * LlvmStatements: The compiled LLVM statements.
98 -- * LlvmCmmDecl: Any global data needed.
99 type StmtData = (LlvmStatements, [LlvmCmmDecl])
100
101
102 -- | Convert a list of CmmNode's to LlvmStatement's
103 stmtsToInstrs :: [CmmNode e x] -> LlvmM StmtData
104 stmtsToInstrs stmts
105 = do (instrss, topss) <- fmap unzip $ mapM stmtToInstrs stmts
106 return (concatOL instrss, concat topss)
107
108
109 -- | Convert a CmmStmt to a list of LlvmStatement's
110 stmtToInstrs :: CmmNode e x -> LlvmM StmtData
111 stmtToInstrs stmt = case stmt of
112
113 CmmComment _ -> return (nilOL, []) -- nuke comments
114 CmmTick _ -> return (nilOL, [])
115 CmmUnwind {} -> return (nilOL, [])
116
117 CmmAssign reg src -> genAssign reg src
118 CmmStore addr src -> genStore addr src
119
120 CmmBranch id -> genBranch id
121 CmmCondBranch arg true false _ -- TODO: likely annotation
122 -> genCondBranch arg true false
123 CmmSwitch arg ids -> genSwitch arg ids
124
125 -- Foreign Call
126 CmmUnsafeForeignCall target res args
127 -> genCall target res args
128
129 -- Tail call
130 CmmCall { cml_target = arg,
131 cml_args_regs = live } -> genJump arg live
132
133 _ -> panic "Llvm.CodeGen.stmtToInstrs"
134
135 -- | Wrapper function to declare an instrinct function by function type
136 getInstrinct2 :: LMString -> LlvmType -> LlvmM ExprData
137 getInstrinct2 fname fty@(LMFunction funSig) = do
138
139 let fv = LMGlobalVar fname fty (funcLinkage funSig) Nothing Nothing Constant
140
141 fn <- funLookup fname
142 tops <- case fn of
143 Just _ ->
144 return []
145 Nothing -> do
146 funInsert fname fty
147 return [CmmData Data [([],[fty])]]
148
149 return (fv, nilOL, tops)
150
151 getInstrinct2 _ _ = error "getInstrinct2: Non-function type!"
152
153 -- | Declares an instrinct function by return and parameter types
154 getInstrinct :: LMString -> LlvmType -> [LlvmType] -> LlvmM ExprData
155 getInstrinct fname retTy parTys =
156 let funSig = LlvmFunctionDecl fname ExternallyVisible CC_Ccc retTy
157 FixedArgs (tysToParams parTys) Nothing
158 fty = LMFunction funSig
159 in getInstrinct2 fname fty
160
161 -- | Memory barrier instruction for LLVM >= 3.0
162 barrier :: LlvmM StmtData
163 barrier = do
164 let s = Fence False SyncSeqCst
165 return (unitOL s, [])
166
167 -- | Foreign Calls
168 genCall :: ForeignTarget -> [CmmFormal] -> [CmmActual]
169 -> LlvmM StmtData
170
171 -- Write barrier needs to be handled specially as it is implemented as an LLVM
172 -- intrinsic function.
173 genCall (PrimTarget MO_WriteBarrier) _ _ = do
174 platform <- getLlvmPlatform
175 if platformArch platform `elem` [ArchX86, ArchX86_64, ArchSPARC]
176 then return (nilOL, [])
177 else barrier
178
179 genCall (PrimTarget MO_Touch) _ _
180 = return (nilOL, [])
181
182 genCall (PrimTarget (MO_UF_Conv w)) [dst] [e] = do
183 dstV <- getCmmReg (CmmLocal dst)
184 let ty = cmmToLlvmType $ localRegType dst
185 width = widthToLlvmFloat w
186 castV <- mkLocalVar ty
187 (ve, stmts, top) <- exprToVar e
188 let stmt3 = Assignment castV $ Cast LM_Uitofp ve width
189 stmt4 = Store castV dstV
190 return (stmts `snocOL` stmt3 `snocOL` stmt4, top)
191
192 genCall (PrimTarget (MO_UF_Conv _)) [_] args =
193 panic $ "genCall: Too many arguments to MO_UF_Conv. " ++
194 "Can only handle 1, given" ++ show (length args) ++ "."
195
196 -- Handle prefetching data
197 genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args
198 | 0 <= localityInt && localityInt <= 3 = do
199 let argTy = [i8Ptr, i32, i32, i32]
200 funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
201 CC_Ccc LMVoid FixedArgs (tysToParams argTy) Nothing
202
203 let (_, arg_hints) = foreignTargetHints t
204 let args_hints' = zip args arg_hints
205 (argVars, stmts1, top1) <- arg_vars args_hints' ([], nilOL, [])
206 (fptr, stmts2, top2) <- getFunPtr funTy t
207 (argVars', stmts3) <- castVars $ zip argVars argTy
208
209 trash <- getTrashStmts
210 let argSuffix = [mkIntLit i32 0, mkIntLit i32 localityInt, mkIntLit i32 1]
211 call = Expr $ Call StdCall fptr (argVars' ++ argSuffix) []
212 stmts = stmts1 `appOL` stmts2 `appOL` stmts3
213 `appOL` trash `snocOL` call
214 return (stmts, top1 ++ top2)
215 | otherwise = panic $ "prefetch locality level integer must be between 0 and 3, given: " ++ (show localityInt)
216
217 -- Handle PopCnt, Clz, Ctz, and BSwap that need to only convert arg
218 -- and return types
219 genCall t@(PrimTarget (MO_PopCnt w)) dsts args =
220 genCallSimpleCast w t dsts args
221 genCall t@(PrimTarget (MO_Clz w)) dsts args =
222 genCallSimpleCast w t dsts args
223 genCall t@(PrimTarget (MO_Ctz w)) dsts args =
224 genCallSimpleCast w t dsts args
225 genCall t@(PrimTarget (MO_BSwap w)) dsts args =
226 genCallSimpleCast w t dsts args
227
228 genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = do
229 (addrVar, stmts1, decls1) <- exprToVar addr
230 (nVar, stmts2, decls2) <- exprToVar n
231 let targetTy = widthToLlvmInt width
232 ptrExpr = Cast LM_Inttoptr addrVar (pLift targetTy)
233 (ptrVar, stmt3) <- doExpr (pLift targetTy) ptrExpr
234 dstVar <- getCmmReg (CmmLocal dst)
235 let op = case amop of
236 AMO_Add -> LAO_Add
237 AMO_Sub -> LAO_Sub
238 AMO_And -> LAO_And
239 AMO_Nand -> LAO_Nand
240 AMO_Or -> LAO_Or
241 AMO_Xor -> LAO_Xor
242 (retVar, stmt4) <- doExpr targetTy $ AtomicRMW op ptrVar nVar SyncSeqCst
243 let stmt5 = Store retVar dstVar
244 let stmts = stmts1 `appOL` stmts2 `snocOL`
245 stmt3 `snocOL` stmt4 `snocOL` stmt5
246 return (stmts, decls1++decls2)
247
248 genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = do
249 dstV <- getCmmReg (CmmLocal dst)
250 (v1, stmts, top) <- genLoad True addr (localRegType dst)
251 let stmt1 = Store v1 dstV
252 return (stmts `snocOL` stmt1, top)
253
254 genCall (PrimTarget (MO_Cmpxchg _width)) [dst] [addr, old, new] = do
255 (addrVar, stmts1, decls1) <- exprToVar addr
256 (oldVar, stmts2, decls2) <- exprToVar old
257 (newVar, stmts3, decls3) <- exprToVar new
258 let targetTy = getVarType oldVar
259 ptrExpr = Cast LM_Inttoptr addrVar (pLift targetTy)
260 (ptrVar, stmt4) <- doExpr (pLift targetTy) ptrExpr
261 dstVar <- getCmmReg (CmmLocal dst)
262 (retVar, stmt5) <- doExpr (LMStructU [targetTy,i1])
263 $ CmpXChg ptrVar oldVar newVar SyncSeqCst SyncSeqCst
264 (retVar', stmt6) <- doExpr targetTy $ ExtractV retVar 0
265 let stmt7 = Store retVar' dstVar
266 stmts = stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL`
267 stmt4 `snocOL` stmt5 `snocOL` stmt6 `snocOL` stmt7
268 return (stmts, decls1 ++ decls2 ++ decls3)
269
270 genCall (PrimTarget (MO_AtomicWrite _width)) [] [addr, val] = do
271 (addrVar, stmts1, decls1) <- exprToVar addr
272 (valVar, stmts2, decls2) <- exprToVar val
273 let ptrTy = pLift $ getVarType valVar
274 ptrExpr = Cast LM_Inttoptr addrVar ptrTy
275 (ptrVar, stmt3) <- doExpr ptrTy ptrExpr
276 let stmts4 = unitOL $ Expr
277 $ AtomicRMW LAO_Xchg ptrVar valVar SyncSeqCst
278 stmts = stmts1 `appOL` stmts2 `snocOL` stmt3 `appOL` stmts4
279 return (stmts, decls1++decls2)
280
281 -- Handle memcpy function specifically since llvm's intrinsic version takes
282 -- some extra parameters.
283 genCall t@(PrimTarget op) [] args
284 | Just align <- machOpMemcpyishAlign op = do
285 dflags <- getDynFlags
286 let isVolTy = [i1]
287 isVolVal = [mkIntLit i1 0]
288 argTy | MO_Memset _ <- op = [i8Ptr, i8, llvmWord dflags, i32] ++ isVolTy
289 | otherwise = [i8Ptr, i8Ptr, llvmWord dflags, i32] ++ isVolTy
290 funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
291 CC_Ccc LMVoid FixedArgs (tysToParams argTy) Nothing
292
293 let (_, arg_hints) = foreignTargetHints t
294 let args_hints = zip args arg_hints
295 (argVars, stmts1, top1) <- arg_vars args_hints ([], nilOL, [])
296 (fptr, stmts2, top2) <- getFunPtr funTy t
297 (argVars', stmts3) <- castVars $ zip argVars argTy
298
299 stmts4 <- getTrashStmts
300 let alignVal = mkIntLit i32 align
301 arguments = argVars' ++ (alignVal:isVolVal)
302 call = Expr $ Call StdCall fptr arguments []
303 stmts = stmts1 `appOL` stmts2 `appOL` stmts3
304 `appOL` stmts4 `snocOL` call
305 return (stmts, top1 ++ top2)
306
307 -- We handle MO_U_Mul2 by simply using a 'mul' instruction, but with operands
308 -- twice the width (we first zero-extend them), e.g., on 64-bit arch we will
309 -- generate 'mul' on 128-bit operands. Then we only need some plumbing to
310 -- extract the two 64-bit values out of 128-bit result.
311 genCall (PrimTarget (MO_U_Mul2 w)) [dstH, dstL] [lhs, rhs] = do
312 let width = widthToLlvmInt w
313 bitWidth = widthInBits w
314 width2x = LMInt (bitWidth * 2)
315 -- First zero-extend the operands ('mul' instruction requires the operands
316 -- and the result to be of the same type). Note that we don't use 'castVars'
317 -- because it tries to do LM_Sext.
318 (lhsVar, stmts1, decls1) <- exprToVar lhs
319 (rhsVar, stmts2, decls2) <- exprToVar rhs
320 (lhsExt, stmt3) <- doExpr width2x $ Cast LM_Zext lhsVar width2x
321 (rhsExt, stmt4) <- doExpr width2x $ Cast LM_Zext rhsVar width2x
322 -- Do the actual multiplication (note that the result is also 2x width).
323 (retV, stmt5) <- doExpr width2x $ LlvmOp LM_MO_Mul lhsExt rhsExt
324 -- Extract the lower bits of the result into retL.
325 (retL, stmt6) <- doExpr width $ Cast LM_Trunc retV width
326 -- Now we right-shift the higher bits by width.
327 let widthLlvmLit = LMLitVar $ LMIntLit (fromIntegral bitWidth) width
328 (retShifted, stmt7) <- doExpr width2x $ LlvmOp LM_MO_LShr retV widthLlvmLit
329 -- And extract them into retH.
330 (retH, stmt8) <- doExpr width $ Cast LM_Trunc retShifted width
331 dstRegL <- getCmmReg (CmmLocal dstL)
332 dstRegH <- getCmmReg (CmmLocal dstH)
333 let storeL = Store retL dstRegL
334 storeH = Store retH dstRegH
335 stmts = stmts1 `appOL` stmts2 `appOL`
336 toOL [ stmt3 , stmt4, stmt5, stmt6, stmt7, stmt8, storeL, storeH ]
337 return (stmts, decls1 ++ decls2)
338
339 -- MO_U_QuotRem2 is another case we handle by widening the registers to double
340 -- the width and use normal LLVM instructions (similarly to the MO_U_Mul2). The
341 -- main difference here is that we need to combine two words into one register
342 -- and then use both 'udiv' and 'urem' instructions to compute the result.
343 genCall (PrimTarget (MO_U_QuotRem2 w)) [dstQ, dstR] [lhsH, lhsL, rhs] = run $ do
344 let width = widthToLlvmInt w
345 bitWidth = widthInBits w
346 width2x = LMInt (bitWidth * 2)
347 -- First zero-extend all parameters to double width.
348 let zeroExtend expr = do
349 var <- liftExprData $ exprToVar expr
350 doExprW width2x $ Cast LM_Zext var width2x
351 lhsExtH <- zeroExtend lhsH
352 lhsExtL <- zeroExtend lhsL
353 rhsExt <- zeroExtend rhs
354 -- Now we combine the first two parameters (that represent the high and low
355 -- bits of the value). So first left-shift the high bits to their position
356 -- and then bit-or them with the low bits.
357 let widthLlvmLit = LMLitVar $ LMIntLit (fromIntegral bitWidth) width
358 lhsExtHShifted <- doExprW width2x $ LlvmOp LM_MO_Shl lhsExtH widthLlvmLit
359 lhsExt <- doExprW width2x $ LlvmOp LM_MO_Or lhsExtHShifted lhsExtL
360 -- Finally, we can call 'udiv' and 'urem' to compute the results.
361 retExtDiv <- doExprW width2x $ LlvmOp LM_MO_UDiv lhsExt rhsExt
362 retExtRem <- doExprW width2x $ LlvmOp LM_MO_URem lhsExt rhsExt
363 -- And since everything is in 2x width, we need to truncate the results and
364 -- then return them.
365 let narrow var = doExprW width $ Cast LM_Trunc var width
366 retDiv <- narrow retExtDiv
367 retRem <- narrow retExtRem
368 dstRegQ <- lift $ getCmmReg (CmmLocal dstQ)
369 dstRegR <- lift $ getCmmReg (CmmLocal dstR)
370 statement $ Store retDiv dstRegQ
371 statement $ Store retRem dstRegR
372 where
373 -- TODO(michalt): Consider extracting this and using in more places.
374 -- Hopefully this should cut down on the noise of accumulating the
375 -- statements and declarations.
376 doExprW :: LlvmType -> LlvmExpression -> WriterT LlvmAccum LlvmM LlvmVar
377 doExprW a b = do
378 (var, stmt) <- lift $ doExpr a b
379 statement stmt
380 return var
381 run :: WriterT LlvmAccum LlvmM () -> LlvmM (LlvmStatements, [LlvmCmmDecl])
382 run action = do
383 LlvmAccum stmts decls <- execWriterT action
384 return (stmts, decls)
385
386 -- Handle the MO_{Add,Sub}IntC separately. LLVM versions return a record from
387 -- which we need to extract the actual values.
388 genCall t@(PrimTarget (MO_AddIntC w)) [dstV, dstO] [lhs, rhs] =
389 genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
390 genCall t@(PrimTarget (MO_SubIntC w)) [dstV, dstO] [lhs, rhs] =
391 genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
392
393 -- Similar to MO_{Add,Sub}IntC, but MO_Add2 expects the first element of the
394 -- return tuple to be the overflow bit and the second element to contain the
395 -- actual result of the addition. So we still use genCallWithOverflow but swap
396 -- the return registers.
397 genCall t@(PrimTarget (MO_Add2 w)) [dstO, dstV] [lhs, rhs] =
398 genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
399
400 -- Handle all other foreign calls and prim ops.
401 genCall target res args = do
402
403 dflags <- getDynFlags
404
405 -- parameter types
406 let arg_type (_, AddrHint) = i8Ptr
407 -- cast pointers to i8*. Llvm equivalent of void*
408 arg_type (expr, _) = cmmToLlvmType $ cmmExprType dflags expr
409
410 -- ret type
411 let ret_type [] = LMVoid
412 ret_type [(_, AddrHint)] = i8Ptr
413 ret_type [(reg, _)] = cmmToLlvmType $ localRegType reg
414 ret_type t = panic $ "genCall: Too many return values! Can only handle"
415 ++ " 0 or 1, given " ++ show (length t) ++ "."
416
417 -- extract Cmm call convention, and translate to LLVM call convention
418 platform <- getLlvmPlatform
419 let lmconv = case target of
420 ForeignTarget _ (ForeignConvention conv _ _ _) ->
421 case conv of
422 StdCallConv -> case platformArch platform of
423 ArchX86 -> CC_X86_Stdcc
424 ArchX86_64 -> CC_X86_Stdcc
425 _ -> CC_Ccc
426 CCallConv -> CC_Ccc
427 CApiConv -> CC_Ccc
428 PrimCallConv -> panic "LlvmCodeGen.CodeGen.genCall: PrimCallConv"
429 JavaScriptCallConv -> panic "LlvmCodeGen.CodeGen.genCall: JavaScriptCallConv"
430
431 PrimTarget _ -> CC_Ccc
432
433 {-
434 CC_Ccc of the possibilities here are a worry with the use of a custom
435 calling convention for passing STG args. In practice the more
436 dangerous combinations (e.g StdCall + llvmGhcCC) don't occur.
437
438 The native code generator only handles StdCall and CCallConv.
439 -}
440
441 -- call attributes
442 let fnAttrs | never_returns = NoReturn : llvmStdFunAttrs
443 | otherwise = llvmStdFunAttrs
444
445 never_returns = case target of
446 ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns) -> True
447 _ -> False
448
449 -- fun type
450 let (res_hints, arg_hints) = foreignTargetHints target
451 let args_hints = zip args arg_hints
452 let ress_hints = zip res res_hints
453 let ccTy = StdCall -- tail calls should be done through CmmJump
454 let retTy = ret_type ress_hints
455 let argTy = tysToParams $ map arg_type args_hints
456 let funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
457 lmconv retTy FixedArgs argTy (llvmFunAlign dflags)
458
459
460 (argVars, stmts1, top1) <- arg_vars args_hints ([], nilOL, [])
461 (fptr, stmts2, top2) <- getFunPtr funTy target
462
463 let retStmt | ccTy == TailCall = unitOL $ Return Nothing
464 | never_returns = unitOL $ Unreachable
465 | otherwise = nilOL
466
467 stmts3 <- getTrashStmts
468 let stmts = stmts1 `appOL` stmts2 `appOL` stmts3
469
470 -- make the actual call
471 case retTy of
472 LMVoid -> do
473 let s1 = Expr $ Call ccTy fptr argVars fnAttrs
474 let allStmts = stmts `snocOL` s1 `appOL` retStmt
475 return (allStmts, top1 ++ top2)
476
477 _ -> do
478 (v1, s1) <- doExpr retTy $ Call ccTy fptr argVars fnAttrs
479 -- get the return register
480 let ret_reg [reg] = reg
481 ret_reg t = panic $ "genCall: Bad number of registers! Can only handle"
482 ++ " 1, given " ++ show (length t) ++ "."
483 let creg = ret_reg res
484 vreg <- getCmmReg (CmmLocal creg)
485 let allStmts = stmts `snocOL` s1
486 if retTy == pLower (getVarType vreg)
487 then do
488 let s2 = Store v1 vreg
489 return (allStmts `snocOL` s2 `appOL` retStmt,
490 top1 ++ top2)
491 else do
492 let ty = pLower $ getVarType vreg
493 let op = case ty of
494 vt | isPointer vt -> LM_Bitcast
495 | isInt vt -> LM_Ptrtoint
496 | otherwise ->
497 panic $ "genCall: CmmReg bad match for"
498 ++ " returned type!"
499
500 (v2, s2) <- doExpr ty $ Cast op v1 ty
501 let s3 = Store v2 vreg
502 return (allStmts `snocOL` s2 `snocOL` s3
503 `appOL` retStmt, top1 ++ top2)
504
505 -- | Generate a call to an LLVM intrinsic that performs arithmetic operation
506 -- with overflow bit (i.e., returns a struct containing the actual result of the
507 -- operation and an overflow bit). This function will also extract the overflow
508 -- bit and zero-extend it (all the corresponding Cmm PrimOps represent the
509 -- overflow "bit" as a usual Int# or Word#).
510 genCallWithOverflow
511 :: ForeignTarget -> Width -> [CmmFormal] -> [CmmActual] -> LlvmM StmtData
512 genCallWithOverflow t@(PrimTarget op) w [dstV, dstO] [lhs, rhs] = do
513 -- So far this was only tested for the following three CallishMachOps.
514 MASSERT( (op `elem` [MO_Add2 w, MO_AddIntC w, MO_SubIntC w]) )
515 let width = widthToLlvmInt w
516 -- This will do most of the work of generating the call to the intrinsic and
517 -- extracting the values from the struct.
518 (value, overflowBit, (stmts, top)) <-
519 genCallExtract t w (lhs, rhs) (width, i1)
520 -- value is i<width>, but overflowBit is i1, so we need to cast (Cmm expects
521 -- both to be i<width>)
522 (overflow, zext) <- doExpr width $ Cast LM_Zext overflowBit width
523 dstRegV <- getCmmReg (CmmLocal dstV)
524 dstRegO <- getCmmReg (CmmLocal dstO)
525 let storeV = Store value dstRegV
526 storeO = Store overflow dstRegO
527 return (stmts `snocOL` zext `snocOL` storeV `snocOL` storeO, top)
528 genCallWithOverflow _ _ _ _ =
529 panic "genCallExtract: wrong ForeignTarget or number of arguments"
530
531 -- | A helper function for genCallWithOverflow that handles generating the call
532 -- to the LLVM intrinsic and extracting the result from the struct to LlvmVars.
533 genCallExtract
534 :: ForeignTarget -- ^ PrimOp
535 -> Width -- ^ Width of the operands.
536 -> (CmmActual, CmmActual) -- ^ Actual arguments.
537 -> (LlvmType, LlvmType) -- ^ LLLVM types of the returned sturct.
538 -> LlvmM (LlvmVar, LlvmVar, StmtData)
539 genCallExtract target@(PrimTarget op) w (argA, argB) (llvmTypeA, llvmTypeB) = do
540 let width = widthToLlvmInt w
541 argTy = [width, width]
542 retTy = LMStructU [llvmTypeA, llvmTypeB]
543
544 -- Process the arguments.
545 let args_hints = zip [argA, argB] (snd $ foreignTargetHints target)
546 (argsV1, args1, top1) <- arg_vars args_hints ([], nilOL, [])
547 (argsV2, args2) <- castVars $ zip argsV1 argTy
548
549 -- Get the function and make the call.
550 fname <- cmmPrimOpFunctions op
551 (fptr, _, top2) <- getInstrinct fname retTy argTy
552 -- We use StdCall for primops. See also the last case of genCall.
553 (retV, call) <- doExpr retTy $ Call StdCall fptr argsV2 []
554
555 -- This will result in a two element struct, we need to use "extractvalue"
556 -- to get them out of it.
557 (res1, ext1) <- doExpr llvmTypeA (ExtractV retV 0)
558 (res2, ext2) <- doExpr llvmTypeB (ExtractV retV 1)
559
560 let stmts = args1 `appOL` args2 `snocOL` call `snocOL` ext1 `snocOL` ext2
561 tops = top1 ++ top2
562 return (res1, res2, (stmts, tops))
563
564 genCallExtract _ _ _ _ =
565 panic "genCallExtract: unsupported ForeignTarget"
566
567 -- Handle simple function call that only need simple type casting, of the form:
568 -- truncate arg >>= \a -> call(a) >>= zext
569 --
570 -- since GHC only really has i32 and i64 types and things like Word8 are backed
571 -- by an i32 and just present a logical i8 range. So we must handle conversions
572 -- from i32 to i8 explicitly as LLVM is strict about types.
573 genCallSimpleCast :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
574 -> LlvmM StmtData
575 genCallSimpleCast w t@(PrimTarget op) [dst] args = do
576 let width = widthToLlvmInt w
577 dstTy = cmmToLlvmType $ localRegType dst
578
579 fname <- cmmPrimOpFunctions op
580 (fptr, _, top3) <- getInstrinct fname width [width]
581
582 dstV <- getCmmReg (CmmLocal dst)
583
584 let (_, arg_hints) = foreignTargetHints t
585 let args_hints = zip args arg_hints
586 (argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, [])
587 (argsV', stmts4) <- castVars $ zip argsV [width]
588 (retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
589 ([retV'], stmts5) <- castVars [(retV,dstTy)]
590 let s2 = Store retV' dstV
591
592 let stmts = stmts2 `appOL` stmts4 `snocOL`
593 s1 `appOL` stmts5 `snocOL` s2
594 return (stmts, top2 ++ top3)
595 genCallSimpleCast _ _ dsts _ =
596 panic ("genCallSimpleCast: " ++ show (length dsts) ++ " dsts")
597
598 -- | Create a function pointer from a target.
599 getFunPtr :: (LMString -> LlvmType) -> ForeignTarget
600 -> LlvmM ExprData
601 getFunPtr funTy targ = case targ of
602 ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
603 name <- strCLabel_llvm lbl
604 getHsFunc' name (funTy name)
605
606 ForeignTarget expr _ -> do
607 (v1, stmts, top) <- exprToVar expr
608 dflags <- getDynFlags
609 let fty = funTy $ fsLit "dynamic"
610 cast = case getVarType v1 of
611 ty | isPointer ty -> LM_Bitcast
612 ty | isInt ty -> LM_Inttoptr
613
614 ty -> panic $ "genCall: Expr is of bad type for function"
615 ++ " call! (" ++ showSDoc dflags (ppr ty) ++ ")"
616
617 (v2,s1) <- doExpr (pLift fty) $ Cast cast v1 (pLift fty)
618 return (v2, stmts `snocOL` s1, top)
619
620 PrimTarget mop -> do
621 name <- cmmPrimOpFunctions mop
622 let fty = funTy name
623 getInstrinct2 name fty
624
625 -- | Conversion of call arguments.
626 arg_vars :: [(CmmActual, ForeignHint)]
627 -> ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
628 -> LlvmM ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
629
630 arg_vars [] (vars, stmts, tops)
631 = return (vars, stmts, tops)
632
633 arg_vars ((e, AddrHint):rest) (vars, stmts, tops)
634 = do (v1, stmts', top') <- exprToVar e
635 dflags <- getDynFlags
636 let op = case getVarType v1 of
637 ty | isPointer ty -> LM_Bitcast
638 ty | isInt ty -> LM_Inttoptr
639
640 a -> panic $ "genCall: Can't cast llvmType to i8*! ("
641 ++ showSDoc dflags (ppr a) ++ ")"
642
643 (v2, s1) <- doExpr i8Ptr $ Cast op v1 i8Ptr
644 arg_vars rest (vars ++ [v2], stmts `appOL` stmts' `snocOL` s1,
645 tops ++ top')
646
647 arg_vars ((e, _):rest) (vars, stmts, tops)
648 = do (v1, stmts', top') <- exprToVar e
649 arg_vars rest (vars ++ [v1], stmts `appOL` stmts', tops ++ top')
650
651
652 -- | Cast a collection of LLVM variables to specific types.
653 castVars :: [(LlvmVar, LlvmType)]
654 -> LlvmM ([LlvmVar], LlvmStatements)
655 castVars vars = do
656 done <- mapM (uncurry castVar) vars
657 let (vars', stmts) = unzip done
658 return (vars', toOL stmts)
659
660 -- | Cast an LLVM variable to a specific type, panicing if it can't be done.
661 castVar :: LlvmVar -> LlvmType -> LlvmM (LlvmVar, LlvmStatement)
662 castVar v t | getVarType v == t
663 = return (v, Nop)
664
665 | otherwise
666 = do dflags <- getDynFlags
667 let op = case (getVarType v, t) of
668 (LMInt n, LMInt m)
669 -> if n < m then LM_Sext else LM_Trunc
670 (vt, _) | isFloat vt && isFloat t
671 -> if llvmWidthInBits dflags vt < llvmWidthInBits dflags t
672 then LM_Fpext else LM_Fptrunc
673 (vt, _) | isInt vt && isFloat t -> LM_Sitofp
674 (vt, _) | isFloat vt && isInt t -> LM_Fptosi
675 (vt, _) | isInt vt && isPointer t -> LM_Inttoptr
676 (vt, _) | isPointer vt && isInt t -> LM_Ptrtoint
677 (vt, _) | isPointer vt && isPointer t -> LM_Bitcast
678 (vt, _) | isVector vt && isVector t -> LM_Bitcast
679
680 (vt, _) -> panic $ "castVars: Can't cast this type ("
681 ++ showSDoc dflags (ppr vt) ++ ") to (" ++ showSDoc dflags (ppr t) ++ ")"
682 doExpr t $ Cast op v t
683
684
685 -- | Decide what C function to use to implement a CallishMachOp
686 cmmPrimOpFunctions :: CallishMachOp -> LlvmM LMString
687 cmmPrimOpFunctions mop = do
688
689 dflags <- getDynFlags
690 let intrinTy1 = "p0i8.p0i8." ++ showSDoc dflags (ppr $ llvmWord dflags)
691 intrinTy2 = "p0i8." ++ showSDoc dflags (ppr $ llvmWord dflags)
692 unsupported = panic ("cmmPrimOpFunctions: " ++ show mop
693 ++ " not supported here")
694
695 return $ case mop of
696 MO_F32_Exp -> fsLit "expf"
697 MO_F32_Log -> fsLit "logf"
698 MO_F32_Sqrt -> fsLit "llvm.sqrt.f32"
699 MO_F32_Pwr -> fsLit "llvm.pow.f32"
700
701 MO_F32_Sin -> fsLit "llvm.sin.f32"
702 MO_F32_Cos -> fsLit "llvm.cos.f32"
703 MO_F32_Tan -> fsLit "tanf"
704
705 MO_F32_Asin -> fsLit "asinf"
706 MO_F32_Acos -> fsLit "acosf"
707 MO_F32_Atan -> fsLit "atanf"
708
709 MO_F32_Sinh -> fsLit "sinhf"
710 MO_F32_Cosh -> fsLit "coshf"
711 MO_F32_Tanh -> fsLit "tanhf"
712
713 MO_F64_Exp -> fsLit "exp"
714 MO_F64_Log -> fsLit "log"
715 MO_F64_Sqrt -> fsLit "llvm.sqrt.f64"
716 MO_F64_Pwr -> fsLit "llvm.pow.f64"
717
718 MO_F64_Sin -> fsLit "llvm.sin.f64"
719 MO_F64_Cos -> fsLit "llvm.cos.f64"
720 MO_F64_Tan -> fsLit "tan"
721
722 MO_F64_Asin -> fsLit "asin"
723 MO_F64_Acos -> fsLit "acos"
724 MO_F64_Atan -> fsLit "atan"
725
726 MO_F64_Sinh -> fsLit "sinh"
727 MO_F64_Cosh -> fsLit "cosh"
728 MO_F64_Tanh -> fsLit "tanh"
729
730 MO_Memcpy _ -> fsLit $ "llvm.memcpy." ++ intrinTy1
731 MO_Memmove _ -> fsLit $ "llvm.memmove." ++ intrinTy1
732 MO_Memset _ -> fsLit $ "llvm.memset." ++ intrinTy2
733
734 (MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
735 (MO_BSwap w) -> fsLit $ "llvm.bswap." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
736 (MO_Clz w) -> fsLit $ "llvm.ctlz." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
737 (MO_Ctz w) -> fsLit $ "llvm.cttz." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
738
739 (MO_Prefetch_Data _ )-> fsLit "llvm.prefetch"
740
741 MO_AddIntC w -> fsLit $ "llvm.sadd.with.overflow."
742 ++ showSDoc dflags (ppr $ widthToLlvmInt w)
743 MO_SubIntC w -> fsLit $ "llvm.ssub.with.overflow."
744 ++ showSDoc dflags (ppr $ widthToLlvmInt w)
745 MO_Add2 w -> fsLit $ "llvm.uadd.with.overflow."
746 ++ showSDoc dflags (ppr $ widthToLlvmInt w)
747
748 MO_S_QuotRem {} -> unsupported
749 MO_U_QuotRem {} -> unsupported
750 MO_U_QuotRem2 {} -> unsupported
751 -- We support MO_U_Mul2 through ordinary LLVM mul instruction, see the
752 -- appropriate case of genCall.
753 MO_U_Mul2 {} -> unsupported
754 MO_WriteBarrier -> unsupported
755 MO_Touch -> unsupported
756 MO_UF_Conv _ -> unsupported
757
758 MO_AtomicRead _ -> unsupported
759 MO_AtomicRMW _ _ -> unsupported
760 MO_AtomicWrite _ -> unsupported
761 MO_Cmpxchg _ -> unsupported
762
763 -- | Tail function calls
764 genJump :: CmmExpr -> [GlobalReg] -> LlvmM StmtData
765
766 -- Call to known function
767 genJump (CmmLit (CmmLabel lbl)) live = do
768 (vf, stmts, top) <- getHsFunc live lbl
769 (stgRegs, stgStmts) <- funEpilogue live
770 let s1 = Expr $ Call TailCall vf stgRegs llvmStdFunAttrs
771 let s2 = Return Nothing
772 return (stmts `appOL` stgStmts `snocOL` s1 `snocOL` s2, top)
773
774
775 -- Call to unknown function / address
776 genJump expr live = do
777 fty <- llvmFunTy live
778 (vf, stmts, top) <- exprToVar expr
779 dflags <- getDynFlags
780
781 let cast = case getVarType vf of
782 ty | isPointer ty -> LM_Bitcast
783 ty | isInt ty -> LM_Inttoptr
784
785 ty -> panic $ "genJump: Expr is of bad type for function call! ("
786 ++ showSDoc dflags (ppr ty) ++ ")"
787
788 (v1, s1) <- doExpr (pLift fty) $ Cast cast vf (pLift fty)
789 (stgRegs, stgStmts) <- funEpilogue live
790 let s2 = Expr $ Call TailCall v1 stgRegs llvmStdFunAttrs
791 let s3 = Return Nothing
792 return (stmts `snocOL` s1 `appOL` stgStmts `snocOL` s2 `snocOL` s3,
793 top)
794
795
796 -- | CmmAssign operation
797 --
798 -- We use stack allocated variables for CmmReg. The optimiser will replace
799 -- these with registers when possible.
800 genAssign :: CmmReg -> CmmExpr -> LlvmM StmtData
801 genAssign reg val = do
802 vreg <- getCmmReg reg
803 (vval, stmts2, top2) <- exprToVar val
804 let stmts = stmts2
805
806 let ty = (pLower . getVarType) vreg
807 dflags <- getDynFlags
808 case ty of
809 -- Some registers are pointer types, so need to cast value to pointer
810 LMPointer _ | getVarType vval == llvmWord dflags -> do
811 (v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty
812 let s2 = Store v vreg
813 return (stmts `snocOL` s1 `snocOL` s2, top2)
814
815 LMVector _ _ -> do
816 (v, s1) <- doExpr ty $ Cast LM_Bitcast vval ty
817 let s2 = Store v vreg
818 return (stmts `snocOL` s1 `snocOL` s2, top2)
819
820 _ -> do
821 let s1 = Store vval vreg
822 return (stmts `snocOL` s1, top2)
823
824
825 -- | CmmStore operation
826 genStore :: CmmExpr -> CmmExpr -> LlvmM StmtData
827
828 -- First we try to detect a few common cases and produce better code for
829 -- these then the default case. We are mostly trying to detect Cmm code
830 -- like I32[Sp + n] and use 'getelementptr' operations instead of the
831 -- generic case that uses casts and pointer arithmetic
832 genStore addr@(CmmReg (CmmGlobal r)) val
833 = genStore_fast addr r 0 val
834
835 genStore addr@(CmmRegOff (CmmGlobal r) n) val
836 = genStore_fast addr r n val
837
838 genStore addr@(CmmMachOp (MO_Add _) [
839 (CmmReg (CmmGlobal r)),
840 (CmmLit (CmmInt n _))])
841 val
842 = genStore_fast addr r (fromInteger n) val
843
844 genStore addr@(CmmMachOp (MO_Sub _) [
845 (CmmReg (CmmGlobal r)),
846 (CmmLit (CmmInt n _))])
847 val
848 = genStore_fast addr r (negate $ fromInteger n) val
849
850 -- generic case
851 genStore addr val
852 = do other <- getTBAAMeta otherN
853 genStore_slow addr val other
854
855 -- | CmmStore operation
856 -- This is a special case for storing to a global register pointer
857 -- offset such as I32[Sp+8].
858 genStore_fast :: CmmExpr -> GlobalReg -> Int -> CmmExpr
859 -> LlvmM StmtData
860 genStore_fast addr r n val
861 = do dflags <- getDynFlags
862 (gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
863 meta <- getTBAARegMeta r
864 let (ix,rem) = n `divMod` ((llvmWidthInBits dflags . pLower) grt `div` 8)
865 case isPointer grt && rem == 0 of
866 True -> do
867 (vval, stmts, top) <- exprToVar val
868 (ptr, s2) <- doExpr grt $ GetElemPtr True gv [toI32 ix]
869 -- We might need a different pointer type, so check
870 case pLower grt == getVarType vval of
871 -- were fine
872 True -> do
873 let s3 = MetaStmt meta $ Store vval ptr
874 return (stmts `appOL` s1 `snocOL` s2
875 `snocOL` s3, top)
876
877 -- cast to pointer type needed
878 False -> do
879 let ty = (pLift . getVarType) vval
880 (ptr', s3) <- doExpr ty $ Cast LM_Bitcast ptr ty
881 let s4 = MetaStmt meta $ Store vval ptr'
882 return (stmts `appOL` s1 `snocOL` s2
883 `snocOL` s3 `snocOL` s4, top)
884
885 -- If its a bit type then we use the slow method since
886 -- we can't avoid casting anyway.
887 False -> genStore_slow addr val meta
888
889
890 -- | CmmStore operation
891 -- Generic case. Uses casts and pointer arithmetic if needed.
892 genStore_slow :: CmmExpr -> CmmExpr -> [MetaAnnot] -> LlvmM StmtData
893 genStore_slow addr val meta = do
894 (vaddr, stmts1, top1) <- exprToVar addr
895 (vval, stmts2, top2) <- exprToVar val
896
897 let stmts = stmts1 `appOL` stmts2
898 dflags <- getDynFlags
899 case getVarType vaddr of
900 -- sometimes we need to cast an int to a pointer before storing
901 LMPointer ty@(LMPointer _) | getVarType vval == llvmWord dflags -> do
902 (v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty
903 let s2 = MetaStmt meta $ Store v vaddr
904 return (stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
905
906 LMPointer _ -> do
907 let s1 = MetaStmt meta $ Store vval vaddr
908 return (stmts `snocOL` s1, top1 ++ top2)
909
910 i@(LMInt _) | i == llvmWord dflags -> do
911 let vty = pLift $ getVarType vval
912 (vptr, s1) <- doExpr vty $ Cast LM_Inttoptr vaddr vty
913 let s2 = MetaStmt meta $ Store vval vptr
914 return (stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
915
916 other ->
917 pprPanic "genStore: ptr not right type!"
918 (PprCmm.pprExpr addr <+> text (
919 "Size of Ptr: " ++ show (llvmPtrBits dflags) ++
920 ", Size of var: " ++ show (llvmWidthInBits dflags other) ++
921 ", Var: " ++ showSDoc dflags (ppr vaddr)))
922
923
924 -- | Unconditional branch
925 genBranch :: BlockId -> LlvmM StmtData
926 genBranch id =
927 let label = blockIdToLlvm id
928 in return (unitOL $ Branch label, [])
929
930
931 -- | Conditional branch
932 genCondBranch :: CmmExpr -> BlockId -> BlockId -> LlvmM StmtData
933 genCondBranch cond idT idF = do
934 let labelT = blockIdToLlvm idT
935 let labelF = blockIdToLlvm idF
936 -- See Note [Literals and branch conditions].
937 (vc, stmts, top) <- exprToVarOpt i1Option cond
938 if getVarType vc == i1
939 then do
940 let s1 = BranchIf vc labelT labelF
941 return (stmts `snocOL` s1, top)
942 else do
943 dflags <- getDynFlags
944 panic $ "genCondBranch: Cond expr not bool! (" ++ showSDoc dflags (ppr vc) ++ ")"
945
946 {- Note [Literals and branch conditions]
947 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
948
949 It is important that whenever we generate branch conditions for
950 literals like '1', they are properly narrowed to an LLVM expression of
951 type 'i1' (for bools.) Otherwise, nobody is happy. So when we convert
952 a CmmExpr to an LLVM expression for a branch conditional, exprToVarOpt
953 must be certain to return a properly narrowed type. genLit is
954 responsible for this, in the case of literal integers.
955
956 Often, we won't see direct statements like:
957
958 if(1) {
959 ...
960 } else {
961 ...
962 }
963
964 at this point in the pipeline, because the Glorious Code Generator
965 will do trivial branch elimination in the sinking pass (among others,)
966 which will eliminate the expression entirely.
967
968 However, it's certainly possible and reasonable for this to occur in
969 hand-written C-- code. Consider something like:
970
971 #ifndef SOME_CONDITIONAL
972 #define CHECK_THING(x) 1
973 #else
974 #define CHECK_THING(x) some_operation((x))
975 #endif
976
977 f() {
978
979 if (CHECK_THING(xyz)) {
980 ...
981 } else {
982 ...
983 }
984
985 }
986
987 In such an instance, CHECK_THING might result in an *expression* in
988 one case, and a *literal* in the other, depending on what in
989 particular was #define'd. So we must be sure to properly narrow the
990 literal in this case to i1 as it won't be eliminated beforehand.
991
992 For a real example of this, see ./rts/StgStdThunks.cmm
993
994 -}
995
996
997
998 -- | Switch branch
999 genSwitch :: CmmExpr -> SwitchTargets -> LlvmM StmtData
1000 genSwitch cond ids = do
1001 (vc, stmts, top) <- exprToVar cond
1002 let ty = getVarType vc
1003
1004 let labels = [ (mkIntLit ty ix, blockIdToLlvm b)
1005 | (ix, b) <- switchTargetsCases ids ]
1006 -- out of range is undefined, so let's just branch to first label
1007 let defLbl | Just l <- switchTargetsDefault ids = blockIdToLlvm l
1008 | otherwise = snd (head labels)
1009
1010 let s1 = Switch vc defLbl labels
1011 return $ (stmts `snocOL` s1, top)
1012
1013
1014 -- -----------------------------------------------------------------------------
1015 -- * CmmExpr code generation
1016 --
1017
1018 -- | An expression conversion return data:
1019 -- * LlvmVar: The var holding the result of the expression
1020 -- * LlvmStatements: Any statements needed to evaluate the expression
1021 -- * LlvmCmmDecl: Any global data needed for this expression
1022 type ExprData = (LlvmVar, LlvmStatements, [LlvmCmmDecl])
1023
1024 -- | Values which can be passed to 'exprToVar' to configure its
1025 -- behaviour in certain circumstances.
1026 --
1027 -- Currently just used for determining if a comparison should return
1028 -- a boolean (i1) or a word. See Note [Literals and branch conditions].
1029 newtype EOption = EOption { i1Expected :: Bool }
1030 -- XXX: EOption is an ugly and inefficient solution to this problem.
1031
1032 -- | i1 type expected (condition scrutinee).
1033 i1Option :: EOption
1034 i1Option = EOption True
1035
1036 -- | Word type expected (usual).
1037 wordOption :: EOption
1038 wordOption = EOption False
1039
1040 -- | Convert a CmmExpr to a list of LlvmStatements with the result of the
1041 -- expression being stored in the returned LlvmVar.
1042 exprToVar :: CmmExpr -> LlvmM ExprData
1043 exprToVar = exprToVarOpt wordOption
1044
1045 exprToVarOpt :: EOption -> CmmExpr -> LlvmM ExprData
1046 exprToVarOpt opt e = case e of
1047
1048 CmmLit lit
1049 -> genLit opt lit
1050
1051 CmmLoad e' ty
1052 -> genLoad False e' ty
1053
1054 -- Cmmreg in expression is the value, so must load. If you want actual
1055 -- reg pointer, call getCmmReg directly.
1056 CmmReg r -> do
1057 (v1, ty, s1) <- getCmmRegVal r
1058 case isPointer ty of
1059 True -> do
1060 -- Cmm wants the value, so pointer types must be cast to ints
1061 dflags <- getDynFlags
1062 (v2, s2) <- doExpr (llvmWord dflags) $ Cast LM_Ptrtoint v1 (llvmWord dflags)
1063 return (v2, s1 `snocOL` s2, [])
1064
1065 False -> return (v1, s1, [])
1066
1067 CmmMachOp op exprs
1068 -> genMachOp opt op exprs
1069
1070 CmmRegOff r i
1071 -> do dflags <- getDynFlags
1072 exprToVar $ expandCmmReg dflags (r, i)
1073
1074 CmmStackSlot _ _
1075 -> panic "exprToVar: CmmStackSlot not supported!"
1076
1077
1078 -- | Handle CmmMachOp expressions
1079 genMachOp :: EOption -> MachOp -> [CmmExpr] -> LlvmM ExprData
1080
1081 -- Unary Machop
1082 genMachOp _ op [x] = case op of
1083
1084 MO_Not w ->
1085 let all1 = mkIntLit (widthToLlvmInt w) (-1)
1086 in negate (widthToLlvmInt w) all1 LM_MO_Xor
1087
1088 MO_S_Neg w ->
1089 let all0 = mkIntLit (widthToLlvmInt w) 0
1090 in negate (widthToLlvmInt w) all0 LM_MO_Sub
1091
1092 MO_F_Neg w ->
1093 let all0 = LMLitVar $ LMFloatLit (-0) (widthToLlvmFloat w)
1094 in negate (widthToLlvmFloat w) all0 LM_MO_FSub
1095
1096 MO_SF_Conv _ w -> fiConv (widthToLlvmFloat w) LM_Sitofp
1097 MO_FS_Conv _ w -> fiConv (widthToLlvmInt w) LM_Fptosi
1098
1099 MO_SS_Conv from to
1100 -> sameConv from (widthToLlvmInt to) LM_Trunc LM_Sext
1101
1102 MO_UU_Conv from to
1103 -> sameConv from (widthToLlvmInt to) LM_Trunc LM_Zext
1104
1105 MO_FF_Conv from to
1106 -> sameConv from (widthToLlvmFloat to) LM_Fptrunc LM_Fpext
1107
1108 MO_VS_Neg len w ->
1109 let ty = widthToLlvmInt w
1110 vecty = LMVector len ty
1111 all0 = LMIntLit (-0) ty
1112 all0s = LMLitVar $ LMVectorLit (replicate len all0)
1113 in negateVec vecty all0s LM_MO_Sub
1114
1115 MO_VF_Neg len w ->
1116 let ty = widthToLlvmFloat w
1117 vecty = LMVector len ty
1118 all0 = LMFloatLit (-0) ty
1119 all0s = LMLitVar $ LMVectorLit (replicate len all0)
1120 in negateVec vecty all0s LM_MO_FSub
1121
1122 -- Handle unsupported cases explicitly so we get a warning
1123 -- of missing case when new MachOps added
1124 MO_Add _ -> panicOp
1125 MO_Mul _ -> panicOp
1126 MO_Sub _ -> panicOp
1127 MO_S_MulMayOflo _ -> panicOp
1128 MO_S_Quot _ -> panicOp
1129 MO_S_Rem _ -> panicOp
1130 MO_U_MulMayOflo _ -> panicOp
1131 MO_U_Quot _ -> panicOp
1132 MO_U_Rem _ -> panicOp
1133
1134 MO_Eq _ -> panicOp
1135 MO_Ne _ -> panicOp
1136 MO_S_Ge _ -> panicOp
1137 MO_S_Gt _ -> panicOp
1138 MO_S_Le _ -> panicOp
1139 MO_S_Lt _ -> panicOp
1140 MO_U_Ge _ -> panicOp
1141 MO_U_Gt _ -> panicOp
1142 MO_U_Le _ -> panicOp
1143 MO_U_Lt _ -> panicOp
1144
1145 MO_F_Add _ -> panicOp
1146 MO_F_Sub _ -> panicOp
1147 MO_F_Mul _ -> panicOp
1148 MO_F_Quot _ -> panicOp
1149 MO_F_Eq _ -> panicOp
1150 MO_F_Ne _ -> panicOp
1151 MO_F_Ge _ -> panicOp
1152 MO_F_Gt _ -> panicOp
1153 MO_F_Le _ -> panicOp
1154 MO_F_Lt _ -> panicOp
1155
1156 MO_And _ -> panicOp
1157 MO_Or _ -> panicOp
1158 MO_Xor _ -> panicOp
1159 MO_Shl _ -> panicOp
1160 MO_U_Shr _ -> panicOp
1161 MO_S_Shr _ -> panicOp
1162
1163 MO_V_Insert _ _ -> panicOp
1164 MO_V_Extract _ _ -> panicOp
1165
1166 MO_V_Add _ _ -> panicOp
1167 MO_V_Sub _ _ -> panicOp
1168 MO_V_Mul _ _ -> panicOp
1169
1170 MO_VS_Quot _ _ -> panicOp
1171 MO_VS_Rem _ _ -> panicOp
1172
1173 MO_VU_Quot _ _ -> panicOp
1174 MO_VU_Rem _ _ -> panicOp
1175
1176 MO_VF_Insert _ _ -> panicOp
1177 MO_VF_Extract _ _ -> panicOp
1178
1179 MO_VF_Add _ _ -> panicOp
1180 MO_VF_Sub _ _ -> panicOp
1181 MO_VF_Mul _ _ -> panicOp
1182 MO_VF_Quot _ _ -> panicOp
1183
1184 where
1185 negate ty v2 negOp = do
1186 (vx, stmts, top) <- exprToVar x
1187 (v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx
1188 return (v1, stmts `snocOL` s1, top)
1189
1190 negateVec ty v2 negOp = do
1191 (vx, stmts1, top) <- exprToVar x
1192 ([vx'], stmts2) <- castVars [(vx, ty)]
1193 (v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx'
1194 return (v1, stmts1 `appOL` stmts2 `snocOL` s1, top)
1195
1196 fiConv ty convOp = do
1197 (vx, stmts, top) <- exprToVar x
1198 (v1, s1) <- doExpr ty $ Cast convOp vx ty
1199 return (v1, stmts `snocOL` s1, top)
1200
1201 sameConv from ty reduce expand = do
1202 x'@(vx, stmts, top) <- exprToVar x
1203 let sameConv' op = do
1204 (v1, s1) <- doExpr ty $ Cast op vx ty
1205 return (v1, stmts `snocOL` s1, top)
1206 dflags <- getDynFlags
1207 let toWidth = llvmWidthInBits dflags ty
1208 -- LLVM doesn't like trying to convert to same width, so
1209 -- need to check for that as we do get Cmm code doing it.
1210 case widthInBits from of
1211 w | w < toWidth -> sameConv' expand
1212 w | w > toWidth -> sameConv' reduce
1213 _w -> return x'
1214
1215 panicOp = panic $ "LLVM.CodeGen.genMachOp: non unary op encountered"
1216 ++ "with one argument! (" ++ show op ++ ")"
1217
1218 -- Handle GlobalRegs pointers
1219 genMachOp opt o@(MO_Add _) e@[(CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]
1220 = genMachOp_fast opt o r (fromInteger n) e
1221
1222 genMachOp opt o@(MO_Sub _) e@[(CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]
1223 = genMachOp_fast opt o r (negate . fromInteger $ n) e
1224
1225 -- Generic case
1226 genMachOp opt op e = genMachOp_slow opt op e
1227
1228
1229 -- | Handle CmmMachOp expressions
1230 -- This is a specialised method that handles Global register manipulations like
1231 -- 'Sp - 16', using the getelementptr instruction.
1232 genMachOp_fast :: EOption -> MachOp -> GlobalReg -> Int -> [CmmExpr]
1233 -> LlvmM ExprData
1234 genMachOp_fast opt op r n e
1235 = do (gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
1236 dflags <- getDynFlags
1237 let (ix,rem) = n `divMod` ((llvmWidthInBits dflags . pLower) grt `div` 8)
1238 case isPointer grt && rem == 0 of
1239 True -> do
1240 (ptr, s2) <- doExpr grt $ GetElemPtr True gv [toI32 ix]
1241 (var, s3) <- doExpr (llvmWord dflags) $ Cast LM_Ptrtoint ptr (llvmWord dflags)
1242 return (var, s1 `snocOL` s2 `snocOL` s3, [])
1243
1244 False -> genMachOp_slow opt op e
1245
1246
1247 -- | Handle CmmMachOp expressions
1248 -- This handles all the cases not handle by the specialised genMachOp_fast.
1249 genMachOp_slow :: EOption -> MachOp -> [CmmExpr] -> LlvmM ExprData
1250
1251 -- Element extraction
1252 genMachOp_slow _ (MO_V_Extract l w) [val, idx] = do
1253 (vval, stmts1, top1) <- exprToVar val
1254 (vidx, stmts2, top2) <- exprToVar idx
1255 ([vval'], stmts3) <- castVars [(vval, LMVector l ty)]
1256 (v1, s1) <- doExpr ty $ Extract vval' vidx
1257 return (v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1, top1 ++ top2)
1258 where
1259 ty = widthToLlvmInt w
1260
1261 genMachOp_slow _ (MO_VF_Extract l w) [val, idx] = do
1262 (vval, stmts1, top1) <- exprToVar val
1263 (vidx, stmts2, top2) <- exprToVar idx
1264 ([vval'], stmts3) <- castVars [(vval, LMVector l ty)]
1265 (v1, s1) <- doExpr ty $ Extract vval' vidx
1266 return (v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1, top1 ++ top2)
1267 where
1268 ty = widthToLlvmFloat w
1269
1270 -- Element insertion
1271 genMachOp_slow _ (MO_V_Insert l w) [val, elt, idx] = do
1272 (vval, stmts1, top1) <- exprToVar val
1273 (velt, stmts2, top2) <- exprToVar elt
1274 (vidx, stmts3, top3) <- exprToVar idx
1275 ([vval'], stmts4) <- castVars [(vval, ty)]
1276 (v1, s1) <- doExpr ty $ Insert vval' velt vidx
1277 return (v1, stmts1 `appOL` stmts2 `appOL` stmts3 `appOL` stmts4 `snocOL` s1,
1278 top1 ++ top2 ++ top3)
1279 where
1280 ty = LMVector l (widthToLlvmInt w)
1281
1282 genMachOp_slow _ (MO_VF_Insert l w) [val, elt, idx] = do
1283 (vval, stmts1, top1) <- exprToVar val
1284 (velt, stmts2, top2) <- exprToVar elt
1285 (vidx, stmts3, top3) <- exprToVar idx
1286 ([vval'], stmts4) <- castVars [(vval, ty)]
1287 (v1, s1) <- doExpr ty $ Insert vval' velt vidx
1288 return (v1, stmts1 `appOL` stmts2 `appOL` stmts3 `appOL` stmts4 `snocOL` s1,
1289 top1 ++ top2 ++ top3)
1290 where
1291 ty = LMVector l (widthToLlvmFloat w)
1292
1293 -- Binary MachOp
1294 genMachOp_slow opt op [x, y] = case op of
1295
1296 MO_Eq _ -> genBinComp opt LM_CMP_Eq
1297 MO_Ne _ -> genBinComp opt LM_CMP_Ne
1298
1299 MO_S_Gt _ -> genBinComp opt LM_CMP_Sgt
1300 MO_S_Ge _ -> genBinComp opt LM_CMP_Sge
1301 MO_S_Lt _ -> genBinComp opt LM_CMP_Slt
1302 MO_S_Le _ -> genBinComp opt LM_CMP_Sle
1303
1304 MO_U_Gt _ -> genBinComp opt LM_CMP_Ugt
1305 MO_U_Ge _ -> genBinComp opt LM_CMP_Uge
1306 MO_U_Lt _ -> genBinComp opt LM_CMP_Ult
1307 MO_U_Le _ -> genBinComp opt LM_CMP_Ule
1308
1309 MO_Add _ -> genBinMach LM_MO_Add
1310 MO_Sub _ -> genBinMach LM_MO_Sub
1311 MO_Mul _ -> genBinMach LM_MO_Mul
1312
1313 MO_U_MulMayOflo _ -> panic "genMachOp: MO_U_MulMayOflo unsupported!"
1314
1315 MO_S_MulMayOflo w -> isSMulOK w x y
1316
1317 MO_S_Quot _ -> genBinMach LM_MO_SDiv
1318 MO_S_Rem _ -> genBinMach LM_MO_SRem
1319
1320 MO_U_Quot _ -> genBinMach LM_MO_UDiv
1321 MO_U_Rem _ -> genBinMach LM_MO_URem
1322
1323 MO_F_Eq _ -> genBinComp opt LM_CMP_Feq
1324 MO_F_Ne _ -> genBinComp opt LM_CMP_Fne
1325 MO_F_Gt _ -> genBinComp opt LM_CMP_Fgt
1326 MO_F_Ge _ -> genBinComp opt LM_CMP_Fge
1327 MO_F_Lt _ -> genBinComp opt LM_CMP_Flt
1328 MO_F_Le _ -> genBinComp opt LM_CMP_Fle
1329
1330 MO_F_Add _ -> genBinMach LM_MO_FAdd
1331 MO_F_Sub _ -> genBinMach LM_MO_FSub
1332 MO_F_Mul _ -> genBinMach LM_MO_FMul
1333 MO_F_Quot _ -> genBinMach LM_MO_FDiv
1334
1335 MO_And _ -> genBinMach LM_MO_And
1336 MO_Or _ -> genBinMach LM_MO_Or
1337 MO_Xor _ -> genBinMach LM_MO_Xor
1338 MO_Shl _ -> genBinMach LM_MO_Shl
1339 MO_U_Shr _ -> genBinMach LM_MO_LShr
1340 MO_S_Shr _ -> genBinMach LM_MO_AShr
1341
1342 MO_V_Add l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Add
1343 MO_V_Sub l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Sub
1344 MO_V_Mul l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Mul
1345
1346 MO_VS_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SDiv
1347 MO_VS_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SRem
1348
1349 MO_VU_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_UDiv
1350 MO_VU_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_URem
1351
1352 MO_VF_Add l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FAdd
1353 MO_VF_Sub l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FSub
1354 MO_VF_Mul l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FMul
1355 MO_VF_Quot l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FDiv
1356
1357 MO_Not _ -> panicOp
1358 MO_S_Neg _ -> panicOp
1359 MO_F_Neg _ -> panicOp
1360
1361 MO_SF_Conv _ _ -> panicOp
1362 MO_FS_Conv _ _ -> panicOp
1363 MO_SS_Conv _ _ -> panicOp
1364 MO_UU_Conv _ _ -> panicOp
1365 MO_FF_Conv _ _ -> panicOp
1366
1367 MO_V_Insert {} -> panicOp
1368 MO_V_Extract {} -> panicOp
1369
1370 MO_VS_Neg {} -> panicOp
1371
1372 MO_VF_Insert {} -> panicOp
1373 MO_VF_Extract {} -> panicOp
1374
1375 MO_VF_Neg {} -> panicOp
1376
1377 where
1378 binLlvmOp ty binOp = do
1379 (vx, stmts1, top1) <- exprToVar x
1380 (vy, stmts2, top2) <- exprToVar y
1381 if getVarType vx == getVarType vy
1382 then do
1383 (v1, s1) <- doExpr (ty vx) $ binOp vx vy
1384 return (v1, stmts1 `appOL` stmts2 `snocOL` s1,
1385 top1 ++ top2)
1386
1387 else do
1388 -- Error. Continue anyway so we can debug the generated ll file.
1389 dflags <- getDynFlags
1390 let style = mkCodeStyle CStyle
1391 toString doc = renderWithStyle dflags doc style
1392 cmmToStr = (lines . toString . PprCmm.pprExpr)
1393 let dx = Comment $ map fsLit $ cmmToStr x
1394 let dy = Comment $ map fsLit $ cmmToStr y
1395 (v1, s1) <- doExpr (ty vx) $ binOp vx vy
1396 let allStmts = stmts1 `appOL` stmts2 `snocOL` dx
1397 `snocOL` dy `snocOL` s1
1398 return (v1, allStmts, top1 ++ top2)
1399
1400 binCastLlvmOp ty binOp = do
1401 (vx, stmts1, top1) <- exprToVar x
1402 (vy, stmts2, top2) <- exprToVar y
1403 ([vx', vy'], stmts3) <- castVars [(vx, ty), (vy, ty)]
1404 (v1, s1) <- doExpr ty $ binOp vx' vy'
1405 return (v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1,
1406 top1 ++ top2)
1407
1408 -- | Need to use EOption here as Cmm expects word size results from
1409 -- comparisons while LLVM return i1. Need to extend to llvmWord type
1410 -- if expected. See Note [Literals and branch conditions].
1411 genBinComp opt cmp = do
1412 ed@(v1, stmts, top) <- binLlvmOp (\_ -> i1) (Compare cmp)
1413 dflags <- getDynFlags
1414 if getVarType v1 == i1
1415 then case i1Expected opt of
1416 True -> return ed
1417 False -> do
1418 let w_ = llvmWord dflags
1419 (v2, s1) <- doExpr w_ $ Cast LM_Zext v1 w_
1420 return (v2, stmts `snocOL` s1, top)
1421 else
1422 panic $ "genBinComp: Compare returned type other then i1! "
1423 ++ (showSDoc dflags $ ppr $ getVarType v1)
1424
1425 genBinMach op = binLlvmOp getVarType (LlvmOp op)
1426
1427 genCastBinMach ty op = binCastLlvmOp ty (LlvmOp op)
1428
1429 -- | Detect if overflow will occur in signed multiply of the two
1430 -- CmmExpr's. This is the LLVM assembly equivalent of the NCG
1431 -- implementation. Its much longer due to type information/safety.
1432 -- This should actually compile to only about 3 asm instructions.
1433 isSMulOK :: Width -> CmmExpr -> CmmExpr -> LlvmM ExprData
1434 isSMulOK _ x y = do
1435 (vx, stmts1, top1) <- exprToVar x
1436 (vy, stmts2, top2) <- exprToVar y
1437
1438 dflags <- getDynFlags
1439 let word = getVarType vx
1440 let word2 = LMInt $ 2 * (llvmWidthInBits dflags $ getVarType vx)
1441 let shift = llvmWidthInBits dflags word
1442 let shift1 = toIWord dflags (shift - 1)
1443 let shift2 = toIWord dflags shift
1444
1445 if isInt word
1446 then do
1447 (x1, s1) <- doExpr word2 $ Cast LM_Sext vx word2
1448 (y1, s2) <- doExpr word2 $ Cast LM_Sext vy word2
1449 (r1, s3) <- doExpr word2 $ LlvmOp LM_MO_Mul x1 y1
1450 (rlow1, s4) <- doExpr word $ Cast LM_Trunc r1 word
1451 (rlow2, s5) <- doExpr word $ LlvmOp LM_MO_AShr rlow1 shift1
1452 (rhigh1, s6) <- doExpr word2 $ LlvmOp LM_MO_AShr r1 shift2
1453 (rhigh2, s7) <- doExpr word $ Cast LM_Trunc rhigh1 word
1454 (dst, s8) <- doExpr word $ LlvmOp LM_MO_Sub rlow2 rhigh2
1455 let stmts = (unitOL s1) `snocOL` s2 `snocOL` s3 `snocOL` s4
1456 `snocOL` s5 `snocOL` s6 `snocOL` s7 `snocOL` s8
1457 return (dst, stmts1 `appOL` stmts2 `appOL` stmts,
1458 top1 ++ top2)
1459
1460 else
1461 panic $ "isSMulOK: Not bit type! (" ++ showSDoc dflags (ppr word) ++ ")"
1462
1463 panicOp = panic $ "LLVM.CodeGen.genMachOp_slow: unary op encountered"
1464 ++ "with two arguments! (" ++ show op ++ ")"
1465
1466 -- More then two expression, invalid!
1467 genMachOp_slow _ _ _ = panic "genMachOp: More then 2 expressions in MachOp!"
1468
1469
1470 -- | Handle CmmLoad expression.
1471 genLoad :: Atomic -> CmmExpr -> CmmType -> LlvmM ExprData
1472
1473 -- First we try to detect a few common cases and produce better code for
1474 -- these then the default case. We are mostly trying to detect Cmm code
1475 -- like I32[Sp + n] and use 'getelementptr' operations instead of the
1476 -- generic case that uses casts and pointer arithmetic
1477 genLoad atomic e@(CmmReg (CmmGlobal r)) ty
1478 = genLoad_fast atomic e r 0 ty
1479
1480 genLoad atomic e@(CmmRegOff (CmmGlobal r) n) ty
1481 = genLoad_fast atomic e r n ty
1482
1483 genLoad atomic e@(CmmMachOp (MO_Add _) [
1484 (CmmReg (CmmGlobal r)),
1485 (CmmLit (CmmInt n _))])
1486 ty
1487 = genLoad_fast atomic e r (fromInteger n) ty
1488
1489 genLoad atomic e@(CmmMachOp (MO_Sub _) [
1490 (CmmReg (CmmGlobal r)),
1491 (CmmLit (CmmInt n _))])
1492 ty
1493 = genLoad_fast atomic e r (negate $ fromInteger n) ty
1494
1495 -- generic case
1496 genLoad atomic e ty
1497 = do other <- getTBAAMeta otherN
1498 genLoad_slow atomic e ty other
1499
1500 -- | Handle CmmLoad expression.
1501 -- This is a special case for loading from a global register pointer
1502 -- offset such as I32[Sp+8].
1503 genLoad_fast :: Atomic -> CmmExpr -> GlobalReg -> Int -> CmmType
1504 -> LlvmM ExprData
1505 genLoad_fast atomic e r n ty = do
1506 dflags <- getDynFlags
1507 (gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
1508 meta <- getTBAARegMeta r
1509 let ty' = cmmToLlvmType ty
1510 (ix,rem) = n `divMod` ((llvmWidthInBits dflags . pLower) grt `div` 8)
1511 case isPointer grt && rem == 0 of
1512 True -> do
1513 (ptr, s2) <- doExpr grt $ GetElemPtr True gv [toI32 ix]
1514 -- We might need a different pointer type, so check
1515 case grt == ty' of
1516 -- were fine
1517 True -> do
1518 (var, s3) <- doExpr ty' (MExpr meta $ loadInstr ptr)
1519 return (var, s1 `snocOL` s2 `snocOL` s3,
1520 [])
1521
1522 -- cast to pointer type needed
1523 False -> do
1524 let pty = pLift ty'
1525 (ptr', s3) <- doExpr pty $ Cast LM_Bitcast ptr pty
1526 (var, s4) <- doExpr ty' (MExpr meta $ loadInstr ptr')
1527 return (var, s1 `snocOL` s2 `snocOL` s3
1528 `snocOL` s4, [])
1529
1530 -- If its a bit type then we use the slow method since
1531 -- we can't avoid casting anyway.
1532 False -> genLoad_slow atomic e ty meta
1533 where
1534 loadInstr ptr | atomic = ALoad SyncSeqCst False ptr
1535 | otherwise = Load ptr
1536
1537 -- | Handle Cmm load expression.
1538 -- Generic case. Uses casts and pointer arithmetic if needed.
1539 genLoad_slow :: Atomic -> CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData
1540 genLoad_slow atomic e ty meta = do
1541 (iptr, stmts, tops) <- exprToVar e
1542 dflags <- getDynFlags
1543 case getVarType iptr of
1544 LMPointer _ -> do
1545 (dvar, load) <- doExpr (cmmToLlvmType ty)
1546 (MExpr meta $ loadInstr iptr)
1547 return (dvar, stmts `snocOL` load, tops)
1548
1549 i@(LMInt _) | i == llvmWord dflags -> do
1550 let pty = LMPointer $ cmmToLlvmType ty
1551 (ptr, cast) <- doExpr pty $ Cast LM_Inttoptr iptr pty
1552 (dvar, load) <- doExpr (cmmToLlvmType ty)
1553 (MExpr meta $ loadInstr ptr)
1554 return (dvar, stmts `snocOL` cast `snocOL` load, tops)
1555
1556 other -> do dflags <- getDynFlags
1557 pprPanic "exprToVar: CmmLoad expression is not right type!"
1558 (PprCmm.pprExpr e <+> text (
1559 "Size of Ptr: " ++ show (llvmPtrBits dflags) ++
1560 ", Size of var: " ++ show (llvmWidthInBits dflags other) ++
1561 ", Var: " ++ showSDoc dflags (ppr iptr)))
1562 where
1563 loadInstr ptr | atomic = ALoad SyncSeqCst False ptr
1564 | otherwise = Load ptr
1565
1566
1567 -- | Handle CmmReg expression. This will return a pointer to the stack
1568 -- location of the register. Throws an error if it isn't allocated on
1569 -- the stack.
1570 getCmmReg :: CmmReg -> LlvmM LlvmVar
1571 getCmmReg (CmmLocal (LocalReg un _))
1572 = do exists <- varLookup un
1573 dflags <- getDynFlags
1574 case exists of
1575 Just ety -> return (LMLocalVar un $ pLift ety)
1576 Nothing -> fail $ "getCmmReg: Cmm register " ++ showSDoc dflags (ppr un) ++ " was not allocated!"
1577 -- This should never happen, as every local variable should
1578 -- have been assigned a value at some point, triggering
1579 -- "funPrologue" to allocate it on the stack.
1580
1581 getCmmReg (CmmGlobal g)
1582 = do onStack <- checkStackReg g
1583 dflags <- getDynFlags
1584 if onStack
1585 then return (lmGlobalRegVar dflags g)
1586 else fail $ "getCmmReg: Cmm register " ++ showSDoc dflags (ppr g) ++ " not stack-allocated!"
1587
1588 -- | Return the value of a given register, as well as its type. Might
1589 -- need to be load from stack.
1590 getCmmRegVal :: CmmReg -> LlvmM (LlvmVar, LlvmType, LlvmStatements)
1591 getCmmRegVal reg =
1592 case reg of
1593 CmmGlobal g -> do
1594 onStack <- checkStackReg g
1595 dflags <- getDynFlags
1596 if onStack then loadFromStack else do
1597 let r = lmGlobalRegArg dflags g
1598 return (r, getVarType r, nilOL)
1599 _ -> loadFromStack
1600 where loadFromStack = do
1601 ptr <- getCmmReg reg
1602 let ty = pLower $ getVarType ptr
1603 (v, s) <- doExpr ty (Load ptr)
1604 return (v, ty, unitOL s)
1605
1606 -- | Allocate a local CmmReg on the stack
1607 allocReg :: CmmReg -> (LlvmVar, LlvmStatements)
1608 allocReg (CmmLocal (LocalReg un ty))
1609 = let ty' = cmmToLlvmType ty
1610 var = LMLocalVar un (LMPointer ty')
1611 alc = Alloca ty' 1
1612 in (var, unitOL $ Assignment var alc)
1613
1614 allocReg _ = panic $ "allocReg: Global reg encountered! Global registers should"
1615 ++ " have been handled elsewhere!"
1616
1617
1618 -- | Generate code for a literal
1619 genLit :: EOption -> CmmLit -> LlvmM ExprData
1620 genLit opt (CmmInt i w)
1621 -- See Note [Literals and branch conditions].
1622 = let width | i1Expected opt = i1
1623 | otherwise = LMInt (widthInBits w)
1624 -- comm = Comment [ fsLit $ "EOption: " ++ show opt
1625 -- , fsLit $ "Width : " ++ show w
1626 -- , fsLit $ "Width' : " ++ show (widthInBits w)
1627 -- ]
1628 in return (mkIntLit width i, nilOL, [])
1629
1630 genLit _ (CmmFloat r w)
1631 = return (LMLitVar $ LMFloatLit (fromRational r) (widthToLlvmFloat w),
1632 nilOL, [])
1633
1634 genLit opt (CmmVec ls)
1635 = do llvmLits <- mapM toLlvmLit ls
1636 return (LMLitVar $ LMVectorLit llvmLits, nilOL, [])
1637 where
1638 toLlvmLit :: CmmLit -> LlvmM LlvmLit
1639 toLlvmLit lit = do
1640 (llvmLitVar, _, _) <- genLit opt lit
1641 case llvmLitVar of
1642 LMLitVar llvmLit -> return llvmLit
1643 _ -> panic "genLit"
1644
1645 genLit _ cmm@(CmmLabel l)
1646 = do var <- getGlobalPtr =<< strCLabel_llvm l
1647 dflags <- getDynFlags
1648 let lmty = cmmToLlvmType $ cmmLitType dflags cmm
1649 (v1, s1) <- doExpr lmty $ Cast LM_Ptrtoint var (llvmWord dflags)
1650 return (v1, unitOL s1, [])
1651
1652 genLit opt (CmmLabelOff label off) = do
1653 dflags <- getDynFlags
1654 (vlbl, stmts, stat) <- genLit opt (CmmLabel label)
1655 let voff = toIWord dflags off
1656 (v1, s1) <- doExpr (getVarType vlbl) $ LlvmOp LM_MO_Add vlbl voff
1657 return (v1, stmts `snocOL` s1, stat)
1658
1659 genLit opt (CmmLabelDiffOff l1 l2 off) = do
1660 dflags <- getDynFlags
1661 (vl1, stmts1, stat1) <- genLit opt (CmmLabel l1)
1662 (vl2, stmts2, stat2) <- genLit opt (CmmLabel l2)
1663 let voff = toIWord dflags off
1664 let ty1 = getVarType vl1
1665 let ty2 = getVarType vl2
1666 if (isInt ty1) && (isInt ty2)
1667 && (llvmWidthInBits dflags ty1 == llvmWidthInBits dflags ty2)
1668
1669 then do
1670 (v1, s1) <- doExpr (getVarType vl1) $ LlvmOp LM_MO_Sub vl1 vl2
1671 (v2, s2) <- doExpr (getVarType v1 ) $ LlvmOp LM_MO_Add v1 voff
1672 return (v2, stmts1 `appOL` stmts2 `snocOL` s1 `snocOL` s2,
1673 stat1 ++ stat2)
1674
1675 else
1676 panic "genLit: CmmLabelDiffOff encountered with different label ty!"
1677
1678 genLit opt (CmmBlock b)
1679 = genLit opt (CmmLabel $ infoTblLbl b)
1680
1681 genLit _ CmmHighStackMark
1682 = panic "genStaticLit - CmmHighStackMark unsupported!"
1683
1684
1685 -- -----------------------------------------------------------------------------
1686 -- * Misc
1687 --
1688
1689 -- | Find CmmRegs that get assigned and allocate them on the stack
1690 --
1691 -- Any register that gets written needs to be allcoated on the
1692 -- stack. This avoids having to map a CmmReg to an equivalent SSA form
1693 -- and avoids having to deal with Phi node insertion. This is also
1694 -- the approach recommended by LLVM developers.
1695 --
1696 -- On the other hand, this is unecessarily verbose if the register in
1697 -- question is never written. Therefore we skip it where we can to
1698 -- save a few lines in the output and hopefully speed compilation up a
1699 -- bit.
1700 funPrologue :: LiveGlobalRegs -> [CmmBlock] -> LlvmM StmtData
1701 funPrologue live cmmBlocks = do
1702
1703 trash <- getTrashRegs
1704 let getAssignedRegs :: CmmNode O O -> [CmmReg]
1705 getAssignedRegs (CmmAssign reg _) = [reg]
1706 -- Calls will trash all registers. Unfortunately, this needs them to
1707 -- be stack-allocated in the first place.
1708 getAssignedRegs (CmmUnsafeForeignCall _ rs _) = map CmmGlobal trash ++ map CmmLocal rs
1709 getAssignedRegs _ = []
1710 getRegsBlock (_, body, _) = concatMap getAssignedRegs $ blockToList body
1711 assignedRegs = nub $ concatMap (getRegsBlock . blockSplit) cmmBlocks
1712 isLive r = r `elem` alwaysLive || r `elem` live
1713
1714 dflags <- getDynFlags
1715 stmtss <- flip mapM assignedRegs $ \reg ->
1716 case reg of
1717 CmmLocal (LocalReg un _) -> do
1718 let (newv, stmts) = allocReg reg
1719 varInsert un (pLower $ getVarType newv)
1720 return stmts
1721 CmmGlobal r -> do
1722 let reg = lmGlobalRegVar dflags r
1723 arg = lmGlobalRegArg dflags r
1724 ty = (pLower . getVarType) reg
1725 trash = LMLitVar $ LMUndefLit ty
1726 rval = if isLive r then arg else trash
1727 alloc = Assignment reg $ Alloca (pLower $ getVarType reg) 1
1728 markStackReg r
1729 return $ toOL [alloc, Store rval reg]
1730
1731 return (concatOL stmtss, [])
1732
1733 -- | Function epilogue. Load STG variables to use as argument for call.
1734 -- STG Liveness optimisation done here.
1735 funEpilogue :: LiveGlobalRegs -> LlvmM ([LlvmVar], LlvmStatements)
1736 funEpilogue live = do
1737
1738 -- Have information and liveness optimisation is enabled?
1739 let liveRegs = alwaysLive ++ live
1740 isSSE (FloatReg _) = True
1741 isSSE (DoubleReg _) = True
1742 isSSE (XmmReg _) = True
1743 isSSE (YmmReg _) = True
1744 isSSE (ZmmReg _) = True
1745 isSSE _ = False
1746
1747 -- Set to value or "undef" depending on whether the register is
1748 -- actually live
1749 dflags <- getDynFlags
1750 let loadExpr r = do
1751 (v, _, s) <- getCmmRegVal (CmmGlobal r)
1752 return (Just $ v, s)
1753 loadUndef r = do
1754 let ty = (pLower . getVarType $ lmGlobalRegVar dflags r)
1755 return (Just $ LMLitVar $ LMUndefLit ty, nilOL)
1756 platform <- getDynFlag targetPlatform
1757 loads <- flip mapM (activeStgRegs platform) $ \r -> case () of
1758 _ | r `elem` liveRegs -> loadExpr r
1759 | not (isSSE r) -> loadUndef r
1760 | otherwise -> return (Nothing, nilOL)
1761
1762 let (vars, stmts) = unzip loads
1763 return (catMaybes vars, concatOL stmts)
1764
1765
1766 -- | A series of statements to trash all the STG registers.
1767 --
1768 -- In LLVM we pass the STG registers around everywhere in function calls.
1769 -- So this means LLVM considers them live across the entire function, when
1770 -- in reality they usually aren't. For Caller save registers across C calls
1771 -- the saving and restoring of them is done by the Cmm code generator,
1772 -- using Cmm local vars. So to stop LLVM saving them as well (and saving
1773 -- all of them since it thinks they're always live, we trash them just
1774 -- before the call by assigning the 'undef' value to them. The ones we
1775 -- need are restored from the Cmm local var and the ones we don't need
1776 -- are fine to be trashed.
1777 getTrashStmts :: LlvmM LlvmStatements
1778 getTrashStmts = do
1779 regs <- getTrashRegs
1780 stmts <- flip mapM regs $ \ r -> do
1781 reg <- getCmmReg (CmmGlobal r)
1782 let ty = (pLower . getVarType) reg
1783 return $ Store (LMLitVar $ LMUndefLit ty) reg
1784 return $ toOL stmts
1785
1786 getTrashRegs :: LlvmM [GlobalReg]
1787 getTrashRegs = do plat <- getLlvmPlatform
1788 return $ filter (callerSaves plat) (activeStgRegs plat)
1789
1790 -- | Get a function pointer to the CLabel specified.
1791 --
1792 -- This is for Haskell functions, function type is assumed, so doesn't work
1793 -- with foreign functions.
1794 getHsFunc :: LiveGlobalRegs -> CLabel -> LlvmM ExprData
1795 getHsFunc live lbl
1796 = do fty <- llvmFunTy live
1797 name <- strCLabel_llvm lbl
1798 getHsFunc' name fty
1799
1800 getHsFunc' :: LMString -> LlvmType -> LlvmM ExprData
1801 getHsFunc' name fty
1802 = do fun <- getGlobalPtr name
1803 if getVarType fun == fty
1804 then return (fun, nilOL, [])
1805 else do (v1, s1) <- doExpr (pLift fty)
1806 $ Cast LM_Bitcast fun (pLift fty)
1807 return (v1, unitOL s1, [])
1808
1809 -- | Create a new local var
1810 mkLocalVar :: LlvmType -> LlvmM LlvmVar
1811 mkLocalVar ty = do
1812 un <- runUs getUniqueM
1813 return $ LMLocalVar un ty
1814
1815
1816 -- | Execute an expression, assigning result to a var
1817 doExpr :: LlvmType -> LlvmExpression -> LlvmM (LlvmVar, LlvmStatement)
1818 doExpr ty expr = do
1819 v <- mkLocalVar ty
1820 return (v, Assignment v expr)
1821
1822
1823 -- | Expand CmmRegOff
1824 expandCmmReg :: DynFlags -> (CmmReg, Int) -> CmmExpr
1825 expandCmmReg dflags (reg, off)
1826 = let width = typeWidth (cmmRegType dflags reg)
1827 voff = CmmLit $ CmmInt (fromIntegral off) width
1828 in CmmMachOp (MO_Add width) [CmmReg reg, voff]
1829
1830
1831 -- | Convert a block id into a appropriate Llvm label
1832 blockIdToLlvm :: BlockId -> LlvmVar
1833 blockIdToLlvm bid = LMLocalVar (getUnique bid) LMLabel
1834
1835 -- | Create Llvm int Literal
1836 mkIntLit :: Integral a => LlvmType -> a -> LlvmVar
1837 mkIntLit ty i = LMLitVar $ LMIntLit (toInteger i) ty
1838
1839 -- | Convert int type to a LLvmVar of word or i32 size
1840 toI32 :: Integral a => a -> LlvmVar
1841 toI32 = mkIntLit i32
1842
1843 toIWord :: Integral a => DynFlags -> a -> LlvmVar
1844 toIWord dflags = mkIntLit (llvmWord dflags)
1845
1846
1847 -- | Error functions
1848 panic :: String -> a
1849 panic s = Outputable.panic $ "LlvmCodeGen.CodeGen." ++ s
1850
1851 pprPanic :: String -> SDoc -> a
1852 pprPanic s d = Outputable.pprPanic ("LlvmCodeGen.CodeGen." ++ s) d
1853
1854
1855 -- | Returns TBAA meta data by unique
1856 getTBAAMeta :: Unique -> LlvmM [MetaAnnot]
1857 getTBAAMeta u = do
1858 mi <- getUniqMeta u
1859 return [MetaAnnot tbaa (MetaNode i) | let Just i = mi]
1860
1861 -- | Returns TBAA meta data for given register
1862 getTBAARegMeta :: GlobalReg -> LlvmM [MetaAnnot]
1863 getTBAARegMeta = getTBAAMeta . getTBAA
1864
1865
1866 -- | A more convenient way of accumulating LLVM statements and declarations.
1867 data LlvmAccum = LlvmAccum LlvmStatements [LlvmCmmDecl]
1868
1869 instance Monoid LlvmAccum where
1870 mempty = LlvmAccum nilOL []
1871 LlvmAccum stmtsA declsA `mappend` LlvmAccum stmtsB declsB =
1872 LlvmAccum (stmtsA `mappend` stmtsB) (declsA `mappend` declsB)
1873
1874 liftExprData :: LlvmM ExprData -> WriterT LlvmAccum LlvmM LlvmVar
1875 liftExprData action = do
1876 (var, stmts, decls) <- lift action
1877 tell $ LlvmAccum stmts decls
1878 return var
1879
1880 statement :: LlvmStatement -> WriterT LlvmAccum LlvmM ()
1881 statement stmt = tell $ LlvmAccum (unitOL stmt) []