Add 'addWordC#' PrimOp
[ghc.git] / compiler / llvmGen / LlvmCodeGen / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs #-}
2 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
3 -- ----------------------------------------------------------------------------
4 -- | Handle conversion of CmmProc to LLVM code.
5 --
6 module LlvmCodeGen.CodeGen ( genLlvmProc ) where
7
8 #include "HsVersions.h"
9
10 import GhcPrelude
11
12 import Llvm
13 import LlvmCodeGen.Base
14 import LlvmCodeGen.Regs
15
16 import BlockId
17 import CodeGen.Platform ( activeStgRegs, callerSaves )
18 import CLabel
19 import Cmm
20 import PprCmm
21 import CmmUtils
22 import CmmSwitch
23 import Hoopl.Block
24 import Hoopl.Graph
25 import Hoopl.Collections
26
27 import DynFlags
28 import FastString
29 import ForeignCall
30 import Outputable hiding (panic, pprPanic)
31 import qualified Outputable
32 import Platform
33 import OrdList
34 import UniqSupply
35 import Unique
36 import Util
37
38 import Control.Monad.Trans.Class
39 import Control.Monad.Trans.Writer
40
41 import Data.Semigroup ( Semigroup )
42 import qualified Data.Semigroup as Semigroup
43 import Data.List ( nub )
44 import Data.Maybe ( catMaybes )
45
46 type Atomic = Bool
47 type LlvmStatements = OrdList LlvmStatement
48
49 data Signage = Signed | Unsigned deriving (Eq, Show)
50
51 -- -----------------------------------------------------------------------------
52 -- | Top-level of the LLVM proc Code generator
53 --
54 genLlvmProc :: RawCmmDecl -> LlvmM [LlvmCmmDecl]
55 genLlvmProc (CmmProc infos lbl live graph) = do
56 let blocks = toBlockListEntryFirstFalseFallthrough graph
57 (lmblocks, lmdata) <- basicBlocksCodeGen live blocks
58 let info = mapLookup (g_entry graph) infos
59 proc = CmmProc info lbl live (ListGraph lmblocks)
60 return (proc:lmdata)
61
62 genLlvmProc _ = panic "genLlvmProc: case that shouldn't reach here!"
63
64 -- -----------------------------------------------------------------------------
65 -- * Block code generation
66 --
67
68 -- | Generate code for a list of blocks that make up a complete
69 -- procedure. The first block in the list is expected to be the entry
70 -- point and will get the prologue.
71 basicBlocksCodeGen :: LiveGlobalRegs -> [CmmBlock]
72 -> LlvmM ([LlvmBasicBlock], [LlvmCmmDecl])
73 basicBlocksCodeGen _ [] = panic "no entry block!"
74 basicBlocksCodeGen live (entryBlock:cmmBlocks)
75 = do (prologue, prologueTops) <- funPrologue live (entryBlock:cmmBlocks)
76
77 -- Generate code
78 (BasicBlock bid entry, entryTops) <- basicBlockCodeGen entryBlock
79 (blocks, topss) <- fmap unzip $ mapM basicBlockCodeGen cmmBlocks
80
81 -- Compose
82 let entryBlock = BasicBlock bid (fromOL prologue ++ entry)
83 return (entryBlock : blocks, prologueTops ++ entryTops ++ concat topss)
84
85
86 -- | Generate code for one block
87 basicBlockCodeGen :: CmmBlock -> LlvmM ( LlvmBasicBlock, [LlvmCmmDecl] )
88 basicBlockCodeGen block
89 = do let (_, nodes, tail) = blockSplit block
90 id = entryLabel block
91 (mid_instrs, top) <- stmtsToInstrs $ blockToList nodes
92 (tail_instrs, top') <- stmtToInstrs tail
93 let instrs = fromOL (mid_instrs `appOL` tail_instrs)
94 return (BasicBlock id instrs, top' ++ top)
95
96 -- -----------------------------------------------------------------------------
97 -- * CmmNode code generation
98 --
99
100 -- A statement conversion return data.
101 -- * LlvmStatements: The compiled LLVM statements.
102 -- * LlvmCmmDecl: Any global data needed.
103 type StmtData = (LlvmStatements, [LlvmCmmDecl])
104
105
106 -- | Convert a list of CmmNode's to LlvmStatement's
107 stmtsToInstrs :: [CmmNode e x] -> LlvmM StmtData
108 stmtsToInstrs stmts
109 = do (instrss, topss) <- fmap unzip $ mapM stmtToInstrs stmts
110 return (concatOL instrss, concat topss)
111
112
113 -- | Convert a CmmStmt to a list of LlvmStatement's
114 stmtToInstrs :: CmmNode e x -> LlvmM StmtData
115 stmtToInstrs stmt = case stmt of
116
117 CmmComment _ -> return (nilOL, []) -- nuke comments
118 CmmTick _ -> return (nilOL, [])
119 CmmUnwind {} -> return (nilOL, [])
120
121 CmmAssign reg src -> genAssign reg src
122 CmmStore addr src -> genStore addr src
123
124 CmmBranch id -> genBranch id
125 CmmCondBranch arg true false likely
126 -> genCondBranch arg true false likely
127 CmmSwitch arg ids -> genSwitch arg ids
128
129 -- Foreign Call
130 CmmUnsafeForeignCall target res args
131 -> genCall target res args
132
133 -- Tail call
134 CmmCall { cml_target = arg,
135 cml_args_regs = live } -> genJump arg live
136
137 _ -> panic "Llvm.CodeGen.stmtToInstrs"
138
139 -- | Wrapper function to declare an instrinct function by function type
140 getInstrinct2 :: LMString -> LlvmType -> LlvmM ExprData
141 getInstrinct2 fname fty@(LMFunction funSig) = do
142
143 let fv = LMGlobalVar fname fty (funcLinkage funSig) Nothing Nothing Constant
144
145 fn <- funLookup fname
146 tops <- case fn of
147 Just _ ->
148 return []
149 Nothing -> do
150 funInsert fname fty
151 un <- getUniqueM
152 let lbl = mkAsmTempLabel un
153 return [CmmData (Section Data lbl) [([],[fty])]]
154
155 return (fv, nilOL, tops)
156
157 getInstrinct2 _ _ = error "getInstrinct2: Non-function type!"
158
159 -- | Declares an instrinct function by return and parameter types
160 getInstrinct :: LMString -> LlvmType -> [LlvmType] -> LlvmM ExprData
161 getInstrinct fname retTy parTys =
162 let funSig = LlvmFunctionDecl fname ExternallyVisible CC_Ccc retTy
163 FixedArgs (tysToParams parTys) Nothing
164 fty = LMFunction funSig
165 in getInstrinct2 fname fty
166
167 -- | Memory barrier instruction for LLVM >= 3.0
168 barrier :: LlvmM StmtData
169 barrier = do
170 let s = Fence False SyncSeqCst
171 return (unitOL s, [])
172
173 -- | Foreign Calls
174 genCall :: ForeignTarget -> [CmmFormal] -> [CmmActual]
175 -> LlvmM StmtData
176
177 -- Write barrier needs to be handled specially as it is implemented as an LLVM
178 -- intrinsic function.
179 genCall (PrimTarget MO_WriteBarrier) _ _ = do
180 platform <- getLlvmPlatform
181 if platformArch platform `elem` [ArchX86, ArchX86_64, ArchSPARC]
182 then return (nilOL, [])
183 else barrier
184
185 genCall (PrimTarget MO_Touch) _ _
186 = return (nilOL, [])
187
188 genCall (PrimTarget (MO_UF_Conv w)) [dst] [e] = runStmtsDecls $ do
189 dstV <- getCmmRegW (CmmLocal dst)
190 let ty = cmmToLlvmType $ localRegType dst
191 width = widthToLlvmFloat w
192 castV <- lift $ mkLocalVar ty
193 ve <- exprToVarW e
194 statement $ Assignment castV $ Cast LM_Uitofp ve width
195 statement $ Store castV dstV
196
197 genCall (PrimTarget (MO_UF_Conv _)) [_] args =
198 panic $ "genCall: Too many arguments to MO_UF_Conv. " ++
199 "Can only handle 1, given" ++ show (length args) ++ "."
200
201 -- Handle prefetching data
202 genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args
203 | 0 <= localityInt && localityInt <= 3 = runStmtsDecls $ do
204 let argTy = [i8Ptr, i32, i32, i32]
205 funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
206 CC_Ccc LMVoid FixedArgs (tysToParams argTy) Nothing
207
208 let (_, arg_hints) = foreignTargetHints t
209 let args_hints' = zip args arg_hints
210 argVars <- arg_varsW args_hints' ([], nilOL, [])
211 fptr <- liftExprData $ getFunPtr funTy t
212 argVars' <- castVarsW Signed $ zip argVars argTy
213
214 doTrashStmts
215 let argSuffix = [mkIntLit i32 0, mkIntLit i32 localityInt, mkIntLit i32 1]
216 statement $ Expr $ Call StdCall fptr (argVars' ++ argSuffix) []
217 | otherwise = panic $ "prefetch locality level integer must be between 0 and 3, given: " ++ (show localityInt)
218
219 -- Handle PopCnt, Clz, Ctz, and BSwap that need to only convert arg
220 -- and return types
221 genCall t@(PrimTarget (MO_PopCnt w)) dsts args =
222 genCallSimpleCast w t dsts args
223
224 genCall t@(PrimTarget (MO_Pdep w)) dsts args =
225 genCallSimpleCast2 w t dsts args
226 genCall t@(PrimTarget (MO_Pext w)) dsts args =
227 genCallSimpleCast2 w t dsts args
228 genCall t@(PrimTarget (MO_Clz w)) dsts args =
229 genCallSimpleCast w t dsts args
230 genCall t@(PrimTarget (MO_Ctz w)) dsts args =
231 genCallSimpleCast w t dsts args
232 genCall t@(PrimTarget (MO_BSwap w)) dsts args =
233 genCallSimpleCast w t dsts args
234
235 genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = runStmtsDecls $ do
236 addrVar <- exprToVarW addr
237 nVar <- exprToVarW n
238 let targetTy = widthToLlvmInt width
239 ptrExpr = Cast LM_Inttoptr addrVar (pLift targetTy)
240 ptrVar <- doExprW (pLift targetTy) ptrExpr
241 dstVar <- getCmmRegW (CmmLocal dst)
242 let op = case amop of
243 AMO_Add -> LAO_Add
244 AMO_Sub -> LAO_Sub
245 AMO_And -> LAO_And
246 AMO_Nand -> LAO_Nand
247 AMO_Or -> LAO_Or
248 AMO_Xor -> LAO_Xor
249 retVar <- doExprW targetTy $ AtomicRMW op ptrVar nVar SyncSeqCst
250 statement $ Store retVar dstVar
251
252 genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = runStmtsDecls $ do
253 dstV <- getCmmRegW (CmmLocal dst)
254 v1 <- genLoadW True addr (localRegType dst)
255 statement $ Store v1 dstV
256
257 genCall (PrimTarget (MO_Cmpxchg _width))
258 [dst] [addr, old, new] = runStmtsDecls $ do
259 addrVar <- exprToVarW addr
260 oldVar <- exprToVarW old
261 newVar <- exprToVarW new
262 let targetTy = getVarType oldVar
263 ptrExpr = Cast LM_Inttoptr addrVar (pLift targetTy)
264 ptrVar <- doExprW (pLift targetTy) ptrExpr
265 dstVar <- getCmmRegW (CmmLocal dst)
266 retVar <- doExprW (LMStructU [targetTy,i1])
267 $ CmpXChg ptrVar oldVar newVar SyncSeqCst SyncSeqCst
268 retVar' <- doExprW targetTy $ ExtractV retVar 0
269 statement $ Store retVar' dstVar
270
271 genCall (PrimTarget (MO_AtomicWrite _width)) [] [addr, val] = runStmtsDecls $ do
272 addrVar <- exprToVarW addr
273 valVar <- exprToVarW val
274 let ptrTy = pLift $ getVarType valVar
275 ptrExpr = Cast LM_Inttoptr addrVar ptrTy
276 ptrVar <- doExprW ptrTy ptrExpr
277 statement $ Expr $ AtomicRMW LAO_Xchg ptrVar valVar SyncSeqCst
278
279 -- Handle memcpy function specifically since llvm's intrinsic version takes
280 -- some extra parameters.
281 genCall t@(PrimTarget op) [] args
282 | Just align <- machOpMemcpyishAlign op = runStmtsDecls $ do
283 dflags <- getDynFlags
284 let isVolTy = [i1]
285 isVolVal = [mkIntLit i1 0]
286 argTy | MO_Memset _ <- op = [i8Ptr, i8, llvmWord dflags, i32] ++ isVolTy
287 | otherwise = [i8Ptr, i8Ptr, llvmWord dflags, i32] ++ isVolTy
288 funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
289 CC_Ccc LMVoid FixedArgs (tysToParams argTy) Nothing
290
291 let (_, arg_hints) = foreignTargetHints t
292 let args_hints = zip args arg_hints
293 argVars <- arg_varsW args_hints ([], nilOL, [])
294 fptr <- getFunPtrW funTy t
295 argVars' <- castVarsW Signed $ zip argVars argTy
296
297 doTrashStmts
298 let alignVal = mkIntLit i32 align
299 arguments = argVars' ++ (alignVal:isVolVal)
300 statement $ Expr $ Call StdCall fptr arguments []
301
302 -- We handle MO_U_Mul2 by simply using a 'mul' instruction, but with operands
303 -- twice the width (we first zero-extend them), e.g., on 64-bit arch we will
304 -- generate 'mul' on 128-bit operands. Then we only need some plumbing to
305 -- extract the two 64-bit values out of 128-bit result.
306 genCall (PrimTarget (MO_U_Mul2 w)) [dstH, dstL] [lhs, rhs] = runStmtsDecls $ do
307 let width = widthToLlvmInt w
308 bitWidth = widthInBits w
309 width2x = LMInt (bitWidth * 2)
310 -- First zero-extend the operands ('mul' instruction requires the operands
311 -- and the result to be of the same type). Note that we don't use 'castVars'
312 -- because it tries to do LM_Sext.
313 lhsVar <- exprToVarW lhs
314 rhsVar <- exprToVarW rhs
315 lhsExt <- doExprW width2x $ Cast LM_Zext lhsVar width2x
316 rhsExt <- doExprW width2x $ Cast LM_Zext rhsVar width2x
317 -- Do the actual multiplication (note that the result is also 2x width).
318 retV <- doExprW width2x $ LlvmOp LM_MO_Mul lhsExt rhsExt
319 -- Extract the lower bits of the result into retL.
320 retL <- doExprW width $ Cast LM_Trunc retV width
321 -- Now we right-shift the higher bits by width.
322 let widthLlvmLit = LMLitVar $ LMIntLit (fromIntegral bitWidth) width
323 retShifted <- doExprW width2x $ LlvmOp LM_MO_LShr retV widthLlvmLit
324 -- And extract them into retH.
325 retH <- doExprW width $ Cast LM_Trunc retShifted width
326 dstRegL <- getCmmRegW (CmmLocal dstL)
327 dstRegH <- getCmmRegW (CmmLocal dstH)
328 statement $ Store retL dstRegL
329 statement $ Store retH dstRegH
330
331 -- MO_U_QuotRem2 is another case we handle by widening the registers to double
332 -- the width and use normal LLVM instructions (similarly to the MO_U_Mul2). The
333 -- main difference here is that we need to combine two words into one register
334 -- and then use both 'udiv' and 'urem' instructions to compute the result.
335 genCall (PrimTarget (MO_U_QuotRem2 w))
336 [dstQ, dstR] [lhsH, lhsL, rhs] = runStmtsDecls $ do
337 let width = widthToLlvmInt w
338 bitWidth = widthInBits w
339 width2x = LMInt (bitWidth * 2)
340 -- First zero-extend all parameters to double width.
341 let zeroExtend expr = do
342 var <- exprToVarW expr
343 doExprW width2x $ Cast LM_Zext var width2x
344 lhsExtH <- zeroExtend lhsH
345 lhsExtL <- zeroExtend lhsL
346 rhsExt <- zeroExtend rhs
347 -- Now we combine the first two parameters (that represent the high and low
348 -- bits of the value). So first left-shift the high bits to their position
349 -- and then bit-or them with the low bits.
350 let widthLlvmLit = LMLitVar $ LMIntLit (fromIntegral bitWidth) width
351 lhsExtHShifted <- doExprW width2x $ LlvmOp LM_MO_Shl lhsExtH widthLlvmLit
352 lhsExt <- doExprW width2x $ LlvmOp LM_MO_Or lhsExtHShifted lhsExtL
353 -- Finally, we can call 'udiv' and 'urem' to compute the results.
354 retExtDiv <- doExprW width2x $ LlvmOp LM_MO_UDiv lhsExt rhsExt
355 retExtRem <- doExprW width2x $ LlvmOp LM_MO_URem lhsExt rhsExt
356 -- And since everything is in 2x width, we need to truncate the results and
357 -- then return them.
358 let narrow var = doExprW width $ Cast LM_Trunc var width
359 retDiv <- narrow retExtDiv
360 retRem <- narrow retExtRem
361 dstRegQ <- lift $ getCmmReg (CmmLocal dstQ)
362 dstRegR <- lift $ getCmmReg (CmmLocal dstR)
363 statement $ Store retDiv dstRegQ
364 statement $ Store retRem dstRegR
365
366 -- Handle the MO_{Add,Sub}IntC separately. LLVM versions return a record from
367 -- which we need to extract the actual values.
368 genCall t@(PrimTarget (MO_AddIntC w)) [dstV, dstO] [lhs, rhs] =
369 genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
370 genCall t@(PrimTarget (MO_SubIntC w)) [dstV, dstO] [lhs, rhs] =
371 genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
372
373 -- Similar to MO_{Add,Sub}IntC, but MO_Add2 expects the first element of the
374 -- return tuple to be the overflow bit and the second element to contain the
375 -- actual result of the addition. So we still use genCallWithOverflow but swap
376 -- the return registers.
377 genCall t@(PrimTarget (MO_Add2 w)) [dstO, dstV] [lhs, rhs] =
378 genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
379
380 genCall t@(PrimTarget (MO_AddWordC w)) [dstV, dstO] [lhs, rhs] =
381 genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
382
383 genCall t@(PrimTarget (MO_SubWordC w)) [dstV, dstO] [lhs, rhs] =
384 genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
385
386 -- Handle all other foreign calls and prim ops.
387 genCall target res args = runStmtsDecls $ do
388 dflags <- getDynFlags
389
390 -- parameter types
391 let arg_type (_, AddrHint) = i8Ptr
392 -- cast pointers to i8*. Llvm equivalent of void*
393 arg_type (expr, _) = cmmToLlvmType $ cmmExprType dflags expr
394
395 -- ret type
396 let ret_type [] = LMVoid
397 ret_type [(_, AddrHint)] = i8Ptr
398 ret_type [(reg, _)] = cmmToLlvmType $ localRegType reg
399 ret_type t = panic $ "genCall: Too many return values! Can only handle"
400 ++ " 0 or 1, given " ++ show (length t) ++ "."
401
402 -- extract Cmm call convention, and translate to LLVM call convention
403 platform <- lift $ getLlvmPlatform
404 let lmconv = case target of
405 ForeignTarget _ (ForeignConvention conv _ _ _) ->
406 case conv of
407 StdCallConv -> case platformArch platform of
408 ArchX86 -> CC_X86_Stdcc
409 ArchX86_64 -> CC_X86_Stdcc
410 _ -> CC_Ccc
411 CCallConv -> CC_Ccc
412 CApiConv -> CC_Ccc
413 PrimCallConv -> panic "LlvmCodeGen.CodeGen.genCall: PrimCallConv"
414 JavaScriptCallConv -> panic "LlvmCodeGen.CodeGen.genCall: JavaScriptCallConv"
415
416 PrimTarget _ -> CC_Ccc
417
418 {-
419 CC_Ccc of the possibilities here are a worry with the use of a custom
420 calling convention for passing STG args. In practice the more
421 dangerous combinations (e.g StdCall + llvmGhcCC) don't occur.
422
423 The native code generator only handles StdCall and CCallConv.
424 -}
425
426 -- call attributes
427 let fnAttrs | never_returns = NoReturn : llvmStdFunAttrs
428 | otherwise = llvmStdFunAttrs
429
430 never_returns = case target of
431 ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns) -> True
432 _ -> False
433
434 -- fun type
435 let (res_hints, arg_hints) = foreignTargetHints target
436 let args_hints = zip args arg_hints
437 let ress_hints = zip res res_hints
438 let ccTy = StdCall -- tail calls should be done through CmmJump
439 let retTy = ret_type ress_hints
440 let argTy = tysToParams $ map arg_type args_hints
441 let funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
442 lmconv retTy FixedArgs argTy (llvmFunAlign dflags)
443
444
445 argVars <- arg_varsW args_hints ([], nilOL, [])
446 fptr <- getFunPtrW funTy target
447
448 let doReturn | ccTy == TailCall = statement $ Return Nothing
449 | never_returns = statement $ Unreachable
450 | otherwise = return ()
451
452 doTrashStmts
453
454 -- make the actual call
455 case retTy of
456 LMVoid -> do
457 statement $ Expr $ Call ccTy fptr argVars fnAttrs
458
459 _ -> do
460 v1 <- doExprW retTy $ Call ccTy fptr argVars fnAttrs
461 -- get the return register
462 let ret_reg [reg] = reg
463 ret_reg t = panic $ "genCall: Bad number of registers! Can only handle"
464 ++ " 1, given " ++ show (length t) ++ "."
465 let creg = ret_reg res
466 vreg <- getCmmRegW (CmmLocal creg)
467 if retTy == pLower (getVarType vreg)
468 then do
469 statement $ Store v1 vreg
470 doReturn
471 else do
472 let ty = pLower $ getVarType vreg
473 let op = case ty of
474 vt | isPointer vt -> LM_Bitcast
475 | isInt vt -> LM_Ptrtoint
476 | otherwise ->
477 panic $ "genCall: CmmReg bad match for"
478 ++ " returned type!"
479
480 v2 <- doExprW ty $ Cast op v1 ty
481 statement $ Store v2 vreg
482 doReturn
483
484 -- | Generate a call to an LLVM intrinsic that performs arithmetic operation
485 -- with overflow bit (i.e., returns a struct containing the actual result of the
486 -- operation and an overflow bit). This function will also extract the overflow
487 -- bit and zero-extend it (all the corresponding Cmm PrimOps represent the
488 -- overflow "bit" as a usual Int# or Word#).
489 genCallWithOverflow
490 :: ForeignTarget -> Width -> [CmmFormal] -> [CmmActual] -> LlvmM StmtData
491 genCallWithOverflow t@(PrimTarget op) w [dstV, dstO] [lhs, rhs] = do
492 -- So far this was only tested for the following four CallishMachOps.
493 let valid = op `elem` [ MO_Add2 w
494 , MO_AddIntC w
495 , MO_SubIntC w
496 , MO_AddWordC w
497 , MO_SubWordC w
498 ]
499 MASSERT(valid)
500 let width = widthToLlvmInt w
501 -- This will do most of the work of generating the call to the intrinsic and
502 -- extracting the values from the struct.
503 (value, overflowBit, (stmts, top)) <-
504 genCallExtract t w (lhs, rhs) (width, i1)
505 -- value is i<width>, but overflowBit is i1, so we need to cast (Cmm expects
506 -- both to be i<width>)
507 (overflow, zext) <- doExpr width $ Cast LM_Zext overflowBit width
508 dstRegV <- getCmmReg (CmmLocal dstV)
509 dstRegO <- getCmmReg (CmmLocal dstO)
510 let storeV = Store value dstRegV
511 storeO = Store overflow dstRegO
512 return (stmts `snocOL` zext `snocOL` storeV `snocOL` storeO, top)
513 genCallWithOverflow _ _ _ _ =
514 panic "genCallExtract: wrong ForeignTarget or number of arguments"
515
516 -- | A helper function for genCallWithOverflow that handles generating the call
517 -- to the LLVM intrinsic and extracting the result from the struct to LlvmVars.
518 genCallExtract
519 :: ForeignTarget -- ^ PrimOp
520 -> Width -- ^ Width of the operands.
521 -> (CmmActual, CmmActual) -- ^ Actual arguments.
522 -> (LlvmType, LlvmType) -- ^ LLVM types of the returned struct.
523 -> LlvmM (LlvmVar, LlvmVar, StmtData)
524 genCallExtract target@(PrimTarget op) w (argA, argB) (llvmTypeA, llvmTypeB) = do
525 let width = widthToLlvmInt w
526 argTy = [width, width]
527 retTy = LMStructU [llvmTypeA, llvmTypeB]
528
529 -- Process the arguments.
530 let args_hints = zip [argA, argB] (snd $ foreignTargetHints target)
531 (argsV1, args1, top1) <- arg_vars args_hints ([], nilOL, [])
532 (argsV2, args2) <- castVars Signed $ zip argsV1 argTy
533
534 -- Get the function and make the call.
535 fname <- cmmPrimOpFunctions op
536 (fptr, _, top2) <- getInstrinct fname retTy argTy
537 -- We use StdCall for primops. See also the last case of genCall.
538 (retV, call) <- doExpr retTy $ Call StdCall fptr argsV2 []
539
540 -- This will result in a two element struct, we need to use "extractvalue"
541 -- to get them out of it.
542 (res1, ext1) <- doExpr llvmTypeA (ExtractV retV 0)
543 (res2, ext2) <- doExpr llvmTypeB (ExtractV retV 1)
544
545 let stmts = args1 `appOL` args2 `snocOL` call `snocOL` ext1 `snocOL` ext2
546 tops = top1 ++ top2
547 return (res1, res2, (stmts, tops))
548
549 genCallExtract _ _ _ _ =
550 panic "genCallExtract: unsupported ForeignTarget"
551
552 -- Handle simple function call that only need simple type casting, of the form:
553 -- truncate arg >>= \a -> call(a) >>= zext
554 --
555 -- since GHC only really has i32 and i64 types and things like Word8 are backed
556 -- by an i32 and just present a logical i8 range. So we must handle conversions
557 -- from i32 to i8 explicitly as LLVM is strict about types.
558 genCallSimpleCast :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
559 -> LlvmM StmtData
560 genCallSimpleCast w t@(PrimTarget op) [dst] args = do
561 let width = widthToLlvmInt w
562 dstTy = cmmToLlvmType $ localRegType dst
563
564 fname <- cmmPrimOpFunctions op
565 (fptr, _, top3) <- getInstrinct fname width [width]
566
567 dstV <- getCmmReg (CmmLocal dst)
568
569 let (_, arg_hints) = foreignTargetHints t
570 let args_hints = zip args arg_hints
571 (argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, [])
572 (argsV', stmts4) <- castVars Signed $ zip argsV [width]
573 (retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
574 ([retV'], stmts5) <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
575 let s2 = Store retV' dstV
576
577 let stmts = stmts2 `appOL` stmts4 `snocOL`
578 s1 `appOL` stmts5 `snocOL` s2
579 return (stmts, top2 ++ top3)
580 genCallSimpleCast _ _ dsts _ =
581 panic ("genCallSimpleCast: " ++ show (length dsts) ++ " dsts")
582
583 -- Handle simple function call that only need simple type casting, of the form:
584 -- truncate arg >>= \a -> call(a) >>= zext
585 --
586 -- since GHC only really has i32 and i64 types and things like Word8 are backed
587 -- by an i32 and just present a logical i8 range. So we must handle conversions
588 -- from i32 to i8 explicitly as LLVM is strict about types.
589 genCallSimpleCast2 :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
590 -> LlvmM StmtData
591 genCallSimpleCast2 w t@(PrimTarget op) [dst] args = do
592 let width = widthToLlvmInt w
593 dstTy = cmmToLlvmType $ localRegType dst
594
595 fname <- cmmPrimOpFunctions op
596 (fptr, _, top3) <- getInstrinct fname width (const width <$> args)
597
598 dstV <- getCmmReg (CmmLocal dst)
599
600 let (_, arg_hints) = foreignTargetHints t
601 let args_hints = zip args arg_hints
602 (argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, [])
603 (argsV', stmts4) <- castVars Signed $ zip argsV (const width <$> argsV)
604 (retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
605 ([retV'], stmts5) <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
606 let s2 = Store retV' dstV
607
608 let stmts = stmts2 `appOL` stmts4 `snocOL`
609 s1 `appOL` stmts5 `snocOL` s2
610 return (stmts, top2 ++ top3)
611 genCallSimpleCast2 _ _ dsts _ =
612 panic ("genCallSimpleCast2: " ++ show (length dsts) ++ " dsts")
613
614 -- | Create a function pointer from a target.
615 getFunPtrW :: (LMString -> LlvmType) -> ForeignTarget
616 -> WriterT LlvmAccum LlvmM LlvmVar
617 getFunPtrW funTy targ = liftExprData $ getFunPtr funTy targ
618
619 -- | Create a function pointer from a target.
620 getFunPtr :: (LMString -> LlvmType) -> ForeignTarget
621 -> LlvmM ExprData
622 getFunPtr funTy targ = case targ of
623 ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
624 name <- strCLabel_llvm lbl
625 getHsFunc' name (funTy name)
626
627 ForeignTarget expr _ -> do
628 (v1, stmts, top) <- exprToVar expr
629 dflags <- getDynFlags
630 let fty = funTy $ fsLit "dynamic"
631 cast = case getVarType v1 of
632 ty | isPointer ty -> LM_Bitcast
633 ty | isInt ty -> LM_Inttoptr
634
635 ty -> panic $ "genCall: Expr is of bad type for function"
636 ++ " call! (" ++ showSDoc dflags (ppr ty) ++ ")"
637
638 (v2,s1) <- doExpr (pLift fty) $ Cast cast v1 (pLift fty)
639 return (v2, stmts `snocOL` s1, top)
640
641 PrimTarget mop -> do
642 name <- cmmPrimOpFunctions mop
643 let fty = funTy name
644 getInstrinct2 name fty
645
646 -- | Conversion of call arguments.
647 arg_varsW :: [(CmmActual, ForeignHint)]
648 -> ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
649 -> WriterT LlvmAccum LlvmM [LlvmVar]
650 arg_varsW xs ys = do
651 (vars, stmts, decls) <- lift $ arg_vars xs ys
652 tell $ LlvmAccum stmts decls
653 return vars
654
655 -- | Conversion of call arguments.
656 arg_vars :: [(CmmActual, ForeignHint)]
657 -> ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
658 -> LlvmM ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
659
660 arg_vars [] (vars, stmts, tops)
661 = return (vars, stmts, tops)
662
663 arg_vars ((e, AddrHint):rest) (vars, stmts, tops)
664 = do (v1, stmts', top') <- exprToVar e
665 dflags <- getDynFlags
666 let op = case getVarType v1 of
667 ty | isPointer ty -> LM_Bitcast
668 ty | isInt ty -> LM_Inttoptr
669
670 a -> panic $ "genCall: Can't cast llvmType to i8*! ("
671 ++ showSDoc dflags (ppr a) ++ ")"
672
673 (v2, s1) <- doExpr i8Ptr $ Cast op v1 i8Ptr
674 arg_vars rest (vars ++ [v2], stmts `appOL` stmts' `snocOL` s1,
675 tops ++ top')
676
677 arg_vars ((e, _):rest) (vars, stmts, tops)
678 = do (v1, stmts', top') <- exprToVar e
679 arg_vars rest (vars ++ [v1], stmts `appOL` stmts', tops ++ top')
680
681
682 -- | Cast a collection of LLVM variables to specific types.
683 castVarsW :: Signage
684 -> [(LlvmVar, LlvmType)]
685 -> WriterT LlvmAccum LlvmM [LlvmVar]
686 castVarsW signage vars = do
687 (vars, stmts) <- lift $ castVars signage vars
688 tell $ LlvmAccum stmts mempty
689 return vars
690
691 -- | Cast a collection of LLVM variables to specific types.
692 castVars :: Signage -> [(LlvmVar, LlvmType)]
693 -> LlvmM ([LlvmVar], LlvmStatements)
694 castVars signage vars = do
695 done <- mapM (uncurry (castVar signage)) vars
696 let (vars', stmts) = unzip done
697 return (vars', toOL stmts)
698
699 -- | Cast an LLVM variable to a specific type, panicing if it can't be done.
700 castVar :: Signage -> LlvmVar -> LlvmType -> LlvmM (LlvmVar, LlvmStatement)
701 castVar signage v t | getVarType v == t
702 = return (v, Nop)
703
704 | otherwise
705 = do dflags <- getDynFlags
706 let op = case (getVarType v, t) of
707 (LMInt n, LMInt m)
708 -> if n < m then extend else LM_Trunc
709 (vt, _) | isFloat vt && isFloat t
710 -> if llvmWidthInBits dflags vt < llvmWidthInBits dflags t
711 then LM_Fpext else LM_Fptrunc
712 (vt, _) | isInt vt && isFloat t -> LM_Sitofp
713 (vt, _) | isFloat vt && isInt t -> LM_Fptosi
714 (vt, _) | isInt vt && isPointer t -> LM_Inttoptr
715 (vt, _) | isPointer vt && isInt t -> LM_Ptrtoint
716 (vt, _) | isPointer vt && isPointer t -> LM_Bitcast
717 (vt, _) | isVector vt && isVector t -> LM_Bitcast
718
719 (vt, _) -> panic $ "castVars: Can't cast this type ("
720 ++ showSDoc dflags (ppr vt) ++ ") to (" ++ showSDoc dflags (ppr t) ++ ")"
721 doExpr t $ Cast op v t
722 where extend = case signage of
723 Signed -> LM_Sext
724 Unsigned -> LM_Zext
725
726
727 cmmPrimOpRetValSignage :: CallishMachOp -> Signage
728 cmmPrimOpRetValSignage mop = case mop of
729 MO_Pdep _ -> Unsigned
730 MO_Pext _ -> Unsigned
731 _ -> Signed
732
733 -- | Decide what C function to use to implement a CallishMachOp
734 cmmPrimOpFunctions :: CallishMachOp -> LlvmM LMString
735 cmmPrimOpFunctions mop = do
736
737 dflags <- getDynFlags
738 let intrinTy1 = "p0i8.p0i8." ++ showSDoc dflags (ppr $ llvmWord dflags)
739 intrinTy2 = "p0i8." ++ showSDoc dflags (ppr $ llvmWord dflags)
740 unsupported = panic ("cmmPrimOpFunctions: " ++ show mop
741 ++ " not supported here")
742
743 return $ case mop of
744 MO_F32_Exp -> fsLit "expf"
745 MO_F32_Log -> fsLit "logf"
746 MO_F32_Sqrt -> fsLit "llvm.sqrt.f32"
747 MO_F32_Fabs -> fsLit "llvm.fabs.f32"
748 MO_F32_Pwr -> fsLit "llvm.pow.f32"
749
750 MO_F32_Sin -> fsLit "llvm.sin.f32"
751 MO_F32_Cos -> fsLit "llvm.cos.f32"
752 MO_F32_Tan -> fsLit "tanf"
753
754 MO_F32_Asin -> fsLit "asinf"
755 MO_F32_Acos -> fsLit "acosf"
756 MO_F32_Atan -> fsLit "atanf"
757
758 MO_F32_Sinh -> fsLit "sinhf"
759 MO_F32_Cosh -> fsLit "coshf"
760 MO_F32_Tanh -> fsLit "tanhf"
761
762 MO_F64_Exp -> fsLit "exp"
763 MO_F64_Log -> fsLit "log"
764 MO_F64_Sqrt -> fsLit "llvm.sqrt.f64"
765 MO_F64_Fabs -> fsLit "llvm.fabs.f64"
766 MO_F64_Pwr -> fsLit "llvm.pow.f64"
767
768 MO_F64_Sin -> fsLit "llvm.sin.f64"
769 MO_F64_Cos -> fsLit "llvm.cos.f64"
770 MO_F64_Tan -> fsLit "tan"
771
772 MO_F64_Asin -> fsLit "asin"
773 MO_F64_Acos -> fsLit "acos"
774 MO_F64_Atan -> fsLit "atan"
775
776 MO_F64_Sinh -> fsLit "sinh"
777 MO_F64_Cosh -> fsLit "cosh"
778 MO_F64_Tanh -> fsLit "tanh"
779
780 MO_Memcpy _ -> fsLit $ "llvm.memcpy." ++ intrinTy1
781 MO_Memmove _ -> fsLit $ "llvm.memmove." ++ intrinTy1
782 MO_Memset _ -> fsLit $ "llvm.memset." ++ intrinTy2
783 MO_Memcmp _ -> fsLit $ "memcmp"
784
785 (MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
786 (MO_BSwap w) -> fsLit $ "llvm.bswap." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
787 (MO_Clz w) -> fsLit $ "llvm.ctlz." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
788 (MO_Ctz w) -> fsLit $ "llvm.cttz." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
789
790 (MO_Pdep w) -> let w' = showSDoc dflags (ppr $ widthInBits w)
791 in if isBmi2Enabled dflags
792 then fsLit $ "llvm.x86.bmi.pdep." ++ w'
793 else fsLit $ "hs_pdep" ++ w'
794 (MO_Pext w) -> let w' = showSDoc dflags (ppr $ widthInBits w)
795 in if isBmi2Enabled dflags
796 then fsLit $ "llvm.x86.bmi.pext." ++ w'
797 else fsLit $ "hs_pext" ++ w'
798
799 (MO_Prefetch_Data _ )-> fsLit "llvm.prefetch"
800
801 MO_AddIntC w -> fsLit $ "llvm.sadd.with.overflow."
802 ++ showSDoc dflags (ppr $ widthToLlvmInt w)
803 MO_SubIntC w -> fsLit $ "llvm.ssub.with.overflow."
804 ++ showSDoc dflags (ppr $ widthToLlvmInt w)
805 MO_Add2 w -> fsLit $ "llvm.uadd.with.overflow."
806 ++ showSDoc dflags (ppr $ widthToLlvmInt w)
807 MO_AddWordC w -> fsLit $ "llvm.usub.with.overflow."
808 ++ showSDoc dflags (ppr $ widthToLlvmInt w)
809 MO_SubWordC w -> fsLit $ "llvm.usub.with.overflow."
810 ++ showSDoc dflags (ppr $ widthToLlvmInt w)
811
812 MO_S_QuotRem {} -> unsupported
813 MO_U_QuotRem {} -> unsupported
814 MO_U_QuotRem2 {} -> unsupported
815 -- We support MO_U_Mul2 through ordinary LLVM mul instruction, see the
816 -- appropriate case of genCall.
817 MO_U_Mul2 {} -> unsupported
818 MO_WriteBarrier -> unsupported
819 MO_Touch -> unsupported
820 MO_UF_Conv _ -> unsupported
821
822 MO_AtomicRead _ -> unsupported
823 MO_AtomicRMW _ _ -> unsupported
824 MO_AtomicWrite _ -> unsupported
825 MO_Cmpxchg _ -> unsupported
826
827 -- | Tail function calls
828 genJump :: CmmExpr -> [GlobalReg] -> LlvmM StmtData
829
830 -- Call to known function
831 genJump (CmmLit (CmmLabel lbl)) live = do
832 (vf, stmts, top) <- getHsFunc live lbl
833 (stgRegs, stgStmts) <- funEpilogue live
834 let s1 = Expr $ Call TailCall vf stgRegs llvmStdFunAttrs
835 let s2 = Return Nothing
836 return (stmts `appOL` stgStmts `snocOL` s1 `snocOL` s2, top)
837
838
839 -- Call to unknown function / address
840 genJump expr live = do
841 fty <- llvmFunTy live
842 (vf, stmts, top) <- exprToVar expr
843 dflags <- getDynFlags
844
845 let cast = case getVarType vf of
846 ty | isPointer ty -> LM_Bitcast
847 ty | isInt ty -> LM_Inttoptr
848
849 ty -> panic $ "genJump: Expr is of bad type for function call! ("
850 ++ showSDoc dflags (ppr ty) ++ ")"
851
852 (v1, s1) <- doExpr (pLift fty) $ Cast cast vf (pLift fty)
853 (stgRegs, stgStmts) <- funEpilogue live
854 let s2 = Expr $ Call TailCall v1 stgRegs llvmStdFunAttrs
855 let s3 = Return Nothing
856 return (stmts `snocOL` s1 `appOL` stgStmts `snocOL` s2 `snocOL` s3,
857 top)
858
859
860 -- | CmmAssign operation
861 --
862 -- We use stack allocated variables for CmmReg. The optimiser will replace
863 -- these with registers when possible.
864 genAssign :: CmmReg -> CmmExpr -> LlvmM StmtData
865 genAssign reg val = do
866 vreg <- getCmmReg reg
867 (vval, stmts2, top2) <- exprToVar val
868 let stmts = stmts2
869
870 let ty = (pLower . getVarType) vreg
871 dflags <- getDynFlags
872 case ty of
873 -- Some registers are pointer types, so need to cast value to pointer
874 LMPointer _ | getVarType vval == llvmWord dflags -> do
875 (v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty
876 let s2 = Store v vreg
877 return (stmts `snocOL` s1 `snocOL` s2, top2)
878
879 LMVector _ _ -> do
880 (v, s1) <- doExpr ty $ Cast LM_Bitcast vval ty
881 let s2 = Store v vreg
882 return (stmts `snocOL` s1 `snocOL` s2, top2)
883
884 _ -> do
885 let s1 = Store vval vreg
886 return (stmts `snocOL` s1, top2)
887
888
889 -- | CmmStore operation
890 genStore :: CmmExpr -> CmmExpr -> LlvmM StmtData
891
892 -- First we try to detect a few common cases and produce better code for
893 -- these then the default case. We are mostly trying to detect Cmm code
894 -- like I32[Sp + n] and use 'getelementptr' operations instead of the
895 -- generic case that uses casts and pointer arithmetic
896 genStore addr@(CmmReg (CmmGlobal r)) val
897 = genStore_fast addr r 0 val
898
899 genStore addr@(CmmRegOff (CmmGlobal r) n) val
900 = genStore_fast addr r n val
901
902 genStore addr@(CmmMachOp (MO_Add _) [
903 (CmmReg (CmmGlobal r)),
904 (CmmLit (CmmInt n _))])
905 val
906 = genStore_fast addr r (fromInteger n) val
907
908 genStore addr@(CmmMachOp (MO_Sub _) [
909 (CmmReg (CmmGlobal r)),
910 (CmmLit (CmmInt n _))])
911 val
912 = genStore_fast addr r (negate $ fromInteger n) val
913
914 -- generic case
915 genStore addr val
916 = getTBAAMeta topN >>= genStore_slow addr val
917
918 -- | CmmStore operation
919 -- This is a special case for storing to a global register pointer
920 -- offset such as I32[Sp+8].
921 genStore_fast :: CmmExpr -> GlobalReg -> Int -> CmmExpr
922 -> LlvmM StmtData
923 genStore_fast addr r n val
924 = do dflags <- getDynFlags
925 (gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
926 meta <- getTBAARegMeta r
927 let (ix,rem) = n `divMod` ((llvmWidthInBits dflags . pLower) grt `div` 8)
928 case isPointer grt && rem == 0 of
929 True -> do
930 (vval, stmts, top) <- exprToVar val
931 (ptr, s2) <- doExpr grt $ GetElemPtr True gv [toI32 ix]
932 -- We might need a different pointer type, so check
933 case pLower grt == getVarType vval of
934 -- were fine
935 True -> do
936 let s3 = MetaStmt meta $ Store vval ptr
937 return (stmts `appOL` s1 `snocOL` s2
938 `snocOL` s3, top)
939
940 -- cast to pointer type needed
941 False -> do
942 let ty = (pLift . getVarType) vval
943 (ptr', s3) <- doExpr ty $ Cast LM_Bitcast ptr ty
944 let s4 = MetaStmt meta $ Store vval ptr'
945 return (stmts `appOL` s1 `snocOL` s2
946 `snocOL` s3 `snocOL` s4, top)
947
948 -- If its a bit type then we use the slow method since
949 -- we can't avoid casting anyway.
950 False -> genStore_slow addr val meta
951
952
953 -- | CmmStore operation
954 -- Generic case. Uses casts and pointer arithmetic if needed.
955 genStore_slow :: CmmExpr -> CmmExpr -> [MetaAnnot] -> LlvmM StmtData
956 genStore_slow addr val meta = do
957 (vaddr, stmts1, top1) <- exprToVar addr
958 (vval, stmts2, top2) <- exprToVar val
959
960 let stmts = stmts1 `appOL` stmts2
961 dflags <- getDynFlags
962 case getVarType vaddr of
963 -- sometimes we need to cast an int to a pointer before storing
964 LMPointer ty@(LMPointer _) | getVarType vval == llvmWord dflags -> do
965 (v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty
966 let s2 = MetaStmt meta $ Store v vaddr
967 return (stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
968
969 LMPointer _ -> do
970 let s1 = MetaStmt meta $ Store vval vaddr
971 return (stmts `snocOL` s1, top1 ++ top2)
972
973 i@(LMInt _) | i == llvmWord dflags -> do
974 let vty = pLift $ getVarType vval
975 (vptr, s1) <- doExpr vty $ Cast LM_Inttoptr vaddr vty
976 let s2 = MetaStmt meta $ Store vval vptr
977 return (stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
978
979 other ->
980 pprPanic "genStore: ptr not right type!"
981 (PprCmm.pprExpr addr <+> text (
982 "Size of Ptr: " ++ show (llvmPtrBits dflags) ++
983 ", Size of var: " ++ show (llvmWidthInBits dflags other) ++
984 ", Var: " ++ showSDoc dflags (ppr vaddr)))
985
986
987 -- | Unconditional branch
988 genBranch :: BlockId -> LlvmM StmtData
989 genBranch id =
990 let label = blockIdToLlvm id
991 in return (unitOL $ Branch label, [])
992
993
994 -- | Conditional branch
995 genCondBranch :: CmmExpr -> BlockId -> BlockId -> Maybe Bool -> LlvmM StmtData
996 genCondBranch cond idT idF likely = do
997 let labelT = blockIdToLlvm idT
998 let labelF = blockIdToLlvm idF
999 -- See Note [Literals and branch conditions].
1000 (vc, stmts1, top1) <- exprToVarOpt i1Option cond
1001 if getVarType vc == i1
1002 then do
1003 (vc', (stmts2, top2)) <- case likely of
1004 Just b -> genExpectLit (if b then 1 else 0) i1 vc
1005 _ -> pure (vc, (nilOL, []))
1006 let s1 = BranchIf vc' labelT labelF
1007 return (stmts1 `appOL` stmts2 `snocOL` s1, top1 ++ top2)
1008 else do
1009 dflags <- getDynFlags
1010 panic $ "genCondBranch: Cond expr not bool! (" ++ showSDoc dflags (ppr vc) ++ ")"
1011
1012
1013 -- | Generate call to llvm.expect.x intrinsic. Assigning result to a new var.
1014 genExpectLit :: Integer -> LlvmType -> LlvmVar -> LlvmM (LlvmVar, StmtData)
1015 genExpectLit expLit expTy var = do
1016 dflags <- getDynFlags
1017
1018 let
1019 lit = LMLitVar $ LMIntLit expLit expTy
1020
1021 llvmExpectName
1022 | isInt expTy = fsLit $ "llvm.expect." ++ showSDoc dflags (ppr expTy)
1023 | otherwise = panic $ "genExpectedLit: Type not an int!"
1024
1025 (llvmExpect, stmts, top) <-
1026 getInstrinct llvmExpectName expTy [expTy, expTy]
1027 (var', call) <- doExpr expTy $ Call StdCall llvmExpect [var, lit] []
1028 return (var', (stmts `snocOL` call, top))
1029
1030 {- Note [Literals and branch conditions]
1031 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1032
1033 It is important that whenever we generate branch conditions for
1034 literals like '1', they are properly narrowed to an LLVM expression of
1035 type 'i1' (for bools.) Otherwise, nobody is happy. So when we convert
1036 a CmmExpr to an LLVM expression for a branch conditional, exprToVarOpt
1037 must be certain to return a properly narrowed type. genLit is
1038 responsible for this, in the case of literal integers.
1039
1040 Often, we won't see direct statements like:
1041
1042 if(1) {
1043 ...
1044 } else {
1045 ...
1046 }
1047
1048 at this point in the pipeline, because the Glorious Code Generator
1049 will do trivial branch elimination in the sinking pass (among others,)
1050 which will eliminate the expression entirely.
1051
1052 However, it's certainly possible and reasonable for this to occur in
1053 hand-written C-- code. Consider something like:
1054
1055 #if !defined(SOME_CONDITIONAL)
1056 #define CHECK_THING(x) 1
1057 #else
1058 #define CHECK_THING(x) some_operation((x))
1059 #endif
1060
1061 f() {
1062
1063 if (CHECK_THING(xyz)) {
1064 ...
1065 } else {
1066 ...
1067 }
1068
1069 }
1070
1071 In such an instance, CHECK_THING might result in an *expression* in
1072 one case, and a *literal* in the other, depending on what in
1073 particular was #define'd. So we must be sure to properly narrow the
1074 literal in this case to i1 as it won't be eliminated beforehand.
1075
1076 For a real example of this, see ./rts/StgStdThunks.cmm
1077
1078 -}
1079
1080
1081
1082 -- | Switch branch
1083 genSwitch :: CmmExpr -> SwitchTargets -> LlvmM StmtData
1084 genSwitch cond ids = do
1085 (vc, stmts, top) <- exprToVar cond
1086 let ty = getVarType vc
1087
1088 let labels = [ (mkIntLit ty ix, blockIdToLlvm b)
1089 | (ix, b) <- switchTargetsCases ids ]
1090 -- out of range is undefined, so let's just branch to first label
1091 let defLbl | Just l <- switchTargetsDefault ids = blockIdToLlvm l
1092 | otherwise = snd (head labels)
1093
1094 let s1 = Switch vc defLbl labels
1095 return $ (stmts `snocOL` s1, top)
1096
1097
1098 -- -----------------------------------------------------------------------------
1099 -- * CmmExpr code generation
1100 --
1101
1102 -- | An expression conversion return data:
1103 -- * LlvmVar: The var holding the result of the expression
1104 -- * LlvmStatements: Any statements needed to evaluate the expression
1105 -- * LlvmCmmDecl: Any global data needed for this expression
1106 type ExprData = (LlvmVar, LlvmStatements, [LlvmCmmDecl])
1107
1108 -- | Values which can be passed to 'exprToVar' to configure its
1109 -- behaviour in certain circumstances.
1110 --
1111 -- Currently just used for determining if a comparison should return
1112 -- a boolean (i1) or a word. See Note [Literals and branch conditions].
1113 newtype EOption = EOption { i1Expected :: Bool }
1114 -- XXX: EOption is an ugly and inefficient solution to this problem.
1115
1116 -- | i1 type expected (condition scrutinee).
1117 i1Option :: EOption
1118 i1Option = EOption True
1119
1120 -- | Word type expected (usual).
1121 wordOption :: EOption
1122 wordOption = EOption False
1123
1124 -- | Convert a CmmExpr to a list of LlvmStatements with the result of the
1125 -- expression being stored in the returned LlvmVar.
1126 exprToVar :: CmmExpr -> LlvmM ExprData
1127 exprToVar = exprToVarOpt wordOption
1128
1129 exprToVarOpt :: EOption -> CmmExpr -> LlvmM ExprData
1130 exprToVarOpt opt e = case e of
1131
1132 CmmLit lit
1133 -> genLit opt lit
1134
1135 CmmLoad e' ty
1136 -> genLoad False e' ty
1137
1138 -- Cmmreg in expression is the value, so must load. If you want actual
1139 -- reg pointer, call getCmmReg directly.
1140 CmmReg r -> do
1141 (v1, ty, s1) <- getCmmRegVal r
1142 case isPointer ty of
1143 True -> do
1144 -- Cmm wants the value, so pointer types must be cast to ints
1145 dflags <- getDynFlags
1146 (v2, s2) <- doExpr (llvmWord dflags) $ Cast LM_Ptrtoint v1 (llvmWord dflags)
1147 return (v2, s1 `snocOL` s2, [])
1148
1149 False -> return (v1, s1, [])
1150
1151 CmmMachOp op exprs
1152 -> genMachOp opt op exprs
1153
1154 CmmRegOff r i
1155 -> do dflags <- getDynFlags
1156 exprToVar $ expandCmmReg dflags (r, i)
1157
1158 CmmStackSlot _ _
1159 -> panic "exprToVar: CmmStackSlot not supported!"
1160
1161
1162 -- | Handle CmmMachOp expressions
1163 genMachOp :: EOption -> MachOp -> [CmmExpr] -> LlvmM ExprData
1164
1165 -- Unary Machop
1166 genMachOp _ op [x] = case op of
1167
1168 MO_Not w ->
1169 let all1 = mkIntLit (widthToLlvmInt w) (-1)
1170 in negate (widthToLlvmInt w) all1 LM_MO_Xor
1171
1172 MO_S_Neg w ->
1173 let all0 = mkIntLit (widthToLlvmInt w) 0
1174 in negate (widthToLlvmInt w) all0 LM_MO_Sub
1175
1176 MO_F_Neg w ->
1177 let all0 = LMLitVar $ LMFloatLit (-0) (widthToLlvmFloat w)
1178 in negate (widthToLlvmFloat w) all0 LM_MO_FSub
1179
1180 MO_SF_Conv _ w -> fiConv (widthToLlvmFloat w) LM_Sitofp
1181 MO_FS_Conv _ w -> fiConv (widthToLlvmInt w) LM_Fptosi
1182
1183 MO_SS_Conv from to
1184 -> sameConv from (widthToLlvmInt to) LM_Trunc LM_Sext
1185
1186 MO_UU_Conv from to
1187 -> sameConv from (widthToLlvmInt to) LM_Trunc LM_Zext
1188
1189 MO_FF_Conv from to
1190 -> sameConv from (widthToLlvmFloat to) LM_Fptrunc LM_Fpext
1191
1192 MO_VS_Neg len w ->
1193 let ty = widthToLlvmInt w
1194 vecty = LMVector len ty
1195 all0 = LMIntLit (-0) ty
1196 all0s = LMLitVar $ LMVectorLit (replicate len all0)
1197 in negateVec vecty all0s LM_MO_Sub
1198
1199 MO_VF_Neg len w ->
1200 let ty = widthToLlvmFloat w
1201 vecty = LMVector len ty
1202 all0 = LMFloatLit (-0) ty
1203 all0s = LMLitVar $ LMVectorLit (replicate len all0)
1204 in negateVec vecty all0s LM_MO_FSub
1205
1206 MO_AlignmentCheck _ _ -> panic "-falignment-sanitisation is not supported by -fllvm"
1207
1208 -- Handle unsupported cases explicitly so we get a warning
1209 -- of missing case when new MachOps added
1210 MO_Add _ -> panicOp
1211 MO_Mul _ -> panicOp
1212 MO_Sub _ -> panicOp
1213 MO_S_MulMayOflo _ -> panicOp
1214 MO_S_Quot _ -> panicOp
1215 MO_S_Rem _ -> panicOp
1216 MO_U_MulMayOflo _ -> panicOp
1217 MO_U_Quot _ -> panicOp
1218 MO_U_Rem _ -> panicOp
1219
1220 MO_Eq _ -> panicOp
1221 MO_Ne _ -> panicOp
1222 MO_S_Ge _ -> panicOp
1223 MO_S_Gt _ -> panicOp
1224 MO_S_Le _ -> panicOp
1225 MO_S_Lt _ -> panicOp
1226 MO_U_Ge _ -> panicOp
1227 MO_U_Gt _ -> panicOp
1228 MO_U_Le _ -> panicOp
1229 MO_U_Lt _ -> panicOp
1230
1231 MO_F_Add _ -> panicOp
1232 MO_F_Sub _ -> panicOp
1233 MO_F_Mul _ -> panicOp
1234 MO_F_Quot _ -> panicOp
1235 MO_F_Eq _ -> panicOp
1236 MO_F_Ne _ -> panicOp
1237 MO_F_Ge _ -> panicOp
1238 MO_F_Gt _ -> panicOp
1239 MO_F_Le _ -> panicOp
1240 MO_F_Lt _ -> panicOp
1241
1242 MO_And _ -> panicOp
1243 MO_Or _ -> panicOp
1244 MO_Xor _ -> panicOp
1245 MO_Shl _ -> panicOp
1246 MO_U_Shr _ -> panicOp
1247 MO_S_Shr _ -> panicOp
1248
1249 MO_V_Insert _ _ -> panicOp
1250 MO_V_Extract _ _ -> panicOp
1251
1252 MO_V_Add _ _ -> panicOp
1253 MO_V_Sub _ _ -> panicOp
1254 MO_V_Mul _ _ -> panicOp
1255
1256 MO_VS_Quot _ _ -> panicOp
1257 MO_VS_Rem _ _ -> panicOp
1258
1259 MO_VU_Quot _ _ -> panicOp
1260 MO_VU_Rem _ _ -> panicOp
1261
1262 MO_VF_Insert _ _ -> panicOp
1263 MO_VF_Extract _ _ -> panicOp
1264
1265 MO_VF_Add _ _ -> panicOp
1266 MO_VF_Sub _ _ -> panicOp
1267 MO_VF_Mul _ _ -> panicOp
1268 MO_VF_Quot _ _ -> panicOp
1269
1270 where
1271 negate ty v2 negOp = do
1272 (vx, stmts, top) <- exprToVar x
1273 (v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx
1274 return (v1, stmts `snocOL` s1, top)
1275
1276 negateVec ty v2 negOp = do
1277 (vx, stmts1, top) <- exprToVar x
1278 ([vx'], stmts2) <- castVars Signed [(vx, ty)]
1279 (v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx'
1280 return (v1, stmts1 `appOL` stmts2 `snocOL` s1, top)
1281
1282 fiConv ty convOp = do
1283 (vx, stmts, top) <- exprToVar x
1284 (v1, s1) <- doExpr ty $ Cast convOp vx ty
1285 return (v1, stmts `snocOL` s1, top)
1286
1287 sameConv from ty reduce expand = do
1288 x'@(vx, stmts, top) <- exprToVar x
1289 let sameConv' op = do
1290 (v1, s1) <- doExpr ty $ Cast op vx ty
1291 return (v1, stmts `snocOL` s1, top)
1292 dflags <- getDynFlags
1293 let toWidth = llvmWidthInBits dflags ty
1294 -- LLVM doesn't like trying to convert to same width, so
1295 -- need to check for that as we do get Cmm code doing it.
1296 case widthInBits from of
1297 w | w < toWidth -> sameConv' expand
1298 w | w > toWidth -> sameConv' reduce
1299 _w -> return x'
1300
1301 panicOp = panic $ "LLVM.CodeGen.genMachOp: non unary op encountered"
1302 ++ "with one argument! (" ++ show op ++ ")"
1303
1304 -- Handle GlobalRegs pointers
1305 genMachOp opt o@(MO_Add _) e@[(CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]
1306 = genMachOp_fast opt o r (fromInteger n) e
1307
1308 genMachOp opt o@(MO_Sub _) e@[(CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]
1309 = genMachOp_fast opt o r (negate . fromInteger $ n) e
1310
1311 -- Generic case
1312 genMachOp opt op e = genMachOp_slow opt op e
1313
1314
1315 -- | Handle CmmMachOp expressions
1316 -- This is a specialised method that handles Global register manipulations like
1317 -- 'Sp - 16', using the getelementptr instruction.
1318 genMachOp_fast :: EOption -> MachOp -> GlobalReg -> Int -> [CmmExpr]
1319 -> LlvmM ExprData
1320 genMachOp_fast opt op r n e
1321 = do (gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
1322 dflags <- getDynFlags
1323 let (ix,rem) = n `divMod` ((llvmWidthInBits dflags . pLower) grt `div` 8)
1324 case isPointer grt && rem == 0 of
1325 True -> do
1326 (ptr, s2) <- doExpr grt $ GetElemPtr True gv [toI32 ix]
1327 (var, s3) <- doExpr (llvmWord dflags) $ Cast LM_Ptrtoint ptr (llvmWord dflags)
1328 return (var, s1 `snocOL` s2 `snocOL` s3, [])
1329
1330 False -> genMachOp_slow opt op e
1331
1332
1333 -- | Handle CmmMachOp expressions
1334 -- This handles all the cases not handle by the specialised genMachOp_fast.
1335 genMachOp_slow :: EOption -> MachOp -> [CmmExpr] -> LlvmM ExprData
1336
1337 -- Element extraction
1338 genMachOp_slow _ (MO_V_Extract l w) [val, idx] = runExprData $ do
1339 vval <- exprToVarW val
1340 vidx <- exprToVarW idx
1341 [vval'] <- castVarsW Signed [(vval, LMVector l ty)]
1342 doExprW ty $ Extract vval' vidx
1343 where
1344 ty = widthToLlvmInt w
1345
1346 genMachOp_slow _ (MO_VF_Extract l w) [val, idx] = runExprData $ do
1347 vval <- exprToVarW val
1348 vidx <- exprToVarW idx
1349 [vval'] <- castVarsW Signed [(vval, LMVector l ty)]
1350 doExprW ty $ Extract vval' vidx
1351 where
1352 ty = widthToLlvmFloat w
1353
1354 -- Element insertion
1355 genMachOp_slow _ (MO_V_Insert l w) [val, elt, idx] = runExprData $ do
1356 vval <- exprToVarW val
1357 velt <- exprToVarW elt
1358 vidx <- exprToVarW idx
1359 [vval'] <- castVarsW Signed [(vval, ty)]
1360 doExprW ty $ Insert vval' velt vidx
1361 where
1362 ty = LMVector l (widthToLlvmInt w)
1363
1364 genMachOp_slow _ (MO_VF_Insert l w) [val, elt, idx] = runExprData $ do
1365 vval <- exprToVarW val
1366 velt <- exprToVarW elt
1367 vidx <- exprToVarW idx
1368 [vval'] <- castVarsW Signed [(vval, ty)]
1369 doExprW ty $ Insert vval' velt vidx
1370 where
1371 ty = LMVector l (widthToLlvmFloat w)
1372
1373 -- Binary MachOp
1374 genMachOp_slow opt op [x, y] = case op of
1375
1376 MO_Eq _ -> genBinComp opt LM_CMP_Eq
1377 MO_Ne _ -> genBinComp opt LM_CMP_Ne
1378
1379 MO_S_Gt _ -> genBinComp opt LM_CMP_Sgt
1380 MO_S_Ge _ -> genBinComp opt LM_CMP_Sge
1381 MO_S_Lt _ -> genBinComp opt LM_CMP_Slt
1382 MO_S_Le _ -> genBinComp opt LM_CMP_Sle
1383
1384 MO_U_Gt _ -> genBinComp opt LM_CMP_Ugt
1385 MO_U_Ge _ -> genBinComp opt LM_CMP_Uge
1386 MO_U_Lt _ -> genBinComp opt LM_CMP_Ult
1387 MO_U_Le _ -> genBinComp opt LM_CMP_Ule
1388
1389 MO_Add _ -> genBinMach LM_MO_Add
1390 MO_Sub _ -> genBinMach LM_MO_Sub
1391 MO_Mul _ -> genBinMach LM_MO_Mul
1392
1393 MO_U_MulMayOflo _ -> panic "genMachOp: MO_U_MulMayOflo unsupported!"
1394
1395 MO_S_MulMayOflo w -> isSMulOK w x y
1396
1397 MO_S_Quot _ -> genBinMach LM_MO_SDiv
1398 MO_S_Rem _ -> genBinMach LM_MO_SRem
1399
1400 MO_U_Quot _ -> genBinMach LM_MO_UDiv
1401 MO_U_Rem _ -> genBinMach LM_MO_URem
1402
1403 MO_F_Eq _ -> genBinComp opt LM_CMP_Feq
1404 MO_F_Ne _ -> genBinComp opt LM_CMP_Fne
1405 MO_F_Gt _ -> genBinComp opt LM_CMP_Fgt
1406 MO_F_Ge _ -> genBinComp opt LM_CMP_Fge
1407 MO_F_Lt _ -> genBinComp opt LM_CMP_Flt
1408 MO_F_Le _ -> genBinComp opt LM_CMP_Fle
1409
1410 MO_F_Add _ -> genBinMach LM_MO_FAdd
1411 MO_F_Sub _ -> genBinMach LM_MO_FSub
1412 MO_F_Mul _ -> genBinMach LM_MO_FMul
1413 MO_F_Quot _ -> genBinMach LM_MO_FDiv
1414
1415 MO_And _ -> genBinMach LM_MO_And
1416 MO_Or _ -> genBinMach LM_MO_Or
1417 MO_Xor _ -> genBinMach LM_MO_Xor
1418 MO_Shl _ -> genBinMach LM_MO_Shl
1419 MO_U_Shr _ -> genBinMach LM_MO_LShr
1420 MO_S_Shr _ -> genBinMach LM_MO_AShr
1421
1422 MO_V_Add l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Add
1423 MO_V_Sub l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Sub
1424 MO_V_Mul l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Mul
1425
1426 MO_VS_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SDiv
1427 MO_VS_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SRem
1428
1429 MO_VU_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_UDiv
1430 MO_VU_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_URem
1431
1432 MO_VF_Add l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FAdd
1433 MO_VF_Sub l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FSub
1434 MO_VF_Mul l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FMul
1435 MO_VF_Quot l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FDiv
1436
1437 MO_Not _ -> panicOp
1438 MO_S_Neg _ -> panicOp
1439 MO_F_Neg _ -> panicOp
1440
1441 MO_SF_Conv _ _ -> panicOp
1442 MO_FS_Conv _ _ -> panicOp
1443 MO_SS_Conv _ _ -> panicOp
1444 MO_UU_Conv _ _ -> panicOp
1445 MO_FF_Conv _ _ -> panicOp
1446
1447 MO_V_Insert {} -> panicOp
1448 MO_V_Extract {} -> panicOp
1449
1450 MO_VS_Neg {} -> panicOp
1451
1452 MO_VF_Insert {} -> panicOp
1453 MO_VF_Extract {} -> panicOp
1454
1455 MO_VF_Neg {} -> panicOp
1456
1457 MO_AlignmentCheck {} -> panicOp
1458
1459 where
1460 binLlvmOp ty binOp = runExprData $ do
1461 vx <- exprToVarW x
1462 vy <- exprToVarW y
1463 if getVarType vx == getVarType vy
1464 then do
1465 doExprW (ty vx) $ binOp vx vy
1466
1467 else do
1468 -- Error. Continue anyway so we can debug the generated ll file.
1469 dflags <- getDynFlags
1470 let style = mkCodeStyle CStyle
1471 toString doc = renderWithStyle dflags doc style
1472 cmmToStr = (lines . toString . PprCmm.pprExpr)
1473 statement $ Comment $ map fsLit $ cmmToStr x
1474 statement $ Comment $ map fsLit $ cmmToStr y
1475 doExprW (ty vx) $ binOp vx vy
1476
1477 binCastLlvmOp ty binOp = runExprData $ do
1478 vx <- exprToVarW x
1479 vy <- exprToVarW y
1480 [vx', vy'] <- castVarsW Signed [(vx, ty), (vy, ty)]
1481 doExprW ty $ binOp vx' vy'
1482
1483 -- | Need to use EOption here as Cmm expects word size results from
1484 -- comparisons while LLVM return i1. Need to extend to llvmWord type
1485 -- if expected. See Note [Literals and branch conditions].
1486 genBinComp opt cmp = do
1487 ed@(v1, stmts, top) <- binLlvmOp (\_ -> i1) (Compare cmp)
1488 dflags <- getDynFlags
1489 if getVarType v1 == i1
1490 then case i1Expected opt of
1491 True -> return ed
1492 False -> do
1493 let w_ = llvmWord dflags
1494 (v2, s1) <- doExpr w_ $ Cast LM_Zext v1 w_
1495 return (v2, stmts `snocOL` s1, top)
1496 else
1497 panic $ "genBinComp: Compare returned type other then i1! "
1498 ++ (showSDoc dflags $ ppr $ getVarType v1)
1499
1500 genBinMach op = binLlvmOp getVarType (LlvmOp op)
1501
1502 genCastBinMach ty op = binCastLlvmOp ty (LlvmOp op)
1503
1504 -- | Detect if overflow will occur in signed multiply of the two
1505 -- CmmExpr's. This is the LLVM assembly equivalent of the NCG
1506 -- implementation. Its much longer due to type information/safety.
1507 -- This should actually compile to only about 3 asm instructions.
1508 isSMulOK :: Width -> CmmExpr -> CmmExpr -> LlvmM ExprData
1509 isSMulOK _ x y = runExprData $ do
1510 vx <- exprToVarW x
1511 vy <- exprToVarW y
1512
1513 dflags <- getDynFlags
1514 let word = getVarType vx
1515 let word2 = LMInt $ 2 * (llvmWidthInBits dflags $ getVarType vx)
1516 let shift = llvmWidthInBits dflags word
1517 let shift1 = toIWord dflags (shift - 1)
1518 let shift2 = toIWord dflags shift
1519
1520 if isInt word
1521 then do
1522 x1 <- doExprW word2 $ Cast LM_Sext vx word2
1523 y1 <- doExprW word2 $ Cast LM_Sext vy word2
1524 r1 <- doExprW word2 $ LlvmOp LM_MO_Mul x1 y1
1525 rlow1 <- doExprW word $ Cast LM_Trunc r1 word
1526 rlow2 <- doExprW word $ LlvmOp LM_MO_AShr rlow1 shift1
1527 rhigh1 <- doExprW word2 $ LlvmOp LM_MO_AShr r1 shift2
1528 rhigh2 <- doExprW word $ Cast LM_Trunc rhigh1 word
1529 doExprW word $ LlvmOp LM_MO_Sub rlow2 rhigh2
1530
1531 else
1532 panic $ "isSMulOK: Not bit type! (" ++ showSDoc dflags (ppr word) ++ ")"
1533
1534 panicOp = panic $ "LLVM.CodeGen.genMachOp_slow: unary op encountered"
1535 ++ "with two arguments! (" ++ show op ++ ")"
1536
1537 -- More then two expression, invalid!
1538 genMachOp_slow _ _ _ = panic "genMachOp: More then 2 expressions in MachOp!"
1539
1540
1541 -- | Handle CmmLoad expression.
1542 genLoad :: Atomic -> CmmExpr -> CmmType -> LlvmM ExprData
1543
1544 -- First we try to detect a few common cases and produce better code for
1545 -- these then the default case. We are mostly trying to detect Cmm code
1546 -- like I32[Sp + n] and use 'getelementptr' operations instead of the
1547 -- generic case that uses casts and pointer arithmetic
1548 genLoad atomic e@(CmmReg (CmmGlobal r)) ty
1549 = genLoad_fast atomic e r 0 ty
1550
1551 genLoad atomic e@(CmmRegOff (CmmGlobal r) n) ty
1552 = genLoad_fast atomic e r n ty
1553
1554 genLoad atomic e@(CmmMachOp (MO_Add _) [
1555 (CmmReg (CmmGlobal r)),
1556 (CmmLit (CmmInt n _))])
1557 ty
1558 = genLoad_fast atomic e r (fromInteger n) ty
1559
1560 genLoad atomic e@(CmmMachOp (MO_Sub _) [
1561 (CmmReg (CmmGlobal r)),
1562 (CmmLit (CmmInt n _))])
1563 ty
1564 = genLoad_fast atomic e r (negate $ fromInteger n) ty
1565
1566 -- generic case
1567 genLoad atomic e ty
1568 = getTBAAMeta topN >>= genLoad_slow atomic e ty
1569
1570 -- | Handle CmmLoad expression.
1571 -- This is a special case for loading from a global register pointer
1572 -- offset such as I32[Sp+8].
1573 genLoad_fast :: Atomic -> CmmExpr -> GlobalReg -> Int -> CmmType
1574 -> LlvmM ExprData
1575 genLoad_fast atomic e r n ty = do
1576 dflags <- getDynFlags
1577 (gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
1578 meta <- getTBAARegMeta r
1579 let ty' = cmmToLlvmType ty
1580 (ix,rem) = n `divMod` ((llvmWidthInBits dflags . pLower) grt `div` 8)
1581 case isPointer grt && rem == 0 of
1582 True -> do
1583 (ptr, s2) <- doExpr grt $ GetElemPtr True gv [toI32 ix]
1584 -- We might need a different pointer type, so check
1585 case grt == ty' of
1586 -- were fine
1587 True -> do
1588 (var, s3) <- doExpr ty' (MExpr meta $ loadInstr ptr)
1589 return (var, s1 `snocOL` s2 `snocOL` s3,
1590 [])
1591
1592 -- cast to pointer type needed
1593 False -> do
1594 let pty = pLift ty'
1595 (ptr', s3) <- doExpr pty $ Cast LM_Bitcast ptr pty
1596 (var, s4) <- doExpr ty' (MExpr meta $ loadInstr ptr')
1597 return (var, s1 `snocOL` s2 `snocOL` s3
1598 `snocOL` s4, [])
1599
1600 -- If its a bit type then we use the slow method since
1601 -- we can't avoid casting anyway.
1602 False -> genLoad_slow atomic e ty meta
1603 where
1604 loadInstr ptr | atomic = ALoad SyncSeqCst False ptr
1605 | otherwise = Load ptr
1606
1607 -- | Handle Cmm load expression.
1608 -- Generic case. Uses casts and pointer arithmetic if needed.
1609 genLoad_slow :: Atomic -> CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData
1610 genLoad_slow atomic e ty meta = runExprData $ do
1611 iptr <- exprToVarW e
1612 dflags <- getDynFlags
1613 case getVarType iptr of
1614 LMPointer _ -> do
1615 doExprW (cmmToLlvmType ty) (MExpr meta $ loadInstr iptr)
1616
1617 i@(LMInt _) | i == llvmWord dflags -> do
1618 let pty = LMPointer $ cmmToLlvmType ty
1619 ptr <- doExprW pty $ Cast LM_Inttoptr iptr pty
1620 doExprW (cmmToLlvmType ty) (MExpr meta $ loadInstr ptr)
1621
1622 other -> do pprPanic "exprToVar: CmmLoad expression is not right type!"
1623 (PprCmm.pprExpr e <+> text (
1624 "Size of Ptr: " ++ show (llvmPtrBits dflags) ++
1625 ", Size of var: " ++ show (llvmWidthInBits dflags other) ++
1626 ", Var: " ++ showSDoc dflags (ppr iptr)))
1627 where
1628 loadInstr ptr | atomic = ALoad SyncSeqCst False ptr
1629 | otherwise = Load ptr
1630
1631
1632 -- | Handle CmmReg expression. This will return a pointer to the stack
1633 -- location of the register. Throws an error if it isn't allocated on
1634 -- the stack.
1635 getCmmReg :: CmmReg -> LlvmM LlvmVar
1636 getCmmReg (CmmLocal (LocalReg un _))
1637 = do exists <- varLookup un
1638 dflags <- getDynFlags
1639 case exists of
1640 Just ety -> return (LMLocalVar un $ pLift ety)
1641 Nothing -> fail $ "getCmmReg: Cmm register " ++ showSDoc dflags (ppr un) ++ " was not allocated!"
1642 -- This should never happen, as every local variable should
1643 -- have been assigned a value at some point, triggering
1644 -- "funPrologue" to allocate it on the stack.
1645
1646 getCmmReg (CmmGlobal g)
1647 = do onStack <- checkStackReg g
1648 dflags <- getDynFlags
1649 if onStack
1650 then return (lmGlobalRegVar dflags g)
1651 else fail $ "getCmmReg: Cmm register " ++ showSDoc dflags (ppr g) ++ " not stack-allocated!"
1652
1653 -- | Return the value of a given register, as well as its type. Might
1654 -- need to be load from stack.
1655 getCmmRegVal :: CmmReg -> LlvmM (LlvmVar, LlvmType, LlvmStatements)
1656 getCmmRegVal reg =
1657 case reg of
1658 CmmGlobal g -> do
1659 onStack <- checkStackReg g
1660 dflags <- getDynFlags
1661 if onStack then loadFromStack else do
1662 let r = lmGlobalRegArg dflags g
1663 return (r, getVarType r, nilOL)
1664 _ -> loadFromStack
1665 where loadFromStack = do
1666 ptr <- getCmmReg reg
1667 let ty = pLower $ getVarType ptr
1668 (v, s) <- doExpr ty (Load ptr)
1669 return (v, ty, unitOL s)
1670
1671 -- | Allocate a local CmmReg on the stack
1672 allocReg :: CmmReg -> (LlvmVar, LlvmStatements)
1673 allocReg (CmmLocal (LocalReg un ty))
1674 = let ty' = cmmToLlvmType ty
1675 var = LMLocalVar un (LMPointer ty')
1676 alc = Alloca ty' 1
1677 in (var, unitOL $ Assignment var alc)
1678
1679 allocReg _ = panic $ "allocReg: Global reg encountered! Global registers should"
1680 ++ " have been handled elsewhere!"
1681
1682
1683 -- | Generate code for a literal
1684 genLit :: EOption -> CmmLit -> LlvmM ExprData
1685 genLit opt (CmmInt i w)
1686 -- See Note [Literals and branch conditions].
1687 = let width | i1Expected opt = i1
1688 | otherwise = LMInt (widthInBits w)
1689 -- comm = Comment [ fsLit $ "EOption: " ++ show opt
1690 -- , fsLit $ "Width : " ++ show w
1691 -- , fsLit $ "Width' : " ++ show (widthInBits w)
1692 -- ]
1693 in return (mkIntLit width i, nilOL, [])
1694
1695 genLit _ (CmmFloat r w)
1696 = return (LMLitVar $ LMFloatLit (fromRational r) (widthToLlvmFloat w),
1697 nilOL, [])
1698
1699 genLit opt (CmmVec ls)
1700 = do llvmLits <- mapM toLlvmLit ls
1701 return (LMLitVar $ LMVectorLit llvmLits, nilOL, [])
1702 where
1703 toLlvmLit :: CmmLit -> LlvmM LlvmLit
1704 toLlvmLit lit = do
1705 (llvmLitVar, _, _) <- genLit opt lit
1706 case llvmLitVar of
1707 LMLitVar llvmLit -> return llvmLit
1708 _ -> panic "genLit"
1709
1710 genLit _ cmm@(CmmLabel l)
1711 = do var <- getGlobalPtr =<< strCLabel_llvm l
1712 dflags <- getDynFlags
1713 let lmty = cmmToLlvmType $ cmmLitType dflags cmm
1714 (v1, s1) <- doExpr lmty $ Cast LM_Ptrtoint var (llvmWord dflags)
1715 return (v1, unitOL s1, [])
1716
1717 genLit opt (CmmLabelOff label off) = do
1718 dflags <- getDynFlags
1719 (vlbl, stmts, stat) <- genLit opt (CmmLabel label)
1720 let voff = toIWord dflags off
1721 (v1, s1) <- doExpr (getVarType vlbl) $ LlvmOp LM_MO_Add vlbl voff
1722 return (v1, stmts `snocOL` s1, stat)
1723
1724 genLit opt (CmmLabelDiffOff l1 l2 off) = do
1725 dflags <- getDynFlags
1726 (vl1, stmts1, stat1) <- genLit opt (CmmLabel l1)
1727 (vl2, stmts2, stat2) <- genLit opt (CmmLabel l2)
1728 let voff = toIWord dflags off
1729 let ty1 = getVarType vl1
1730 let ty2 = getVarType vl2
1731 if (isInt ty1) && (isInt ty2)
1732 && (llvmWidthInBits dflags ty1 == llvmWidthInBits dflags ty2)
1733
1734 then do
1735 (v1, s1) <- doExpr (getVarType vl1) $ LlvmOp LM_MO_Sub vl1 vl2
1736 (v2, s2) <- doExpr (getVarType v1 ) $ LlvmOp LM_MO_Add v1 voff
1737 return (v2, stmts1 `appOL` stmts2 `snocOL` s1 `snocOL` s2,
1738 stat1 ++ stat2)
1739
1740 else
1741 panic "genLit: CmmLabelDiffOff encountered with different label ty!"
1742
1743 genLit opt (CmmBlock b)
1744 = genLit opt (CmmLabel $ infoTblLbl b)
1745
1746 genLit _ CmmHighStackMark
1747 = panic "genStaticLit - CmmHighStackMark unsupported!"
1748
1749
1750 -- -----------------------------------------------------------------------------
1751 -- * Misc
1752 --
1753
1754 -- | Find CmmRegs that get assigned and allocate them on the stack
1755 --
1756 -- Any register that gets written needs to be allcoated on the
1757 -- stack. This avoids having to map a CmmReg to an equivalent SSA form
1758 -- and avoids having to deal with Phi node insertion. This is also
1759 -- the approach recommended by LLVM developers.
1760 --
1761 -- On the other hand, this is unnecessarily verbose if the register in
1762 -- question is never written. Therefore we skip it where we can to
1763 -- save a few lines in the output and hopefully speed compilation up a
1764 -- bit.
1765 funPrologue :: LiveGlobalRegs -> [CmmBlock] -> LlvmM StmtData
1766 funPrologue live cmmBlocks = do
1767
1768 trash <- getTrashRegs
1769 let getAssignedRegs :: CmmNode O O -> [CmmReg]
1770 getAssignedRegs (CmmAssign reg _) = [reg]
1771 -- Calls will trash all registers. Unfortunately, this needs them to
1772 -- be stack-allocated in the first place.
1773 getAssignedRegs (CmmUnsafeForeignCall _ rs _) = map CmmGlobal trash ++ map CmmLocal rs
1774 getAssignedRegs _ = []
1775 getRegsBlock (_, body, _) = concatMap getAssignedRegs $ blockToList body
1776 assignedRegs = nub $ concatMap (getRegsBlock . blockSplit) cmmBlocks
1777 isLive r = r `elem` alwaysLive || r `elem` live
1778
1779 dflags <- getDynFlags
1780 stmtss <- flip mapM assignedRegs $ \reg ->
1781 case reg of
1782 CmmLocal (LocalReg un _) -> do
1783 let (newv, stmts) = allocReg reg
1784 varInsert un (pLower $ getVarType newv)
1785 return stmts
1786 CmmGlobal r -> do
1787 let reg = lmGlobalRegVar dflags r
1788 arg = lmGlobalRegArg dflags r
1789 ty = (pLower . getVarType) reg
1790 trash = LMLitVar $ LMUndefLit ty
1791 rval = if isLive r then arg else trash
1792 alloc = Assignment reg $ Alloca (pLower $ getVarType reg) 1
1793 markStackReg r
1794 return $ toOL [alloc, Store rval reg]
1795
1796 return (concatOL stmtss, [])
1797
1798 -- | Function epilogue. Load STG variables to use as argument for call.
1799 -- STG Liveness optimisation done here.
1800 funEpilogue :: LiveGlobalRegs -> LlvmM ([LlvmVar], LlvmStatements)
1801 funEpilogue live = do
1802
1803 -- Have information and liveness optimisation is enabled?
1804 let liveRegs = alwaysLive ++ live
1805 isSSE (FloatReg _) = True
1806 isSSE (DoubleReg _) = True
1807 isSSE (XmmReg _) = True
1808 isSSE (YmmReg _) = True
1809 isSSE (ZmmReg _) = True
1810 isSSE _ = False
1811
1812 -- Set to value or "undef" depending on whether the register is
1813 -- actually live
1814 dflags <- getDynFlags
1815 let loadExpr r = do
1816 (v, _, s) <- getCmmRegVal (CmmGlobal r)
1817 return (Just $ v, s)
1818 loadUndef r = do
1819 let ty = (pLower . getVarType $ lmGlobalRegVar dflags r)
1820 return (Just $ LMLitVar $ LMUndefLit ty, nilOL)
1821 platform <- getDynFlag targetPlatform
1822 loads <- flip mapM (activeStgRegs platform) $ \r -> case () of
1823 _ | r `elem` liveRegs -> loadExpr r
1824 | not (isSSE r) -> loadUndef r
1825 | otherwise -> return (Nothing, nilOL)
1826
1827 let (vars, stmts) = unzip loads
1828 return (catMaybes vars, concatOL stmts)
1829
1830
1831 -- | A series of statements to trash all the STG registers.
1832 --
1833 -- In LLVM we pass the STG registers around everywhere in function calls.
1834 -- So this means LLVM considers them live across the entire function, when
1835 -- in reality they usually aren't. For Caller save registers across C calls
1836 -- the saving and restoring of them is done by the Cmm code generator,
1837 -- using Cmm local vars. So to stop LLVM saving them as well (and saving
1838 -- all of them since it thinks they're always live, we trash them just
1839 -- before the call by assigning the 'undef' value to them. The ones we
1840 -- need are restored from the Cmm local var and the ones we don't need
1841 -- are fine to be trashed.
1842 getTrashStmts :: LlvmM LlvmStatements
1843 getTrashStmts = do
1844 regs <- getTrashRegs
1845 stmts <- flip mapM regs $ \ r -> do
1846 reg <- getCmmReg (CmmGlobal r)
1847 let ty = (pLower . getVarType) reg
1848 return $ Store (LMLitVar $ LMUndefLit ty) reg
1849 return $ toOL stmts
1850
1851 getTrashRegs :: LlvmM [GlobalReg]
1852 getTrashRegs = do plat <- getLlvmPlatform
1853 return $ filter (callerSaves plat) (activeStgRegs plat)
1854
1855 -- | Get a function pointer to the CLabel specified.
1856 --
1857 -- This is for Haskell functions, function type is assumed, so doesn't work
1858 -- with foreign functions.
1859 getHsFunc :: LiveGlobalRegs -> CLabel -> LlvmM ExprData
1860 getHsFunc live lbl
1861 = do fty <- llvmFunTy live
1862 name <- strCLabel_llvm lbl
1863 getHsFunc' name fty
1864
1865 getHsFunc' :: LMString -> LlvmType -> LlvmM ExprData
1866 getHsFunc' name fty
1867 = do fun <- getGlobalPtr name
1868 if getVarType fun == fty
1869 then return (fun, nilOL, [])
1870 else do (v1, s1) <- doExpr (pLift fty)
1871 $ Cast LM_Bitcast fun (pLift fty)
1872 return (v1, unitOL s1, [])
1873
1874 -- | Create a new local var
1875 mkLocalVar :: LlvmType -> LlvmM LlvmVar
1876 mkLocalVar ty = do
1877 un <- getUniqueM
1878 return $ LMLocalVar un ty
1879
1880
1881 -- | Execute an expression, assigning result to a var
1882 doExpr :: LlvmType -> LlvmExpression -> LlvmM (LlvmVar, LlvmStatement)
1883 doExpr ty expr = do
1884 v <- mkLocalVar ty
1885 return (v, Assignment v expr)
1886
1887
1888 -- | Expand CmmRegOff
1889 expandCmmReg :: DynFlags -> (CmmReg, Int) -> CmmExpr
1890 expandCmmReg dflags (reg, off)
1891 = let width = typeWidth (cmmRegType dflags reg)
1892 voff = CmmLit $ CmmInt (fromIntegral off) width
1893 in CmmMachOp (MO_Add width) [CmmReg reg, voff]
1894
1895
1896 -- | Convert a block id into a appropriate Llvm label
1897 blockIdToLlvm :: BlockId -> LlvmVar
1898 blockIdToLlvm bid = LMLocalVar (getUnique bid) LMLabel
1899
1900 -- | Create Llvm int Literal
1901 mkIntLit :: Integral a => LlvmType -> a -> LlvmVar
1902 mkIntLit ty i = LMLitVar $ LMIntLit (toInteger i) ty
1903
1904 -- | Convert int type to a LLvmVar of word or i32 size
1905 toI32 :: Integral a => a -> LlvmVar
1906 toI32 = mkIntLit i32
1907
1908 toIWord :: Integral a => DynFlags -> a -> LlvmVar
1909 toIWord dflags = mkIntLit (llvmWord dflags)
1910
1911
1912 -- | Error functions
1913 panic :: String -> a
1914 panic s = Outputable.panic $ "LlvmCodeGen.CodeGen." ++ s
1915
1916 pprPanic :: String -> SDoc -> a
1917 pprPanic s d = Outputable.pprPanic ("LlvmCodeGen.CodeGen." ++ s) d
1918
1919
1920 -- | Returns TBAA meta data by unique
1921 getTBAAMeta :: Unique -> LlvmM [MetaAnnot]
1922 getTBAAMeta u = do
1923 mi <- getUniqMeta u
1924 return [MetaAnnot tbaa (MetaNode i) | let Just i = mi]
1925
1926 -- | Returns TBAA meta data for given register
1927 getTBAARegMeta :: GlobalReg -> LlvmM [MetaAnnot]
1928 getTBAARegMeta = getTBAAMeta . getTBAA
1929
1930
1931 -- | A more convenient way of accumulating LLVM statements and declarations.
1932 data LlvmAccum = LlvmAccum LlvmStatements [LlvmCmmDecl]
1933
1934 instance Semigroup LlvmAccum where
1935 LlvmAccum stmtsA declsA <> LlvmAccum stmtsB declsB =
1936 LlvmAccum (stmtsA Semigroup.<> stmtsB) (declsA Semigroup.<> declsB)
1937
1938 instance Monoid LlvmAccum where
1939 mempty = LlvmAccum nilOL []
1940 mappend = (Semigroup.<>)
1941
1942 liftExprData :: LlvmM ExprData -> WriterT LlvmAccum LlvmM LlvmVar
1943 liftExprData action = do
1944 (var, stmts, decls) <- lift action
1945 tell $ LlvmAccum stmts decls
1946 return var
1947
1948 statement :: LlvmStatement -> WriterT LlvmAccum LlvmM ()
1949 statement stmt = tell $ LlvmAccum (unitOL stmt) []
1950
1951 doExprW :: LlvmType -> LlvmExpression -> WriterT LlvmAccum LlvmM LlvmVar
1952 doExprW a b = do
1953 (var, stmt) <- lift $ doExpr a b
1954 statement stmt
1955 return var
1956
1957 exprToVarW :: CmmExpr -> WriterT LlvmAccum LlvmM LlvmVar
1958 exprToVarW = liftExprData . exprToVar
1959
1960 runExprData :: WriterT LlvmAccum LlvmM LlvmVar -> LlvmM ExprData
1961 runExprData action = do
1962 (var, LlvmAccum stmts decls) <- runWriterT action
1963 return (var, stmts, decls)
1964
1965 runStmtsDecls :: WriterT LlvmAccum LlvmM () -> LlvmM (LlvmStatements, [LlvmCmmDecl])
1966 runStmtsDecls action = do
1967 LlvmAccum stmts decls <- execWriterT action
1968 return (stmts, decls)
1969
1970 getCmmRegW :: CmmReg -> WriterT LlvmAccum LlvmM LlvmVar
1971 getCmmRegW = lift . getCmmReg
1972
1973 genLoadW :: Atomic -> CmmExpr -> CmmType -> WriterT LlvmAccum LlvmM LlvmVar
1974 genLoadW atomic e ty = liftExprData $ genLoad atomic e ty
1975
1976 doTrashStmts :: WriterT LlvmAccum LlvmM ()
1977 doTrashStmts = do
1978 stmts <- lift getTrashStmts
1979 tell $ LlvmAccum stmts mempty