Remove pointless partiality in `Parser.ajs`
[ghc.git] / compiler / llvmGen / LlvmCodeGen / CodeGen.hs
1 {-# LANGUAGE CPP, GADTs #-}
2 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
3 -- ----------------------------------------------------------------------------
4 -- | Handle conversion of CmmProc to LLVM code.
5 --
6 module LlvmCodeGen.CodeGen ( genLlvmProc ) where
7
8 #include "HsVersions.h"
9
10 import GhcPrelude
11
12 import Llvm
13 import LlvmCodeGen.Base
14 import LlvmCodeGen.Regs
15
16 import BlockId
17 import GHC.Platform.Regs ( activeStgRegs, callerSaves )
18 import CLabel
19 import Cmm
20 import PprCmm
21 import CmmUtils
22 import CmmSwitch
23 import Hoopl.Block
24 import Hoopl.Graph
25 import Hoopl.Collections
26
27 import DynFlags
28 import FastString
29 import ForeignCall
30 import Outputable hiding (panic, pprPanic)
31 import qualified Outputable
32 import GHC.Platform
33 import OrdList
34 import UniqSupply
35 import Unique
36 import Util
37
38 import Control.Monad.Trans.Class
39 import Control.Monad.Trans.Writer
40
41 import qualified Data.Semigroup as Semigroup
42 import Data.List ( nub )
43 import Data.Maybe ( catMaybes )
44
45 type Atomic = Bool
46 type LlvmStatements = OrdList LlvmStatement
47
48 data Signage = Signed | Unsigned deriving (Eq, Show)
49
50 -- -----------------------------------------------------------------------------
51 -- | Top-level of the LLVM proc Code generator
52 --
53 genLlvmProc :: RawCmmDecl -> LlvmM [LlvmCmmDecl]
54 genLlvmProc (CmmProc infos lbl live graph) = do
55 let blocks = toBlockListEntryFirstFalseFallthrough graph
56 (lmblocks, lmdata) <- basicBlocksCodeGen live blocks
57 let info = mapLookup (g_entry graph) infos
58 proc = CmmProc info lbl live (ListGraph lmblocks)
59 return (proc:lmdata)
60
61 genLlvmProc _ = panic "genLlvmProc: case that shouldn't reach here!"
62
63 -- -----------------------------------------------------------------------------
64 -- * Block code generation
65 --
66
67 -- | Generate code for a list of blocks that make up a complete
68 -- procedure. The first block in the list is expected to be the entry
69 -- point and will get the prologue.
70 basicBlocksCodeGen :: LiveGlobalRegs -> [CmmBlock]
71 -> LlvmM ([LlvmBasicBlock], [LlvmCmmDecl])
72 basicBlocksCodeGen _ [] = panic "no entry block!"
73 basicBlocksCodeGen live (entryBlock:cmmBlocks)
74 = do (prologue, prologueTops) <- funPrologue live (entryBlock:cmmBlocks)
75
76 -- Generate code
77 (BasicBlock bid entry, entryTops) <- basicBlockCodeGen entryBlock
78 (blocks, topss) <- fmap unzip $ mapM basicBlockCodeGen cmmBlocks
79
80 -- Compose
81 let entryBlock = BasicBlock bid (fromOL prologue ++ entry)
82 return (entryBlock : blocks, prologueTops ++ entryTops ++ concat topss)
83
84
85 -- | Generate code for one block
86 basicBlockCodeGen :: CmmBlock -> LlvmM ( LlvmBasicBlock, [LlvmCmmDecl] )
87 basicBlockCodeGen block
88 = do let (_, nodes, tail) = blockSplit block
89 id = entryLabel block
90 (mid_instrs, top) <- stmtsToInstrs $ blockToList nodes
91 (tail_instrs, top') <- stmtToInstrs tail
92 let instrs = fromOL (mid_instrs `appOL` tail_instrs)
93 return (BasicBlock id instrs, top' ++ top)
94
95 -- -----------------------------------------------------------------------------
96 -- * CmmNode code generation
97 --
98
99 -- A statement conversion return data.
100 -- * LlvmStatements: The compiled LLVM statements.
101 -- * LlvmCmmDecl: Any global data needed.
102 type StmtData = (LlvmStatements, [LlvmCmmDecl])
103
104
105 -- | Convert a list of CmmNode's to LlvmStatement's
106 stmtsToInstrs :: [CmmNode e x] -> LlvmM StmtData
107 stmtsToInstrs stmts
108 = do (instrss, topss) <- fmap unzip $ mapM stmtToInstrs stmts
109 return (concatOL instrss, concat topss)
110
111
112 -- | Convert a CmmStmt to a list of LlvmStatement's
113 stmtToInstrs :: CmmNode e x -> LlvmM StmtData
114 stmtToInstrs stmt = case stmt of
115
116 CmmComment _ -> return (nilOL, []) -- nuke comments
117 CmmTick _ -> return (nilOL, [])
118 CmmUnwind {} -> return (nilOL, [])
119
120 CmmAssign reg src -> genAssign reg src
121 CmmStore addr src -> genStore addr src
122
123 CmmBranch id -> genBranch id
124 CmmCondBranch arg true false likely
125 -> genCondBranch arg true false likely
126 CmmSwitch arg ids -> genSwitch arg ids
127
128 -- Foreign Call
129 CmmUnsafeForeignCall target res args
130 -> genCall target res args
131
132 -- Tail call
133 CmmCall { cml_target = arg,
134 cml_args_regs = live } -> genJump arg live
135
136 _ -> panic "Llvm.CodeGen.stmtToInstrs"
137
138 -- | Wrapper function to declare an instrinct function by function type
139 getInstrinct2 :: LMString -> LlvmType -> LlvmM ExprData
140 getInstrinct2 fname fty@(LMFunction funSig) = do
141
142 let fv = LMGlobalVar fname fty (funcLinkage funSig) Nothing Nothing Constant
143
144 fn <- funLookup fname
145 tops <- case fn of
146 Just _ ->
147 return []
148 Nothing -> do
149 funInsert fname fty
150 un <- getUniqueM
151 let lbl = mkAsmTempLabel un
152 return [CmmData (Section Data lbl) [([],[fty])]]
153
154 return (fv, nilOL, tops)
155
156 getInstrinct2 _ _ = error "getInstrinct2: Non-function type!"
157
158 -- | Declares an instrinct function by return and parameter types
159 getInstrinct :: LMString -> LlvmType -> [LlvmType] -> LlvmM ExprData
160 getInstrinct fname retTy parTys =
161 let funSig = LlvmFunctionDecl fname ExternallyVisible CC_Ccc retTy
162 FixedArgs (tysToParams parTys) Nothing
163 fty = LMFunction funSig
164 in getInstrinct2 fname fty
165
166 -- | Memory barrier instruction for LLVM >= 3.0
167 barrier :: LlvmM StmtData
168 barrier = do
169 let s = Fence False SyncSeqCst
170 return (unitOL s, [])
171
172 -- | Insert a 'barrier', unless the target platform is in the provided list of
173 -- exceptions (where no code will be emitted instead).
174 barrierUnless :: [Arch] -> LlvmM StmtData
175 barrierUnless exs = do
176 platform <- getLlvmPlatform
177 if platformArch platform `elem` exs
178 then return (nilOL, [])
179 else barrier
180
181 -- | Foreign Calls
182 genCall :: ForeignTarget -> [CmmFormal] -> [CmmActual]
183 -> LlvmM StmtData
184
185 -- Barriers need to be handled specially as they are implemented as LLVM
186 -- intrinsic functions.
187 genCall (PrimTarget MO_ReadBarrier) _ _ =
188 barrierUnless [ArchX86, ArchX86_64, ArchSPARC]
189 genCall (PrimTarget MO_WriteBarrier) _ _ = do
190 barrierUnless [ArchX86, ArchX86_64, ArchSPARC]
191
192 genCall (PrimTarget MO_Touch) _ _
193 = return (nilOL, [])
194
195 genCall (PrimTarget (MO_UF_Conv w)) [dst] [e] = runStmtsDecls $ do
196 dstV <- getCmmRegW (CmmLocal dst)
197 let ty = cmmToLlvmType $ localRegType dst
198 width = widthToLlvmFloat w
199 castV <- lift $ mkLocalVar ty
200 ve <- exprToVarW e
201 statement $ Assignment castV $ Cast LM_Uitofp ve width
202 statement $ Store castV dstV
203
204 genCall (PrimTarget (MO_UF_Conv _)) [_] args =
205 panic $ "genCall: Too many arguments to MO_UF_Conv. " ++
206 "Can only handle 1, given" ++ show (length args) ++ "."
207
208 -- Handle prefetching data
209 genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args
210 | 0 <= localityInt && localityInt <= 3 = runStmtsDecls $ do
211 let argTy = [i8Ptr, i32, i32, i32]
212 funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
213 CC_Ccc LMVoid FixedArgs (tysToParams argTy) Nothing
214
215 let (_, arg_hints) = foreignTargetHints t
216 let args_hints' = zip args arg_hints
217 argVars <- arg_varsW args_hints' ([], nilOL, [])
218 fptr <- liftExprData $ getFunPtr funTy t
219 argVars' <- castVarsW Signed $ zip argVars argTy
220
221 doTrashStmts
222 let argSuffix = [mkIntLit i32 0, mkIntLit i32 localityInt, mkIntLit i32 1]
223 statement $ Expr $ Call StdCall fptr (argVars' ++ argSuffix) []
224 | otherwise = panic $ "prefetch locality level integer must be between 0 and 3, given: " ++ (show localityInt)
225
226 -- Handle PopCnt, Clz, Ctz, and BSwap that need to only convert arg
227 -- and return types
228 genCall t@(PrimTarget (MO_PopCnt w)) dsts args =
229 genCallSimpleCast w t dsts args
230
231 genCall t@(PrimTarget (MO_Pdep w)) dsts args =
232 genCallSimpleCast2 w t dsts args
233 genCall t@(PrimTarget (MO_Pext w)) dsts args =
234 genCallSimpleCast2 w t dsts args
235 genCall t@(PrimTarget (MO_Clz w)) dsts args =
236 genCallSimpleCast w t dsts args
237 genCall t@(PrimTarget (MO_Ctz w)) dsts args =
238 genCallSimpleCast w t dsts args
239 genCall t@(PrimTarget (MO_BSwap w)) dsts args =
240 genCallSimpleCast w t dsts args
241 genCall t@(PrimTarget (MO_BRev w)) dsts args =
242 genCallSimpleCast w t dsts args
243
244 genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = runStmtsDecls $ do
245 addrVar <- exprToVarW addr
246 nVar <- exprToVarW n
247 let targetTy = widthToLlvmInt width
248 ptrExpr = Cast LM_Inttoptr addrVar (pLift targetTy)
249 ptrVar <- doExprW (pLift targetTy) ptrExpr
250 dstVar <- getCmmRegW (CmmLocal dst)
251 let op = case amop of
252 AMO_Add -> LAO_Add
253 AMO_Sub -> LAO_Sub
254 AMO_And -> LAO_And
255 AMO_Nand -> LAO_Nand
256 AMO_Or -> LAO_Or
257 AMO_Xor -> LAO_Xor
258 retVar <- doExprW targetTy $ AtomicRMW op ptrVar nVar SyncSeqCst
259 statement $ Store retVar dstVar
260
261 genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = runStmtsDecls $ do
262 dstV <- getCmmRegW (CmmLocal dst)
263 v1 <- genLoadW True addr (localRegType dst)
264 statement $ Store v1 dstV
265
266 genCall (PrimTarget (MO_Cmpxchg _width))
267 [dst] [addr, old, new] = runStmtsDecls $ do
268 addrVar <- exprToVarW addr
269 oldVar <- exprToVarW old
270 newVar <- exprToVarW new
271 let targetTy = getVarType oldVar
272 ptrExpr = Cast LM_Inttoptr addrVar (pLift targetTy)
273 ptrVar <- doExprW (pLift targetTy) ptrExpr
274 dstVar <- getCmmRegW (CmmLocal dst)
275 retVar <- doExprW (LMStructU [targetTy,i1])
276 $ CmpXChg ptrVar oldVar newVar SyncSeqCst SyncSeqCst
277 retVar' <- doExprW targetTy $ ExtractV retVar 0
278 statement $ Store retVar' dstVar
279
280 genCall (PrimTarget (MO_AtomicWrite _width)) [] [addr, val] = runStmtsDecls $ do
281 addrVar <- exprToVarW addr
282 valVar <- exprToVarW val
283 let ptrTy = pLift $ getVarType valVar
284 ptrExpr = Cast LM_Inttoptr addrVar ptrTy
285 ptrVar <- doExprW ptrTy ptrExpr
286 statement $ Expr $ AtomicRMW LAO_Xchg ptrVar valVar SyncSeqCst
287
288 -- Handle memcpy function specifically since llvm's intrinsic version takes
289 -- some extra parameters.
290 genCall t@(PrimTarget op) [] args
291 | Just align <- machOpMemcpyishAlign op = runStmtsDecls $ do
292 dflags <- getDynFlags
293 let isVolTy = [i1]
294 isVolVal = [mkIntLit i1 0]
295 argTy | MO_Memset _ <- op = [i8Ptr, i8, llvmWord dflags, i32] ++ isVolTy
296 | otherwise = [i8Ptr, i8Ptr, llvmWord dflags, i32] ++ isVolTy
297 funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
298 CC_Ccc LMVoid FixedArgs (tysToParams argTy) Nothing
299
300 let (_, arg_hints) = foreignTargetHints t
301 let args_hints = zip args arg_hints
302 argVars <- arg_varsW args_hints ([], nilOL, [])
303 fptr <- getFunPtrW funTy t
304 argVars' <- castVarsW Signed $ zip argVars argTy
305
306 doTrashStmts
307 let alignVal = mkIntLit i32 align
308 arguments = argVars' ++ (alignVal:isVolVal)
309 statement $ Expr $ Call StdCall fptr arguments []
310
311 -- We handle MO_U_Mul2 by simply using a 'mul' instruction, but with operands
312 -- twice the width (we first zero-extend them), e.g., on 64-bit arch we will
313 -- generate 'mul' on 128-bit operands. Then we only need some plumbing to
314 -- extract the two 64-bit values out of 128-bit result.
315 genCall (PrimTarget (MO_U_Mul2 w)) [dstH, dstL] [lhs, rhs] = runStmtsDecls $ do
316 let width = widthToLlvmInt w
317 bitWidth = widthInBits w
318 width2x = LMInt (bitWidth * 2)
319 -- First zero-extend the operands ('mul' instruction requires the operands
320 -- and the result to be of the same type). Note that we don't use 'castVars'
321 -- because it tries to do LM_Sext.
322 lhsVar <- exprToVarW lhs
323 rhsVar <- exprToVarW rhs
324 lhsExt <- doExprW width2x $ Cast LM_Zext lhsVar width2x
325 rhsExt <- doExprW width2x $ Cast LM_Zext rhsVar width2x
326 -- Do the actual multiplication (note that the result is also 2x width).
327 retV <- doExprW width2x $ LlvmOp LM_MO_Mul lhsExt rhsExt
328 -- Extract the lower bits of the result into retL.
329 retL <- doExprW width $ Cast LM_Trunc retV width
330 -- Now we right-shift the higher bits by width.
331 let widthLlvmLit = LMLitVar $ LMIntLit (fromIntegral bitWidth) width
332 retShifted <- doExprW width2x $ LlvmOp LM_MO_LShr retV widthLlvmLit
333 -- And extract them into retH.
334 retH <- doExprW width $ Cast LM_Trunc retShifted width
335 dstRegL <- getCmmRegW (CmmLocal dstL)
336 dstRegH <- getCmmRegW (CmmLocal dstH)
337 statement $ Store retL dstRegL
338 statement $ Store retH dstRegH
339
340 -- MO_U_QuotRem2 is another case we handle by widening the registers to double
341 -- the width and use normal LLVM instructions (similarly to the MO_U_Mul2). The
342 -- main difference here is that we need to combine two words into one register
343 -- and then use both 'udiv' and 'urem' instructions to compute the result.
344 genCall (PrimTarget (MO_U_QuotRem2 w))
345 [dstQ, dstR] [lhsH, lhsL, rhs] = runStmtsDecls $ do
346 let width = widthToLlvmInt w
347 bitWidth = widthInBits w
348 width2x = LMInt (bitWidth * 2)
349 -- First zero-extend all parameters to double width.
350 let zeroExtend expr = do
351 var <- exprToVarW expr
352 doExprW width2x $ Cast LM_Zext var width2x
353 lhsExtH <- zeroExtend lhsH
354 lhsExtL <- zeroExtend lhsL
355 rhsExt <- zeroExtend rhs
356 -- Now we combine the first two parameters (that represent the high and low
357 -- bits of the value). So first left-shift the high bits to their position
358 -- and then bit-or them with the low bits.
359 let widthLlvmLit = LMLitVar $ LMIntLit (fromIntegral bitWidth) width
360 lhsExtHShifted <- doExprW width2x $ LlvmOp LM_MO_Shl lhsExtH widthLlvmLit
361 lhsExt <- doExprW width2x $ LlvmOp LM_MO_Or lhsExtHShifted lhsExtL
362 -- Finally, we can call 'udiv' and 'urem' to compute the results.
363 retExtDiv <- doExprW width2x $ LlvmOp LM_MO_UDiv lhsExt rhsExt
364 retExtRem <- doExprW width2x $ LlvmOp LM_MO_URem lhsExt rhsExt
365 -- And since everything is in 2x width, we need to truncate the results and
366 -- then return them.
367 let narrow var = doExprW width $ Cast LM_Trunc var width
368 retDiv <- narrow retExtDiv
369 retRem <- narrow retExtRem
370 dstRegQ <- lift $ getCmmReg (CmmLocal dstQ)
371 dstRegR <- lift $ getCmmReg (CmmLocal dstR)
372 statement $ Store retDiv dstRegQ
373 statement $ Store retRem dstRegR
374
375 -- Handle the MO_{Add,Sub}IntC separately. LLVM versions return a record from
376 -- which we need to extract the actual values.
377 genCall t@(PrimTarget (MO_AddIntC w)) [dstV, dstO] [lhs, rhs] =
378 genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
379 genCall t@(PrimTarget (MO_SubIntC w)) [dstV, dstO] [lhs, rhs] =
380 genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
381
382 -- Similar to MO_{Add,Sub}IntC, but MO_Add2 expects the first element of the
383 -- return tuple to be the overflow bit and the second element to contain the
384 -- actual result of the addition. So we still use genCallWithOverflow but swap
385 -- the return registers.
386 genCall t@(PrimTarget (MO_Add2 w)) [dstO, dstV] [lhs, rhs] =
387 genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
388
389 genCall t@(PrimTarget (MO_AddWordC w)) [dstV, dstO] [lhs, rhs] =
390 genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
391
392 genCall t@(PrimTarget (MO_SubWordC w)) [dstV, dstO] [lhs, rhs] =
393 genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
394
395 -- Handle all other foreign calls and prim ops.
396 genCall target res args = runStmtsDecls $ do
397 dflags <- getDynFlags
398
399 -- parameter types
400 let arg_type (_, AddrHint) = i8Ptr
401 -- cast pointers to i8*. Llvm equivalent of void*
402 arg_type (expr, _) = cmmToLlvmType $ cmmExprType dflags expr
403
404 -- ret type
405 let ret_type [] = LMVoid
406 ret_type [(_, AddrHint)] = i8Ptr
407 ret_type [(reg, _)] = cmmToLlvmType $ localRegType reg
408 ret_type t = panic $ "genCall: Too many return values! Can only handle"
409 ++ " 0 or 1, given " ++ show (length t) ++ "."
410
411 -- extract Cmm call convention, and translate to LLVM call convention
412 platform <- lift $ getLlvmPlatform
413 let lmconv = case target of
414 ForeignTarget _ (ForeignConvention conv _ _ _) ->
415 case conv of
416 StdCallConv -> case platformArch platform of
417 ArchX86 -> CC_X86_Stdcc
418 ArchX86_64 -> CC_X86_Stdcc
419 _ -> CC_Ccc
420 CCallConv -> CC_Ccc
421 CApiConv -> CC_Ccc
422 PrimCallConv -> panic "LlvmCodeGen.CodeGen.genCall: PrimCallConv"
423 JavaScriptCallConv -> panic "LlvmCodeGen.CodeGen.genCall: JavaScriptCallConv"
424
425 PrimTarget _ -> CC_Ccc
426
427 {-
428 CC_Ccc of the possibilities here are a worry with the use of a custom
429 calling convention for passing STG args. In practice the more
430 dangerous combinations (e.g StdCall + llvmGhcCC) don't occur.
431
432 The native code generator only handles StdCall and CCallConv.
433 -}
434
435 -- call attributes
436 let fnAttrs | never_returns = NoReturn : llvmStdFunAttrs
437 | otherwise = llvmStdFunAttrs
438
439 never_returns = case target of
440 ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns) -> True
441 _ -> False
442
443 -- fun type
444 let (res_hints, arg_hints) = foreignTargetHints target
445 let args_hints = zip args arg_hints
446 let ress_hints = zip res res_hints
447 let ccTy = StdCall -- tail calls should be done through CmmJump
448 let retTy = ret_type ress_hints
449 let argTy = tysToParams $ map arg_type args_hints
450 let funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
451 lmconv retTy FixedArgs argTy (llvmFunAlign dflags)
452
453
454 argVars <- arg_varsW args_hints ([], nilOL, [])
455 fptr <- getFunPtrW funTy target
456
457 let doReturn | ccTy == TailCall = statement $ Return Nothing
458 | never_returns = statement $ Unreachable
459 | otherwise = return ()
460
461 doTrashStmts
462
463 -- make the actual call
464 case retTy of
465 LMVoid -> do
466 statement $ Expr $ Call ccTy fptr argVars fnAttrs
467
468 _ -> do
469 v1 <- doExprW retTy $ Call ccTy fptr argVars fnAttrs
470 -- get the return register
471 let ret_reg [reg] = reg
472 ret_reg t = panic $ "genCall: Bad number of registers! Can only handle"
473 ++ " 1, given " ++ show (length t) ++ "."
474 let creg = ret_reg res
475 vreg <- getCmmRegW (CmmLocal creg)
476 if retTy == pLower (getVarType vreg)
477 then do
478 statement $ Store v1 vreg
479 doReturn
480 else do
481 let ty = pLower $ getVarType vreg
482 let op = case ty of
483 vt | isPointer vt -> LM_Bitcast
484 | isInt vt -> LM_Ptrtoint
485 | otherwise ->
486 panic $ "genCall: CmmReg bad match for"
487 ++ " returned type!"
488
489 v2 <- doExprW ty $ Cast op v1 ty
490 statement $ Store v2 vreg
491 doReturn
492
493 -- | Generate a call to an LLVM intrinsic that performs arithmetic operation
494 -- with overflow bit (i.e., returns a struct containing the actual result of the
495 -- operation and an overflow bit). This function will also extract the overflow
496 -- bit and zero-extend it (all the corresponding Cmm PrimOps represent the
497 -- overflow "bit" as a usual Int# or Word#).
498 genCallWithOverflow
499 :: ForeignTarget -> Width -> [CmmFormal] -> [CmmActual] -> LlvmM StmtData
500 genCallWithOverflow t@(PrimTarget op) w [dstV, dstO] [lhs, rhs] = do
501 -- So far this was only tested for the following four CallishMachOps.
502 let valid = op `elem` [ MO_Add2 w
503 , MO_AddIntC w
504 , MO_SubIntC w
505 , MO_AddWordC w
506 , MO_SubWordC w
507 ]
508 MASSERT(valid)
509 let width = widthToLlvmInt w
510 -- This will do most of the work of generating the call to the intrinsic and
511 -- extracting the values from the struct.
512 (value, overflowBit, (stmts, top)) <-
513 genCallExtract t w (lhs, rhs) (width, i1)
514 -- value is i<width>, but overflowBit is i1, so we need to cast (Cmm expects
515 -- both to be i<width>)
516 (overflow, zext) <- doExpr width $ Cast LM_Zext overflowBit width
517 dstRegV <- getCmmReg (CmmLocal dstV)
518 dstRegO <- getCmmReg (CmmLocal dstO)
519 let storeV = Store value dstRegV
520 storeO = Store overflow dstRegO
521 return (stmts `snocOL` zext `snocOL` storeV `snocOL` storeO, top)
522 genCallWithOverflow _ _ _ _ =
523 panic "genCallExtract: wrong ForeignTarget or number of arguments"
524
525 -- | A helper function for genCallWithOverflow that handles generating the call
526 -- to the LLVM intrinsic and extracting the result from the struct to LlvmVars.
527 genCallExtract
528 :: ForeignTarget -- ^ PrimOp
529 -> Width -- ^ Width of the operands.
530 -> (CmmActual, CmmActual) -- ^ Actual arguments.
531 -> (LlvmType, LlvmType) -- ^ LLVM types of the returned struct.
532 -> LlvmM (LlvmVar, LlvmVar, StmtData)
533 genCallExtract target@(PrimTarget op) w (argA, argB) (llvmTypeA, llvmTypeB) = do
534 let width = widthToLlvmInt w
535 argTy = [width, width]
536 retTy = LMStructU [llvmTypeA, llvmTypeB]
537
538 -- Process the arguments.
539 let args_hints = zip [argA, argB] (snd $ foreignTargetHints target)
540 (argsV1, args1, top1) <- arg_vars args_hints ([], nilOL, [])
541 (argsV2, args2) <- castVars Signed $ zip argsV1 argTy
542
543 -- Get the function and make the call.
544 fname <- cmmPrimOpFunctions op
545 (fptr, _, top2) <- getInstrinct fname retTy argTy
546 -- We use StdCall for primops. See also the last case of genCall.
547 (retV, call) <- doExpr retTy $ Call StdCall fptr argsV2 []
548
549 -- This will result in a two element struct, we need to use "extractvalue"
550 -- to get them out of it.
551 (res1, ext1) <- doExpr llvmTypeA (ExtractV retV 0)
552 (res2, ext2) <- doExpr llvmTypeB (ExtractV retV 1)
553
554 let stmts = args1 `appOL` args2 `snocOL` call `snocOL` ext1 `snocOL` ext2
555 tops = top1 ++ top2
556 return (res1, res2, (stmts, tops))
557
558 genCallExtract _ _ _ _ =
559 panic "genCallExtract: unsupported ForeignTarget"
560
561 -- Handle simple function call that only need simple type casting, of the form:
562 -- truncate arg >>= \a -> call(a) >>= zext
563 --
564 -- since GHC only really has i32 and i64 types and things like Word8 are backed
565 -- by an i32 and just present a logical i8 range. So we must handle conversions
566 -- from i32 to i8 explicitly as LLVM is strict about types.
567 genCallSimpleCast :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
568 -> LlvmM StmtData
569 genCallSimpleCast w t@(PrimTarget op) [dst] args = do
570 let width = widthToLlvmInt w
571 dstTy = cmmToLlvmType $ localRegType dst
572
573 fname <- cmmPrimOpFunctions op
574 (fptr, _, top3) <- getInstrinct fname width [width]
575
576 dstV <- getCmmReg (CmmLocal dst)
577
578 let (_, arg_hints) = foreignTargetHints t
579 let args_hints = zip args arg_hints
580 (argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, [])
581 (argsV', stmts4) <- castVars Signed $ zip argsV [width]
582 (retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
583 (retVs', stmts5) <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
584 let retV' = singletonPanic "genCallSimpleCast" retVs'
585 let s2 = Store retV' dstV
586
587 let stmts = stmts2 `appOL` stmts4 `snocOL`
588 s1 `appOL` stmts5 `snocOL` s2
589 return (stmts, top2 ++ top3)
590 genCallSimpleCast _ _ dsts _ =
591 panic ("genCallSimpleCast: " ++ show (length dsts) ++ " dsts")
592
593 -- Handle simple function call that only need simple type casting, of the form:
594 -- truncate arg >>= \a -> call(a) >>= zext
595 --
596 -- since GHC only really has i32 and i64 types and things like Word8 are backed
597 -- by an i32 and just present a logical i8 range. So we must handle conversions
598 -- from i32 to i8 explicitly as LLVM is strict about types.
599 genCallSimpleCast2 :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
600 -> LlvmM StmtData
601 genCallSimpleCast2 w t@(PrimTarget op) [dst] args = do
602 let width = widthToLlvmInt w
603 dstTy = cmmToLlvmType $ localRegType dst
604
605 fname <- cmmPrimOpFunctions op
606 (fptr, _, top3) <- getInstrinct fname width (const width <$> args)
607
608 dstV <- getCmmReg (CmmLocal dst)
609
610 let (_, arg_hints) = foreignTargetHints t
611 let args_hints = zip args arg_hints
612 (argsV, stmts2, top2) <- arg_vars args_hints ([], nilOL, [])
613 (argsV', stmts4) <- castVars Signed $ zip argsV (const width <$> argsV)
614 (retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
615 (retVs', stmts5) <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
616 let retV' = singletonPanic "genCallSimpleCast2" retVs'
617 let s2 = Store retV' dstV
618
619 let stmts = stmts2 `appOL` stmts4 `snocOL`
620 s1 `appOL` stmts5 `snocOL` s2
621 return (stmts, top2 ++ top3)
622 genCallSimpleCast2 _ _ dsts _ =
623 panic ("genCallSimpleCast2: " ++ show (length dsts) ++ " dsts")
624
625 -- | Create a function pointer from a target.
626 getFunPtrW :: (LMString -> LlvmType) -> ForeignTarget
627 -> WriterT LlvmAccum LlvmM LlvmVar
628 getFunPtrW funTy targ = liftExprData $ getFunPtr funTy targ
629
630 -- | Create a function pointer from a target.
631 getFunPtr :: (LMString -> LlvmType) -> ForeignTarget
632 -> LlvmM ExprData
633 getFunPtr funTy targ = case targ of
634 ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
635 name <- strCLabel_llvm lbl
636 getHsFunc' name (funTy name)
637
638 ForeignTarget expr _ -> do
639 (v1, stmts, top) <- exprToVar expr
640 dflags <- getDynFlags
641 let fty = funTy $ fsLit "dynamic"
642 cast = case getVarType v1 of
643 ty | isPointer ty -> LM_Bitcast
644 ty | isInt ty -> LM_Inttoptr
645
646 ty -> panic $ "genCall: Expr is of bad type for function"
647 ++ " call! (" ++ showSDoc dflags (ppr ty) ++ ")"
648
649 (v2,s1) <- doExpr (pLift fty) $ Cast cast v1 (pLift fty)
650 return (v2, stmts `snocOL` s1, top)
651
652 PrimTarget mop -> do
653 name <- cmmPrimOpFunctions mop
654 let fty = funTy name
655 getInstrinct2 name fty
656
657 -- | Conversion of call arguments.
658 arg_varsW :: [(CmmActual, ForeignHint)]
659 -> ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
660 -> WriterT LlvmAccum LlvmM [LlvmVar]
661 arg_varsW xs ys = do
662 (vars, stmts, decls) <- lift $ arg_vars xs ys
663 tell $ LlvmAccum stmts decls
664 return vars
665
666 -- | Conversion of call arguments.
667 arg_vars :: [(CmmActual, ForeignHint)]
668 -> ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
669 -> LlvmM ([LlvmVar], LlvmStatements, [LlvmCmmDecl])
670
671 arg_vars [] (vars, stmts, tops)
672 = return (vars, stmts, tops)
673
674 arg_vars ((e, AddrHint):rest) (vars, stmts, tops)
675 = do (v1, stmts', top') <- exprToVar e
676 dflags <- getDynFlags
677 let op = case getVarType v1 of
678 ty | isPointer ty -> LM_Bitcast
679 ty | isInt ty -> LM_Inttoptr
680
681 a -> panic $ "genCall: Can't cast llvmType to i8*! ("
682 ++ showSDoc dflags (ppr a) ++ ")"
683
684 (v2, s1) <- doExpr i8Ptr $ Cast op v1 i8Ptr
685 arg_vars rest (vars ++ [v2], stmts `appOL` stmts' `snocOL` s1,
686 tops ++ top')
687
688 arg_vars ((e, _):rest) (vars, stmts, tops)
689 = do (v1, stmts', top') <- exprToVar e
690 arg_vars rest (vars ++ [v1], stmts `appOL` stmts', tops ++ top')
691
692
693 -- | Cast a collection of LLVM variables to specific types.
694 castVarsW :: Signage
695 -> [(LlvmVar, LlvmType)]
696 -> WriterT LlvmAccum LlvmM [LlvmVar]
697 castVarsW signage vars = do
698 (vars, stmts) <- lift $ castVars signage vars
699 tell $ LlvmAccum stmts mempty
700 return vars
701
702 -- | Cast a collection of LLVM variables to specific types.
703 castVars :: Signage -> [(LlvmVar, LlvmType)]
704 -> LlvmM ([LlvmVar], LlvmStatements)
705 castVars signage vars = do
706 done <- mapM (uncurry (castVar signage)) vars
707 let (vars', stmts) = unzip done
708 return (vars', toOL stmts)
709
710 -- | Cast an LLVM variable to a specific type, panicing if it can't be done.
711 castVar :: Signage -> LlvmVar -> LlvmType -> LlvmM (LlvmVar, LlvmStatement)
712 castVar signage v t | getVarType v == t
713 = return (v, Nop)
714
715 | otherwise
716 = do dflags <- getDynFlags
717 let op = case (getVarType v, t) of
718 (LMInt n, LMInt m)
719 -> if n < m then extend else LM_Trunc
720 (vt, _) | isFloat vt && isFloat t
721 -> if llvmWidthInBits dflags vt < llvmWidthInBits dflags t
722 then LM_Fpext else LM_Fptrunc
723 (vt, _) | isInt vt && isFloat t -> LM_Sitofp
724 (vt, _) | isFloat vt && isInt t -> LM_Fptosi
725 (vt, _) | isInt vt && isPointer t -> LM_Inttoptr
726 (vt, _) | isPointer vt && isInt t -> LM_Ptrtoint
727 (vt, _) | isPointer vt && isPointer t -> LM_Bitcast
728 (vt, _) | isVector vt && isVector t -> LM_Bitcast
729
730 (vt, _) -> panic $ "castVars: Can't cast this type ("
731 ++ showSDoc dflags (ppr vt) ++ ") to (" ++ showSDoc dflags (ppr t) ++ ")"
732 doExpr t $ Cast op v t
733 where extend = case signage of
734 Signed -> LM_Sext
735 Unsigned -> LM_Zext
736
737
738 cmmPrimOpRetValSignage :: CallishMachOp -> Signage
739 cmmPrimOpRetValSignage mop = case mop of
740 MO_Pdep _ -> Unsigned
741 MO_Pext _ -> Unsigned
742 _ -> Signed
743
744 -- | Decide what C function to use to implement a CallishMachOp
745 cmmPrimOpFunctions :: CallishMachOp -> LlvmM LMString
746 cmmPrimOpFunctions mop = do
747
748 dflags <- getDynFlags
749 let intrinTy1 = "p0i8.p0i8." ++ showSDoc dflags (ppr $ llvmWord dflags)
750 intrinTy2 = "p0i8." ++ showSDoc dflags (ppr $ llvmWord dflags)
751 unsupported = panic ("cmmPrimOpFunctions: " ++ show mop
752 ++ " not supported here")
753
754 return $ case mop of
755 MO_F32_Exp -> fsLit "expf"
756 MO_F32_ExpM1 -> fsLit "expm1f"
757 MO_F32_Log -> fsLit "logf"
758 MO_F32_Log1P -> fsLit "log1pf"
759 MO_F32_Sqrt -> fsLit "llvm.sqrt.f32"
760 MO_F32_Fabs -> fsLit "llvm.fabs.f32"
761 MO_F32_Pwr -> fsLit "llvm.pow.f32"
762
763 MO_F32_Sin -> fsLit "llvm.sin.f32"
764 MO_F32_Cos -> fsLit "llvm.cos.f32"
765 MO_F32_Tan -> fsLit "tanf"
766
767 MO_F32_Asin -> fsLit "asinf"
768 MO_F32_Acos -> fsLit "acosf"
769 MO_F32_Atan -> fsLit "atanf"
770
771 MO_F32_Sinh -> fsLit "sinhf"
772 MO_F32_Cosh -> fsLit "coshf"
773 MO_F32_Tanh -> fsLit "tanhf"
774
775 MO_F32_Asinh -> fsLit "asinhf"
776 MO_F32_Acosh -> fsLit "acoshf"
777 MO_F32_Atanh -> fsLit "atanhf"
778
779 MO_F64_Exp -> fsLit "exp"
780 MO_F64_ExpM1 -> fsLit "expm1"
781 MO_F64_Log -> fsLit "log"
782 MO_F64_Log1P -> fsLit "log1p"
783 MO_F64_Sqrt -> fsLit "llvm.sqrt.f64"
784 MO_F64_Fabs -> fsLit "llvm.fabs.f64"
785 MO_F64_Pwr -> fsLit "llvm.pow.f64"
786
787 MO_F64_Sin -> fsLit "llvm.sin.f64"
788 MO_F64_Cos -> fsLit "llvm.cos.f64"
789 MO_F64_Tan -> fsLit "tan"
790
791 MO_F64_Asin -> fsLit "asin"
792 MO_F64_Acos -> fsLit "acos"
793 MO_F64_Atan -> fsLit "atan"
794
795 MO_F64_Sinh -> fsLit "sinh"
796 MO_F64_Cosh -> fsLit "cosh"
797 MO_F64_Tanh -> fsLit "tanh"
798
799 MO_F64_Asinh -> fsLit "asinh"
800 MO_F64_Acosh -> fsLit "acosh"
801 MO_F64_Atanh -> fsLit "atanh"
802
803 MO_Memcpy _ -> fsLit $ "llvm.memcpy." ++ intrinTy1
804 MO_Memmove _ -> fsLit $ "llvm.memmove." ++ intrinTy1
805 MO_Memset _ -> fsLit $ "llvm.memset." ++ intrinTy2
806 MO_Memcmp _ -> fsLit $ "memcmp"
807
808 (MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
809 (MO_BSwap w) -> fsLit $ "llvm.bswap." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
810 (MO_BRev w) -> fsLit $ "llvm.bitreverse." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
811 (MO_Clz w) -> fsLit $ "llvm.ctlz." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
812 (MO_Ctz w) -> fsLit $ "llvm.cttz." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
813
814 (MO_Pdep w) -> let w' = showSDoc dflags (ppr $ widthInBits w)
815 in if isBmi2Enabled dflags
816 then fsLit $ "llvm.x86.bmi.pdep." ++ w'
817 else fsLit $ "hs_pdep" ++ w'
818 (MO_Pext w) -> let w' = showSDoc dflags (ppr $ widthInBits w)
819 in if isBmi2Enabled dflags
820 then fsLit $ "llvm.x86.bmi.pext." ++ w'
821 else fsLit $ "hs_pext" ++ w'
822
823 (MO_Prefetch_Data _ )-> fsLit "llvm.prefetch"
824
825 MO_AddIntC w -> fsLit $ "llvm.sadd.with.overflow."
826 ++ showSDoc dflags (ppr $ widthToLlvmInt w)
827 MO_SubIntC w -> fsLit $ "llvm.ssub.with.overflow."
828 ++ showSDoc dflags (ppr $ widthToLlvmInt w)
829 MO_Add2 w -> fsLit $ "llvm.uadd.with.overflow."
830 ++ showSDoc dflags (ppr $ widthToLlvmInt w)
831 MO_AddWordC w -> fsLit $ "llvm.uadd.with.overflow."
832 ++ showSDoc dflags (ppr $ widthToLlvmInt w)
833 MO_SubWordC w -> fsLit $ "llvm.usub.with.overflow."
834 ++ showSDoc dflags (ppr $ widthToLlvmInt w)
835
836 MO_S_QuotRem {} -> unsupported
837 MO_U_QuotRem {} -> unsupported
838 MO_U_QuotRem2 {} -> unsupported
839 -- We support MO_U_Mul2 through ordinary LLVM mul instruction, see the
840 -- appropriate case of genCall.
841 MO_U_Mul2 {} -> unsupported
842 MO_ReadBarrier -> unsupported
843 MO_WriteBarrier -> unsupported
844 MO_Touch -> unsupported
845 MO_UF_Conv _ -> unsupported
846
847 MO_AtomicRead _ -> unsupported
848 MO_AtomicRMW _ _ -> unsupported
849 MO_AtomicWrite _ -> unsupported
850 MO_Cmpxchg _ -> unsupported
851
852 -- | Tail function calls
853 genJump :: CmmExpr -> [GlobalReg] -> LlvmM StmtData
854
855 -- Call to known function
856 genJump (CmmLit (CmmLabel lbl)) live = do
857 (vf, stmts, top) <- getHsFunc live lbl
858 (stgRegs, stgStmts) <- funEpilogue live
859 let s1 = Expr $ Call TailCall vf stgRegs llvmStdFunAttrs
860 let s2 = Return Nothing
861 return (stmts `appOL` stgStmts `snocOL` s1 `snocOL` s2, top)
862
863
864 -- Call to unknown function / address
865 genJump expr live = do
866 fty <- llvmFunTy live
867 (vf, stmts, top) <- exprToVar expr
868 dflags <- getDynFlags
869
870 let cast = case getVarType vf of
871 ty | isPointer ty -> LM_Bitcast
872 ty | isInt ty -> LM_Inttoptr
873
874 ty -> panic $ "genJump: Expr is of bad type for function call! ("
875 ++ showSDoc dflags (ppr ty) ++ ")"
876
877 (v1, s1) <- doExpr (pLift fty) $ Cast cast vf (pLift fty)
878 (stgRegs, stgStmts) <- funEpilogue live
879 let s2 = Expr $ Call TailCall v1 stgRegs llvmStdFunAttrs
880 let s3 = Return Nothing
881 return (stmts `snocOL` s1 `appOL` stgStmts `snocOL` s2 `snocOL` s3,
882 top)
883
884
885 -- | CmmAssign operation
886 --
887 -- We use stack allocated variables for CmmReg. The optimiser will replace
888 -- these with registers when possible.
889 genAssign :: CmmReg -> CmmExpr -> LlvmM StmtData
890 genAssign reg val = do
891 vreg <- getCmmReg reg
892 (vval, stmts2, top2) <- exprToVar val
893 let stmts = stmts2
894
895 let ty = (pLower . getVarType) vreg
896 dflags <- getDynFlags
897 case ty of
898 -- Some registers are pointer types, so need to cast value to pointer
899 LMPointer _ | getVarType vval == llvmWord dflags -> do
900 (v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty
901 let s2 = Store v vreg
902 return (stmts `snocOL` s1 `snocOL` s2, top2)
903
904 LMVector _ _ -> do
905 (v, s1) <- doExpr ty $ Cast LM_Bitcast vval ty
906 let s2 = Store v vreg
907 return (stmts `snocOL` s1 `snocOL` s2, top2)
908
909 _ -> do
910 let s1 = Store vval vreg
911 return (stmts `snocOL` s1, top2)
912
913
914 -- | CmmStore operation
915 genStore :: CmmExpr -> CmmExpr -> LlvmM StmtData
916
917 -- First we try to detect a few common cases and produce better code for
918 -- these then the default case. We are mostly trying to detect Cmm code
919 -- like I32[Sp + n] and use 'getelementptr' operations instead of the
920 -- generic case that uses casts and pointer arithmetic
921 genStore addr@(CmmReg (CmmGlobal r)) val
922 = genStore_fast addr r 0 val
923
924 genStore addr@(CmmRegOff (CmmGlobal r) n) val
925 = genStore_fast addr r n val
926
927 genStore addr@(CmmMachOp (MO_Add _) [
928 (CmmReg (CmmGlobal r)),
929 (CmmLit (CmmInt n _))])
930 val
931 = genStore_fast addr r (fromInteger n) val
932
933 genStore addr@(CmmMachOp (MO_Sub _) [
934 (CmmReg (CmmGlobal r)),
935 (CmmLit (CmmInt n _))])
936 val
937 = genStore_fast addr r (negate $ fromInteger n) val
938
939 -- generic case
940 genStore addr val
941 = getTBAAMeta topN >>= genStore_slow addr val
942
943 -- | CmmStore operation
944 -- This is a special case for storing to a global register pointer
945 -- offset such as I32[Sp+8].
946 genStore_fast :: CmmExpr -> GlobalReg -> Int -> CmmExpr
947 -> LlvmM StmtData
948 genStore_fast addr r n val
949 = do dflags <- getDynFlags
950 (gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
951 meta <- getTBAARegMeta r
952 let (ix,rem) = n `divMod` ((llvmWidthInBits dflags . pLower) grt `div` 8)
953 case isPointer grt && rem == 0 of
954 True -> do
955 (vval, stmts, top) <- exprToVar val
956 (ptr, s2) <- doExpr grt $ GetElemPtr True gv [toI32 ix]
957 -- We might need a different pointer type, so check
958 case pLower grt == getVarType vval of
959 -- were fine
960 True -> do
961 let s3 = MetaStmt meta $ Store vval ptr
962 return (stmts `appOL` s1 `snocOL` s2
963 `snocOL` s3, top)
964
965 -- cast to pointer type needed
966 False -> do
967 let ty = (pLift . getVarType) vval
968 (ptr', s3) <- doExpr ty $ Cast LM_Bitcast ptr ty
969 let s4 = MetaStmt meta $ Store vval ptr'
970 return (stmts `appOL` s1 `snocOL` s2
971 `snocOL` s3 `snocOL` s4, top)
972
973 -- If its a bit type then we use the slow method since
974 -- we can't avoid casting anyway.
975 False -> genStore_slow addr val meta
976
977
978 -- | CmmStore operation
979 -- Generic case. Uses casts and pointer arithmetic if needed.
980 genStore_slow :: CmmExpr -> CmmExpr -> [MetaAnnot] -> LlvmM StmtData
981 genStore_slow addr val meta = do
982 (vaddr, stmts1, top1) <- exprToVar addr
983 (vval, stmts2, top2) <- exprToVar val
984
985 let stmts = stmts1 `appOL` stmts2
986 dflags <- getDynFlags
987 case getVarType vaddr of
988 -- sometimes we need to cast an int to a pointer before storing
989 LMPointer ty@(LMPointer _) | getVarType vval == llvmWord dflags -> do
990 (v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty
991 let s2 = MetaStmt meta $ Store v vaddr
992 return (stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
993
994 LMPointer _ -> do
995 let s1 = MetaStmt meta $ Store vval vaddr
996 return (stmts `snocOL` s1, top1 ++ top2)
997
998 i@(LMInt _) | i == llvmWord dflags -> do
999 let vty = pLift $ getVarType vval
1000 (vptr, s1) <- doExpr vty $ Cast LM_Inttoptr vaddr vty
1001 let s2 = MetaStmt meta $ Store vval vptr
1002 return (stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
1003
1004 other ->
1005 pprPanic "genStore: ptr not right type!"
1006 (PprCmm.pprExpr addr <+> text (
1007 "Size of Ptr: " ++ show (llvmPtrBits dflags) ++
1008 ", Size of var: " ++ show (llvmWidthInBits dflags other) ++
1009 ", Var: " ++ showSDoc dflags (ppr vaddr)))
1010
1011
1012 -- | Unconditional branch
1013 genBranch :: BlockId -> LlvmM StmtData
1014 genBranch id =
1015 let label = blockIdToLlvm id
1016 in return (unitOL $ Branch label, [])
1017
1018
1019 -- | Conditional branch
1020 genCondBranch :: CmmExpr -> BlockId -> BlockId -> Maybe Bool -> LlvmM StmtData
1021 genCondBranch cond idT idF likely = do
1022 let labelT = blockIdToLlvm idT
1023 let labelF = blockIdToLlvm idF
1024 -- See Note [Literals and branch conditions].
1025 (vc, stmts1, top1) <- exprToVarOpt i1Option cond
1026 if getVarType vc == i1
1027 then do
1028 (vc', (stmts2, top2)) <- case likely of
1029 Just b -> genExpectLit (if b then 1 else 0) i1 vc
1030 _ -> pure (vc, (nilOL, []))
1031 let s1 = BranchIf vc' labelT labelF
1032 return (stmts1 `appOL` stmts2 `snocOL` s1, top1 ++ top2)
1033 else do
1034 dflags <- getDynFlags
1035 panic $ "genCondBranch: Cond expr not bool! (" ++ showSDoc dflags (ppr vc) ++ ")"
1036
1037
1038 -- | Generate call to llvm.expect.x intrinsic. Assigning result to a new var.
1039 genExpectLit :: Integer -> LlvmType -> LlvmVar -> LlvmM (LlvmVar, StmtData)
1040 genExpectLit expLit expTy var = do
1041 dflags <- getDynFlags
1042
1043 let
1044 lit = LMLitVar $ LMIntLit expLit expTy
1045
1046 llvmExpectName
1047 | isInt expTy = fsLit $ "llvm.expect." ++ showSDoc dflags (ppr expTy)
1048 | otherwise = panic $ "genExpectedLit: Type not an int!"
1049
1050 (llvmExpect, stmts, top) <-
1051 getInstrinct llvmExpectName expTy [expTy, expTy]
1052 (var', call) <- doExpr expTy $ Call StdCall llvmExpect [var, lit] []
1053 return (var', (stmts `snocOL` call, top))
1054
1055 {- Note [Literals and branch conditions]
1056 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1057
1058 It is important that whenever we generate branch conditions for
1059 literals like '1', they are properly narrowed to an LLVM expression of
1060 type 'i1' (for bools.) Otherwise, nobody is happy. So when we convert
1061 a CmmExpr to an LLVM expression for a branch conditional, exprToVarOpt
1062 must be certain to return a properly narrowed type. genLit is
1063 responsible for this, in the case of literal integers.
1064
1065 Often, we won't see direct statements like:
1066
1067 if(1) {
1068 ...
1069 } else {
1070 ...
1071 }
1072
1073 at this point in the pipeline, because the Glorious Code Generator
1074 will do trivial branch elimination in the sinking pass (among others,)
1075 which will eliminate the expression entirely.
1076
1077 However, it's certainly possible and reasonable for this to occur in
1078 hand-written C-- code. Consider something like:
1079
1080 #if !defined(SOME_CONDITIONAL)
1081 #define CHECK_THING(x) 1
1082 #else
1083 #define CHECK_THING(x) some_operation((x))
1084 #endif
1085
1086 f() {
1087
1088 if (CHECK_THING(xyz)) {
1089 ...
1090 } else {
1091 ...
1092 }
1093
1094 }
1095
1096 In such an instance, CHECK_THING might result in an *expression* in
1097 one case, and a *literal* in the other, depending on what in
1098 particular was #define'd. So we must be sure to properly narrow the
1099 literal in this case to i1 as it won't be eliminated beforehand.
1100
1101 For a real example of this, see ./rts/StgStdThunks.cmm
1102
1103 -}
1104
1105
1106
1107 -- | Switch branch
1108 genSwitch :: CmmExpr -> SwitchTargets -> LlvmM StmtData
1109 genSwitch cond ids = do
1110 (vc, stmts, top) <- exprToVar cond
1111 let ty = getVarType vc
1112
1113 let labels = [ (mkIntLit ty ix, blockIdToLlvm b)
1114 | (ix, b) <- switchTargetsCases ids ]
1115 -- out of range is undefined, so let's just branch to first label
1116 let defLbl | Just l <- switchTargetsDefault ids = blockIdToLlvm l
1117 | otherwise = snd (head labels)
1118
1119 let s1 = Switch vc defLbl labels
1120 return $ (stmts `snocOL` s1, top)
1121
1122
1123 -- -----------------------------------------------------------------------------
1124 -- * CmmExpr code generation
1125 --
1126
1127 -- | An expression conversion return data:
1128 -- * LlvmVar: The var holding the result of the expression
1129 -- * LlvmStatements: Any statements needed to evaluate the expression
1130 -- * LlvmCmmDecl: Any global data needed for this expression
1131 type ExprData = (LlvmVar, LlvmStatements, [LlvmCmmDecl])
1132
1133 -- | Values which can be passed to 'exprToVar' to configure its
1134 -- behaviour in certain circumstances.
1135 --
1136 -- Currently just used for determining if a comparison should return
1137 -- a boolean (i1) or a word. See Note [Literals and branch conditions].
1138 newtype EOption = EOption { i1Expected :: Bool }
1139 -- XXX: EOption is an ugly and inefficient solution to this problem.
1140
1141 -- | i1 type expected (condition scrutinee).
1142 i1Option :: EOption
1143 i1Option = EOption True
1144
1145 -- | Word type expected (usual).
1146 wordOption :: EOption
1147 wordOption = EOption False
1148
1149 -- | Convert a CmmExpr to a list of LlvmStatements with the result of the
1150 -- expression being stored in the returned LlvmVar.
1151 exprToVar :: CmmExpr -> LlvmM ExprData
1152 exprToVar = exprToVarOpt wordOption
1153
1154 exprToVarOpt :: EOption -> CmmExpr -> LlvmM ExprData
1155 exprToVarOpt opt e = case e of
1156
1157 CmmLit lit
1158 -> genLit opt lit
1159
1160 CmmLoad e' ty
1161 -> genLoad False e' ty
1162
1163 -- Cmmreg in expression is the value, so must load. If you want actual
1164 -- reg pointer, call getCmmReg directly.
1165 CmmReg r -> do
1166 (v1, ty, s1) <- getCmmRegVal r
1167 case isPointer ty of
1168 True -> do
1169 -- Cmm wants the value, so pointer types must be cast to ints
1170 dflags <- getDynFlags
1171 (v2, s2) <- doExpr (llvmWord dflags) $ Cast LM_Ptrtoint v1 (llvmWord dflags)
1172 return (v2, s1 `snocOL` s2, [])
1173
1174 False -> return (v1, s1, [])
1175
1176 CmmMachOp op exprs
1177 -> genMachOp opt op exprs
1178
1179 CmmRegOff r i
1180 -> do dflags <- getDynFlags
1181 exprToVar $ expandCmmReg dflags (r, i)
1182
1183 CmmStackSlot _ _
1184 -> panic "exprToVar: CmmStackSlot not supported!"
1185
1186
1187 -- | Handle CmmMachOp expressions
1188 genMachOp :: EOption -> MachOp -> [CmmExpr] -> LlvmM ExprData
1189
1190 -- Unary Machop
1191 genMachOp _ op [x] = case op of
1192
1193 MO_Not w ->
1194 let all1 = mkIntLit (widthToLlvmInt w) (-1)
1195 in negate (widthToLlvmInt w) all1 LM_MO_Xor
1196
1197 MO_S_Neg w ->
1198 let all0 = mkIntLit (widthToLlvmInt w) 0
1199 in negate (widthToLlvmInt w) all0 LM_MO_Sub
1200
1201 MO_F_Neg w ->
1202 let all0 = LMLitVar $ LMFloatLit (-0) (widthToLlvmFloat w)
1203 in negate (widthToLlvmFloat w) all0 LM_MO_FSub
1204
1205 MO_SF_Conv _ w -> fiConv (widthToLlvmFloat w) LM_Sitofp
1206 MO_FS_Conv _ w -> fiConv (widthToLlvmInt w) LM_Fptosi
1207
1208 MO_SS_Conv from to
1209 -> sameConv from (widthToLlvmInt to) LM_Trunc LM_Sext
1210
1211 MO_UU_Conv from to
1212 -> sameConv from (widthToLlvmInt to) LM_Trunc LM_Zext
1213
1214 MO_XX_Conv from to
1215 -> sameConv from (widthToLlvmInt to) LM_Trunc LM_Zext
1216
1217 MO_FF_Conv from to
1218 -> sameConv from (widthToLlvmFloat to) LM_Fptrunc LM_Fpext
1219
1220 MO_VS_Neg len w ->
1221 let ty = widthToLlvmInt w
1222 vecty = LMVector len ty
1223 all0 = LMIntLit (-0) ty
1224 all0s = LMLitVar $ LMVectorLit (replicate len all0)
1225 in negateVec vecty all0s LM_MO_Sub
1226
1227 MO_VF_Neg len w ->
1228 let ty = widthToLlvmFloat w
1229 vecty = LMVector len ty
1230 all0 = LMFloatLit (-0) ty
1231 all0s = LMLitVar $ LMVectorLit (replicate len all0)
1232 in negateVec vecty all0s LM_MO_FSub
1233
1234 MO_AlignmentCheck _ _ -> panic "-falignment-sanitisation is not supported by -fllvm"
1235
1236 -- Handle unsupported cases explicitly so we get a warning
1237 -- of missing case when new MachOps added
1238 MO_Add _ -> panicOp
1239 MO_Mul _ -> panicOp
1240 MO_Sub _ -> panicOp
1241 MO_S_MulMayOflo _ -> panicOp
1242 MO_S_Quot _ -> panicOp
1243 MO_S_Rem _ -> panicOp
1244 MO_U_MulMayOflo _ -> panicOp
1245 MO_U_Quot _ -> panicOp
1246 MO_U_Rem _ -> panicOp
1247
1248 MO_Eq _ -> panicOp
1249 MO_Ne _ -> panicOp
1250 MO_S_Ge _ -> panicOp
1251 MO_S_Gt _ -> panicOp
1252 MO_S_Le _ -> panicOp
1253 MO_S_Lt _ -> panicOp
1254 MO_U_Ge _ -> panicOp
1255 MO_U_Gt _ -> panicOp
1256 MO_U_Le _ -> panicOp
1257 MO_U_Lt _ -> panicOp
1258
1259 MO_F_Add _ -> panicOp
1260 MO_F_Sub _ -> panicOp
1261 MO_F_Mul _ -> panicOp
1262 MO_F_Quot _ -> panicOp
1263 MO_F_Eq _ -> panicOp
1264 MO_F_Ne _ -> panicOp
1265 MO_F_Ge _ -> panicOp
1266 MO_F_Gt _ -> panicOp
1267 MO_F_Le _ -> panicOp
1268 MO_F_Lt _ -> panicOp
1269
1270 MO_And _ -> panicOp
1271 MO_Or _ -> panicOp
1272 MO_Xor _ -> panicOp
1273 MO_Shl _ -> panicOp
1274 MO_U_Shr _ -> panicOp
1275 MO_S_Shr _ -> panicOp
1276
1277 MO_V_Insert _ _ -> panicOp
1278 MO_V_Extract _ _ -> panicOp
1279
1280 MO_V_Add _ _ -> panicOp
1281 MO_V_Sub _ _ -> panicOp
1282 MO_V_Mul _ _ -> panicOp
1283
1284 MO_VS_Quot _ _ -> panicOp
1285 MO_VS_Rem _ _ -> panicOp
1286
1287 MO_VU_Quot _ _ -> panicOp
1288 MO_VU_Rem _ _ -> panicOp
1289
1290 MO_VF_Insert _ _ -> panicOp
1291 MO_VF_Extract _ _ -> panicOp
1292
1293 MO_VF_Add _ _ -> panicOp
1294 MO_VF_Sub _ _ -> panicOp
1295 MO_VF_Mul _ _ -> panicOp
1296 MO_VF_Quot _ _ -> panicOp
1297
1298 where
1299 negate ty v2 negOp = do
1300 (vx, stmts, top) <- exprToVar x
1301 (v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx
1302 return (v1, stmts `snocOL` s1, top)
1303
1304 negateVec ty v2 negOp = do
1305 (vx, stmts1, top) <- exprToVar x
1306 (vxs', stmts2) <- castVars Signed [(vx, ty)]
1307 let vx' = singletonPanic "genMachOp: negateVec" vxs'
1308 (v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx'
1309 return (v1, stmts1 `appOL` stmts2 `snocOL` s1, top)
1310
1311 fiConv ty convOp = do
1312 (vx, stmts, top) <- exprToVar x
1313 (v1, s1) <- doExpr ty $ Cast convOp vx ty
1314 return (v1, stmts `snocOL` s1, top)
1315
1316 sameConv from ty reduce expand = do
1317 x'@(vx, stmts, top) <- exprToVar x
1318 let sameConv' op = do
1319 (v1, s1) <- doExpr ty $ Cast op vx ty
1320 return (v1, stmts `snocOL` s1, top)
1321 dflags <- getDynFlags
1322 let toWidth = llvmWidthInBits dflags ty
1323 -- LLVM doesn't like trying to convert to same width, so
1324 -- need to check for that as we do get Cmm code doing it.
1325 case widthInBits from of
1326 w | w < toWidth -> sameConv' expand
1327 w | w > toWidth -> sameConv' reduce
1328 _w -> return x'
1329
1330 panicOp = panic $ "LLVM.CodeGen.genMachOp: non unary op encountered"
1331 ++ "with one argument! (" ++ show op ++ ")"
1332
1333 -- Handle GlobalRegs pointers
1334 genMachOp opt o@(MO_Add _) e@[(CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]
1335 = genMachOp_fast opt o r (fromInteger n) e
1336
1337 genMachOp opt o@(MO_Sub _) e@[(CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]
1338 = genMachOp_fast opt o r (negate . fromInteger $ n) e
1339
1340 -- Generic case
1341 genMachOp opt op e = genMachOp_slow opt op e
1342
1343
1344 -- | Handle CmmMachOp expressions
1345 -- This is a specialised method that handles Global register manipulations like
1346 -- 'Sp - 16', using the getelementptr instruction.
1347 genMachOp_fast :: EOption -> MachOp -> GlobalReg -> Int -> [CmmExpr]
1348 -> LlvmM ExprData
1349 genMachOp_fast opt op r n e
1350 = do (gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
1351 dflags <- getDynFlags
1352 let (ix,rem) = n `divMod` ((llvmWidthInBits dflags . pLower) grt `div` 8)
1353 case isPointer grt && rem == 0 of
1354 True -> do
1355 (ptr, s2) <- doExpr grt $ GetElemPtr True gv [toI32 ix]
1356 (var, s3) <- doExpr (llvmWord dflags) $ Cast LM_Ptrtoint ptr (llvmWord dflags)
1357 return (var, s1 `snocOL` s2 `snocOL` s3, [])
1358
1359 False -> genMachOp_slow opt op e
1360
1361
1362 -- | Handle CmmMachOp expressions
1363 -- This handles all the cases not handle by the specialised genMachOp_fast.
1364 genMachOp_slow :: EOption -> MachOp -> [CmmExpr] -> LlvmM ExprData
1365
1366 -- Element extraction
1367 genMachOp_slow _ (MO_V_Extract l w) [val, idx] = runExprData $ do
1368 vval <- exprToVarW val
1369 vidx <- exprToVarW idx
1370 vval' <- singletonPanic "genMachOp_slow" <$>
1371 castVarsW Signed [(vval, LMVector l ty)]
1372 doExprW ty $ Extract vval' vidx
1373 where
1374 ty = widthToLlvmInt w
1375
1376 genMachOp_slow _ (MO_VF_Extract l w) [val, idx] = runExprData $ do
1377 vval <- exprToVarW val
1378 vidx <- exprToVarW idx
1379 vval' <- singletonPanic "genMachOp_slow" <$>
1380 castVarsW Signed [(vval, LMVector l ty)]
1381 doExprW ty $ Extract vval' vidx
1382 where
1383 ty = widthToLlvmFloat w
1384
1385 -- Element insertion
1386 genMachOp_slow _ (MO_V_Insert l w) [val, elt, idx] = runExprData $ do
1387 vval <- exprToVarW val
1388 velt <- exprToVarW elt
1389 vidx <- exprToVarW idx
1390 vval' <- singletonPanic "genMachOp_slow" <$>
1391 castVarsW Signed [(vval, ty)]
1392 doExprW ty $ Insert vval' velt vidx
1393 where
1394 ty = LMVector l (widthToLlvmInt w)
1395
1396 genMachOp_slow _ (MO_VF_Insert l w) [val, elt, idx] = runExprData $ do
1397 vval <- exprToVarW val
1398 velt <- exprToVarW elt
1399 vidx <- exprToVarW idx
1400 vval' <- singletonPanic "genMachOp_slow" <$>
1401 castVarsW Signed [(vval, ty)]
1402 doExprW ty $ Insert vval' velt vidx
1403 where
1404 ty = LMVector l (widthToLlvmFloat w)
1405
1406 -- Binary MachOp
1407 genMachOp_slow opt op [x, y] = case op of
1408
1409 MO_Eq _ -> genBinComp opt LM_CMP_Eq
1410 MO_Ne _ -> genBinComp opt LM_CMP_Ne
1411
1412 MO_S_Gt _ -> genBinComp opt LM_CMP_Sgt
1413 MO_S_Ge _ -> genBinComp opt LM_CMP_Sge
1414 MO_S_Lt _ -> genBinComp opt LM_CMP_Slt
1415 MO_S_Le _ -> genBinComp opt LM_CMP_Sle
1416
1417 MO_U_Gt _ -> genBinComp opt LM_CMP_Ugt
1418 MO_U_Ge _ -> genBinComp opt LM_CMP_Uge
1419 MO_U_Lt _ -> genBinComp opt LM_CMP_Ult
1420 MO_U_Le _ -> genBinComp opt LM_CMP_Ule
1421
1422 MO_Add _ -> genBinMach LM_MO_Add
1423 MO_Sub _ -> genBinMach LM_MO_Sub
1424 MO_Mul _ -> genBinMach LM_MO_Mul
1425
1426 MO_U_MulMayOflo _ -> panic "genMachOp: MO_U_MulMayOflo unsupported!"
1427
1428 MO_S_MulMayOflo w -> isSMulOK w x y
1429
1430 MO_S_Quot _ -> genBinMach LM_MO_SDiv
1431 MO_S_Rem _ -> genBinMach LM_MO_SRem
1432
1433 MO_U_Quot _ -> genBinMach LM_MO_UDiv
1434 MO_U_Rem _ -> genBinMach LM_MO_URem
1435
1436 MO_F_Eq _ -> genBinComp opt LM_CMP_Feq
1437 MO_F_Ne _ -> genBinComp opt LM_CMP_Fne
1438 MO_F_Gt _ -> genBinComp opt LM_CMP_Fgt
1439 MO_F_Ge _ -> genBinComp opt LM_CMP_Fge
1440 MO_F_Lt _ -> genBinComp opt LM_CMP_Flt
1441 MO_F_Le _ -> genBinComp opt LM_CMP_Fle
1442
1443 MO_F_Add _ -> genBinMach LM_MO_FAdd
1444 MO_F_Sub _ -> genBinMach LM_MO_FSub
1445 MO_F_Mul _ -> genBinMach LM_MO_FMul
1446 MO_F_Quot _ -> genBinMach LM_MO_FDiv
1447
1448 MO_And _ -> genBinMach LM_MO_And
1449 MO_Or _ -> genBinMach LM_MO_Or
1450 MO_Xor _ -> genBinMach LM_MO_Xor
1451 MO_Shl _ -> genBinMach LM_MO_Shl
1452 MO_U_Shr _ -> genBinMach LM_MO_LShr
1453 MO_S_Shr _ -> genBinMach LM_MO_AShr
1454
1455 MO_V_Add l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Add
1456 MO_V_Sub l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Sub
1457 MO_V_Mul l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Mul
1458
1459 MO_VS_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SDiv
1460 MO_VS_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SRem
1461
1462 MO_VU_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_UDiv
1463 MO_VU_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_URem
1464
1465 MO_VF_Add l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FAdd
1466 MO_VF_Sub l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FSub
1467 MO_VF_Mul l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FMul
1468 MO_VF_Quot l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FDiv
1469
1470 MO_Not _ -> panicOp
1471 MO_S_Neg _ -> panicOp
1472 MO_F_Neg _ -> panicOp
1473
1474 MO_SF_Conv _ _ -> panicOp
1475 MO_FS_Conv _ _ -> panicOp
1476 MO_SS_Conv _ _ -> panicOp
1477 MO_UU_Conv _ _ -> panicOp
1478 MO_XX_Conv _ _ -> panicOp
1479 MO_FF_Conv _ _ -> panicOp
1480
1481 MO_V_Insert {} -> panicOp
1482 MO_V_Extract {} -> panicOp
1483
1484 MO_VS_Neg {} -> panicOp
1485
1486 MO_VF_Insert {} -> panicOp
1487 MO_VF_Extract {} -> panicOp
1488
1489 MO_VF_Neg {} -> panicOp
1490
1491 MO_AlignmentCheck {} -> panicOp
1492
1493 where
1494 binLlvmOp ty binOp = runExprData $ do
1495 vx <- exprToVarW x
1496 vy <- exprToVarW y
1497 if getVarType vx == getVarType vy
1498 then do
1499 doExprW (ty vx) $ binOp vx vy
1500
1501 else do
1502 -- Error. Continue anyway so we can debug the generated ll file.
1503 dflags <- getDynFlags
1504 let style = mkCodeStyle CStyle
1505 toString doc = renderWithStyle dflags doc style
1506 cmmToStr = (lines . toString . PprCmm.pprExpr)
1507 statement $ Comment $ map fsLit $ cmmToStr x
1508 statement $ Comment $ map fsLit $ cmmToStr y
1509 doExprW (ty vx) $ binOp vx vy
1510
1511 binCastLlvmOp ty binOp = runExprData $ do
1512 vx <- exprToVarW x
1513 vy <- exprToVarW y
1514 vxy' <- castVarsW Signed [(vx, ty), (vy, ty)]
1515 case vxy' of
1516 [vx',vy'] -> doExprW ty $ binOp vx' vy'
1517 _ -> panic "genMachOp_slow: binCastLlvmOp"
1518
1519 -- | Need to use EOption here as Cmm expects word size results from
1520 -- comparisons while LLVM return i1. Need to extend to llvmWord type
1521 -- if expected. See Note [Literals and branch conditions].
1522 genBinComp opt cmp = do
1523 ed@(v1, stmts, top) <- binLlvmOp (\_ -> i1) (Compare cmp)
1524 dflags <- getDynFlags
1525 if getVarType v1 == i1
1526 then case i1Expected opt of
1527 True -> return ed
1528 False -> do
1529 let w_ = llvmWord dflags
1530 (v2, s1) <- doExpr w_ $ Cast LM_Zext v1 w_
1531 return (v2, stmts `snocOL` s1, top)
1532 else
1533 panic $ "genBinComp: Compare returned type other then i1! "
1534 ++ (showSDoc dflags $ ppr $ getVarType v1)
1535
1536 genBinMach op = binLlvmOp getVarType (LlvmOp op)
1537
1538 genCastBinMach ty op = binCastLlvmOp ty (LlvmOp op)
1539
1540 -- | Detect if overflow will occur in signed multiply of the two
1541 -- CmmExpr's. This is the LLVM assembly equivalent of the NCG
1542 -- implementation. Its much longer due to type information/safety.
1543 -- This should actually compile to only about 3 asm instructions.
1544 isSMulOK :: Width -> CmmExpr -> CmmExpr -> LlvmM ExprData
1545 isSMulOK _ x y = runExprData $ do
1546 vx <- exprToVarW x
1547 vy <- exprToVarW y
1548
1549 dflags <- getDynFlags
1550 let word = getVarType vx
1551 let word2 = LMInt $ 2 * (llvmWidthInBits dflags $ getVarType vx)
1552 let shift = llvmWidthInBits dflags word
1553 let shift1 = toIWord dflags (shift - 1)
1554 let shift2 = toIWord dflags shift
1555
1556 if isInt word
1557 then do
1558 x1 <- doExprW word2 $ Cast LM_Sext vx word2
1559 y1 <- doExprW word2 $ Cast LM_Sext vy word2
1560 r1 <- doExprW word2 $ LlvmOp LM_MO_Mul x1 y1
1561 rlow1 <- doExprW word $ Cast LM_Trunc r1 word
1562 rlow2 <- doExprW word $ LlvmOp LM_MO_AShr rlow1 shift1
1563 rhigh1 <- doExprW word2 $ LlvmOp LM_MO_AShr r1 shift2
1564 rhigh2 <- doExprW word $ Cast LM_Trunc rhigh1 word
1565 doExprW word $ LlvmOp LM_MO_Sub rlow2 rhigh2
1566
1567 else
1568 panic $ "isSMulOK: Not bit type! (" ++ showSDoc dflags (ppr word) ++ ")"
1569
1570 panicOp = panic $ "LLVM.CodeGen.genMachOp_slow: unary op encountered"
1571 ++ "with two arguments! (" ++ show op ++ ")"
1572
1573 -- More than two expression, invalid!
1574 genMachOp_slow _ _ _ = panic "genMachOp: More than 2 expressions in MachOp!"
1575
1576
1577 -- | Handle CmmLoad expression.
1578 genLoad :: Atomic -> CmmExpr -> CmmType -> LlvmM ExprData
1579
1580 -- First we try to detect a few common cases and produce better code for
1581 -- these then the default case. We are mostly trying to detect Cmm code
1582 -- like I32[Sp + n] and use 'getelementptr' operations instead of the
1583 -- generic case that uses casts and pointer arithmetic
1584 genLoad atomic e@(CmmReg (CmmGlobal r)) ty
1585 = genLoad_fast atomic e r 0 ty
1586
1587 genLoad atomic e@(CmmRegOff (CmmGlobal r) n) ty
1588 = genLoad_fast atomic e r n ty
1589
1590 genLoad atomic e@(CmmMachOp (MO_Add _) [
1591 (CmmReg (CmmGlobal r)),
1592 (CmmLit (CmmInt n _))])
1593 ty
1594 = genLoad_fast atomic e r (fromInteger n) ty
1595
1596 genLoad atomic e@(CmmMachOp (MO_Sub _) [
1597 (CmmReg (CmmGlobal r)),
1598 (CmmLit (CmmInt n _))])
1599 ty
1600 = genLoad_fast atomic e r (negate $ fromInteger n) ty
1601
1602 -- generic case
1603 genLoad atomic e ty
1604 = getTBAAMeta topN >>= genLoad_slow atomic e ty
1605
1606 -- | Handle CmmLoad expression.
1607 -- This is a special case for loading from a global register pointer
1608 -- offset such as I32[Sp+8].
1609 genLoad_fast :: Atomic -> CmmExpr -> GlobalReg -> Int -> CmmType
1610 -> LlvmM ExprData
1611 genLoad_fast atomic e r n ty = do
1612 dflags <- getDynFlags
1613 (gv, grt, s1) <- getCmmRegVal (CmmGlobal r)
1614 meta <- getTBAARegMeta r
1615 let ty' = cmmToLlvmType ty
1616 (ix,rem) = n `divMod` ((llvmWidthInBits dflags . pLower) grt `div` 8)
1617 case isPointer grt && rem == 0 of
1618 True -> do
1619 (ptr, s2) <- doExpr grt $ GetElemPtr True gv [toI32 ix]
1620 -- We might need a different pointer type, so check
1621 case grt == ty' of
1622 -- were fine
1623 True -> do
1624 (var, s3) <- doExpr ty' (MExpr meta $ loadInstr ptr)
1625 return (var, s1 `snocOL` s2 `snocOL` s3,
1626 [])
1627
1628 -- cast to pointer type needed
1629 False -> do
1630 let pty = pLift ty'
1631 (ptr', s3) <- doExpr pty $ Cast LM_Bitcast ptr pty
1632 (var, s4) <- doExpr ty' (MExpr meta $ loadInstr ptr')
1633 return (var, s1 `snocOL` s2 `snocOL` s3
1634 `snocOL` s4, [])
1635
1636 -- If its a bit type then we use the slow method since
1637 -- we can't avoid casting anyway.
1638 False -> genLoad_slow atomic e ty meta
1639 where
1640 loadInstr ptr | atomic = ALoad SyncSeqCst False ptr
1641 | otherwise = Load ptr
1642
1643 -- | Handle Cmm load expression.
1644 -- Generic case. Uses casts and pointer arithmetic if needed.
1645 genLoad_slow :: Atomic -> CmmExpr -> CmmType -> [MetaAnnot] -> LlvmM ExprData
1646 genLoad_slow atomic e ty meta = runExprData $ do
1647 iptr <- exprToVarW e
1648 dflags <- getDynFlags
1649 case getVarType iptr of
1650 LMPointer _ -> do
1651 doExprW (cmmToLlvmType ty) (MExpr meta $ loadInstr iptr)
1652
1653 i@(LMInt _) | i == llvmWord dflags -> do
1654 let pty = LMPointer $ cmmToLlvmType ty
1655 ptr <- doExprW pty $ Cast LM_Inttoptr iptr pty
1656 doExprW (cmmToLlvmType ty) (MExpr meta $ loadInstr ptr)
1657
1658 other -> do pprPanic "exprToVar: CmmLoad expression is not right type!"
1659 (PprCmm.pprExpr e <+> text (
1660 "Size of Ptr: " ++ show (llvmPtrBits dflags) ++
1661 ", Size of var: " ++ show (llvmWidthInBits dflags other) ++
1662 ", Var: " ++ showSDoc dflags (ppr iptr)))
1663 where
1664 loadInstr ptr | atomic = ALoad SyncSeqCst False ptr
1665 | otherwise = Load ptr
1666
1667
1668 -- | Handle CmmReg expression. This will return a pointer to the stack
1669 -- location of the register. Throws an error if it isn't allocated on
1670 -- the stack.
1671 getCmmReg :: CmmReg -> LlvmM LlvmVar
1672 getCmmReg (CmmLocal (LocalReg un _))
1673 = do exists <- varLookup un
1674 dflags <- getDynFlags
1675 case exists of
1676 Just ety -> return (LMLocalVar un $ pLift ety)
1677 Nothing -> panic $ "getCmmReg: Cmm register " ++ showSDoc dflags (ppr un) ++ " was not allocated!"
1678 -- This should never happen, as every local variable should
1679 -- have been assigned a value at some point, triggering
1680 -- "funPrologue" to allocate it on the stack.
1681
1682 getCmmReg (CmmGlobal g)
1683 = do onStack <- checkStackReg g
1684 dflags <- getDynFlags
1685 if onStack
1686 then return (lmGlobalRegVar dflags g)
1687 else panic $ "getCmmReg: Cmm register " ++ showSDoc dflags (ppr g) ++ " not stack-allocated!"
1688
1689 -- | Return the value of a given register, as well as its type. Might
1690 -- need to be load from stack.
1691 getCmmRegVal :: CmmReg -> LlvmM (LlvmVar, LlvmType, LlvmStatements)
1692 getCmmRegVal reg =
1693 case reg of
1694 CmmGlobal g -> do
1695 onStack <- checkStackReg g
1696 dflags <- getDynFlags
1697 if onStack then loadFromStack else do
1698 let r = lmGlobalRegArg dflags g
1699 return (r, getVarType r, nilOL)
1700 _ -> loadFromStack
1701 where loadFromStack = do
1702 ptr <- getCmmReg reg
1703 let ty = pLower $ getVarType ptr
1704 (v, s) <- doExpr ty (Load ptr)
1705 return (v, ty, unitOL s)
1706
1707 -- | Allocate a local CmmReg on the stack
1708 allocReg :: CmmReg -> (LlvmVar, LlvmStatements)
1709 allocReg (CmmLocal (LocalReg un ty))
1710 = let ty' = cmmToLlvmType ty
1711 var = LMLocalVar un (LMPointer ty')
1712 alc = Alloca ty' 1
1713 in (var, unitOL $ Assignment var alc)
1714
1715 allocReg _ = panic $ "allocReg: Global reg encountered! Global registers should"
1716 ++ " have been handled elsewhere!"
1717
1718
1719 -- | Generate code for a literal
1720 genLit :: EOption -> CmmLit -> LlvmM ExprData
1721 genLit opt (CmmInt i w)
1722 -- See Note [Literals and branch conditions].
1723 = let width | i1Expected opt = i1
1724 | otherwise = LMInt (widthInBits w)
1725 -- comm = Comment [ fsLit $ "EOption: " ++ show opt
1726 -- , fsLit $ "Width : " ++ show w
1727 -- , fsLit $ "Width' : " ++ show (widthInBits w)
1728 -- ]
1729 in return (mkIntLit width i, nilOL, [])
1730
1731 genLit _ (CmmFloat r w)
1732 = return (LMLitVar $ LMFloatLit (fromRational r) (widthToLlvmFloat w),
1733 nilOL, [])
1734
1735 genLit opt (CmmVec ls)
1736 = do llvmLits <- mapM toLlvmLit ls
1737 return (LMLitVar $ LMVectorLit llvmLits, nilOL, [])
1738 where
1739 toLlvmLit :: CmmLit -> LlvmM LlvmLit
1740 toLlvmLit lit = do
1741 (llvmLitVar, _, _) <- genLit opt lit
1742 case llvmLitVar of
1743 LMLitVar llvmLit -> return llvmLit
1744 _ -> panic "genLit"
1745
1746 genLit _ cmm@(CmmLabel l)
1747 = do var <- getGlobalPtr =<< strCLabel_llvm l
1748 dflags <- getDynFlags
1749 let lmty = cmmToLlvmType $ cmmLitType dflags cmm
1750 (v1, s1) <- doExpr lmty $ Cast LM_Ptrtoint var (llvmWord dflags)
1751 return (v1, unitOL s1, [])
1752
1753 genLit opt (CmmLabelOff label off) = do
1754 dflags <- getDynFlags
1755 (vlbl, stmts, stat) <- genLit opt (CmmLabel label)
1756 let voff = toIWord dflags off
1757 (v1, s1) <- doExpr (getVarType vlbl) $ LlvmOp LM_MO_Add vlbl voff
1758 return (v1, stmts `snocOL` s1, stat)
1759
1760 genLit opt (CmmLabelDiffOff l1 l2 off w) = do
1761 dflags <- getDynFlags
1762 (vl1, stmts1, stat1) <- genLit opt (CmmLabel l1)
1763 (vl2, stmts2, stat2) <- genLit opt (CmmLabel l2)
1764 let voff = toIWord dflags off
1765 let ty1 = getVarType vl1
1766 let ty2 = getVarType vl2
1767 if (isInt ty1) && (isInt ty2)
1768 && (llvmWidthInBits dflags ty1 == llvmWidthInBits dflags ty2)
1769 then do
1770 (v1, s1) <- doExpr (getVarType vl1) $ LlvmOp LM_MO_Sub vl1 vl2
1771 (v2, s2) <- doExpr (getVarType v1 ) $ LlvmOp LM_MO_Add v1 voff
1772 let ty = widthToLlvmInt w
1773 let stmts = stmts1 `appOL` stmts2 `snocOL` s1 `snocOL` s2
1774 if w /= wordWidth dflags
1775 then do
1776 (v3, s3) <- doExpr ty $ Cast LM_Trunc v2 ty
1777 return (v3, stmts `snocOL` s3, stat1 ++ stat2)
1778 else
1779 return (v2, stmts, stat1 ++ stat2)
1780 else
1781 panic "genLit: CmmLabelDiffOff encountered with different label ty!"
1782
1783 genLit opt (CmmBlock b)
1784 = genLit opt (CmmLabel $ infoTblLbl b)
1785
1786 genLit _ CmmHighStackMark
1787 = panic "genStaticLit - CmmHighStackMark unsupported!"
1788
1789
1790 -- -----------------------------------------------------------------------------
1791 -- * Misc
1792 --
1793
1794 -- | Find CmmRegs that get assigned and allocate them on the stack
1795 --
1796 -- Any register that gets written needs to be allcoated on the
1797 -- stack. This avoids having to map a CmmReg to an equivalent SSA form
1798 -- and avoids having to deal with Phi node insertion. This is also
1799 -- the approach recommended by LLVM developers.
1800 --
1801 -- On the other hand, this is unnecessarily verbose if the register in
1802 -- question is never written. Therefore we skip it where we can to
1803 -- save a few lines in the output and hopefully speed compilation up a
1804 -- bit.
1805 funPrologue :: LiveGlobalRegs -> [CmmBlock] -> LlvmM StmtData
1806 funPrologue live cmmBlocks = do
1807
1808 trash <- getTrashRegs
1809 let getAssignedRegs :: CmmNode O O -> [CmmReg]
1810 getAssignedRegs (CmmAssign reg _) = [reg]
1811 -- Calls will trash all registers. Unfortunately, this needs them to
1812 -- be stack-allocated in the first place.
1813 getAssignedRegs (CmmUnsafeForeignCall _ rs _) = map CmmGlobal trash ++ map CmmLocal rs
1814 getAssignedRegs _ = []
1815 getRegsBlock (_, body, _) = concatMap getAssignedRegs $ blockToList body
1816 assignedRegs = nub $ concatMap (getRegsBlock . blockSplit) cmmBlocks
1817 isLive r = r `elem` alwaysLive || r `elem` live
1818
1819 dflags <- getDynFlags
1820 stmtss <- flip mapM assignedRegs $ \reg ->
1821 case reg of
1822 CmmLocal (LocalReg un _) -> do
1823 let (newv, stmts) = allocReg reg
1824 varInsert un (pLower $ getVarType newv)
1825 return stmts
1826 CmmGlobal r -> do
1827 let reg = lmGlobalRegVar dflags r
1828 arg = lmGlobalRegArg dflags r
1829 ty = (pLower . getVarType) reg
1830 trash = LMLitVar $ LMUndefLit ty
1831 rval = if isLive r then arg else trash
1832 alloc = Assignment reg $ Alloca (pLower $ getVarType reg) 1
1833 markStackReg r
1834 return $ toOL [alloc, Store rval reg]
1835
1836 return (concatOL stmtss, [])
1837
1838 -- | Function epilogue. Load STG variables to use as argument for call.
1839 -- STG Liveness optimisation done here.
1840 funEpilogue :: LiveGlobalRegs -> LlvmM ([LlvmVar], LlvmStatements)
1841 funEpilogue live = do
1842
1843 -- Have information and liveness optimisation is enabled?
1844 let liveRegs = alwaysLive ++ live
1845 isSSE (FloatReg _) = True
1846 isSSE (DoubleReg _) = True
1847 isSSE (XmmReg _) = True
1848 isSSE (YmmReg _) = True
1849 isSSE (ZmmReg _) = True
1850 isSSE _ = False
1851
1852 -- Set to value or "undef" depending on whether the register is
1853 -- actually live
1854 dflags <- getDynFlags
1855 let loadExpr r = do
1856 (v, _, s) <- getCmmRegVal (CmmGlobal r)
1857 return (Just $ v, s)
1858 loadUndef r = do
1859 let ty = (pLower . getVarType $ lmGlobalRegVar dflags r)
1860 return (Just $ LMLitVar $ LMUndefLit ty, nilOL)
1861 platform <- getDynFlag targetPlatform
1862 loads <- flip mapM (activeStgRegs platform) $ \r -> case () of
1863 _ | r `elem` liveRegs -> loadExpr r
1864 | not (isSSE r) -> loadUndef r
1865 | otherwise -> return (Nothing, nilOL)
1866
1867 let (vars, stmts) = unzip loads
1868 return (catMaybes vars, concatOL stmts)
1869
1870
1871 -- | A series of statements to trash all the STG registers.
1872 --
1873 -- In LLVM we pass the STG registers around everywhere in function calls.
1874 -- So this means LLVM considers them live across the entire function, when
1875 -- in reality they usually aren't. For Caller save registers across C calls
1876 -- the saving and restoring of them is done by the Cmm code generator,
1877 -- using Cmm local vars. So to stop LLVM saving them as well (and saving
1878 -- all of them since it thinks they're always live, we trash them just
1879 -- before the call by assigning the 'undef' value to them. The ones we
1880 -- need are restored from the Cmm local var and the ones we don't need
1881 -- are fine to be trashed.
1882 getTrashStmts :: LlvmM LlvmStatements
1883 getTrashStmts = do
1884 regs <- getTrashRegs
1885 stmts <- flip mapM regs $ \ r -> do
1886 reg <- getCmmReg (CmmGlobal r)
1887 let ty = (pLower . getVarType) reg
1888 return $ Store (LMLitVar $ LMUndefLit ty) reg
1889 return $ toOL stmts
1890
1891 getTrashRegs :: LlvmM [GlobalReg]
1892 getTrashRegs = do plat <- getLlvmPlatform
1893 return $ filter (callerSaves plat) (activeStgRegs plat)
1894
1895 -- | Get a function pointer to the CLabel specified.
1896 --
1897 -- This is for Haskell functions, function type is assumed, so doesn't work
1898 -- with foreign functions.
1899 getHsFunc :: LiveGlobalRegs -> CLabel -> LlvmM ExprData
1900 getHsFunc live lbl
1901 = do fty <- llvmFunTy live
1902 name <- strCLabel_llvm lbl
1903 getHsFunc' name fty
1904
1905 getHsFunc' :: LMString -> LlvmType -> LlvmM ExprData
1906 getHsFunc' name fty
1907 = do fun <- getGlobalPtr name
1908 if getVarType fun == fty
1909 then return (fun, nilOL, [])
1910 else do (v1, s1) <- doExpr (pLift fty)
1911 $ Cast LM_Bitcast fun (pLift fty)
1912 return (v1, unitOL s1, [])
1913
1914 -- | Create a new local var
1915 mkLocalVar :: LlvmType -> LlvmM LlvmVar
1916 mkLocalVar ty = do
1917 un <- getUniqueM
1918 return $ LMLocalVar un ty
1919
1920
1921 -- | Execute an expression, assigning result to a var
1922 doExpr :: LlvmType -> LlvmExpression -> LlvmM (LlvmVar, LlvmStatement)
1923 doExpr ty expr = do
1924 v <- mkLocalVar ty
1925 return (v, Assignment v expr)
1926
1927
1928 -- | Expand CmmRegOff
1929 expandCmmReg :: DynFlags -> (CmmReg, Int) -> CmmExpr
1930 expandCmmReg dflags (reg, off)
1931 = let width = typeWidth (cmmRegType dflags reg)
1932 voff = CmmLit $ CmmInt (fromIntegral off) width
1933 in CmmMachOp (MO_Add width) [CmmReg reg, voff]
1934
1935
1936 -- | Convert a block id into a appropriate Llvm label
1937 blockIdToLlvm :: BlockId -> LlvmVar
1938 blockIdToLlvm bid = LMLocalVar (getUnique bid) LMLabel
1939
1940 -- | Create Llvm int Literal
1941 mkIntLit :: Integral a => LlvmType -> a -> LlvmVar
1942 mkIntLit ty i = LMLitVar $ LMIntLit (toInteger i) ty
1943
1944 -- | Convert int type to a LLvmVar of word or i32 size
1945 toI32 :: Integral a => a -> LlvmVar
1946 toI32 = mkIntLit i32
1947
1948 toIWord :: Integral a => DynFlags -> a -> LlvmVar
1949 toIWord dflags = mkIntLit (llvmWord dflags)
1950
1951
1952 -- | Error functions
1953 panic :: String -> a
1954 panic s = Outputable.panic $ "LlvmCodeGen.CodeGen." ++ s
1955
1956 pprPanic :: String -> SDoc -> a
1957 pprPanic s d = Outputable.pprPanic ("LlvmCodeGen.CodeGen." ++ s) d
1958
1959
1960 -- | Returns TBAA meta data by unique
1961 getTBAAMeta :: Unique -> LlvmM [MetaAnnot]
1962 getTBAAMeta u = do
1963 mi <- getUniqMeta u
1964 return [MetaAnnot tbaa (MetaNode i) | let Just i = mi]
1965
1966 -- | Returns TBAA meta data for given register
1967 getTBAARegMeta :: GlobalReg -> LlvmM [MetaAnnot]
1968 getTBAARegMeta = getTBAAMeta . getTBAA
1969
1970
1971 -- | A more convenient way of accumulating LLVM statements and declarations.
1972 data LlvmAccum = LlvmAccum LlvmStatements [LlvmCmmDecl]
1973
1974 instance Semigroup LlvmAccum where
1975 LlvmAccum stmtsA declsA <> LlvmAccum stmtsB declsB =
1976 LlvmAccum (stmtsA Semigroup.<> stmtsB) (declsA Semigroup.<> declsB)
1977
1978 instance Monoid LlvmAccum where
1979 mempty = LlvmAccum nilOL []
1980 mappend = (Semigroup.<>)
1981
1982 liftExprData :: LlvmM ExprData -> WriterT LlvmAccum LlvmM LlvmVar
1983 liftExprData action = do
1984 (var, stmts, decls) <- lift action
1985 tell $ LlvmAccum stmts decls
1986 return var
1987
1988 statement :: LlvmStatement -> WriterT LlvmAccum LlvmM ()
1989 statement stmt = tell $ LlvmAccum (unitOL stmt) []
1990
1991 doExprW :: LlvmType -> LlvmExpression -> WriterT LlvmAccum LlvmM LlvmVar
1992 doExprW a b = do
1993 (var, stmt) <- lift $ doExpr a b
1994 statement stmt
1995 return var
1996
1997 exprToVarW :: CmmExpr -> WriterT LlvmAccum LlvmM LlvmVar
1998 exprToVarW = liftExprData . exprToVar
1999
2000 runExprData :: WriterT LlvmAccum LlvmM LlvmVar -> LlvmM ExprData
2001 runExprData action = do
2002 (var, LlvmAccum stmts decls) <- runWriterT action
2003 return (var, stmts, decls)
2004
2005 runStmtsDecls :: WriterT LlvmAccum LlvmM () -> LlvmM (LlvmStatements, [LlvmCmmDecl])
2006 runStmtsDecls action = do
2007 LlvmAccum stmts decls <- execWriterT action
2008 return (stmts, decls)
2009
2010 getCmmRegW :: CmmReg -> WriterT LlvmAccum LlvmM LlvmVar
2011 getCmmRegW = lift . getCmmReg
2012
2013 genLoadW :: Atomic -> CmmExpr -> CmmType -> WriterT LlvmAccum LlvmM LlvmVar
2014 genLoadW atomic e ty = liftExprData $ genLoad atomic e ty
2015
2016 doTrashStmts :: WriterT LlvmAccum LlvmM ()
2017 doTrashStmts = do
2018 stmts <- lift getTrashStmts
2019 tell $ LlvmAccum stmts mempty
2020
2021 -- | Return element of single-element list; 'panic' if list is not a single-element list
2022 singletonPanic :: String -> [a] -> a
2023 singletonPanic _ [x] = x
2024 singletonPanic s _ = panic s