YAML kills a kitten again...
[hsc2hs.git] / ATTParser.hs
1 -- A rather crude asm parser.
2 --
3 --
4 -- we only handle a subset of AT&T assembly
5 -- right now. This is what gcc and clang can
6 -- emit. For clang using llvm-ir might be
7 -- even better. For gcc gimple if that can
8 -- be consumed reliably somehow.
9 --
10 -- For now we'll rely on the at&t assembly
11 -- to be sufficient for constants.
12 --
13
14
15 module ATTParser where
16
17 import Control.Applicative ((<|>))
18 import Data.Word (Word32, Word64)
19 import Data.Int (Int64)
20 import Data.Char (isDigit, isSpace)
21 import Data.Bits (shiftL, shiftR, (.|.))
22 import Data.Maybe (fromMaybe)
23
24 data Inst = Ident String
25 | Long Word32
26 | Quad Word64
27 | Ref String
28 | Ascii String
29 deriving Show
30
31 type ASM = [(String, Inst)]
32
33 isIdent :: Inst -> Bool
34 isIdent (Ident _) = True
35 isIdent _ = False
36
37 trim :: String -> String
38 trim = reverse . dropWhile (`elem` " \t") . reverse . dropWhile (`elem` " \t")
39 -- | generalized @words@.
40 words' :: (a -> Bool) -> [a] -> [[a]]
41 words' p s = case dropWhile p s of
42 [] -> []
43 s' -> w : words' p s''
44 where (w, s'') = break p s'
45
46 isNumber :: String -> Bool
47 isNumber ('-':x) = all isDigit x
48 isNumber ('+':x) = all isDigit x
49 isNumber x = all isDigit x
50
51 -- | process the assembly instructions, filtering out
52 -- identifiers and constant values.
53 preprocess :: String -> [Inst]
54 preprocess [] = []
55 preprocess ('\t':attr) = let (h, t) = break isSpace attr
56 in case h:words' (=='\t') t of
57 (".quad":x:_) | isNumber (w x) -> [Quad $ read (w x)]
58 | otherwise -> [Ref $ (w x)]
59 (".xword":x:_)| isNumber (w x) -> [Quad $ read (w x)]
60 | otherwise -> [Ref $ (w x)]
61 (".long":x:_) | isNumber (w x) -> [Long $ read (w x)]
62 | otherwise -> [Ref $ (w x)]
63 (".space":x:_)| (w x) == "4" -> [Long 0]
64 | (w x) == "8" -> [Quad 0]
65 (".ascii":x:_) -> [Ascii $ read x]
66 (".asciz":x:_) -> [Ascii $ read x ++ "\0"]
67 _ -> []
68 where w = head . words
69 preprocess ('.':'z':'e':'r':'o':'f':'i':'l':'l':' ':x) = case words' (==',') x of
70 (_seg:_sect:sym:size:_) | size == "4" -> [Ident sym, Long 0]
71 | size == "8" -> [Ident sym, Quad 0]
72 _ -> []
73 preprocess (c:cs) | not (isSpace c) = [Ident $ takeWhile (/= ':') (c:cs)]
74 | otherwise = []
75
76 -- | turn the list of instructions into an associated list
77 parseInsts :: [Inst] -> [(String, Inst)]
78 parseInsts [] = []
79 parseInsts (Ident name:xs) = case break isIdent xs of
80 ([], xs') -> parseInsts xs'
81 (is, xs') -> (name, combineInst is):parseInsts xs'
82 parseInsts _ = error "Invalid instructions"
83
84 -- | combine instructions (e.g. two long into a quad)
85 combineInst :: [Inst] -> Inst
86 combineInst [Quad i] = Quad i
87 combineInst [Long i] = Quad (fromIntegral i)
88 combineInst [Long h, Long l] = Quad $ (shiftL (fromIntegral h) 32) .|. fromIntegral l
89 combineInst [Ref s] = Ref s
90 combineInst [Ascii s] = Ascii s
91 combineInst is = error $ "Cannot combine instructions: " ++ show is
92
93 -- | inline references
94 inlineRef :: [(String, Inst)] -> [(String, Inst)]
95 inlineRef xs = map go xs
96 where go (k, Ref name) = (k, fromMaybe (error $ "failed to find reference " ++ show name) $ lookup name xs)
97 go x = x
98
99 fixWordOrder :: [(String, Inst)] -> [(String, Inst)]
100 fixWordOrder xs = case lookupInteger "___hsc2hs_BOM___" xs of
101 Just 1 -> map go xs
102 _ -> xs
103 where go (k, Quad w) = (k, Quad $ shiftL w 32 .|. shiftR w 32)
104 go x = x
105
106 parse :: FilePath -> IO [(String, Inst)]
107 parse f = (fixWordOrder . inlineRef . parseInsts . concatMap preprocess . lines) `fmap` readFile f
108
109 -- | lookup a symbol without or with underscore prefix
110 lookup_ :: String -> [(String,b)] -> Maybe b
111 lookup_ k l = lookup k l <|> lookup ("_" ++ k) l
112
113 lookupString :: String -> [(String, Inst)] -> Maybe String
114 lookupString k l = case (lookup_ k l) of
115 Just (Ascii s) -> Just s
116 _ -> Nothing
117
118 lookupInteger :: String -> [(String, Inst)] -> Maybe Integer
119 lookupInteger k l = case (lookup_ k l, lookup_ (k ++ "___hsc2hs_sign___") l) of
120 (Just (Quad i), Just (Quad 1)) -> Just (fromIntegral (fromIntegral i :: Int64))
121 (Just (Quad i), _) -> Just (fromIntegral i)
122 _ -> Nothing