hsc2hs: Make removeFile more reliable on Windows. (#25)
[hsc2hs.git] / ATTParser.hs
1 -- A rather crude asm parser.
2 --
3 --
4 -- we only handle a subset of AT&T assembly
5 -- right now. This is what gcc and clang can
6 -- emit. For clang using llvm-ir might be
7 -- even better. For gcc gimple if that can
8 -- be consumed reliably somehow.
9 --
10 -- For now we'll rely on the at&t assembly
11 -- to be sufficient for constants.
12 --
13
14
15 module ATTParser where
16
17 import Control.Applicative ((<|>))
18 import Data.Word (Word32, Word64)
19 import Data.Int (Int64)
20 import Data.Char (isDigit, isSpace)
21 import Data.Bits (shiftL, shiftR, (.|.))
22 import Data.Maybe (fromMaybe)
23
24 data Inst = Ident String
25 | Long Word32
26 | Quad Word64
27 | Ref String
28 | Ascii String
29 deriving Show
30
31 mkLong :: Word32 -> Inst
32 mkLong = Long
33 mkQuad :: Word64 -> Inst
34 mkQuad = Quad
35 -- | turn @x@ and @(x)@ into @Ref x@.
36 -- The (x) syntax can be found in mips assembly.
37 mkRef :: String -> Inst
38 mkRef ('(':r) | (')':r') <- reverse r = Ref $ reverse r'
39 mkRef r = Ref r
40
41 mkAscii :: String -> Inst
42 mkAscii = Ascii
43
44 type ASM = [(String, Inst)]
45
46 isIdent :: Inst -> Bool
47 isIdent (Ident _) = True
48 isIdent _ = False
49
50 trim :: String -> String
51 trim = reverse . dropWhile (`elem` " \t") . reverse . dropWhile (`elem` " \t")
52 -- | generalized @words@.
53 words' :: (a -> Bool) -> [a] -> [[a]]
54 words' p s = case dropWhile p s of
55 [] -> []
56 s' -> w : words' p s''
57 where (w, s'') = break p s'
58
59 isNumber :: String -> Bool
60 isNumber ('-':x) = all isDigit x
61 isNumber ('+':x) = all isDigit x
62 isNumber x = all isDigit x
63
64 -- | process the assembly instructions, filtering out
65 -- identifiers and constant values.
66 preprocess :: String -> [Inst]
67 preprocess [] = []
68 preprocess ('\t':attr) = let (h, t) = break isSpace attr
69 in case h:words' (=='\t') t of
70 -- 8 byte values
71 (".quad":x:_) | isNumber (w x) -> [mkQuad $ read (w x)]
72 | otherwise -> [mkRef $ (w x)]
73 (".xword":x:_)| isNumber (w x) -> [mkQuad $ read (w x)]
74 | otherwise -> [mkRef $ (w x)]
75 (".8byte":x:_)| isNumber (w x) -> [mkQuad $ read (w x)]
76 | otherwise -> [mkRef $ (w x)]
77 ("data8":x:_) | isNumber (w x) -> [mkQuad $ read (w x)]
78 | otherwise -> [mkRef $ (w x)]
79
80 -- 4 byte values
81 (".long":x:_) | isNumber (w x) -> [mkLong $ read (w x)]
82 | otherwise -> [mkRef $ (w x)]
83 (".word":x:_) | isNumber (w x) -> [mkLong $ read (w x)]
84 | otherwise -> [mkRef $ (w x)]
85 (".4byte":x:_)| isNumber (w x) -> [mkLong $ read (w x)]
86 | otherwise -> [mkRef $ (w x)]
87
88 (".space":x:_)| (w x) == "4" -> [mkLong 0]
89 | (w x) == "8" -> [mkQuad 0]
90 (".skip":x:_) | (w x) == "4" -> [mkLong 0]
91 | (w x) == "8" -> [mkQuad 0]
92
93 (".ascii":x:_) -> [mkAscii $ read x]
94 (".asciz":x:_) -> [mkAscii $ read x ++ "\0"]
95 -- found on nios, sh4, alpha, mk68k; all without \0.
96 (".string":x:_) -> [mkAscii $ read x ++ "\0"]
97 -- found on hppa
98 (".stringz":x:_) -> [mkAscii $ read x ++ "\0"]
99 -- ia64
100 ("stringz":x:_) -> [mkAscii $ read x ++ "\0"]
101 _ -> []
102 where w = head . words
103 preprocess ('.':'z':'e':'r':'o':'f':'i':'l':'l':' ':x) = case words' (==',') x of
104 (_seg:_sect:sym:size:_) | size == "4" -> [Ident sym, mkLong 0]
105 | size == "8" -> [Ident sym, mkQuad 0]
106 _ -> []
107 preprocess (c:cs) | not (isSpace c) = [Ident $ takeWhile (/= ':') (c:cs)]
108 | otherwise = []
109
110 -- | turn the list of instructions into an associated list
111 parseInsts :: [Inst] -> [(String, Inst)]
112 parseInsts [] = []
113 parseInsts (Ident name:xs) = case break isIdent xs of
114 ([], xs') -> parseInsts xs'
115 (is, xs') -> (name, combineInst is):parseInsts xs'
116 parseInsts _ = error "Invalid instructions"
117
118 -- | combine instructions (e.g. two long into a quad)
119 combineInst :: [Inst] -> Inst
120 combineInst [Quad i] = Quad i
121 combineInst [Long i] = Quad (fromIntegral i)
122 combineInst [Long h, Long l] = Quad $ (shiftL (fromIntegral h) 32) .|. fromIntegral l
123 combineInst [Ref s] = Ref s
124 combineInst [Ascii s] = Ascii s
125 combineInst is = error $ "Cannot combine instructions: " ++ show is
126
127 -- | inline references
128 inlineRef :: [(String, Inst)] -> [(String, Inst)]
129 inlineRef xs = map go xs
130 where go (k, Ref name) = (k, fromMaybe (error $ "failed to find reference " ++ show name) $ lookup name xs)
131 go x = x
132
133 fixWordOrder :: [(String, Inst)] -> [(String, Inst)]
134 fixWordOrder xs = case lookupInteger "___hsc2hs_BOM___" xs of
135 Just 1 -> map go xs
136 _ -> xs
137 where go (k, Quad w) = (k, Quad $ shiftL w 32 .|. shiftR w 32)
138 go x = x
139
140 parse :: FilePath -> IO [(String, Inst)]
141 parse f = (fixWordOrder . inlineRef . parseInsts . concatMap preprocess . lines) `fmap` readFile f
142
143 -- | lookup a symbol without or with underscore prefix
144 lookup_ :: String -> [(String,b)] -> Maybe b
145 lookup_ k l = lookup k l <|> lookup ("_" ++ k) l
146
147 lookupString :: String -> [(String, Inst)] -> Maybe String
148 lookupString k l = case (lookup_ k l) of
149 Just (Ascii s) -> Just s
150 _ -> Nothing
151
152 lookupInteger :: String -> [(String, Inst)] -> Maybe Integer
153 lookupInteger k l = case (lookup_ k l, lookup_ (k ++ "___hsc2hs_sign___") l) of
154 (Just (Quad i), Just (Quad 1)) -> Just (fromIntegral (fromIntegral i :: Int64))
155 (Just (Quad i), _) -> Just (fromIntegral i)
156 _ -> Nothing