Adds `data8`, `stringz` directives for ia64
[hsc2hs.git] / ATTParser.hs
1 -- A rather crude asm parser.
2 --
3 --
4 -- we only handle a subset of AT&T assembly
5 -- right now. This is what gcc and clang can
6 -- emit. For clang using llvm-ir might be
7 -- even better. For gcc gimple if that can
8 -- be consumed reliably somehow.
9 --
10 -- For now we'll rely on the at&t assembly
11 -- to be sufficient for constants.
12 --
13
14
15 module ATTParser where
16
17 import Control.Applicative ((<|>))
18 import Data.Word (Word32, Word64)
19 import Data.Int (Int64)
20 import Data.Char (isDigit, isSpace)
21 import Data.Bits (shiftL, shiftR, (.|.))
22 import Data.Maybe (fromMaybe)
23
24 data Inst = Ident String
25 | Long Word32
26 | Quad Word64
27 | Ref String
28 | Ascii String
29 deriving Show
30
31 type ASM = [(String, Inst)]
32
33 isIdent :: Inst -> Bool
34 isIdent (Ident _) = True
35 isIdent _ = False
36
37 trim :: String -> String
38 trim = reverse . dropWhile (`elem` " \t") . reverse . dropWhile (`elem` " \t")
39 -- | generalized @words@.
40 words' :: (a -> Bool) -> [a] -> [[a]]
41 words' p s = case dropWhile p s of
42 [] -> []
43 s' -> w : words' p s''
44 where (w, s'') = break p s'
45
46 isNumber :: String -> Bool
47 isNumber ('-':x) = all isDigit x
48 isNumber ('+':x) = all isDigit x
49 isNumber x = all isDigit x
50
51 -- | process the assembly instructions, filtering out
52 -- identifiers and constant values.
53 preprocess :: String -> [Inst]
54 preprocess [] = []
55 preprocess ('\t':attr) = let (h, t) = break isSpace attr
56 in case h:words' (=='\t') t of
57 -- 8 byte values
58 (".quad":x:_) | isNumber (w x) -> [Quad $ read (w x)]
59 | otherwise -> [Ref $ (w x)]
60 (".xword":x:_)| isNumber (w x) -> [Quad $ read (w x)]
61 | otherwise -> [Ref $ (w x)]
62 (".8byte":x:_)| isNumber (w x) -> [Quad $ read (w x)]
63 | otherwise -> [Ref $ (w x)]
64 ("data8":x:_) | isNumber (w x) -> [Quad $ read (w x)]
65 | otherwise -> [Ref $ (w x)]
66
67 -- 4 byte values
68 (".long":x:_) | isNumber (w x) -> [Long $ read (w x)]
69 | otherwise -> [Ref $ (w x)]
70 (".word":x:_) | isNumber (w x) -> [Long $ read (w x)]
71 | otherwise -> [Ref $ (w x)]
72 (".4byte":x:_)| isNumber (w x) -> [Long $ read (w x)]
73 | otherwise -> [Ref $ (w x)]
74
75 (".space":x:_)| (w x) == "4" -> [Long 0]
76 | (w x) == "8" -> [Quad 0]
77 (".skip":x:_) | (w x) == "4" -> [Long 0]
78 | (w x) == "8" -> [Quad 0]
79
80 (".ascii":x:_) -> [Ascii $ read x]
81 (".asciz":x:_) -> [Ascii $ read x ++ "\0"]
82 ("stringz":x:_) -> [Ascii $ read x ++ "\0"]
83 _ -> []
84 where w = head . words
85 preprocess ('.':'z':'e':'r':'o':'f':'i':'l':'l':' ':x) = case words' (==',') x of
86 (_seg:_sect:sym:size:_) | size == "4" -> [Ident sym, Long 0]
87 | size == "8" -> [Ident sym, Quad 0]
88 _ -> []
89 preprocess (c:cs) | not (isSpace c) = [Ident $ takeWhile (/= ':') (c:cs)]
90 | otherwise = []
91
92 -- | turn the list of instructions into an associated list
93 parseInsts :: [Inst] -> [(String, Inst)]
94 parseInsts [] = []
95 parseInsts (Ident name:xs) = case break isIdent xs of
96 ([], xs') -> parseInsts xs'
97 (is, xs') -> (name, combineInst is):parseInsts xs'
98 parseInsts _ = error "Invalid instructions"
99
100 -- | combine instructions (e.g. two long into a quad)
101 combineInst :: [Inst] -> Inst
102 combineInst [Quad i] = Quad i
103 combineInst [Long i] = Quad (fromIntegral i)
104 combineInst [Long h, Long l] = Quad $ (shiftL (fromIntegral h) 32) .|. fromIntegral l
105 combineInst [Ref s] = Ref s
106 combineInst [Ascii s] = Ascii s
107 combineInst is = error $ "Cannot combine instructions: " ++ show is
108
109 -- | inline references
110 inlineRef :: [(String, Inst)] -> [(String, Inst)]
111 inlineRef xs = map go xs
112 where go (k, Ref name) = (k, fromMaybe (error $ "failed to find reference " ++ show name) $ lookup name xs)
113 go x = x
114
115 fixWordOrder :: [(String, Inst)] -> [(String, Inst)]
116 fixWordOrder xs = case lookupInteger "___hsc2hs_BOM___" xs of
117 Just 1 -> map go xs
118 _ -> xs
119 where go (k, Quad w) = (k, Quad $ shiftL w 32 .|. shiftR w 32)
120 go x = x
121
122 parse :: FilePath -> IO [(String, Inst)]
123 parse f = (fixWordOrder . inlineRef . parseInsts . concatMap preprocess . lines) `fmap` readFile f
124
125 -- | lookup a symbol without or with underscore prefix
126 lookup_ :: String -> [(String,b)] -> Maybe b
127 lookup_ k l = lookup k l <|> lookup ("_" ++ k) l
128
129 lookupString :: String -> [(String, Inst)] -> Maybe String
130 lookupString k l = case (lookup_ k l) of
131 Just (Ascii s) -> Just s
132 _ -> Nothing
133
134 lookupInteger :: String -> [(String, Inst)] -> Maybe Integer
135 lookupInteger k l = case (lookup_ k l, lookup_ (k ++ "___hsc2hs_sign___") l) of
136 (Just (Quad i), Just (Quad 1)) -> Just (fromIntegral (fromIntegral i :: Int64))
137 (Just (Quad i), _) -> Just (fromIntegral i)
138 _ -> Nothing