c20887f045285d47b35b65396e2c3f17c05b5043
[ghc.git] / compiler / hieFile / HieTypes.hs
1 {-# LANGUAGE DeriveTraversable #-}
2 {-# LANGUAGE DeriveDataTypeable #-}
3 {-# LANGUAGE TypeSynonymInstances #-}
4 {-# LANGUAGE FlexibleInstances #-}
5 {-# LANGUAGE ScopedTypeVariables #-}
6 module HieTypes where
7
8 import GhcPrelude
9
10 import Binary
11 import FastString ( FastString )
12 import IfaceType
13 import Module ( ModuleName )
14 import Name ( Name )
15 import Outputable hiding ( (<>) )
16 import SrcLoc ( RealSrcSpan )
17
18 import qualified Data.Array as A
19 import qualified Data.Map as M
20 import qualified Data.Set as S
21 import Data.ByteString ( ByteString )
22 import Data.Data ( Typeable, Data )
23 import Data.Semigroup ( Semigroup(..) )
24 import Data.Word ( Word8 )
25 import Control.Applicative ( (<|>) )
26
27 type Span = RealSrcSpan
28
29 -- | Current version of @.hie@ files
30 curHieVersion :: Word8
31 curHieVersion = 0
32
33 {- |
34 GHC builds up a wealth of information about Haskell source as it compiles it.
35 @.hie@ files are a way of persisting some of this information to disk so that
36 external tools that need to work with haskell source don't need to parse,
37 typecheck, and rename all over again. These files contain:
38
39 * a simplified AST
40
41 * nodes are annotated with source positions and types
42 * identifiers are annotated with scope information
43
44 * the raw bytes of the initial Haskell source
45
46 Besides saving compilation cycles, @.hie@ files also offer a more stable
47 interface than the GHC API.
48 -}
49 data HieFile = HieFile
50 { hie_version :: Word8
51 -- ^ version of the HIE format
52
53 , hie_ghc_version :: ByteString
54 -- ^ Version of GHC that produced this file
55
56 , hie_hs_file :: FilePath
57 -- ^ Initial Haskell source file path
58
59 , hie_types :: A.Array TypeIndex HieTypeFlat
60 -- ^ Types referenced in the 'hie_asts'.
61 --
62 -- See Note [Efficient serialization of redundant type info]
63
64 , hie_asts :: HieASTs TypeIndex
65 -- ^ Type-annotated abstract syntax trees
66
67 , hie_hs_src :: ByteString
68 -- ^ Raw bytes of the initial Haskell source
69 }
70
71 instance Binary HieFile where
72 put_ bh hf = do
73 put_ bh $ hie_version hf
74 put_ bh $ hie_ghc_version hf
75 put_ bh $ hie_hs_file hf
76 put_ bh $ hie_types hf
77 put_ bh $ hie_asts hf
78 put_ bh $ hie_hs_src hf
79
80 get bh = HieFile
81 <$> get bh
82 <*> get bh
83 <*> get bh
84 <*> get bh
85 <*> get bh
86 <*> get bh
87
88
89 {-
90 Note [Efficient serialization of redundant type info]
91 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
92
93 The type information in .hie files is highly repetitive and redundant. For
94 example, consider the expression
95
96 const True 'a'
97
98 There is a lot of shared structure between the types of subterms:
99
100 * const True 'a' :: Bool
101 * const True :: Char -> Bool
102 * const :: Bool -> Char -> Bool
103
104 Since all 3 of these types need to be stored in the .hie file, it is worth
105 making an effort to deduplicate this shared structure. The trick is to define
106 a new data type that is a flattened version of 'Type':
107
108 data HieType a = HAppTy a a -- data Type = AppTy Type Type
109 | HFunTy a a -- | FunTy Type Type
110 | ...
111
112 type TypeIndex = Int
113
114 Types in the final AST are stored in an 'A.Array TypeIndex (HieType TypeIndex)',
115 where the 'TypeIndex's in the 'HieType' are references to other elements of the
116 array. Types recovered from GHC are deduplicated and stored in this compressed
117 form with sharing of subtrees.
118 -}
119
120 type TypeIndex = Int
121
122 -- | A flattened version of 'Type'.
123 --
124 -- See Note [Efficient serialization of redundant type info]
125 data HieType a
126 = HTyVarTy Name
127 | HAppTy a (HieArgs a)
128 | HTyConApp IfaceTyCon (HieArgs a)
129 | HForAllTy ((Name, a),ArgFlag) a
130 | HFunTy a a
131 | HQualTy a a -- ^ type with constraint: @t1 => t2@ (see 'IfaceDFunTy')
132 | HLitTy IfaceTyLit
133 | HCastTy a
134 | HCoercionTy
135 deriving (Functor, Foldable, Traversable, Eq)
136
137 type HieTypeFlat = HieType TypeIndex
138
139 -- | Roughly isomorphic to the original core 'Type'.
140 newtype HieTypeFix = Roll (HieType (HieTypeFix))
141
142 instance Binary (HieType TypeIndex) where
143 put_ bh (HTyVarTy n) = do
144 putByte bh 0
145 put_ bh n
146 put_ bh (HAppTy a b) = do
147 putByte bh 1
148 put_ bh a
149 put_ bh b
150 put_ bh (HTyConApp n xs) = do
151 putByte bh 2
152 put_ bh n
153 put_ bh xs
154 put_ bh (HForAllTy bndr a) = do
155 putByte bh 3
156 put_ bh bndr
157 put_ bh a
158 put_ bh (HFunTy a b) = do
159 putByte bh 4
160 put_ bh a
161 put_ bh b
162 put_ bh (HQualTy a b) = do
163 putByte bh 5
164 put_ bh a
165 put_ bh b
166 put_ bh (HLitTy l) = do
167 putByte bh 6
168 put_ bh l
169 put_ bh (HCastTy a) = do
170 putByte bh 7
171 put_ bh a
172 put_ bh (HCoercionTy) = putByte bh 8
173
174 get bh = do
175 (t :: Word8) <- get bh
176 case t of
177 0 -> HTyVarTy <$> get bh
178 1 -> HAppTy <$> get bh <*> get bh
179 2 -> HTyConApp <$> get bh <*> get bh
180 3 -> HForAllTy <$> get bh <*> get bh
181 4 -> HFunTy <$> get bh <*> get bh
182 5 -> HQualTy <$> get bh <*> get bh
183 6 -> HLitTy <$> get bh
184 7 -> HCastTy <$> get bh
185 8 -> return HCoercionTy
186 _ -> panic "Binary (HieArgs Int): invalid tag"
187
188
189 -- | A list of type arguments along with their respective visibilities (ie. is
190 -- this an argument that would return 'True' for 'isVisibleArgFlag'?).
191 newtype HieArgs a = HieArgs [(Bool,a)]
192 deriving (Functor, Foldable, Traversable, Eq)
193
194 instance Binary (HieArgs TypeIndex) where
195 put_ bh (HieArgs xs) = put_ bh xs
196 get bh = HieArgs <$> get bh
197
198 -- | Mapping from filepaths (represented using 'FastString') to the
199 -- corresponding AST
200 newtype HieASTs a = HieASTs { getAsts :: (M.Map FastString (HieAST a)) }
201 deriving (Functor, Foldable, Traversable)
202
203 instance Binary (HieASTs TypeIndex) where
204 put_ bh asts = put_ bh $ M.toAscList $ getAsts asts
205 get bh = HieASTs <$> fmap M.fromDistinctAscList (get bh)
206
207
208 data HieAST a =
209 Node
210 { nodeInfo :: NodeInfo a
211 , nodeSpan :: Span
212 , nodeChildren :: [HieAST a]
213 } deriving (Functor, Foldable, Traversable)
214
215 instance Binary (HieAST TypeIndex) where
216 put_ bh ast = do
217 put_ bh $ nodeInfo ast
218 put_ bh $ nodeSpan ast
219 put_ bh $ nodeChildren ast
220
221 get bh = Node
222 <$> get bh
223 <*> get bh
224 <*> get bh
225
226
227 -- | The information stored in one AST node.
228 --
229 -- The type parameter exists to provide flexibility in representation of types
230 -- (see Note [Efficient serialization of redundant type info]).
231 data NodeInfo a = NodeInfo
232 { nodeAnnotations :: S.Set (FastString,FastString)
233 -- ^ (name of the AST node constructor, name of the AST node Type)
234
235 , nodeType :: [a]
236 -- ^ The Haskell types of this node, if any.
237
238 , nodeIdentifiers :: NodeIdentifiers a
239 -- ^ All the identifiers and their details
240 } deriving (Functor, Foldable, Traversable)
241
242 instance Binary (NodeInfo TypeIndex) where
243 put_ bh ni = do
244 put_ bh $ S.toAscList $ nodeAnnotations ni
245 put_ bh $ nodeType ni
246 put_ bh $ M.toList $ nodeIdentifiers ni
247 get bh = NodeInfo
248 <$> fmap (S.fromDistinctAscList) (get bh)
249 <*> get bh
250 <*> fmap (M.fromList) (get bh)
251
252 type Identifier = Either ModuleName Name
253
254 type NodeIdentifiers a = M.Map Identifier (IdentifierDetails a)
255
256 -- | Information associated with every identifier
257 --
258 -- We need to include types with identifiers because sometimes multiple
259 -- identifiers occur in the same span(Overloaded Record Fields and so on)
260 data IdentifierDetails a = IdentifierDetails
261 { identType :: Maybe a
262 , identInfo :: S.Set ContextInfo
263 } deriving (Eq, Functor, Foldable, Traversable)
264
265 instance Outputable a => Outputable (IdentifierDetails a) where
266 ppr x = text "IdentifierDetails" <+> ppr (identType x) <+> ppr (identInfo x)
267
268 instance Semigroup (IdentifierDetails a) where
269 d1 <> d2 = IdentifierDetails (identType d1 <|> identType d2)
270 (S.union (identInfo d1) (identInfo d2))
271
272 instance Monoid (IdentifierDetails a) where
273 mempty = IdentifierDetails Nothing S.empty
274
275 instance Binary (IdentifierDetails TypeIndex) where
276 put_ bh dets = do
277 put_ bh $ identType dets
278 put_ bh $ S.toAscList $ identInfo dets
279 get bh = IdentifierDetails
280 <$> get bh
281 <*> fmap (S.fromDistinctAscList) (get bh)
282
283
284 -- | Different contexts under which identifiers exist
285 data ContextInfo
286 = Use -- ^ regular variable
287 | MatchBind
288 | IEThing IEType -- ^ import/export
289 | TyDecl
290
291 -- | Value binding
292 | ValBind
293 BindType -- ^ whether or not the binding is in an instance
294 Scope -- ^ scope over which the value is bound
295 (Maybe Span) -- ^ span of entire binding
296
297 -- | Pattern binding
298 --
299 -- This case is tricky because the bound identifier can be used in two
300 -- distinct scopes. Consider the following example (with @-XViewPatterns@)
301 --
302 -- @
303 -- do (b, a, (a -> True)) <- bar
304 -- foo a
305 -- @
306 --
307 -- The identifier @a@ has two scopes: in the view pattern @(a -> True)@ and
308 -- in the rest of the @do@-block in @foo a@.
309 | PatternBind
310 Scope -- ^ scope /in the pattern/ (the variable bound can be used
311 -- further in the pattern)
312 Scope -- ^ rest of the scope outside the pattern
313 (Maybe Span) -- ^ span of entire binding
314
315 | ClassTyDecl (Maybe Span)
316
317 -- | Declaration
318 | Decl
319 DeclType -- ^ type of declaration
320 (Maybe Span) -- ^ span of entire binding
321
322 -- | Type variable
323 | TyVarBind Scope TyVarScope
324
325 -- | Record field
326 | RecField RecFieldContext (Maybe Span)
327 deriving (Eq, Ord, Show)
328
329 instance Outputable ContextInfo where
330 ppr = text . show
331
332 instance Binary ContextInfo where
333 put_ bh Use = putByte bh 0
334 put_ bh (IEThing t) = do
335 putByte bh 1
336 put_ bh t
337 put_ bh TyDecl = putByte bh 2
338 put_ bh (ValBind bt sc msp) = do
339 putByte bh 3
340 put_ bh bt
341 put_ bh sc
342 put_ bh msp
343 put_ bh (PatternBind a b c) = do
344 putByte bh 4
345 put_ bh a
346 put_ bh b
347 put_ bh c
348 put_ bh (ClassTyDecl sp) = do
349 putByte bh 5
350 put_ bh sp
351 put_ bh (Decl a b) = do
352 putByte bh 6
353 put_ bh a
354 put_ bh b
355 put_ bh (TyVarBind a b) = do
356 putByte bh 7
357 put_ bh a
358 put_ bh b
359 put_ bh (RecField a b) = do
360 putByte bh 8
361 put_ bh a
362 put_ bh b
363 put_ bh MatchBind = putByte bh 9
364
365 get bh = do
366 (t :: Word8) <- get bh
367 case t of
368 0 -> return Use
369 1 -> IEThing <$> get bh
370 2 -> return TyDecl
371 3 -> ValBind <$> get bh <*> get bh <*> get bh
372 4 -> PatternBind <$> get bh <*> get bh <*> get bh
373 5 -> ClassTyDecl <$> get bh
374 6 -> Decl <$> get bh <*> get bh
375 7 -> TyVarBind <$> get bh <*> get bh
376 8 -> RecField <$> get bh <*> get bh
377 9 -> return MatchBind
378 _ -> panic "Binary ContextInfo: invalid tag"
379
380
381 -- | Types of imports and exports
382 data IEType
383 = Import
384 | ImportAs
385 | ImportHiding
386 | Export
387 deriving (Eq, Enum, Ord, Show)
388
389 instance Binary IEType where
390 put_ bh b = putByte bh (fromIntegral (fromEnum b))
391 get bh = do x <- getByte bh; pure $! (toEnum (fromIntegral x))
392
393
394 data RecFieldContext
395 = RecFieldDecl
396 | RecFieldAssign
397 | RecFieldMatch
398 | RecFieldOcc
399 deriving (Eq, Enum, Ord, Show)
400
401 instance Binary RecFieldContext where
402 put_ bh b = putByte bh (fromIntegral (fromEnum b))
403 get bh = do x <- getByte bh; pure $! (toEnum (fromIntegral x))
404
405
406 data BindType
407 = RegularBind
408 | InstanceBind
409 deriving (Eq, Ord, Show, Enum)
410
411 instance Binary BindType where
412 put_ bh b = putByte bh (fromIntegral (fromEnum b))
413 get bh = do x <- getByte bh; pure $! (toEnum (fromIntegral x))
414
415
416 data DeclType
417 = FamDec -- ^ type or data family
418 | SynDec -- ^ type synonym
419 | DataDec -- ^ data declaration
420 | ConDec -- ^ constructor declaration
421 | PatSynDec -- ^ pattern synonym
422 | ClassDec -- ^ class declaration
423 | InstDec -- ^ instance declaration
424 deriving (Eq, Ord, Show, Enum)
425
426 instance Binary DeclType where
427 put_ bh b = putByte bh (fromIntegral (fromEnum b))
428 get bh = do x <- getByte bh; pure $! (toEnum (fromIntegral x))
429
430
431 data Scope
432 = NoScope
433 | LocalScope Span
434 | ModuleScope
435 deriving (Eq, Ord, Show, Typeable, Data)
436
437 instance Outputable Scope where
438 ppr NoScope = text "NoScope"
439 ppr (LocalScope sp) = text "LocalScope" <+> ppr sp
440 ppr ModuleScope = text "ModuleScope"
441
442 instance Binary Scope where
443 put_ bh NoScope = putByte bh 0
444 put_ bh (LocalScope span) = do
445 putByte bh 1
446 put_ bh span
447 put_ bh ModuleScope = putByte bh 2
448
449 get bh = do
450 (t :: Word8) <- get bh
451 case t of
452 0 -> return NoScope
453 1 -> LocalScope <$> get bh
454 2 -> return ModuleScope
455 _ -> panic "Binary Scope: invalid tag"
456
457
458 -- | Scope of a type variable.
459 --
460 -- This warrants a data type apart from 'Scope' because of complexities
461 -- introduced by features like @-XScopedTypeVariables@ and @-XInstanceSigs@. For
462 -- example, consider:
463 --
464 -- @
465 -- foo, bar, baz :: forall a. a -> a
466 -- @
467 --
468 -- Here @a@ is in scope in all the definitions of @foo@, @bar@, and @baz@, so we
469 -- need a list of scopes to keep track of this. Furthermore, this list cannot be
470 -- computed until we resolve the binding sites of @foo@, @bar@, and @baz@.
471 --
472 -- Consequently, @a@ starts with an @'UnresolvedScope' [foo, bar, baz] Nothing@
473 -- which later gets resolved into a 'ResolvedScopes'.
474 data TyVarScope
475 = ResolvedScopes [Scope]
476
477 -- | Unresolved scopes should never show up in the final @.hie@ file
478 | UnresolvedScope
479 [Name] -- ^ names of the definitions over which the scope spans
480 (Maybe Span) -- ^ the location of the instance/class declaration for
481 -- the case where the type variable is declared in a
482 -- method type signature
483 deriving (Eq, Ord)
484
485 instance Show TyVarScope where
486 show (ResolvedScopes sc) = show sc
487 show _ = error "UnresolvedScope"
488
489 instance Binary TyVarScope where
490 put_ bh (ResolvedScopes xs) = do
491 putByte bh 0
492 put_ bh xs
493 put_ bh (UnresolvedScope ns span) = do
494 putByte bh 1
495 put_ bh ns
496 put_ bh span
497
498 get bh = do
499 (t :: Word8) <- get bh
500 case t of
501 0 -> ResolvedScopes <$> get bh
502 1 -> UnresolvedScope <$> get bh <*> get bh
503 _ -> panic "Binary TyVarScope: invalid tag"