Expression/command ambiguity resolution
[ghc.git] / compiler / hieFile / HieTypes.hs
1 {-# LANGUAGE DeriveTraversable #-}
2 {-# LANGUAGE DeriveDataTypeable #-}
3 {-# LANGUAGE TypeSynonymInstances #-}
4 {-# LANGUAGE FlexibleInstances #-}
5 {-# LANGUAGE ScopedTypeVariables #-}
6 module HieTypes where
7
8 import GhcPrelude
9
10 import Binary
11 import FastString ( FastString )
12 import IfaceType
13 import Module ( ModuleName, Module )
14 import Name ( Name )
15 import Outputable hiding ( (<>) )
16 import SrcLoc ( RealSrcSpan )
17 import Avail
18
19 import qualified Data.Array as A
20 import qualified Data.Map as M
21 import qualified Data.Set as S
22 import Data.ByteString ( ByteString )
23 import Data.Data ( Typeable, Data )
24 import Data.Semigroup ( Semigroup(..) )
25 import Data.Word ( Word8 )
26 import Control.Applicative ( (<|>) )
27
28 type Span = RealSrcSpan
29
30 -- | Current version of @.hie@ files
31 curHieVersion :: Word8
32 curHieVersion = 0
33
34 {- |
35 GHC builds up a wealth of information about Haskell source as it compiles it.
36 @.hie@ files are a way of persisting some of this information to disk so that
37 external tools that need to work with haskell source don't need to parse,
38 typecheck, and rename all over again. These files contain:
39
40 * a simplified AST
41
42 * nodes are annotated with source positions and types
43 * identifiers are annotated with scope information
44
45 * the raw bytes of the initial Haskell source
46
47 Besides saving compilation cycles, @.hie@ files also offer a more stable
48 interface than the GHC API.
49 -}
50 data HieFile = HieFile
51 { hie_version :: Word8
52 -- ^ version of the HIE format
53
54 , hie_ghc_version :: ByteString
55 -- ^ Version of GHC that produced this file
56
57 , hie_hs_file :: FilePath
58 -- ^ Initial Haskell source file path
59
60 , hie_module :: Module
61 -- ^ The module this HIE file is for
62
63 , hie_types :: A.Array TypeIndex HieTypeFlat
64 -- ^ Types referenced in the 'hie_asts'.
65 --
66 -- See Note [Efficient serialization of redundant type info]
67
68 , hie_asts :: HieASTs TypeIndex
69 -- ^ Type-annotated abstract syntax trees
70
71 , hie_exports :: [AvailInfo]
72 -- ^ The names that this module exports
73
74 , hie_hs_src :: ByteString
75 -- ^ Raw bytes of the initial Haskell source
76 }
77
78 instance Binary HieFile where
79 put_ bh hf = do
80 put_ bh $ hie_version hf
81 put_ bh $ hie_ghc_version hf
82 put_ bh $ hie_hs_file hf
83 put_ bh $ hie_module hf
84 put_ bh $ hie_types hf
85 put_ bh $ hie_asts hf
86 put_ bh $ hie_exports hf
87 put_ bh $ hie_hs_src hf
88
89 get bh = HieFile
90 <$> get bh
91 <*> get bh
92 <*> get bh
93 <*> get bh
94 <*> get bh
95 <*> get bh
96 <*> get bh
97 <*> get bh
98
99
100 {-
101 Note [Efficient serialization of redundant type info]
102 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
103
104 The type information in .hie files is highly repetitive and redundant. For
105 example, consider the expression
106
107 const True 'a'
108
109 There is a lot of shared structure between the types of subterms:
110
111 * const True 'a' :: Bool
112 * const True :: Char -> Bool
113 * const :: Bool -> Char -> Bool
114
115 Since all 3 of these types need to be stored in the .hie file, it is worth
116 making an effort to deduplicate this shared structure. The trick is to define
117 a new data type that is a flattened version of 'Type':
118
119 data HieType a = HAppTy a a -- data Type = AppTy Type Type
120 | HFunTy a a -- | FunTy Type Type
121 | ...
122
123 type TypeIndex = Int
124
125 Types in the final AST are stored in an 'A.Array TypeIndex (HieType TypeIndex)',
126 where the 'TypeIndex's in the 'HieType' are references to other elements of the
127 array. Types recovered from GHC are deduplicated and stored in this compressed
128 form with sharing of subtrees.
129 -}
130
131 type TypeIndex = Int
132
133 -- | A flattened version of 'Type'.
134 --
135 -- See Note [Efficient serialization of redundant type info]
136 data HieType a
137 = HTyVarTy Name
138 | HAppTy a (HieArgs a)
139 | HTyConApp IfaceTyCon (HieArgs a)
140 | HForAllTy ((Name, a),ArgFlag) a
141 | HFunTy a a
142 | HQualTy a a -- ^ type with constraint: @t1 => t2@ (see 'IfaceDFunTy')
143 | HLitTy IfaceTyLit
144 | HCastTy a
145 | HCoercionTy
146 deriving (Functor, Foldable, Traversable, Eq)
147
148 type HieTypeFlat = HieType TypeIndex
149
150 -- | Roughly isomorphic to the original core 'Type'.
151 newtype HieTypeFix = Roll (HieType (HieTypeFix))
152
153 instance Binary (HieType TypeIndex) where
154 put_ bh (HTyVarTy n) = do
155 putByte bh 0
156 put_ bh n
157 put_ bh (HAppTy a b) = do
158 putByte bh 1
159 put_ bh a
160 put_ bh b
161 put_ bh (HTyConApp n xs) = do
162 putByte bh 2
163 put_ bh n
164 put_ bh xs
165 put_ bh (HForAllTy bndr a) = do
166 putByte bh 3
167 put_ bh bndr
168 put_ bh a
169 put_ bh (HFunTy a b) = do
170 putByte bh 4
171 put_ bh a
172 put_ bh b
173 put_ bh (HQualTy a b) = do
174 putByte bh 5
175 put_ bh a
176 put_ bh b
177 put_ bh (HLitTy l) = do
178 putByte bh 6
179 put_ bh l
180 put_ bh (HCastTy a) = do
181 putByte bh 7
182 put_ bh a
183 put_ bh (HCoercionTy) = putByte bh 8
184
185 get bh = do
186 (t :: Word8) <- get bh
187 case t of
188 0 -> HTyVarTy <$> get bh
189 1 -> HAppTy <$> get bh <*> get bh
190 2 -> HTyConApp <$> get bh <*> get bh
191 3 -> HForAllTy <$> get bh <*> get bh
192 4 -> HFunTy <$> get bh <*> get bh
193 5 -> HQualTy <$> get bh <*> get bh
194 6 -> HLitTy <$> get bh
195 7 -> HCastTy <$> get bh
196 8 -> return HCoercionTy
197 _ -> panic "Binary (HieArgs Int): invalid tag"
198
199
200 -- | A list of type arguments along with their respective visibilities (ie. is
201 -- this an argument that would return 'True' for 'isVisibleArgFlag'?).
202 newtype HieArgs a = HieArgs [(Bool,a)]
203 deriving (Functor, Foldable, Traversable, Eq)
204
205 instance Binary (HieArgs TypeIndex) where
206 put_ bh (HieArgs xs) = put_ bh xs
207 get bh = HieArgs <$> get bh
208
209 -- | Mapping from filepaths (represented using 'FastString') to the
210 -- corresponding AST
211 newtype HieASTs a = HieASTs { getAsts :: (M.Map FastString (HieAST a)) }
212 deriving (Functor, Foldable, Traversable)
213
214 instance Binary (HieASTs TypeIndex) where
215 put_ bh asts = put_ bh $ M.toAscList $ getAsts asts
216 get bh = HieASTs <$> fmap M.fromDistinctAscList (get bh)
217
218
219 data HieAST a =
220 Node
221 { nodeInfo :: NodeInfo a
222 , nodeSpan :: Span
223 , nodeChildren :: [HieAST a]
224 } deriving (Functor, Foldable, Traversable)
225
226 instance Binary (HieAST TypeIndex) where
227 put_ bh ast = do
228 put_ bh $ nodeInfo ast
229 put_ bh $ nodeSpan ast
230 put_ bh $ nodeChildren ast
231
232 get bh = Node
233 <$> get bh
234 <*> get bh
235 <*> get bh
236
237
238 -- | The information stored in one AST node.
239 --
240 -- The type parameter exists to provide flexibility in representation of types
241 -- (see Note [Efficient serialization of redundant type info]).
242 data NodeInfo a = NodeInfo
243 { nodeAnnotations :: S.Set (FastString,FastString)
244 -- ^ (name of the AST node constructor, name of the AST node Type)
245
246 , nodeType :: [a]
247 -- ^ The Haskell types of this node, if any.
248
249 , nodeIdentifiers :: NodeIdentifiers a
250 -- ^ All the identifiers and their details
251 } deriving (Functor, Foldable, Traversable)
252
253 instance Binary (NodeInfo TypeIndex) where
254 put_ bh ni = do
255 put_ bh $ S.toAscList $ nodeAnnotations ni
256 put_ bh $ nodeType ni
257 put_ bh $ M.toList $ nodeIdentifiers ni
258 get bh = NodeInfo
259 <$> fmap (S.fromDistinctAscList) (get bh)
260 <*> get bh
261 <*> fmap (M.fromList) (get bh)
262
263 type Identifier = Either ModuleName Name
264
265 type NodeIdentifiers a = M.Map Identifier (IdentifierDetails a)
266
267 -- | Information associated with every identifier
268 --
269 -- We need to include types with identifiers because sometimes multiple
270 -- identifiers occur in the same span(Overloaded Record Fields and so on)
271 data IdentifierDetails a = IdentifierDetails
272 { identType :: Maybe a
273 , identInfo :: S.Set ContextInfo
274 } deriving (Eq, Functor, Foldable, Traversable)
275
276 instance Outputable a => Outputable (IdentifierDetails a) where
277 ppr x = text "IdentifierDetails" <+> ppr (identType x) <+> ppr (identInfo x)
278
279 instance Semigroup (IdentifierDetails a) where
280 d1 <> d2 = IdentifierDetails (identType d1 <|> identType d2)
281 (S.union (identInfo d1) (identInfo d2))
282
283 instance Monoid (IdentifierDetails a) where
284 mempty = IdentifierDetails Nothing S.empty
285
286 instance Binary (IdentifierDetails TypeIndex) where
287 put_ bh dets = do
288 put_ bh $ identType dets
289 put_ bh $ S.toAscList $ identInfo dets
290 get bh = IdentifierDetails
291 <$> get bh
292 <*> fmap (S.fromDistinctAscList) (get bh)
293
294
295 -- | Different contexts under which identifiers exist
296 data ContextInfo
297 = Use -- ^ regular variable
298 | MatchBind
299 | IEThing IEType -- ^ import/export
300 | TyDecl
301
302 -- | Value binding
303 | ValBind
304 BindType -- ^ whether or not the binding is in an instance
305 Scope -- ^ scope over which the value is bound
306 (Maybe Span) -- ^ span of entire binding
307
308 -- | Pattern binding
309 --
310 -- This case is tricky because the bound identifier can be used in two
311 -- distinct scopes. Consider the following example (with @-XViewPatterns@)
312 --
313 -- @
314 -- do (b, a, (a -> True)) <- bar
315 -- foo a
316 -- @
317 --
318 -- The identifier @a@ has two scopes: in the view pattern @(a -> True)@ and
319 -- in the rest of the @do@-block in @foo a@.
320 | PatternBind
321 Scope -- ^ scope /in the pattern/ (the variable bound can be used
322 -- further in the pattern)
323 Scope -- ^ rest of the scope outside the pattern
324 (Maybe Span) -- ^ span of entire binding
325
326 | ClassTyDecl (Maybe Span)
327
328 -- | Declaration
329 | Decl
330 DeclType -- ^ type of declaration
331 (Maybe Span) -- ^ span of entire binding
332
333 -- | Type variable
334 | TyVarBind Scope TyVarScope
335
336 -- | Record field
337 | RecField RecFieldContext (Maybe Span)
338 deriving (Eq, Ord, Show)
339
340 instance Outputable ContextInfo where
341 ppr = text . show
342
343 instance Binary ContextInfo where
344 put_ bh Use = putByte bh 0
345 put_ bh (IEThing t) = do
346 putByte bh 1
347 put_ bh t
348 put_ bh TyDecl = putByte bh 2
349 put_ bh (ValBind bt sc msp) = do
350 putByte bh 3
351 put_ bh bt
352 put_ bh sc
353 put_ bh msp
354 put_ bh (PatternBind a b c) = do
355 putByte bh 4
356 put_ bh a
357 put_ bh b
358 put_ bh c
359 put_ bh (ClassTyDecl sp) = do
360 putByte bh 5
361 put_ bh sp
362 put_ bh (Decl a b) = do
363 putByte bh 6
364 put_ bh a
365 put_ bh b
366 put_ bh (TyVarBind a b) = do
367 putByte bh 7
368 put_ bh a
369 put_ bh b
370 put_ bh (RecField a b) = do
371 putByte bh 8
372 put_ bh a
373 put_ bh b
374 put_ bh MatchBind = putByte bh 9
375
376 get bh = do
377 (t :: Word8) <- get bh
378 case t of
379 0 -> return Use
380 1 -> IEThing <$> get bh
381 2 -> return TyDecl
382 3 -> ValBind <$> get bh <*> get bh <*> get bh
383 4 -> PatternBind <$> get bh <*> get bh <*> get bh
384 5 -> ClassTyDecl <$> get bh
385 6 -> Decl <$> get bh <*> get bh
386 7 -> TyVarBind <$> get bh <*> get bh
387 8 -> RecField <$> get bh <*> get bh
388 9 -> return MatchBind
389 _ -> panic "Binary ContextInfo: invalid tag"
390
391
392 -- | Types of imports and exports
393 data IEType
394 = Import
395 | ImportAs
396 | ImportHiding
397 | Export
398 deriving (Eq, Enum, Ord, Show)
399
400 instance Binary IEType where
401 put_ bh b = putByte bh (fromIntegral (fromEnum b))
402 get bh = do x <- getByte bh; pure $! (toEnum (fromIntegral x))
403
404
405 data RecFieldContext
406 = RecFieldDecl
407 | RecFieldAssign
408 | RecFieldMatch
409 | RecFieldOcc
410 deriving (Eq, Enum, Ord, Show)
411
412 instance Binary RecFieldContext where
413 put_ bh b = putByte bh (fromIntegral (fromEnum b))
414 get bh = do x <- getByte bh; pure $! (toEnum (fromIntegral x))
415
416
417 data BindType
418 = RegularBind
419 | InstanceBind
420 deriving (Eq, Ord, Show, Enum)
421
422 instance Binary BindType where
423 put_ bh b = putByte bh (fromIntegral (fromEnum b))
424 get bh = do x <- getByte bh; pure $! (toEnum (fromIntegral x))
425
426
427 data DeclType
428 = FamDec -- ^ type or data family
429 | SynDec -- ^ type synonym
430 | DataDec -- ^ data declaration
431 | ConDec -- ^ constructor declaration
432 | PatSynDec -- ^ pattern synonym
433 | ClassDec -- ^ class declaration
434 | InstDec -- ^ instance declaration
435 deriving (Eq, Ord, Show, Enum)
436
437 instance Binary DeclType where
438 put_ bh b = putByte bh (fromIntegral (fromEnum b))
439 get bh = do x <- getByte bh; pure $! (toEnum (fromIntegral x))
440
441
442 data Scope
443 = NoScope
444 | LocalScope Span
445 | ModuleScope
446 deriving (Eq, Ord, Show, Typeable, Data)
447
448 instance Outputable Scope where
449 ppr NoScope = text "NoScope"
450 ppr (LocalScope sp) = text "LocalScope" <+> ppr sp
451 ppr ModuleScope = text "ModuleScope"
452
453 instance Binary Scope where
454 put_ bh NoScope = putByte bh 0
455 put_ bh (LocalScope span) = do
456 putByte bh 1
457 put_ bh span
458 put_ bh ModuleScope = putByte bh 2
459
460 get bh = do
461 (t :: Word8) <- get bh
462 case t of
463 0 -> return NoScope
464 1 -> LocalScope <$> get bh
465 2 -> return ModuleScope
466 _ -> panic "Binary Scope: invalid tag"
467
468
469 -- | Scope of a type variable.
470 --
471 -- This warrants a data type apart from 'Scope' because of complexities
472 -- introduced by features like @-XScopedTypeVariables@ and @-XInstanceSigs@. For
473 -- example, consider:
474 --
475 -- @
476 -- foo, bar, baz :: forall a. a -> a
477 -- @
478 --
479 -- Here @a@ is in scope in all the definitions of @foo@, @bar@, and @baz@, so we
480 -- need a list of scopes to keep track of this. Furthermore, this list cannot be
481 -- computed until we resolve the binding sites of @foo@, @bar@, and @baz@.
482 --
483 -- Consequently, @a@ starts with an @'UnresolvedScope' [foo, bar, baz] Nothing@
484 -- which later gets resolved into a 'ResolvedScopes'.
485 data TyVarScope
486 = ResolvedScopes [Scope]
487
488 -- | Unresolved scopes should never show up in the final @.hie@ file
489 | UnresolvedScope
490 [Name] -- ^ names of the definitions over which the scope spans
491 (Maybe Span) -- ^ the location of the instance/class declaration for
492 -- the case where the type variable is declared in a
493 -- method type signature
494 deriving (Eq, Ord)
495
496 instance Show TyVarScope where
497 show (ResolvedScopes sc) = show sc
498 show _ = error "UnresolvedScope"
499
500 instance Binary TyVarScope where
501 put_ bh (ResolvedScopes xs) = do
502 putByte bh 0
503 put_ bh xs
504 put_ bh (UnresolvedScope ns span) = do
505 putByte bh 1
506 put_ bh ns
507 put_ bh span
508
509 get bh = do
510 (t :: Word8) <- get bh
511 case t of
512 0 -> ResolvedScopes <$> get bh
513 1 -> UnresolvedScope <$> get bh <*> get bh
514 _ -> panic "Binary TyVarScope: invalid tag"