Simplify, drop code duplication, add comments
[hadrian.git] / src / Oracles / ModuleFiles.hs
1 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
2 module Oracles.ModuleFiles (
3 decodeModule, encodeModule, findGenerator, hsSources, hsObjects, moduleFilesOracle
4 ) where
5
6 import qualified Data.HashMap.Strict as Map
7
8 import Base
9 import Context
10 import Expression
11 import Oracles.PackageData
12 import Settings.Paths
13
14 newtype ModuleFilesKey = ModuleFilesKey (Stage, Package)
15 deriving (Binary, Eq, Hashable, NFData, Show, Typeable)
16
17 newtype Generator = Generator (Stage, Package, FilePath)
18 deriving (Binary, Eq, Hashable, NFData, Show, Typeable)
19
20 -- | We scan for the following Haskell source extensions when looking for module
21 -- files. Note, we do not list "*.(l)hs-boot" files here, as they can never
22 -- appear by themselves and always have accompanying "*.(l)hs" master files.
23 haskellExtensions :: [String]
24 haskellExtensions = [".hs", ".lhs"]
25
26 -- | Non-Haskell source extensions and corresponding builders.
27 otherExtensions :: [(String, Builder)]
28 otherExtensions = [ (".x" , Alex )
29 , (".y" , Happy )
30 , (".ly" , Happy )
31 , (".hsc", Hsc2Hs) ]
32
33 -- | We match the following file patterns when looking for module files.
34 moduleFilePatterns :: [FilePattern]
35 moduleFilePatterns = map ("*" ++) $ haskellExtensions ++ map fst otherExtensions
36
37 -- | Given a FilePath determine the corresponding builder.
38 determineBuilder :: FilePath -> Maybe Builder
39 determineBuilder file = lookup (takeExtension file) otherExtensions
40
41 -- | Given a module name extract the directory and file name, e.g.:
42 --
43 -- > decodeModule "Data.Functor.Identity" == ("Data/Functor", "Identity")
44 -- > decodeModule "Prelude" == ("", "Prelude")
45 decodeModule :: String -> (FilePath, String)
46 decodeModule modName = (intercalate "/" (init xs), last xs)
47 where
48 xs = words $ replaceEq '.' ' ' modName
49
50 -- | Given the directory and file name find the corresponding module name, e.g.:
51 --
52 -- > encodeModule "Data/Functor" "Identity.hs" == "Data.Functor.Identity"
53 -- > encodeModule "" "Prelude" == "Prelude"
54 -- > uncurry encodeModule (decodeModule name) == name
55 encodeModule :: FilePath -> String -> String
56 encodeModule dir file
57 | dir == "" = takeBaseName file
58 | otherwise = replaceEq '/' '.' dir ++ '.' : takeBaseName file
59
60 -- | Find the generator for a given 'Context' and a source file. For example:
61 -- findGenerator (Context Stage1 compiler vanilla)
62 -- "_build/stage1/compiler/build/Lexer.hs"
63 -- == Just ("compiler/parser/Lexer.x", Alex)
64 -- findGenerator (Context Stage1 base vanilla)
65 -- "_build/stage1/base/build/Prelude.hs"
66 -- == Nothing
67 findGenerator :: Context -> FilePath -> Action (Maybe (FilePath, Builder))
68 findGenerator Context {..} file = do
69 maybeSource <- askOracle $ Generator (stage, package, file)
70 return $ do
71 source <- maybeSource
72 builder <- determineBuilder source
73 return (source, builder)
74
75 -- | Find all Haskell source files for a given 'Context'.
76 hsSources :: Context -> Action [FilePath]
77 hsSources context = do
78 let autogen = buildPath context -/- "autogen"
79 -- Generated source files live in buildPath and have extension "hs", except
80 -- for GHC/Prim.hs that lives in autogen. TODO: fix the inconsistency?
81 modFile ("GHC.Prim", _) = autogen -/- "GHC/Prim.hs"
82 modFile (m, Nothing ) = generatedFile context m
83 modFile (m, Just file )
84 | takeExtension file `elem` haskellExtensions = file
85 | otherwise = generatedFile context m
86 map modFile <$> contextFiles context
87
88 -- | Find all Haskell object files for a given 'Context'. Note: this is a much
89 -- simpler function compared to 'hsSources', because all object files live in
90 -- the build directory regardless of whether they are generated or not.
91 hsObjects :: Context -> Action [FilePath]
92 hsObjects context = do
93 modules <- pkgDataList $ Modules (buildPath context)
94 -- GHC.Prim module is only for documentation, we do not actually build it.
95 return . map (objectPath context . moduleSource) $ filter (/= "GHC.Prim") modules
96
97 -- | Generated module files live in the 'Context' specific build directory.
98 generatedFile :: Context -> String -> FilePath
99 generatedFile context moduleName = buildPath context -/- moduleSource moduleName
100
101 moduleSource :: String -> FilePath
102 moduleSource moduleName = replaceEq '.' '/' moduleName <.> "hs"
103
104 -- | Module files for a given 'Context'.
105 contextFiles :: Context -> Action [(String, Maybe FilePath)]
106 contextFiles context@Context {..} = do
107 modules <- fmap sort . pkgDataList . Modules $ buildPath context
108 zip modules <$> askOracle (ModuleFilesKey (stage, package))
109
110 -- | This is an important oracle whose role is to find and cache module source
111 -- files. It takes a 'Stage' and a 'Package', looks up corresponding source
112 -- directories @dirs@ and a sorted list of module names @modules@, and for each
113 -- module, e.g. @A.B.C@, returns a 'FilePath' of the form @dir/A/B/C.extension@,
114 -- such that @dir@ belongs to @dirs@, and file @dir/A/B/C.extension@ exists, or
115 -- 'Nothing' if there is no such file. If more than one matching file is found
116 -- an error is raised. For example, for 'Stage1' and 'compiler', @dirs@ will
117 -- contain ["compiler/codeGen", "compiler/parser"], and @modules@ will contain
118 -- ["CodeGen.Platform.ARM", "Config", "Lexer"]; the oracle will produce a list
119 -- containing [Just "compiler/codeGen/CodeGen/Platform/ARM.hs", Nothing,
120 -- Just "compiler/parser/Lexer.x"]. The oracle ignores @.(l)hs-boot@ files.
121 moduleFilesOracle :: Rules ()
122 moduleFilesOracle = void $ do
123 void . addOracle $ \(ModuleFilesKey (stage, package)) -> do
124 let path = buildPath $ vanillaContext stage package
125 srcDirs <- pkgDataList $ SrcDirs path
126 modules <- fmap sort . pkgDataList $ Modules path
127 let dirs = (path -/- "autogen") : map (pkgPath package -/-) srcDirs
128 modDirFiles = groupSort $ map decodeModule modules
129 result <- concatForM dirs $ \dir -> do
130 todo <- filterM (doesDirectoryExist . (dir -/-) . fst) modDirFiles
131 forM todo $ \(mDir, mFiles) -> do
132 let fullDir = unifyPath $ dir -/- mDir
133 files <- getDirectoryFiles fullDir moduleFilePatterns
134 let cmp fe f = compare (dropExtension fe) f
135 found = intersectOrd cmp files mFiles
136 return (map (fullDir -/-) found, mDir)
137 let pairs = sort [ (encodeModule d f, f) | (fs, d) <- result, f <- fs ]
138 multi = [ (m, f1, f2) | (m, f1):(n, f2):_ <- tails pairs, m == n ]
139 unless (null multi) $ do
140 let (m, f1, f2) = head multi
141 error $ "Module " ++ m ++ " has more than one source file: "
142 ++ f1 ++ " and " ++ f2 ++ "."
143 return $ lookupAll modules pairs
144
145 -- Optimisation: we discard Haskell files here, because they are never used
146 -- as generators, and hence would be discarded in 'findGenerator' anyway.
147 generators <- newCache $ \(stage, package) -> do
148 let context = vanillaContext stage package
149 files <- contextFiles context
150 return $ Map.fromList [ (generatedFile context modName, src)
151 | (modName, Just src) <- files
152 , takeExtension src `notElem` haskellExtensions ]
153
154 addOracle $ \(Generator (stage, package, file)) ->
155 Map.lookup file <$> generators (stage, package)