Implement function-sections for Haskell code, #8405
[ghc.git] / compiler / cmm / CmmBuildInfoTables.hs
1 {-# LANGUAGE CPP, GADTs #-}
2
3 -- See Note [Deprecations in Hoopl] in Hoopl module
4 {-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
5 module CmmBuildInfoTables
6 ( CAFSet, CAFEnv, cafAnal
7 , doSRTs, TopSRT, emptySRT, isEmptySRT, srtToData )
8 where
9
10 #include "HsVersions.h"
11
12 import Hoopl
13 import Digraph
14 import BlockId
15 import Bitmap
16 import CLabel
17 import PprCmmDecl ()
18 import Cmm
19 import CmmUtils
20 import CmmInfo
21 import Data.List
22 import DynFlags
23 import Maybes
24 import Outputable
25 import SMRep
26 import UniqSupply
27 import Util
28
29 import PprCmm()
30 import Data.Map (Map)
31 import qualified Data.Map as Map
32 import Data.Set (Set)
33 import qualified Data.Set as Set
34 import Control.Monad
35
36 import qualified Prelude as P
37 import Prelude hiding (succ)
38
39 foldSet :: (a -> b -> b) -> b -> Set a -> b
40 foldSet = Set.foldr
41
42 -----------------------------------------------------------------------
43 -- SRTs
44
45 {- EXAMPLE
46
47 f = \x. ... g ...
48 where
49 g = \y. ... h ... c1 ...
50 h = \z. ... c2 ...
51
52 c1 & c2 are CAFs
53
54 g and h are local functions, but they have no static closures. When
55 we generate code for f, we start with a CmmGroup of four CmmDecls:
56
57 [ f_closure, f_entry, g_entry, h_entry ]
58
59 we process each CmmDecl separately in cpsTop, giving us a list of
60 CmmDecls. e.g. for f_entry, we might end up with
61
62 [ f_entry, f1_ret, f2_proc ]
63
64 where f1_ret is a return point, and f2_proc is a proc-point. We have
65 a CAFSet for each of these CmmDecls, let's suppose they are
66
67 [ f_entry{g_closure}, f1_ret{g_closure}, f2_proc{} ]
68 [ g_entry{h_closure, c1_closure} ]
69 [ h_entry{c2_closure} ]
70
71 Now, note that we cannot use g_closure and h_closure in an SRT,
72 because there are no static closures corresponding to these functions.
73 So we have to flatten out the structure, replacing g_closure and
74 h_closure with their contents:
75
76 [ f_entry{c2_closure, c1_closure}, f1_ret{c2_closure,c1_closure}, f2_proc{} ]
77 [ g_entry{c2_closure, c1_closure} ]
78 [ h_entry{c2_closure} ]
79
80 This is what flattenCAFSets is doing.
81
82 -}
83
84 -----------------------------------------------------------------------
85 -- Finding the CAFs used by a procedure
86
87 type CAFSet = Set CLabel
88 type CAFEnv = BlockEnv CAFSet
89
90 -- First, an analysis to find live CAFs.
91 cafLattice :: DataflowLattice CAFSet
92 cafLattice = DataflowLattice "live cafs" Set.empty add
93 where add _ (OldFact old) (NewFact new) = case old `Set.union` new of
94 new' -> (changeIf $ Set.size new' > Set.size old, new')
95
96 cafTransfers :: BwdTransfer CmmNode CAFSet
97 cafTransfers = mkBTransfer3 first middle last
98 where first _ live = live
99 middle m live = foldExpDeep addCaf m live
100 last l live = foldExpDeep addCaf l (joinOutFacts cafLattice l live)
101 addCaf e set = case e of
102 CmmLit (CmmLabel c) -> add c set
103 CmmLit (CmmLabelOff c _) -> add c set
104 CmmLit (CmmLabelDiffOff c1 c2 _) -> add c1 $ add c2 set
105 _ -> set
106 add l s = if hasCAF l then Set.insert (toClosureLbl l) s
107 else s
108
109 cafAnal :: CmmGraph -> CAFEnv
110 cafAnal g = dataflowAnalBwd g [] $ analBwd cafLattice cafTransfers
111
112 -----------------------------------------------------------------------
113 -- Building the SRTs
114
115 -- Description of the SRT for a given module.
116 -- Note that this SRT may grow as we greedily add new CAFs to it.
117 data TopSRT = TopSRT { lbl :: CLabel
118 , next_elt :: Int -- the next entry in the table
119 , rev_elts :: [CLabel]
120 , elt_map :: Map CLabel Int }
121 -- map: CLabel -> its last entry in the table
122 instance Outputable TopSRT where
123 ppr (TopSRT lbl next elts eltmap) =
124 text "TopSRT:" <+> ppr lbl
125 <+> ppr next
126 <+> ppr elts
127 <+> ppr eltmap
128
129 emptySRT :: MonadUnique m => m TopSRT
130 emptySRT =
131 do top_lbl <- getUniqueM >>= \ u -> return $ mkTopSRTLabel u
132 return TopSRT { lbl = top_lbl, next_elt = 0, rev_elts = [], elt_map = Map.empty }
133
134 isEmptySRT :: TopSRT -> Bool
135 isEmptySRT srt = null (rev_elts srt)
136
137 cafMember :: TopSRT -> CLabel -> Bool
138 cafMember srt lbl = Map.member lbl (elt_map srt)
139
140 cafOffset :: TopSRT -> CLabel -> Maybe Int
141 cafOffset srt lbl = Map.lookup lbl (elt_map srt)
142
143 addCAF :: CLabel -> TopSRT -> TopSRT
144 addCAF caf srt =
145 srt { next_elt = last + 1
146 , rev_elts = caf : rev_elts srt
147 , elt_map = Map.insert caf last (elt_map srt) }
148 where last = next_elt srt
149
150 srtToData :: TopSRT -> CmmGroup
151 srtToData srt = [CmmData sec (Statics (lbl srt) tbl)]
152 where tbl = map (CmmStaticLit . CmmLabel) (reverse (rev_elts srt))
153 sec = Section RelocatableReadOnlyData (lbl srt)
154
155 -- Once we have found the CAFs, we need to do two things:
156 -- 1. Build a table of all the CAFs used in the procedure.
157 -- 2. Compute the C_SRT describing the subset of CAFs live at each procpoint.
158 --
159 -- When building the local view of the SRT, we first make sure that all the CAFs are
160 -- in the SRT. Then, if the number of CAFs is small enough to fit in a bitmap,
161 -- we make sure they're all close enough to the bottom of the table that the
162 -- bitmap will be able to cover all of them.
163 buildSRT :: DynFlags -> TopSRT -> CAFSet -> UniqSM (TopSRT, Maybe CmmDecl, C_SRT)
164 buildSRT dflags topSRT cafs =
165 do let
166 -- For each label referring to a function f without a static closure,
167 -- replace it with the CAFs that are reachable from f.
168 sub_srt topSRT localCafs =
169 let cafs = Set.elems localCafs
170 mkSRT topSRT =
171 do localSRTs <- procpointSRT dflags (lbl topSRT) (elt_map topSRT) cafs
172 return (topSRT, localSRTs)
173 in if length cafs > maxBmpSize dflags then
174 mkSRT (foldl add_if_missing topSRT cafs)
175 else -- make sure all the cafs are near the bottom of the srt
176 mkSRT (add_if_too_far topSRT cafs)
177 add_if_missing srt caf =
178 if cafMember srt caf then srt else addCAF caf srt
179 -- If a CAF is more than maxBmpSize entries from the young end of the
180 -- SRT, then we add it to the SRT again.
181 -- (Note: Not in the SRT => infinitely far.)
182 add_if_too_far srt@(TopSRT {elt_map = m}) cafs =
183 add srt (sortBy farthestFst cafs)
184 where
185 farthestFst x y = case (Map.lookup x m, Map.lookup y m) of
186 (Nothing, Nothing) -> EQ
187 (Nothing, Just _) -> LT
188 (Just _, Nothing) -> GT
189 (Just d, Just d') -> compare d' d
190 add srt [] = srt
191 add srt@(TopSRT {next_elt = next}) (caf : rst) =
192 case cafOffset srt caf of
193 Just ix -> if next - ix > maxBmpSize dflags then
194 add (addCAF caf srt) rst
195 else srt
196 Nothing -> add (addCAF caf srt) rst
197 (topSRT, subSRTs) <- sub_srt topSRT cafs
198 let (sub_tbls, blockSRTs) = subSRTs
199 return (topSRT, sub_tbls, blockSRTs)
200
201 -- Construct an SRT bitmap.
202 -- Adapted from simpleStg/SRT.hs, which expects Id's.
203 procpointSRT :: DynFlags -> CLabel -> Map CLabel Int -> [CLabel] ->
204 UniqSM (Maybe CmmDecl, C_SRT)
205 procpointSRT _ _ _ [] =
206 return (Nothing, NoC_SRT)
207 procpointSRT dflags top_srt top_table entries =
208 do (top, srt) <- bitmap `seq` to_SRT dflags top_srt offset len bitmap
209 return (top, srt)
210 where
211 ints = map (expectJust "constructSRT" . flip Map.lookup top_table) entries
212 sorted_ints = sort ints
213 offset = head sorted_ints
214 bitmap_entries = map (subtract offset) sorted_ints
215 len = P.last bitmap_entries + 1
216 bitmap = intsToBitmap dflags len bitmap_entries
217
218 maxBmpSize :: DynFlags -> Int
219 maxBmpSize dflags = widthInBits (wordWidth dflags) `div` 2
220
221 -- Adapted from codeGen/StgCmmUtils, which converts from SRT to C_SRT.
222 to_SRT :: DynFlags -> CLabel -> Int -> Int -> Bitmap -> UniqSM (Maybe CmmDecl, C_SRT)
223 to_SRT dflags top_srt off len bmp
224 | len > maxBmpSize dflags || bmp == [toStgWord dflags (fromStgHalfWord (srtEscape dflags))]
225 = do id <- getUniqueM
226 let srt_desc_lbl = mkLargeSRTLabel id
227 section = Section RelocatableReadOnlyData srt_desc_lbl
228 tbl = CmmData section $
229 Statics srt_desc_lbl $ map CmmStaticLit
230 ( cmmLabelOffW dflags top_srt off
231 : mkWordCLit dflags (fromIntegral len)
232 : map (mkStgWordCLit dflags) bmp)
233 return (Just tbl, C_SRT srt_desc_lbl 0 (srtEscape dflags))
234 | otherwise
235 = return (Nothing, C_SRT top_srt off (toStgHalfWord dflags (fromStgWord (head bmp))))
236 -- The fromIntegral converts to StgHalfWord
237
238 -- Gather CAF info for a procedure, but only if the procedure
239 -- doesn't have a static closure.
240 -- (If it has a static closure, it will already have an SRT to
241 -- keep its CAFs live.)
242 -- Any procedure referring to a non-static CAF c must keep live
243 -- any CAF that is reachable from c.
244 localCAFInfo :: CAFEnv -> CmmDecl -> (CAFSet, Maybe CLabel)
245 localCAFInfo _ (CmmData _ _) = (Set.empty, Nothing)
246 localCAFInfo cafEnv proc@(CmmProc _ top_l _ (CmmGraph {g_entry=entry})) =
247 case topInfoTable proc of
248 Just (CmmInfoTable { cit_rep = rep })
249 | not (isStaticRep rep) && not (isStackRep rep)
250 -> (cafs, Just (toClosureLbl top_l))
251 _other -> (cafs, Nothing)
252 where
253 cafs = expectJust "maybeBindCAFs" $ mapLookup entry cafEnv
254
255 -- Once we have the local CAF sets for some (possibly) mutually
256 -- recursive functions, we can create an environment mapping
257 -- each function to its set of CAFs. Note that a CAF may
258 -- be a reference to a function. If that function f does not have
259 -- a static closure, then we need to refer specifically
260 -- to the set of CAFs used by f. Of course, the set of CAFs
261 -- used by f must be included in the local CAF sets that are input to
262 -- this function. To minimize lookup time later, we return
263 -- the environment with every reference to f replaced by its set of CAFs.
264 -- To do this replacement efficiently, we gather strongly connected
265 -- components, then we sort the components in topological order.
266 mkTopCAFInfo :: [(CAFSet, Maybe CLabel)] -> Map CLabel CAFSet
267 mkTopCAFInfo localCAFs = foldl addToTop Map.empty g
268 where
269 addToTop env (AcyclicSCC (l, cafset)) =
270 Map.insert l (flatten env cafset) env
271 addToTop env (CyclicSCC nodes) =
272 let (lbls, cafsets) = unzip nodes
273 cafset = foldr Set.delete (foldl Set.union Set.empty cafsets) lbls
274 in foldl (\env l -> Map.insert l (flatten env cafset) env) env lbls
275
276 g = stronglyConnCompFromEdgedVertices
277 [ ((l,cafs), l, Set.elems cafs) | (cafs, Just l) <- localCAFs ]
278
279 flatten :: Map CLabel CAFSet -> CAFSet -> CAFSet
280 flatten env cafset = foldSet (lookup env) Set.empty cafset
281 where
282 lookup env caf cafset' =
283 case Map.lookup caf env of
284 Just cafs -> foldSet Set.insert cafset' cafs
285 Nothing -> Set.insert caf cafset'
286
287 bundle :: Map CLabel CAFSet
288 -> (CAFEnv, CmmDecl)
289 -> (CAFSet, Maybe CLabel)
290 -> (BlockEnv CAFSet, CmmDecl)
291 bundle flatmap (env, decl@(CmmProc infos _lbl _ g)) (closure_cafs, mb_lbl)
292 = ( mapMapWithKey get_cafs (info_tbls infos), decl )
293 where
294 entry = g_entry g
295
296 entry_cafs
297 | Just l <- mb_lbl = expectJust "bundle" $ Map.lookup l flatmap
298 | otherwise = flatten flatmap closure_cafs
299
300 get_cafs l _
301 | l == entry = entry_cafs
302 | Just info <- mapLookup l env = flatten flatmap info
303 | otherwise = Set.empty
304 -- the label might not be in the env if the code corresponding to
305 -- this info table was optimised away (perhaps because it was
306 -- unreachable). In this case it doesn't matter what SRT we
307 -- infer, since the info table will not appear in the generated
308 -- code. See #9329.
309
310 bundle _flatmap (_, decl) _
311 = ( mapEmpty, decl )
312
313
314 flattenCAFSets :: [(CAFEnv, [CmmDecl])] -> [(BlockEnv CAFSet, CmmDecl)]
315 flattenCAFSets cpsdecls = zipWith (bundle flatmap) zipped localCAFs
316 where
317 zipped = [ (env,decl) | (env,decls) <- cpsdecls, decl <- decls ]
318 localCAFs = unzipWith localCAFInfo zipped
319 flatmap = mkTopCAFInfo localCAFs -- transitive closure of localCAFs
320
321 doSRTs :: DynFlags
322 -> TopSRT
323 -> [(CAFEnv, [CmmDecl])]
324 -> IO (TopSRT, [CmmDecl])
325
326 doSRTs dflags topSRT tops
327 = do
328 let caf_decls = flattenCAFSets tops
329 us <- mkSplitUniqSupply 'u'
330 let (topSRT', gs') = initUs_ us $ foldM setSRT (topSRT, []) caf_decls
331 return (topSRT', reverse gs' {- Note [reverse gs] -})
332 where
333 setSRT (topSRT, rst) (caf_map, decl@(CmmProc{})) = do
334 (topSRT, srt_tables, srt_env) <- buildSRTs dflags topSRT caf_map
335 let decl' = updInfoSRTs srt_env decl
336 return (topSRT, decl': srt_tables ++ rst)
337 setSRT (topSRT, rst) (_, decl) =
338 return (topSRT, decl : rst)
339
340 buildSRTs :: DynFlags -> TopSRT -> BlockEnv CAFSet
341 -> UniqSM (TopSRT, [CmmDecl], BlockEnv C_SRT)
342 buildSRTs dflags top_srt caf_map
343 = foldM doOne (top_srt, [], mapEmpty) (mapToList caf_map)
344 where
345 doOne (top_srt, decls, srt_env) (l, cafs)
346 = do (top_srt, mb_decl, srt) <- buildSRT dflags top_srt cafs
347 return ( top_srt, maybeToList mb_decl ++ decls
348 , mapInsert l srt srt_env )
349
350 {-
351 - In each CmmDecl there is a mapping from BlockId -> CmmInfoTable
352 - The one corresponding to g_entry is the closure info table, the
353 rest are continuations.
354 - Each one needs an SRT.
355 - We get the CAFSet for each one from the CAFEnv
356 - flatten gives us
357 [(BlockEnv CAFSet, CmmDecl)]
358 -
359 -}
360
361
362 {- Note [reverse gs]
363
364 It is important to keep the code blocks in the same order,
365 otherwise binary sizes get slightly bigger. I'm not completely
366 sure why this is, perhaps the assembler generates bigger jump
367 instructions for forward refs. --SDM
368 -}
369
370 updInfoSRTs :: BlockEnv C_SRT -> CmmDecl -> CmmDecl
371 updInfoSRTs srt_env (CmmProc top_info top_l live g) =
372 CmmProc (top_info {info_tbls = mapMapWithKey updInfoTbl (info_tbls top_info)}) top_l live g
373 where updInfoTbl l info_tbl
374 = info_tbl { cit_srt = expectJust "updInfo" $ mapLookup l srt_env }
375 updInfoSRTs _ t = t