Document SCC determinism
[ghc.git] / compiler / utils / Digraph.hs
1 -- (c) The University of Glasgow 2006
2
3 {-# LANGUAGE CPP, ScopedTypeVariables #-}
4
5 module Digraph(
6 Graph, graphFromEdgedVertices,
7
8 SCC(..), Node, flattenSCC, flattenSCCs,
9 stronglyConnCompG,
10 topologicalSortG, dfsTopSortG,
11 verticesG, edgesG, hasVertexG,
12 reachableG, reachablesG, transposeG,
13 outdegreeG, indegreeG,
14 vertexGroupsG, emptyG,
15 componentsG,
16
17 findCycle,
18
19 -- For backwards compatability with the simpler version of Digraph
20 stronglyConnCompFromEdgedVertices, stronglyConnCompFromEdgedVerticesR,
21 ) where
22
23 #include "HsVersions.h"
24
25 ------------------------------------------------------------------------------
26 -- A version of the graph algorithms described in:
27 --
28 -- ``Lazy Depth-First Search and Linear IntGraph Algorithms in Haskell''
29 -- by David King and John Launchbury
30 --
31 -- Also included is some additional code for printing tree structures ...
32 --
33 -- If you ever find yourself in need of algorithms for classifying edges,
34 -- or finding connected/biconnected components, consult the history; Sigbjorn
35 -- Finne contributed some implementations in 1997, although we've since
36 -- removed them since they were not used anywhere in GHC.
37 ------------------------------------------------------------------------------
38
39
40 import Util ( minWith, count )
41 import Outputable
42 import Maybes ( expectJust )
43 import MonadUtils ( allM )
44
45 -- Extensions
46 import Control.Monad ( filterM, liftM, liftM2 )
47 import Control.Monad.ST
48
49 -- std interfaces
50 import Data.Maybe
51 import Data.Array
52 import Data.List hiding (transpose)
53 import Data.Array.ST
54 import qualified Data.Map as Map
55 import qualified Data.Set as Set
56
57 import qualified Data.Graph as G
58 import Data.Graph hiding (Graph, Edge, transposeG, reachable)
59 import Data.Tree
60
61 {-
62 ************************************************************************
63 * *
64 * Graphs and Graph Construction
65 * *
66 ************************************************************************
67
68 Note [Nodes, keys, vertices]
69 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
70 * A 'node' is a big blob of client-stuff
71
72 * Each 'node' has a unique (client) 'key', but the latter
73 is in Ord and has fast comparison
74
75 * Digraph then maps each 'key' to a Vertex (Int) which is
76 arranged densely in 0.n
77 -}
78
79 data Graph node = Graph {
80 gr_int_graph :: IntGraph,
81 gr_vertex_to_node :: Vertex -> node,
82 gr_node_to_vertex :: node -> Maybe Vertex
83 }
84
85 data Edge node = Edge node node
86
87 type Node key payload = (payload, key, [key])
88 -- The payload is user data, just carried around in this module
89 -- The keys are ordered
90 -- The [key] are the dependencies of the node;
91 -- it's ok to have extra keys in the dependencies that
92 -- are not the key of any Node in the graph
93
94 emptyGraph :: Graph a
95 emptyGraph = Graph (array (1, 0) []) (error "emptyGraph") (const Nothing)
96
97 -- See Note [Deterministic SCC]
98 graphFromEdgedVertices
99 :: Ord key -- We only use Ord for efficiency,
100 -- it doesn't effect the result, so
101 -- it can be safely used with Unique's.
102 => [Node key payload] -- The graph; its ok for the
103 -- out-list to contain keys which arent
104 -- a vertex key, they are ignored
105 -> Graph (Node key payload)
106 graphFromEdgedVertices [] = emptyGraph
107 graphFromEdgedVertices edged_vertices = Graph graph vertex_fn (key_vertex . key_extractor)
108 where key_extractor (_, k, _) = k
109 (bounds, vertex_fn, key_vertex, numbered_nodes) = reduceNodesIntoVertices edged_vertices key_extractor
110 graph = array bounds [ (v, sort $ mapMaybe key_vertex ks)
111 | (v, (_, _, ks)) <- numbered_nodes]
112 -- We normalize outgoing edges by sorting on node order, so
113 -- that the result doesn't depend on the order of the edges
114
115
116 reduceNodesIntoVertices
117 :: Ord key
118 => [node]
119 -> (node -> key)
120 -> (Bounds, Vertex -> node, key -> Maybe Vertex, [(Vertex, node)])
121 reduceNodesIntoVertices nodes key_extractor = (bounds, (!) vertex_map, key_vertex, numbered_nodes)
122 where
123 max_v = length nodes - 1
124 bounds = (0, max_v) :: (Vertex, Vertex)
125
126 -- Keep the order intact to make the result depend on input order
127 -- instead of key order
128 numbered_nodes = zip [0..] nodes
129 vertex_map = array bounds numbered_nodes
130
131 key_map = Map.fromList
132 [ (key_extractor node, v) | (v, node) <- numbered_nodes ]
133 key_vertex k = Map.lookup k key_map
134
135 {-
136 ************************************************************************
137 * *
138 * SCC
139 * *
140 ************************************************************************
141 -}
142
143 type WorkItem key payload
144 = (Node key payload, -- Tip of the path
145 [payload]) -- Rest of the path;
146 -- [a,b,c] means c depends on b, b depends on a
147
148 -- | Find a reasonably short cycle a->b->c->a, in a strongly
149 -- connected component. The input nodes are presumed to be
150 -- a SCC, so you can start anywhere.
151 findCycle :: forall payload key. Ord key
152 => [Node key payload] -- The nodes. The dependencies can
153 -- contain extra keys, which are ignored
154 -> Maybe [payload] -- A cycle, starting with node
155 -- so each depends on the next
156 findCycle graph
157 = go Set.empty (new_work root_deps []) []
158 where
159 env :: Map.Map key (Node key payload)
160 env = Map.fromList [ (key, node) | node@(_, key, _) <- graph ]
161
162 -- Find the node with fewest dependencies among the SCC modules
163 -- This is just a heuristic to find some plausible root module
164 root :: Node key payload
165 root = fst (minWith snd [ (node, count (`Map.member` env) deps)
166 | node@(_,_,deps) <- graph ])
167 (root_payload,root_key,root_deps) = root
168
169
170 -- 'go' implements Dijkstra's algorithm, more or less
171 go :: Set.Set key -- Visited
172 -> [WorkItem key payload] -- Work list, items length n
173 -> [WorkItem key payload] -- Work list, items length n+1
174 -> Maybe [payload] -- Returned cycle
175 -- Invariant: in a call (go visited ps qs),
176 -- visited = union (map tail (ps ++ qs))
177
178 go _ [] [] = Nothing -- No cycles
179 go visited [] qs = go visited qs []
180 go visited (((payload,key,deps), path) : ps) qs
181 | key == root_key = Just (root_payload : reverse path)
182 | key `Set.member` visited = go visited ps qs
183 | key `Map.notMember` env = go visited ps qs
184 | otherwise = go (Set.insert key visited)
185 ps (new_qs ++ qs)
186 where
187 new_qs = new_work deps (payload : path)
188
189 new_work :: [key] -> [payload] -> [WorkItem key payload]
190 new_work deps path = [ (n, path) | Just n <- map (`Map.lookup` env) deps ]
191
192 {-
193 ************************************************************************
194 * *
195 * Strongly Connected Component wrappers for Graph
196 * *
197 ************************************************************************
198
199 Note: the components are returned topologically sorted: later components
200 depend on earlier ones, but not vice versa i.e. later components only have
201 edges going from them to earlier ones.
202 -}
203
204 {-
205 Note [Deterministic SCC]
206 ~~~~~~~~~~~~~~~~~~~~~~~~
207 stronglyConnCompFromEdgedVertices and stronglyConnCompFromEdgedVerticesR
208 provide a following guarantee:
209 Given a deterministically ordered list of nodes it returns a deterministically
210 ordered list of strongly connected components, where the list of vertices
211 in an SCC is also deterministically ordered.
212 Note that the order of edges doesn't need to be deterministic for this to work.
213 We use the order of nodes to normalize the order of edges.
214 -}
215
216 stronglyConnCompG :: Graph node -> [SCC node]
217 stronglyConnCompG graph = decodeSccs graph forest
218 where forest = {-# SCC "Digraph.scc" #-} scc (gr_int_graph graph)
219
220 decodeSccs :: Graph node -> Forest Vertex -> [SCC node]
221 decodeSccs Graph { gr_int_graph = graph, gr_vertex_to_node = vertex_fn } forest
222 = map decode forest
223 where
224 decode (Node v []) | mentions_itself v = CyclicSCC [vertex_fn v]
225 | otherwise = AcyclicSCC (vertex_fn v)
226 decode other = CyclicSCC (dec other [])
227 where dec (Node v ts) vs = vertex_fn v : foldr dec vs ts
228 mentions_itself v = v `elem` (graph ! v)
229
230
231 -- The following two versions are provided for backwards compatability:
232 -- See Note [Deterministic SCC]
233 stronglyConnCompFromEdgedVertices
234 :: Ord key
235 => [Node key payload]
236 -> [SCC payload]
237 stronglyConnCompFromEdgedVertices
238 = map (fmap get_node) . stronglyConnCompFromEdgedVerticesR
239 where get_node (n, _, _) = n
240
241 -- The "R" interface is used when you expect to apply SCC to
242 -- (some of) the result of SCC, so you dont want to lose the dependency info
243 -- See Note [Deterministic SCC]
244 stronglyConnCompFromEdgedVerticesR
245 :: Ord key
246 => [Node key payload]
247 -> [SCC (Node key payload)]
248 stronglyConnCompFromEdgedVerticesR = stronglyConnCompG . graphFromEdgedVertices
249
250 {-
251 ************************************************************************
252 * *
253 * Misc wrappers for Graph
254 * *
255 ************************************************************************
256 -}
257
258 topologicalSortG :: Graph node -> [node]
259 topologicalSortG graph = map (gr_vertex_to_node graph) result
260 where result = {-# SCC "Digraph.topSort" #-} topSort (gr_int_graph graph)
261
262 dfsTopSortG :: Graph node -> [[node]]
263 dfsTopSortG graph =
264 map (map (gr_vertex_to_node graph) . flatten) $ dfs g (topSort g)
265 where
266 g = gr_int_graph graph
267
268 reachableG :: Graph node -> node -> [node]
269 reachableG graph from = map (gr_vertex_to_node graph) result
270 where from_vertex = expectJust "reachableG" (gr_node_to_vertex graph from)
271 result = {-# SCC "Digraph.reachable" #-} reachable (gr_int_graph graph) [from_vertex]
272
273 reachablesG :: Graph node -> [node] -> [node]
274 reachablesG graph froms = map (gr_vertex_to_node graph) result
275 where result = {-# SCC "Digraph.reachable" #-}
276 reachable (gr_int_graph graph) vs
277 vs = [ v | Just v <- map (gr_node_to_vertex graph) froms ]
278
279 hasVertexG :: Graph node -> node -> Bool
280 hasVertexG graph node = isJust $ gr_node_to_vertex graph node
281
282 verticesG :: Graph node -> [node]
283 verticesG graph = map (gr_vertex_to_node graph) $ vertices (gr_int_graph graph)
284
285 edgesG :: Graph node -> [Edge node]
286 edgesG graph = map (\(v1, v2) -> Edge (v2n v1) (v2n v2)) $ edges (gr_int_graph graph)
287 where v2n = gr_vertex_to_node graph
288
289 transposeG :: Graph node -> Graph node
290 transposeG graph = Graph (G.transposeG (gr_int_graph graph))
291 (gr_vertex_to_node graph)
292 (gr_node_to_vertex graph)
293
294 outdegreeG :: Graph node -> node -> Maybe Int
295 outdegreeG = degreeG outdegree
296
297 indegreeG :: Graph node -> node -> Maybe Int
298 indegreeG = degreeG indegree
299
300 degreeG :: (G.Graph -> Table Int) -> Graph node -> node -> Maybe Int
301 degreeG degree graph node = let table = degree (gr_int_graph graph)
302 in fmap ((!) table) $ gr_node_to_vertex graph node
303
304 vertexGroupsG :: Graph node -> [[node]]
305 vertexGroupsG graph = map (map (gr_vertex_to_node graph)) result
306 where result = vertexGroups (gr_int_graph graph)
307
308 emptyG :: Graph node -> Bool
309 emptyG g = graphEmpty (gr_int_graph g)
310
311 componentsG :: Graph node -> [[node]]
312 componentsG graph = map (map (gr_vertex_to_node graph) . flatten)
313 $ components (gr_int_graph graph)
314
315 {-
316 ************************************************************************
317 * *
318 * Showing Graphs
319 * *
320 ************************************************************************
321 -}
322
323 instance Outputable node => Outputable (Graph node) where
324 ppr graph = vcat [
325 hang (text "Vertices:") 2 (vcat (map ppr $ verticesG graph)),
326 hang (text "Edges:") 2 (vcat (map ppr $ edgesG graph))
327 ]
328
329 instance Outputable node => Outputable (Edge node) where
330 ppr (Edge from to) = ppr from <+> text "->" <+> ppr to
331
332 graphEmpty :: G.Graph -> Bool
333 graphEmpty g = lo > hi
334 where (lo, hi) = bounds g
335
336 {-
337 ************************************************************************
338 * *
339 * IntGraphs
340 * *
341 ************************************************************************
342 -}
343
344 type IntGraph = G.Graph
345
346 {-
347 ------------------------------------------------------------
348 -- Depth first search numbering
349 ------------------------------------------------------------
350 -}
351
352 -- Data.Tree has flatten for Tree, but nothing for Forest
353 preorderF :: Forest a -> [a]
354 preorderF ts = concat (map flatten ts)
355
356 {-
357 ------------------------------------------------------------
358 -- Finding reachable vertices
359 ------------------------------------------------------------
360 -}
361
362 -- This generalizes reachable which was found in Data.Graph
363 reachable :: IntGraph -> [Vertex] -> [Vertex]
364 reachable g vs = preorderF (dfs g vs)
365
366 {-
367 ------------------------------------------------------------
368 -- Total ordering on groups of vertices
369 ------------------------------------------------------------
370
371 The plan here is to extract a list of groups of elements of the graph
372 such that each group has no dependence except on nodes in previous
373 groups (i.e. in particular they may not depend on nodes in their own
374 group) and is maximal such group.
375
376 Clearly we cannot provide a solution for cyclic graphs.
377
378 We proceed by iteratively removing elements with no outgoing edges
379 and their associated edges from the graph.
380
381 This probably isn't very efficient and certainly isn't very clever.
382 -}
383
384 type Set s = STArray s Vertex Bool
385
386 mkEmpty :: Bounds -> ST s (Set s)
387 mkEmpty bnds = newArray bnds False
388
389 contains :: Set s -> Vertex -> ST s Bool
390 contains m v = readArray m v
391
392 include :: Set s -> Vertex -> ST s ()
393 include m v = writeArray m v True
394
395 vertexGroups :: IntGraph -> [[Vertex]]
396 vertexGroups g = runST (mkEmpty (bounds g) >>= \provided -> vertexGroupsS provided g next_vertices)
397 where next_vertices = noOutEdges g
398
399 noOutEdges :: IntGraph -> [Vertex]
400 noOutEdges g = [ v | v <- vertices g, null (g!v)]
401
402 vertexGroupsS :: Set s -> IntGraph -> [Vertex] -> ST s [[Vertex]]
403 vertexGroupsS provided g to_provide
404 = if null to_provide
405 then do {
406 all_provided <- allM (provided `contains`) (vertices g)
407 ; if all_provided
408 then return []
409 else error "vertexGroup: cyclic graph"
410 }
411 else do {
412 mapM_ (include provided) to_provide
413 ; to_provide' <- filterM (vertexReady provided g) (vertices g)
414 ; rest <- vertexGroupsS provided g to_provide'
415 ; return $ to_provide : rest
416 }
417
418 vertexReady :: Set s -> IntGraph -> Vertex -> ST s Bool
419 vertexReady provided g v = liftM2 (&&) (liftM not $ provided `contains` v) (allM (provided `contains`) (g!v))