bb118a65f6f6768076c1535dc44b7fe59f7d43d5
[ghc.git] / compiler / utils / Digraph.hs
1 -- (c) The University of Glasgow 2006
2
3 {-# LANGUAGE CPP, ScopedTypeVariables, ViewPatterns #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5
6 module Digraph(
7 Graph, graphFromEdgedVerticesOrd, graphFromEdgedVerticesUniq,
8
9 SCC(..), Node(..), flattenSCC, flattenSCCs,
10 stronglyConnCompG,
11 topologicalSortG,
12 verticesG, edgesG, hasVertexG,
13 reachableG, reachablesG, transposeG,
14 emptyG,
15
16 findCycle,
17
18 -- For backwards compatibility with the simpler version of Digraph
19 stronglyConnCompFromEdgedVerticesOrd,
20 stronglyConnCompFromEdgedVerticesOrdR,
21 stronglyConnCompFromEdgedVerticesUniq,
22 stronglyConnCompFromEdgedVerticesUniqR,
23
24 -- Simple way to classify edges
25 EdgeType(..), classifyEdges
26 ) where
27
28 #include "HsVersions.h"
29
30 ------------------------------------------------------------------------------
31 -- A version of the graph algorithms described in:
32 --
33 -- ``Lazy Depth-First Search and Linear IntGraph Algorithms in Haskell''
34 -- by David King and John Launchbury
35 --
36 -- Also included is some additional code for printing tree structures ...
37 --
38 -- If you ever find yourself in need of algorithms for classifying edges,
39 -- or finding connected/biconnected components, consult the history; Sigbjorn
40 -- Finne contributed some implementations in 1997, although we've since
41 -- removed them since they were not used anywhere in GHC.
42 ------------------------------------------------------------------------------
43
44
45 import GhcPrelude
46
47 import Util ( minWith, count )
48 import Outputable
49 import Maybes ( expectJust )
50
51 -- std interfaces
52 import Data.Maybe
53 import Data.Array
54 import Data.List hiding (transpose)
55 import qualified Data.Map as Map
56 import qualified Data.Set as Set
57
58 import qualified Data.Graph as G
59 import Data.Graph hiding (Graph, Edge, transposeG, reachable)
60 import Data.Tree
61 import Unique
62 import UniqFM
63
64 {-
65 ************************************************************************
66 * *
67 * Graphs and Graph Construction
68 * *
69 ************************************************************************
70
71 Note [Nodes, keys, vertices]
72 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
73 * A 'node' is a big blob of client-stuff
74
75 * Each 'node' has a unique (client) 'key', but the latter
76 is in Ord and has fast comparison
77
78 * Digraph then maps each 'key' to a Vertex (Int) which is
79 arranged densely in 0.n
80 -}
81
82 data Graph node = Graph {
83 gr_int_graph :: IntGraph,
84 gr_vertex_to_node :: Vertex -> node,
85 gr_node_to_vertex :: node -> Maybe Vertex
86 }
87
88 data Edge node = Edge node node
89
90 data Node key payload = DigraphNode {
91 node_payload :: payload,
92 node_key :: key,
93 node_dependencies :: [key] }
94 -- The payload is user data, just carried around in this module
95 -- The keys are ordered
96 -- The [key] are the dependencies of the node;
97 -- it's ok to have extra keys in the dependencies that
98 -- are not the key of any Node in the graph
99
100 instance (Outputable a, Outputable b) => Outputable (Node a b) where
101 ppr (DigraphNode a b c) = ppr (a, b, c)
102
103 emptyGraph :: Graph a
104 emptyGraph = Graph (array (1, 0) []) (error "emptyGraph") (const Nothing)
105
106 -- See Note [Deterministic SCC]
107 graphFromEdgedVertices
108 :: ReduceFn key payload
109 -> [Node key payload] -- The graph; its ok for the
110 -- out-list to contain keys which aren't
111 -- a vertex key, they are ignored
112 -> Graph (Node key payload)
113 graphFromEdgedVertices _reduceFn [] = emptyGraph
114 graphFromEdgedVertices reduceFn edged_vertices =
115 Graph graph vertex_fn (key_vertex . key_extractor)
116 where key_extractor = node_key
117 (bounds, vertex_fn, key_vertex, numbered_nodes) =
118 reduceFn edged_vertices key_extractor
119 graph = array bounds [ (v, sort $ mapMaybe key_vertex ks)
120 | (v, (node_dependencies -> ks)) <- numbered_nodes]
121 -- We normalize outgoing edges by sorting on node order, so
122 -- that the result doesn't depend on the order of the edges
123
124 -- See Note [Deterministic SCC]
125 -- See Note [reduceNodesIntoVertices implementations]
126 graphFromEdgedVerticesOrd
127 :: Ord key
128 => [Node key payload] -- The graph; its ok for the
129 -- out-list to contain keys which aren't
130 -- a vertex key, they are ignored
131 -> Graph (Node key payload)
132 graphFromEdgedVerticesOrd = graphFromEdgedVertices reduceNodesIntoVerticesOrd
133
134 -- See Note [Deterministic SCC]
135 -- See Note [reduceNodesIntoVertices implementations]
136 graphFromEdgedVerticesUniq
137 :: Uniquable key
138 => [Node key payload] -- The graph; its ok for the
139 -- out-list to contain keys which aren't
140 -- a vertex key, they are ignored
141 -> Graph (Node key payload)
142 graphFromEdgedVerticesUniq = graphFromEdgedVertices reduceNodesIntoVerticesUniq
143
144 type ReduceFn key payload =
145 [Node key payload] -> (Node key payload -> key) ->
146 (Bounds, Vertex -> Node key payload
147 , key -> Maybe Vertex, [(Vertex, Node key payload)])
148
149 {-
150 Note [reduceNodesIntoVertices implementations]
151 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
152 reduceNodesIntoVertices is parameterized by the container type.
153 This is to accomodate key types that don't have an Ord instance
154 and hence preclude the use of Data.Map. An example of such type
155 would be Unique, there's no way to implement Ord Unique
156 deterministically.
157
158 For such types, there's a version with a Uniquable constraint.
159 This leaves us with two versions of every function that depends on
160 reduceNodesIntoVertices, one with Ord constraint and the other with
161 Uniquable constraint.
162 For example: graphFromEdgedVerticesOrd and graphFromEdgedVerticesUniq.
163
164 The Uniq version should be a tiny bit more efficient since it uses
165 Data.IntMap internally.
166 -}
167 reduceNodesIntoVertices
168 :: ([(key, Vertex)] -> m)
169 -> (key -> m -> Maybe Vertex)
170 -> ReduceFn key payload
171 reduceNodesIntoVertices fromList lookup nodes key_extractor =
172 (bounds, (!) vertex_map, key_vertex, numbered_nodes)
173 where
174 max_v = length nodes - 1
175 bounds = (0, max_v) :: (Vertex, Vertex)
176
177 -- Keep the order intact to make the result depend on input order
178 -- instead of key order
179 numbered_nodes = zip [0..] nodes
180 vertex_map = array bounds numbered_nodes
181
182 key_map = fromList
183 [ (key_extractor node, v) | (v, node) <- numbered_nodes ]
184 key_vertex k = lookup k key_map
185
186 -- See Note [reduceNodesIntoVertices implementations]
187 reduceNodesIntoVerticesOrd :: Ord key => ReduceFn key payload
188 reduceNodesIntoVerticesOrd = reduceNodesIntoVertices Map.fromList Map.lookup
189
190 -- See Note [reduceNodesIntoVertices implementations]
191 reduceNodesIntoVerticesUniq :: Uniquable key => ReduceFn key payload
192 reduceNodesIntoVerticesUniq = reduceNodesIntoVertices listToUFM (flip lookupUFM)
193
194 {-
195 ************************************************************************
196 * *
197 * SCC
198 * *
199 ************************************************************************
200 -}
201
202 type WorkItem key payload
203 = (Node key payload, -- Tip of the path
204 [payload]) -- Rest of the path;
205 -- [a,b,c] means c depends on b, b depends on a
206
207 -- | Find a reasonably short cycle a->b->c->a, in a strongly
208 -- connected component. The input nodes are presumed to be
209 -- a SCC, so you can start anywhere.
210 findCycle :: forall payload key. Ord key
211 => [Node key payload] -- The nodes. The dependencies can
212 -- contain extra keys, which are ignored
213 -> Maybe [payload] -- A cycle, starting with node
214 -- so each depends on the next
215 findCycle graph
216 = go Set.empty (new_work root_deps []) []
217 where
218 env :: Map.Map key (Node key payload)
219 env = Map.fromList [ (node_key node, node) | node <- graph ]
220
221 -- Find the node with fewest dependencies among the SCC modules
222 -- This is just a heuristic to find some plausible root module
223 root :: Node key payload
224 root = fst (minWith snd [ (node, count (`Map.member` env)
225 (node_dependencies node))
226 | node <- graph ])
227 DigraphNode root_payload root_key root_deps = root
228
229
230 -- 'go' implements Dijkstra's algorithm, more or less
231 go :: Set.Set key -- Visited
232 -> [WorkItem key payload] -- Work list, items length n
233 -> [WorkItem key payload] -- Work list, items length n+1
234 -> Maybe [payload] -- Returned cycle
235 -- Invariant: in a call (go visited ps qs),
236 -- visited = union (map tail (ps ++ qs))
237
238 go _ [] [] = Nothing -- No cycles
239 go visited [] qs = go visited qs []
240 go visited (((DigraphNode payload key deps), path) : ps) qs
241 | key == root_key = Just (root_payload : reverse path)
242 | key `Set.member` visited = go visited ps qs
243 | key `Map.notMember` env = go visited ps qs
244 | otherwise = go (Set.insert key visited)
245 ps (new_qs ++ qs)
246 where
247 new_qs = new_work deps (payload : path)
248
249 new_work :: [key] -> [payload] -> [WorkItem key payload]
250 new_work deps path = [ (n, path) | Just n <- map (`Map.lookup` env) deps ]
251
252 {-
253 ************************************************************************
254 * *
255 * Strongly Connected Component wrappers for Graph
256 * *
257 ************************************************************************
258
259 Note: the components are returned topologically sorted: later components
260 depend on earlier ones, but not vice versa i.e. later components only have
261 edges going from them to earlier ones.
262 -}
263
264 {-
265 Note [Deterministic SCC]
266 ~~~~~~~~~~~~~~~~~~~~~~~~
267 stronglyConnCompFromEdgedVerticesUniq,
268 stronglyConnCompFromEdgedVerticesUniqR,
269 stronglyConnCompFromEdgedVerticesOrd and
270 stronglyConnCompFromEdgedVerticesOrdR
271 provide a following guarantee:
272 Given a deterministically ordered list of nodes it returns a deterministically
273 ordered list of strongly connected components, where the list of vertices
274 in an SCC is also deterministically ordered.
275 Note that the order of edges doesn't need to be deterministic for this to work.
276 We use the order of nodes to normalize the order of edges.
277 -}
278
279 stronglyConnCompG :: Graph node -> [SCC node]
280 stronglyConnCompG graph = decodeSccs graph forest
281 where forest = {-# SCC "Digraph.scc" #-} scc (gr_int_graph graph)
282
283 decodeSccs :: Graph node -> Forest Vertex -> [SCC node]
284 decodeSccs Graph { gr_int_graph = graph, gr_vertex_to_node = vertex_fn } forest
285 = map decode forest
286 where
287 decode (Node v []) | mentions_itself v = CyclicSCC [vertex_fn v]
288 | otherwise = AcyclicSCC (vertex_fn v)
289 decode other = CyclicSCC (dec other [])
290 where dec (Node v ts) vs = vertex_fn v : foldr dec vs ts
291 mentions_itself v = v `elem` (graph ! v)
292
293
294 -- The following two versions are provided for backwards compatibility:
295 -- See Note [Deterministic SCC]
296 -- See Note [reduceNodesIntoVertices implementations]
297 stronglyConnCompFromEdgedVerticesOrd
298 :: Ord key
299 => [Node key payload]
300 -> [SCC payload]
301 stronglyConnCompFromEdgedVerticesOrd
302 = map (fmap node_payload) . stronglyConnCompFromEdgedVerticesOrdR
303
304 -- The following two versions are provided for backwards compatibility:
305 -- See Note [Deterministic SCC]
306 -- See Note [reduceNodesIntoVertices implementations]
307 stronglyConnCompFromEdgedVerticesUniq
308 :: Uniquable key
309 => [Node key payload]
310 -> [SCC payload]
311 stronglyConnCompFromEdgedVerticesUniq
312 = map (fmap node_payload) . stronglyConnCompFromEdgedVerticesUniqR
313
314 -- The "R" interface is used when you expect to apply SCC to
315 -- (some of) the result of SCC, so you don't want to lose the dependency info
316 -- See Note [Deterministic SCC]
317 -- See Note [reduceNodesIntoVertices implementations]
318 stronglyConnCompFromEdgedVerticesOrdR
319 :: Ord key
320 => [Node key payload]
321 -> [SCC (Node key payload)]
322 stronglyConnCompFromEdgedVerticesOrdR =
323 stronglyConnCompG . graphFromEdgedVertices reduceNodesIntoVerticesOrd
324
325 -- The "R" interface is used when you expect to apply SCC to
326 -- (some of) the result of SCC, so you don't want to lose the dependency info
327 -- See Note [Deterministic SCC]
328 -- See Note [reduceNodesIntoVertices implementations]
329 stronglyConnCompFromEdgedVerticesUniqR
330 :: Uniquable key
331 => [Node key payload]
332 -> [SCC (Node key payload)]
333 stronglyConnCompFromEdgedVerticesUniqR =
334 stronglyConnCompG . graphFromEdgedVertices reduceNodesIntoVerticesUniq
335
336 {-
337 ************************************************************************
338 * *
339 * Misc wrappers for Graph
340 * *
341 ************************************************************************
342 -}
343
344 topologicalSortG :: Graph node -> [node]
345 topologicalSortG graph = map (gr_vertex_to_node graph) result
346 where result = {-# SCC "Digraph.topSort" #-} topSort (gr_int_graph graph)
347
348 reachableG :: Graph node -> node -> [node]
349 reachableG graph from = map (gr_vertex_to_node graph) result
350 where from_vertex = expectJust "reachableG" (gr_node_to_vertex graph from)
351 result = {-# SCC "Digraph.reachable" #-} reachable (gr_int_graph graph) [from_vertex]
352
353 -- | Given a list of roots return all reachable nodes.
354 reachablesG :: Graph node -> [node] -> [node]
355 reachablesG graph froms = map (gr_vertex_to_node graph) result
356 where result = {-# SCC "Digraph.reachable" #-}
357 reachable (gr_int_graph graph) vs
358 vs = [ v | Just v <- map (gr_node_to_vertex graph) froms ]
359
360 hasVertexG :: Graph node -> node -> Bool
361 hasVertexG graph node = isJust $ gr_node_to_vertex graph node
362
363 verticesG :: Graph node -> [node]
364 verticesG graph = map (gr_vertex_to_node graph) $ vertices (gr_int_graph graph)
365
366 edgesG :: Graph node -> [Edge node]
367 edgesG graph = map (\(v1, v2) -> Edge (v2n v1) (v2n v2)) $ edges (gr_int_graph graph)
368 where v2n = gr_vertex_to_node graph
369
370 transposeG :: Graph node -> Graph node
371 transposeG graph = Graph (G.transposeG (gr_int_graph graph))
372 (gr_vertex_to_node graph)
373 (gr_node_to_vertex graph)
374
375 emptyG :: Graph node -> Bool
376 emptyG g = graphEmpty (gr_int_graph g)
377
378 {-
379 ************************************************************************
380 * *
381 * Showing Graphs
382 * *
383 ************************************************************************
384 -}
385
386 instance Outputable node => Outputable (Graph node) where
387 ppr graph = vcat [
388 hang (text "Vertices:") 2 (vcat (map ppr $ verticesG graph)),
389 hang (text "Edges:") 2 (vcat (map ppr $ edgesG graph))
390 ]
391
392 instance Outputable node => Outputable (Edge node) where
393 ppr (Edge from to) = ppr from <+> text "->" <+> ppr to
394
395 graphEmpty :: G.Graph -> Bool
396 graphEmpty g = lo > hi
397 where (lo, hi) = bounds g
398
399 {-
400 ************************************************************************
401 * *
402 * IntGraphs
403 * *
404 ************************************************************************
405 -}
406
407 type IntGraph = G.Graph
408
409 {-
410 ------------------------------------------------------------
411 -- Depth first search numbering
412 ------------------------------------------------------------
413 -}
414
415 -- Data.Tree has flatten for Tree, but nothing for Forest
416 preorderF :: Forest a -> [a]
417 preorderF ts = concat (map flatten ts)
418
419 {-
420 ------------------------------------------------------------
421 -- Finding reachable vertices
422 ------------------------------------------------------------
423 -}
424
425 -- This generalizes reachable which was found in Data.Graph
426 reachable :: IntGraph -> [Vertex] -> [Vertex]
427 reachable g vs = preorderF (dfs g vs)
428
429 {-
430 ************************************************************************
431 * *
432 * Classify Edge Types
433 * *
434 ************************************************************************
435 -}
436
437 -- Remark: While we could generalize this algorithm this comes at a runtime
438 -- cost and with no advantages. If you find yourself using this with graphs
439 -- not easily represented using Int nodes please consider rewriting this
440 -- using the more general Graph type.
441
442 -- | Edge direction based on DFS Classification
443 data EdgeType
444 = Forward
445 | Cross
446 | Backward -- ^ Loop back towards the root node.
447 -- Eg backjumps in loops
448 | SelfLoop -- ^ v -> v
449 deriving (Eq,Ord)
450
451 instance Outputable EdgeType where
452 ppr Forward = text "Forward"
453 ppr Cross = text "Cross"
454 ppr Backward = text "Backward"
455 ppr SelfLoop = text "SelfLoop"
456
457 newtype Time = Time Int deriving (Eq,Ord,Num,Outputable)
458
459 --Allow for specialzation
460 {-# INLINEABLE classifyEdges #-}
461
462 -- | Given a start vertex, a way to get successors from a node
463 -- and a list of (directed) edges classify the types of edges.
464 classifyEdges :: forall key. Uniquable key => key -> (key -> [key])
465 -> [(key,key)] -> [((key, key), EdgeType)]
466 classifyEdges root getSucc edges =
467 --let uqe (from,to) = (getUnique from, getUnique to)
468 --in pprTrace "Edges:" (ppr $ map uqe edges) $
469 zip edges $ map classify edges
470 where
471 (_time, starts, ends) = addTimes (0,emptyUFM,emptyUFM) root
472 classify :: (key,key) -> EdgeType
473 classify (from,to)
474 | startFrom < startTo
475 , endFrom > endTo
476 = Forward
477 | startFrom > startTo
478 , endFrom < endTo
479 = Backward
480 | startFrom > startTo
481 , endFrom > endTo
482 = Cross
483 | getUnique from == getUnique to
484 = SelfLoop
485 | otherwise
486 = pprPanic "Failed to classify edge of Graph"
487 (ppr (getUnique from, getUnique to))
488
489 where
490 getTime event node
491 | Just time <- lookupUFM event node
492 = time
493 | otherwise
494 = pprPanic "Failed to classify edge of CFG - not not timed"
495 (text "edges" <> ppr (getUnique from, getUnique to)
496 <+> ppr starts <+> ppr ends )
497 startFrom = getTime starts from
498 startTo = getTime starts to
499 endFrom = getTime ends from
500 endTo = getTime ends to
501
502 addTimes :: (Time, UniqFM Time, UniqFM Time) -> key
503 -> (Time, UniqFM Time, UniqFM Time)
504 addTimes (time,starts,ends) n
505 --Dont reenter nodes
506 | elemUFM n starts
507 = (time,starts,ends)
508 | otherwise =
509 let
510 starts' = addToUFM starts n time
511 time' = time + 1
512 succs = getSucc n :: [key]
513 (time'',starts'',ends') = foldl' addTimes (time',starts',ends) succs
514 ends'' = addToUFM ends' n time''
515 in
516 (time'' + 1, starts'', ends'')