Revert "Batch merge"
[ghc.git] / compiler / utils / Digraph.hs
1 -- (c) The University of Glasgow 2006
2
3 {-# LANGUAGE CPP, ScopedTypeVariables, ViewPatterns #-}
4 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
5
6 module Digraph(
7 Graph, graphFromEdgedVerticesOrd, graphFromEdgedVerticesUniq,
8
9 SCC(..), Node(..), flattenSCC, flattenSCCs,
10 stronglyConnCompG,
11 topologicalSortG,
12 verticesG, edgesG, hasVertexG,
13 reachableG, reachablesG, transposeG,
14 emptyG,
15
16 findCycle,
17
18 -- For backwards compatibility with the simpler version of Digraph
19 stronglyConnCompFromEdgedVerticesOrd,
20 stronglyConnCompFromEdgedVerticesOrdR,
21 stronglyConnCompFromEdgedVerticesUniq,
22 stronglyConnCompFromEdgedVerticesUniqR,
23
24 -- Simple way to classify edges
25 EdgeType(..), classifyEdges
26 ) where
27
28 #include "HsVersions.h"
29
30 ------------------------------------------------------------------------------
31 -- A version of the graph algorithms described in:
32 --
33 -- ``Lazy Depth-First Search and Linear IntGraph Algorithms in Haskell''
34 -- by David King and John Launchbury
35 --
36 -- Also included is some additional code for printing tree structures ...
37 --
38 -- If you ever find yourself in need of algorithms for classifying edges,
39 -- or finding connected/biconnected components, consult the history; Sigbjorn
40 -- Finne contributed some implementations in 1997, although we've since
41 -- removed them since they were not used anywhere in GHC.
42 ------------------------------------------------------------------------------
43
44
45 import GhcPrelude
46
47 import Util ( minWith, count )
48 import Outputable
49 import Maybes ( expectJust )
50
51 -- std interfaces
52 import Data.Maybe
53 import Data.Array
54 import Data.List hiding (transpose)
55 import qualified Data.Map as Map
56 import qualified Data.Set as Set
57
58 import qualified Data.Graph as G
59 import Data.Graph hiding (Graph, Edge, transposeG, reachable)
60 import Data.Tree
61 import Unique
62 import UniqFM
63
64 {-
65 ************************************************************************
66 * *
67 * Graphs and Graph Construction
68 * *
69 ************************************************************************
70
71 Note [Nodes, keys, vertices]
72 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
73 * A 'node' is a big blob of client-stuff
74
75 * Each 'node' has a unique (client) 'key', but the latter
76 is in Ord and has fast comparison
77
78 * Digraph then maps each 'key' to a Vertex (Int) which is
79 arranged densely in 0.n
80 -}
81
82 data Graph node = Graph {
83 gr_int_graph :: IntGraph,
84 gr_vertex_to_node :: Vertex -> node,
85 gr_node_to_vertex :: node -> Maybe Vertex
86 }
87
88 data Edge node = Edge node node
89
90 {-| Representation for nodes of the Graph.
91
92 * The @payload@ is user data, just carried around in this module
93
94 * The @key@ is the node identifier.
95 Key has an Ord instance for performance reasons.
96
97 * The @[key]@ are the dependencies of the node;
98 it's ok to have extra keys in the dependencies that
99 are not the key of any Node in the graph
100 -}
101 data Node key payload = DigraphNode {
102 node_payload :: payload, -- ^ User data
103 node_key :: key, -- ^ User defined node id
104 node_dependencies :: [key] -- ^ Dependencies/successors of the node
105 }
106
107
108 instance (Outputable a, Outputable b) => Outputable (Node a b) where
109 ppr (DigraphNode a b c) = ppr (a, b, c)
110
111 emptyGraph :: Graph a
112 emptyGraph = Graph (array (1, 0) []) (error "emptyGraph") (const Nothing)
113
114 -- See Note [Deterministic SCC]
115 graphFromEdgedVertices
116 :: ReduceFn key payload
117 -> [Node key payload] -- The graph; its ok for the
118 -- out-list to contain keys which aren't
119 -- a vertex key, they are ignored
120 -> Graph (Node key payload)
121 graphFromEdgedVertices _reduceFn [] = emptyGraph
122 graphFromEdgedVertices reduceFn edged_vertices =
123 Graph graph vertex_fn (key_vertex . key_extractor)
124 where key_extractor = node_key
125 (bounds, vertex_fn, key_vertex, numbered_nodes) =
126 reduceFn edged_vertices key_extractor
127 graph = array bounds [ (v, sort $ mapMaybe key_vertex ks)
128 | (v, (node_dependencies -> ks)) <- numbered_nodes]
129 -- We normalize outgoing edges by sorting on node order, so
130 -- that the result doesn't depend on the order of the edges
131
132 -- See Note [Deterministic SCC]
133 -- See Note [reduceNodesIntoVertices implementations]
134 graphFromEdgedVerticesOrd
135 :: Ord key
136 => [Node key payload] -- The graph; its ok for the
137 -- out-list to contain keys which aren't
138 -- a vertex key, they are ignored
139 -> Graph (Node key payload)
140 graphFromEdgedVerticesOrd = graphFromEdgedVertices reduceNodesIntoVerticesOrd
141
142 -- See Note [Deterministic SCC]
143 -- See Note [reduceNodesIntoVertices implementations]
144 graphFromEdgedVerticesUniq
145 :: Uniquable key
146 => [Node key payload] -- The graph; its ok for the
147 -- out-list to contain keys which aren't
148 -- a vertex key, they are ignored
149 -> Graph (Node key payload)
150 graphFromEdgedVerticesUniq = graphFromEdgedVertices reduceNodesIntoVerticesUniq
151
152 type ReduceFn key payload =
153 [Node key payload] -> (Node key payload -> key) ->
154 (Bounds, Vertex -> Node key payload
155 , key -> Maybe Vertex, [(Vertex, Node key payload)])
156
157 {-
158 Note [reduceNodesIntoVertices implementations]
159 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
160 reduceNodesIntoVertices is parameterized by the container type.
161 This is to accomodate key types that don't have an Ord instance
162 and hence preclude the use of Data.Map. An example of such type
163 would be Unique, there's no way to implement Ord Unique
164 deterministically.
165
166 For such types, there's a version with a Uniquable constraint.
167 This leaves us with two versions of every function that depends on
168 reduceNodesIntoVertices, one with Ord constraint and the other with
169 Uniquable constraint.
170 For example: graphFromEdgedVerticesOrd and graphFromEdgedVerticesUniq.
171
172 The Uniq version should be a tiny bit more efficient since it uses
173 Data.IntMap internally.
174 -}
175 reduceNodesIntoVertices
176 :: ([(key, Vertex)] -> m)
177 -> (key -> m -> Maybe Vertex)
178 -> ReduceFn key payload
179 reduceNodesIntoVertices fromList lookup nodes key_extractor =
180 (bounds, (!) vertex_map, key_vertex, numbered_nodes)
181 where
182 max_v = length nodes - 1
183 bounds = (0, max_v) :: (Vertex, Vertex)
184
185 -- Keep the order intact to make the result depend on input order
186 -- instead of key order
187 numbered_nodes = zip [0..] nodes
188 vertex_map = array bounds numbered_nodes
189
190 key_map = fromList
191 [ (key_extractor node, v) | (v, node) <- numbered_nodes ]
192 key_vertex k = lookup k key_map
193
194 -- See Note [reduceNodesIntoVertices implementations]
195 reduceNodesIntoVerticesOrd :: Ord key => ReduceFn key payload
196 reduceNodesIntoVerticesOrd = reduceNodesIntoVertices Map.fromList Map.lookup
197
198 -- See Note [reduceNodesIntoVertices implementations]
199 reduceNodesIntoVerticesUniq :: Uniquable key => ReduceFn key payload
200 reduceNodesIntoVerticesUniq = reduceNodesIntoVertices listToUFM (flip lookupUFM)
201
202 {-
203 ************************************************************************
204 * *
205 * SCC
206 * *
207 ************************************************************************
208 -}
209
210 type WorkItem key payload
211 = (Node key payload, -- Tip of the path
212 [payload]) -- Rest of the path;
213 -- [a,b,c] means c depends on b, b depends on a
214
215 -- | Find a reasonably short cycle a->b->c->a, in a strongly
216 -- connected component. The input nodes are presumed to be
217 -- a SCC, so you can start anywhere.
218 findCycle :: forall payload key. Ord key
219 => [Node key payload] -- The nodes. The dependencies can
220 -- contain extra keys, which are ignored
221 -> Maybe [payload] -- A cycle, starting with node
222 -- so each depends on the next
223 findCycle graph
224 = go Set.empty (new_work root_deps []) []
225 where
226 env :: Map.Map key (Node key payload)
227 env = Map.fromList [ (node_key node, node) | node <- graph ]
228
229 -- Find the node with fewest dependencies among the SCC modules
230 -- This is just a heuristic to find some plausible root module
231 root :: Node key payload
232 root = fst (minWith snd [ (node, count (`Map.member` env)
233 (node_dependencies node))
234 | node <- graph ])
235 DigraphNode root_payload root_key root_deps = root
236
237
238 -- 'go' implements Dijkstra's algorithm, more or less
239 go :: Set.Set key -- Visited
240 -> [WorkItem key payload] -- Work list, items length n
241 -> [WorkItem key payload] -- Work list, items length n+1
242 -> Maybe [payload] -- Returned cycle
243 -- Invariant: in a call (go visited ps qs),
244 -- visited = union (map tail (ps ++ qs))
245
246 go _ [] [] = Nothing -- No cycles
247 go visited [] qs = go visited qs []
248 go visited (((DigraphNode payload key deps), path) : ps) qs
249 | key == root_key = Just (root_payload : reverse path)
250 | key `Set.member` visited = go visited ps qs
251 | key `Map.notMember` env = go visited ps qs
252 | otherwise = go (Set.insert key visited)
253 ps (new_qs ++ qs)
254 where
255 new_qs = new_work deps (payload : path)
256
257 new_work :: [key] -> [payload] -> [WorkItem key payload]
258 new_work deps path = [ (n, path) | Just n <- map (`Map.lookup` env) deps ]
259
260 {-
261 ************************************************************************
262 * *
263 * Strongly Connected Component wrappers for Graph
264 * *
265 ************************************************************************
266
267 Note: the components are returned topologically sorted: later components
268 depend on earlier ones, but not vice versa i.e. later components only have
269 edges going from them to earlier ones.
270 -}
271
272 {-
273 Note [Deterministic SCC]
274 ~~~~~~~~~~~~~~~~~~~~~~~~
275 stronglyConnCompFromEdgedVerticesUniq,
276 stronglyConnCompFromEdgedVerticesUniqR,
277 stronglyConnCompFromEdgedVerticesOrd and
278 stronglyConnCompFromEdgedVerticesOrdR
279 provide a following guarantee:
280 Given a deterministically ordered list of nodes it returns a deterministically
281 ordered list of strongly connected components, where the list of vertices
282 in an SCC is also deterministically ordered.
283 Note that the order of edges doesn't need to be deterministic for this to work.
284 We use the order of nodes to normalize the order of edges.
285 -}
286
287 stronglyConnCompG :: Graph node -> [SCC node]
288 stronglyConnCompG graph = decodeSccs graph forest
289 where forest = {-# SCC "Digraph.scc" #-} scc (gr_int_graph graph)
290
291 decodeSccs :: Graph node -> Forest Vertex -> [SCC node]
292 decodeSccs Graph { gr_int_graph = graph, gr_vertex_to_node = vertex_fn } forest
293 = map decode forest
294 where
295 decode (Node v []) | mentions_itself v = CyclicSCC [vertex_fn v]
296 | otherwise = AcyclicSCC (vertex_fn v)
297 decode other = CyclicSCC (dec other [])
298 where dec (Node v ts) vs = vertex_fn v : foldr dec vs ts
299 mentions_itself v = v `elem` (graph ! v)
300
301
302 -- The following two versions are provided for backwards compatibility:
303 -- See Note [Deterministic SCC]
304 -- See Note [reduceNodesIntoVertices implementations]
305 stronglyConnCompFromEdgedVerticesOrd
306 :: Ord key
307 => [Node key payload]
308 -> [SCC payload]
309 stronglyConnCompFromEdgedVerticesOrd
310 = map (fmap node_payload) . stronglyConnCompFromEdgedVerticesOrdR
311
312 -- The following two versions are provided for backwards compatibility:
313 -- See Note [Deterministic SCC]
314 -- See Note [reduceNodesIntoVertices implementations]
315 stronglyConnCompFromEdgedVerticesUniq
316 :: Uniquable key
317 => [Node key payload]
318 -> [SCC payload]
319 stronglyConnCompFromEdgedVerticesUniq
320 = map (fmap node_payload) . stronglyConnCompFromEdgedVerticesUniqR
321
322 -- The "R" interface is used when you expect to apply SCC to
323 -- (some of) the result of SCC, so you don't want to lose the dependency info
324 -- See Note [Deterministic SCC]
325 -- See Note [reduceNodesIntoVertices implementations]
326 stronglyConnCompFromEdgedVerticesOrdR
327 :: Ord key
328 => [Node key payload]
329 -> [SCC (Node key payload)]
330 stronglyConnCompFromEdgedVerticesOrdR =
331 stronglyConnCompG . graphFromEdgedVertices reduceNodesIntoVerticesOrd
332
333 -- The "R" interface is used when you expect to apply SCC to
334 -- (some of) the result of SCC, so you don't want to lose the dependency info
335 -- See Note [Deterministic SCC]
336 -- See Note [reduceNodesIntoVertices implementations]
337 stronglyConnCompFromEdgedVerticesUniqR
338 :: Uniquable key
339 => [Node key payload]
340 -> [SCC (Node key payload)]
341 stronglyConnCompFromEdgedVerticesUniqR =
342 stronglyConnCompG . graphFromEdgedVertices reduceNodesIntoVerticesUniq
343
344 {-
345 ************************************************************************
346 * *
347 * Misc wrappers for Graph
348 * *
349 ************************************************************************
350 -}
351
352 topologicalSortG :: Graph node -> [node]
353 topologicalSortG graph = map (gr_vertex_to_node graph) result
354 where result = {-# SCC "Digraph.topSort" #-} topSort (gr_int_graph graph)
355
356 reachableG :: Graph node -> node -> [node]
357 reachableG graph from = map (gr_vertex_to_node graph) result
358 where from_vertex = expectJust "reachableG" (gr_node_to_vertex graph from)
359 result = {-# SCC "Digraph.reachable" #-} reachable (gr_int_graph graph) [from_vertex]
360
361 -- | Given a list of roots return all reachable nodes.
362 reachablesG :: Graph node -> [node] -> [node]
363 reachablesG graph froms = map (gr_vertex_to_node graph) result
364 where result = {-# SCC "Digraph.reachable" #-}
365 reachable (gr_int_graph graph) vs
366 vs = [ v | Just v <- map (gr_node_to_vertex graph) froms ]
367
368 hasVertexG :: Graph node -> node -> Bool
369 hasVertexG graph node = isJust $ gr_node_to_vertex graph node
370
371 verticesG :: Graph node -> [node]
372 verticesG graph = map (gr_vertex_to_node graph) $ vertices (gr_int_graph graph)
373
374 edgesG :: Graph node -> [Edge node]
375 edgesG graph = map (\(v1, v2) -> Edge (v2n v1) (v2n v2)) $ edges (gr_int_graph graph)
376 where v2n = gr_vertex_to_node graph
377
378 transposeG :: Graph node -> Graph node
379 transposeG graph = Graph (G.transposeG (gr_int_graph graph))
380 (gr_vertex_to_node graph)
381 (gr_node_to_vertex graph)
382
383 emptyG :: Graph node -> Bool
384 emptyG g = graphEmpty (gr_int_graph g)
385
386 {-
387 ************************************************************************
388 * *
389 * Showing Graphs
390 * *
391 ************************************************************************
392 -}
393
394 instance Outputable node => Outputable (Graph node) where
395 ppr graph = vcat [
396 hang (text "Vertices:") 2 (vcat (map ppr $ verticesG graph)),
397 hang (text "Edges:") 2 (vcat (map ppr $ edgesG graph))
398 ]
399
400 instance Outputable node => Outputable (Edge node) where
401 ppr (Edge from to) = ppr from <+> text "->" <+> ppr to
402
403 graphEmpty :: G.Graph -> Bool
404 graphEmpty g = lo > hi
405 where (lo, hi) = bounds g
406
407 {-
408 ************************************************************************
409 * *
410 * IntGraphs
411 * *
412 ************************************************************************
413 -}
414
415 type IntGraph = G.Graph
416
417 {-
418 ------------------------------------------------------------
419 -- Depth first search numbering
420 ------------------------------------------------------------
421 -}
422
423 -- Data.Tree has flatten for Tree, but nothing for Forest
424 preorderF :: Forest a -> [a]
425 preorderF ts = concat (map flatten ts)
426
427 {-
428 ------------------------------------------------------------
429 -- Finding reachable vertices
430 ------------------------------------------------------------
431 -}
432
433 -- This generalizes reachable which was found in Data.Graph
434 reachable :: IntGraph -> [Vertex] -> [Vertex]
435 reachable g vs = preorderF (dfs g vs)
436
437 {-
438 ************************************************************************
439 * *
440 * Classify Edge Types
441 * *
442 ************************************************************************
443 -}
444
445 -- Remark: While we could generalize this algorithm this comes at a runtime
446 -- cost and with no advantages. If you find yourself using this with graphs
447 -- not easily represented using Int nodes please consider rewriting this
448 -- using the more general Graph type.
449
450 -- | Edge direction based on DFS Classification
451 data EdgeType
452 = Forward
453 | Cross
454 | Backward -- ^ Loop back towards the root node.
455 -- Eg backjumps in loops
456 | SelfLoop -- ^ v -> v
457 deriving (Eq,Ord)
458
459 instance Outputable EdgeType where
460 ppr Forward = text "Forward"
461 ppr Cross = text "Cross"
462 ppr Backward = text "Backward"
463 ppr SelfLoop = text "SelfLoop"
464
465 newtype Time = Time Int deriving (Eq,Ord,Num,Outputable)
466
467 --Allow for specialzation
468 {-# INLINEABLE classifyEdges #-}
469
470 -- | Given a start vertex, a way to get successors from a node
471 -- and a list of (directed) edges classify the types of edges.
472 classifyEdges :: forall key. Uniquable key => key -> (key -> [key])
473 -> [(key,key)] -> [((key, key), EdgeType)]
474 classifyEdges root getSucc edges =
475 --let uqe (from,to) = (getUnique from, getUnique to)
476 --in pprTrace "Edges:" (ppr $ map uqe edges) $
477 zip edges $ map classify edges
478 where
479 (_time, starts, ends) = addTimes (0,emptyUFM,emptyUFM) root
480 classify :: (key,key) -> EdgeType
481 classify (from,to)
482 | startFrom < startTo
483 , endFrom > endTo
484 = Forward
485 | startFrom > startTo
486 , endFrom < endTo
487 = Backward
488 | startFrom > startTo
489 , endFrom > endTo
490 = Cross
491 | getUnique from == getUnique to
492 = SelfLoop
493 | otherwise
494 = pprPanic "Failed to classify edge of Graph"
495 (ppr (getUnique from, getUnique to))
496
497 where
498 getTime event node
499 | Just time <- lookupUFM event node
500 = time
501 | otherwise
502 = pprPanic "Failed to classify edge of CFG - not not timed"
503 (text "edges" <> ppr (getUnique from, getUnique to)
504 <+> ppr starts <+> ppr ends )
505 startFrom = getTime starts from
506 startTo = getTime starts to
507 endFrom = getTime ends from
508 endTo = getTime ends to
509
510 addTimes :: (Time, UniqFM Time, UniqFM Time) -> key
511 -> (Time, UniqFM Time, UniqFM Time)
512 addTimes (time,starts,ends) n
513 --Dont reenter nodes
514 | elemUFM n starts
515 = (time,starts,ends)
516 | otherwise =
517 let
518 starts' = addToUFM starts n time
519 time' = time + 1
520 succs = getSucc n :: [key]
521 (time'',starts'',ends') = foldl' addTimes (time',starts',ends) succs
522 ends'' = addToUFM ends' n time''
523 in
524 (time'' + 1, starts'', ends'')