9203c604cedaa8e9ac49301dcd5880ddd9bae8ce
[packages/containers.git] / Data / Containers / ListUtils.hs
1 {-# LANGUAGE CPP #-}
2 {-# LANGUAGE BangPatterns #-}
3 #ifdef __GLASGOW_HASKELL__
4 {-# LANGUAGE Trustworthy #-}
5 #endif
6
7 -----------------------------------------------------------------------------
8 -- |
9 -- Module : Data.Containers.ListUtils
10 -- Copyright : (c) Gershom Bazerman 2018
11 -- License : BSD-style
12 -- Maintainer : libraries@haskell.org
13 -- Portability : portable
14 --
15 -- This module provides efficient containers-based functions on the list type.
16 --
17 -- In the documentation, \(n\) is the number of elements in the list while
18 -- \(d\) is the number of distinct elements in the list. \(W\) is the number
19 -- of bits in an 'Int'.
20 -----------------------------------------------------------------------------
21
22 module Data.Containers.ListUtils (
23 nubOrd,
24 nubOrdOn,
25 nubInt,
26 nubIntOn
27 ) where
28
29 import Data.Set (Set)
30 import qualified Data.Set as Set
31 import qualified Data.IntSet as IntSet
32 import Data.IntSet (IntSet)
33 #ifdef __GLASGOW_HASKELL__
34 import GHC.Exts ( build )
35 #endif
36
37 -- *** Ord-based nubbing ***
38
39
40 -- | \( O(n \log d) \). The @nubOrd@ function removes duplicate elements from a
41 -- list. In particular, it keeps only the first occurrence of each element. By
42 -- using a 'Set' internally it has better asymptotics than the standard
43 -- 'Data.List.nub' function.
44 --
45 -- ==== Strictness
46 --
47 -- @nubOrd@ is strict in the elements of the list.
48 --
49 -- ==== Efficiency note
50 --
51 -- When applicable, it is almost always better to use 'nubInt' or 'nubIntOn'
52 -- instead of this function, although it can be a little worse in certain
53 -- pathological cases. For example, to nub a list of characters, use
54 --
55 -- @ nubIntOn fromEnum xs @
56 nubOrd :: Ord a => [a] -> [a]
57 nubOrd = nubOrdOn id
58 {-# INLINE nubOrd #-}
59
60 -- | The @nubOrdOn@ function behaves just like 'nubOrd' except it performs
61 -- comparisons not on the original datatype, but a user-specified projection
62 -- from that datatype.
63 --
64 -- ==== Strictness
65 --
66 -- @nubOrdOn@ is strict in the values of the function applied to the
67 -- elements of the list.
68 nubOrdOn :: Ord b => (a -> b) -> [a] -> [a]
69 -- For some reason we need to write an explicit lambda here to allow this
70 -- to inline when only applied to a function.
71 nubOrdOn f = \xs -> nubOrdOnExcluding f Set.empty xs
72 {-# INLINE nubOrdOn #-}
73
74 -- Splitting nubOrdOn like this means that we don't have to worry about
75 -- matching specifically on Set.empty in the rewrite-back rule.
76 nubOrdOnExcluding :: Ord b => (a -> b) -> Set b -> [a] -> [a]
77 nubOrdOnExcluding f = go
78 where
79 go _ [] = []
80 go s (x:xs)
81 | fx `Set.member` s = go s xs
82 | otherwise = x : go (Set.insert fx s) xs
83 where !fx = f x
84
85 #ifdef __GLASGOW_HASKELL__
86 -- We want this inlinable to specialize to the necessary Ord instance.
87 {-# INLINABLE [1] nubOrdOnExcluding #-}
88
89 {-# RULES
90 -- Rewrite to a fusible form.
91 "nubOrdOn" [~1] forall f as s. nubOrdOnExcluding f s as =
92 build (\c n -> foldr (nubOrdOnFB f c) (constNubOn n) as s)
93
94 -- Rewrite back to a plain form
95 "nubOrdOnList" [1] forall f as s.
96 foldr (nubOrdOnFB f (:)) (constNubOn []) as s =
97 nubOrdOnExcluding f s as
98 #-}
99
100 nubOrdOnFB :: Ord b
101 => (a -> b)
102 -> (a -> r -> r)
103 -> a
104 -> (Set b -> r)
105 -> Set b
106 -> r
107 nubOrdOnFB f c x r s
108 | fx `Set.member` s = r s
109 | otherwise = x `c` r (Set.insert fx s)
110 where !fx = f x
111 {-# INLINABLE [0] nubOrdOnFB #-}
112
113 constNubOn :: a -> b -> a
114 constNubOn x _ = x
115 {-# INLINE [0] constNubOn #-}
116 #endif
117
118
119 -- *** Int-based nubbing ***
120
121
122 -- | \( O(n \min(d,W)) \). The @nubInt@ function removes duplicate 'Int'
123 -- values from a list. In particular, it keeps only the first occurrence
124 -- of each element. By using an 'IntSet' internally, it attains better
125 -- asymptotics than the standard 'Data.List.nub' function.
126 --
127 -- See also 'nubIntOn', a more widely applicable generalization.
128 --
129 -- ==== Strictness
130 --
131 -- @nubInt@ is strict in the elements of the list.
132 nubInt :: [Int] -> [Int]
133 nubInt = nubIntOn id
134 {-# INLINE nubInt #-}
135
136 -- | The @nubIntOn@ function behaves just like 'nubInt' except it performs
137 -- comparisons not on the original datatype, but a user-specified projection
138 -- from that datatype. For example, @nubIntOn 'fromEnum'@ can be used to
139 -- nub characters and typical fixed-with numerical types efficiently.
140 --
141 -- ==== Strictness
142 --
143 -- @nubIntOn@ is strict in the values of the function applied to the
144 -- elements of the list.
145 nubIntOn :: (a -> Int) -> [a] -> [a]
146 -- For some reason we need to write an explicit lambda here to allow this
147 -- to inline when only applied to a function.
148 nubIntOn f = \xs -> nubIntOnExcluding f IntSet.empty xs
149 {-# INLINE nubIntOn #-}
150
151 -- Splitting nubIntOn like this means that we don't have to worry about
152 -- matching specifically on IntSet.empty in the rewrite-back rule.
153 nubIntOnExcluding :: (a -> Int) -> IntSet -> [a] -> [a]
154 nubIntOnExcluding f = go
155 where
156 go _ [] = []
157 go s (x:xs)
158 | fx `IntSet.member` s = go s xs
159 | otherwise = x : go (IntSet.insert fx s) xs
160 where !fx = f x
161
162 #ifdef __GLASGOW_HASKELL__
163 -- We don't mark this INLINABLE because it doesn't seem obviously useful
164 -- to inline it anywhere; the elements the function operates on are actually
165 -- pulled from a list and installed in a list; the situation is very different
166 -- when fusion occurs. In this case, we let GHC make the call.
167 {-# NOINLINE [1] nubIntOnExcluding #-}
168
169 {-# RULES
170 "nubIntOn" [~1] forall f as s. nubIntOnExcluding f s as =
171 build (\c n -> foldr (nubIntOnFB f c) (constNubOn n) as s)
172 "nubIntOnList" [1] forall f as s. foldr (nubIntOnFB f (:)) (constNubOn []) as s =
173 nubIntOnExcluding f s as
174 #-}
175
176 nubIntOnFB :: (a -> Int)
177 -> (a -> r -> r)
178 -> a
179 -> (IntSet -> r)
180 -> IntSet
181 -> r
182 nubIntOnFB f c x r s
183 | fx `IntSet.member` s = r s
184 | otherwise = x `c` r (IntSet.insert fx s)
185 where !fx = f x
186 {-# INLINABLE [0] nubIntOnFB #-}
187 #endif