1 {-# LANGUAGE CPP #-}

2 {-# LANGUAGE BangPatterns #-}

3 #ifdef __GLASGOW_HASKELL__

4 {-# LANGUAGE Trustworthy #-}

5 #endif

7 -----------------------------------------------------------------------------

8 -- |

9 -- Module : Data.Containers.ListUtils

10 -- Copyright : (c) Gershom Bazerman 2018

11 -- License : BSD-style

12 -- Maintainer : libraries@haskell.org

13 -- Portability : portable

14 --

15 -- This module provides efficient containers-based functions on the list type.

16 --

17 -- In the documentation, \(n\) is the number of elements in the list while

18 -- \(d\) is the number of distinct elements in the list. \(W\) is the number

19 -- of bits in an 'Int'.

20 -----------------------------------------------------------------------------

23 nubOrd,

24 nubOrdOn,

25 nubInt,

26 nubIntOn

33 #ifdef __GLASGOW_HASKELL__

35 #endif

37 -- *** Ord-based nubbing ***

40 -- | \( O(n \log d) \). The @nubOrd@ function removes duplicate elements from a

41 -- list. In particular, it keeps only the first occurrence of each element. By

42 -- using a 'Set' internally it has better asymptotics than the standard

43 -- 'Data.List.nub' function.

44 --

45 -- ==== Strictness

46 --

47 -- @nubOrd@ is strict in the elements of the list.

48 --

49 -- ==== Efficiency note

50 --

51 -- When applicable, it is almost always better to use 'nubInt' or 'nubIntOn'

52 -- instead of this function, although it can be a little worse in certain

53 -- pathological cases. For example, to nub a list of characters, use

54 --

55 -- @ nubIntOn fromEnum xs @

58 {-# INLINE nubOrd #-}

60 -- | The @nubOrdOn@ function behaves just like 'nubOrd' except it performs

61 -- comparisons not on the original datatype, but a user-specified projection

62 -- from that datatype.

63 --

64 -- ==== Strictness

65 --

66 -- @nubOrdOn@ is strict in the values of the function applied to the

67 -- elements of the list.

69 -- For some reason we need to write an explicit lambda here to allow this

70 -- to inline when only applied to a function.

72 {-# INLINE nubOrdOn #-}

74 -- Splitting nubOrdOn like this means that we don't have to worry about

75 -- matching specifically on Set.empty in the rewrite-back rule.

77 nubOrdOnExcluding f = go

78 where

79 go _ [] = []

85 #ifdef __GLASGOW_HASKELL__

86 -- We want this inlinable to specialize to the necessary Ord instance.

87 {-# INLINABLE [1] nubOrdOnExcluding #-}

89 {-# RULES

90 -- Rewrite to a fusible form.

91 "nubOrdOn" [~1] forall f as s. nubOrdOnExcluding f s as =

92 build (\c n -> foldr (nubOrdOnFB f c) (constNubOn n) as s)

94 -- Rewrite back to a plain form

95 "nubOrdOnList" [1] forall f as s.

96 foldr (nubOrdOnFB f (:)) (constNubOn []) as s =

97 nubOrdOnExcluding f s as

98 #-}

100 nubOrdOnFB :: Ord b

103 -> a

105 -> Set b

106 -> r

107 nubOrdOnFB f c x r s

111 {-# INLINABLE [0] nubOrdOnFB #-}

114 constNubOn x _ = x

115 {-# INLINE [0] constNubOn #-}

116 #endif

119 -- *** Int-based nubbing ***

122 -- | \( O(n \min(d,W)) \). The @nubInt@ function removes duplicate 'Int'

123 -- values from a list. In particular, it keeps only the first occurrence

124 -- of each element. By using an 'IntSet' internally, it attains better

125 -- asymptotics than the standard 'Data.List.nub' function.

126 --

127 -- See also 'nubIntOn', a more widely applicable generalization.

128 --

129 -- ==== Strictness

130 --

131 -- @nubInt@ is strict in the elements of the list.

134 {-# INLINE nubInt #-}

136 -- | The @nubIntOn@ function behaves just like 'nubInt' except it performs

137 -- comparisons not on the original datatype, but a user-specified projection

138 -- from that datatype. For example, @nubIntOn 'fromEnum'@ can be used to

139 -- nub characters and typical fixed-with numerical types efficiently.

140 --

141 -- ==== Strictness

142 --

143 -- @nubIntOn@ is strict in the values of the function applied to the

144 -- elements of the list.

146 -- For some reason we need to write an explicit lambda here to allow this

147 -- to inline when only applied to a function.

149 {-# INLINE nubIntOn #-}

151 -- Splitting nubIntOn like this means that we don't have to worry about

152 -- matching specifically on IntSet.empty in the rewrite-back rule.

154 nubIntOnExcluding f = go

155 where

156 go _ [] = []

162 #ifdef __GLASGOW_HASKELL__

163 -- We don't mark this INLINABLE because it doesn't seem obviously useful

164 -- to inline it anywhere; the elements the function operates on are actually

165 -- pulled from a list and installed in a list; the situation is very different

166 -- when fusion occurs. In this case, we let GHC make the call.

167 {-# NOINLINE [1] nubIntOnExcluding #-}

169 {-# RULES

170 "nubIntOn" [~1] forall f as s. nubIntOnExcluding f s as =

171 build (\c n -> foldr (nubIntOnFB f c) (constNubOn n) as s)

172 "nubIntOnList" [1] forall f as s. foldr (nubIntOnFB f (:)) (constNubOn []) as s =

173 nubIntOnExcluding f s as

174 #-}

178 -> a

180 -> IntSet

181 -> r

182 nubIntOnFB f c x r s

186 {-# INLINABLE [0] nubIntOnFB #-}

187 #endif