Refactor treatment of wildcards
[ghc.git] / compiler / utils / Util.hs
1 -- (c) The University of Glasgow 2006
2
3 {-# LANGUAGE CPP #-}
4
5 -- | Highly random utility functions
6 --
7 module Util (
8 -- * Flags dependent on the compiler build
9 ghciSupported, debugIsOn, ncgDebugIsOn,
10 ghciTablesNextToCode,
11 isWindowsHost, isDarwinHost,
12
13 -- * General list processing
14 zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal,
15 zipLazy, stretchZipWith, zipWithAndUnzip,
16
17 filterByList,
18
19 unzipWith,
20
21 mapFst, mapSnd, chkAppend,
22 mapAndUnzip, mapAndUnzip3, mapAccumL2,
23 nOfThem, filterOut, partitionWith, splitEithers,
24
25 dropWhileEndLE,
26
27 foldl1', foldl2, count, all2,
28
29 lengthExceeds, lengthIs, lengthAtLeast,
30 listLengthCmp, atLength,
31 equalLength, compareLength, leLength,
32
33 isSingleton, only, singleton,
34 notNull, snocView,
35
36 isIn, isn'tIn,
37
38 -- * Tuples
39 fstOf3, sndOf3, thirdOf3,
40 firstM, first3M,
41 third3,
42 uncurry3,
43
44 -- * List operations controlled by another list
45 takeList, dropList, splitAtList, split,
46 dropTail,
47
48 -- * For loop
49 nTimes,
50
51 -- * Sorting
52 sortWith, minWith, nubSort,
53
54 -- * Comparisons
55 isEqual, eqListBy, eqMaybeBy,
56 thenCmp, cmpList,
57 removeSpaces,
58 (<&&>), (<||>),
59
60 -- * Edit distance
61 fuzzyMatch, fuzzyLookup,
62
63 -- * Transitive closures
64 transitiveClosure,
65
66 -- * Strictness
67 seqList,
68
69 -- * Module names
70 looksLikeModuleName,
71 looksLikePackageName,
72
73 -- * Argument processing
74 getCmd, toCmdArgs, toArgs,
75
76 -- * Floating point
77 readRational,
78
79 -- * read helpers
80 maybeRead, maybeReadFuzzy,
81
82 -- * IO-ish utilities
83 doesDirNameExist,
84 getModificationUTCTime,
85 modificationTimeIfExists,
86 hSetTranslit,
87
88 global, consIORef, globalM,
89
90 -- * Filenames and paths
91 Suffix,
92 splitLongestPrefix,
93 escapeSpaces,
94 Direction(..), reslash,
95 makeRelativeTo,
96
97 -- * Utils for defining Data instances
98 abstractConstr, abstractDataType, mkNoRepType,
99
100 -- * Utils for printing C code
101 charToC,
102
103 -- * Hashing
104 hashString,
105 ) where
106
107 #include "HsVersions.h"
108
109 import Exception
110 import Panic
111
112 import Data.Data
113 import Data.IORef ( IORef, newIORef, atomicModifyIORef' )
114 import System.IO.Unsafe ( unsafePerformIO )
115 import Data.List hiding (group)
116
117 import GHC.Exts
118
119 #if __GLASGOW_HASKELL__ < 709
120 import Control.Applicative (Applicative)
121 #endif
122 import Control.Applicative ( liftA2 )
123 import Control.Monad ( liftM )
124 import GHC.IO.Encoding (mkTextEncoding, textEncodingName)
125 import System.IO (Handle, hGetEncoding, hSetEncoding)
126 import System.IO.Error as IO ( isDoesNotExistError )
127 import System.Directory ( doesDirectoryExist, getModificationTime )
128 import System.FilePath
129
130 import Data.Char ( isUpper, isAlphaNum, isSpace, chr, ord, isDigit )
131 import Data.Int
132 import Data.Ratio ( (%) )
133 import Data.Ord ( comparing )
134 import Data.Bits
135 import Data.Word
136 import qualified Data.IntMap as IM
137 import qualified Data.Set as Set
138
139 import Data.Time
140
141 infixr 9 `thenCmp`
142
143 {-
144 ************************************************************************
145 * *
146 \subsection{Is DEBUG on, are we on Windows, etc?}
147 * *
148 ************************************************************************
149
150 These booleans are global constants, set by CPP flags. They allow us to
151 recompile a single module (this one) to change whether or not debug output
152 appears. They sometimes let us avoid even running CPP elsewhere.
153
154 It's important that the flags are literal constants (True/False). Then,
155 with -0, tests of the flags in other modules will simplify to the correct
156 branch of the conditional, thereby dropping debug code altogether when
157 the flags are off.
158 -}
159
160 ghciSupported :: Bool
161 #ifdef GHCI
162 ghciSupported = True
163 #else
164 ghciSupported = False
165 #endif
166
167 debugIsOn :: Bool
168 #ifdef DEBUG
169 debugIsOn = True
170 #else
171 debugIsOn = False
172 #endif
173
174 ncgDebugIsOn :: Bool
175 #ifdef NCG_DEBUG
176 ncgDebugIsOn = True
177 #else
178 ncgDebugIsOn = False
179 #endif
180
181 ghciTablesNextToCode :: Bool
182 #ifdef GHCI_TABLES_NEXT_TO_CODE
183 ghciTablesNextToCode = True
184 #else
185 ghciTablesNextToCode = False
186 #endif
187
188 isWindowsHost :: Bool
189 #ifdef mingw32_HOST_OS
190 isWindowsHost = True
191 #else
192 isWindowsHost = False
193 #endif
194
195 isDarwinHost :: Bool
196 #ifdef darwin_HOST_OS
197 isDarwinHost = True
198 #else
199 isDarwinHost = False
200 #endif
201
202 {-
203 ************************************************************************
204 * *
205 \subsection{A for loop}
206 * *
207 ************************************************************************
208 -}
209
210 -- | Compose a function with itself n times. (nth rather than twice)
211 nTimes :: Int -> (a -> a) -> (a -> a)
212 nTimes 0 _ = id
213 nTimes 1 f = f
214 nTimes n f = f . nTimes (n-1) f
215
216 fstOf3 :: (a,b,c) -> a
217 sndOf3 :: (a,b,c) -> b
218 thirdOf3 :: (a,b,c) -> c
219 fstOf3 (a,_,_) = a
220 sndOf3 (_,b,_) = b
221 thirdOf3 (_,_,c) = c
222
223 third3 :: (c -> d) -> (a, b, c) -> (a, b, d)
224 third3 f (a, b, c) = (a, b, f c)
225
226 uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
227 uncurry3 f (a, b, c) = f a b c
228
229 firstM :: Monad m => (a -> m c) -> (a, b) -> m (c, b)
230 firstM f (x, y) = liftM (\x' -> (x', y)) (f x)
231
232 first3M :: Monad m => (a -> m d) -> (a, b, c) -> m (d, b, c)
233 first3M f (x, y, z) = liftM (\x' -> (x', y, z)) (f x)
234
235 {-
236 ************************************************************************
237 * *
238 \subsection[Utils-lists]{General list processing}
239 * *
240 ************************************************************************
241 -}
242
243 filterOut :: (a->Bool) -> [a] -> [a]
244 -- ^ Like filter, only it reverses the sense of the test
245 filterOut _ [] = []
246 filterOut p (x:xs) | p x = filterOut p xs
247 | otherwise = x : filterOut p xs
248
249 partitionWith :: (a -> Either b c) -> [a] -> ([b], [c])
250 -- ^ Uses a function to determine which of two output lists an input element should join
251 partitionWith _ [] = ([],[])
252 partitionWith f (x:xs) = case f x of
253 Left b -> (b:bs, cs)
254 Right c -> (bs, c:cs)
255 where (bs,cs) = partitionWith f xs
256
257 splitEithers :: [Either a b] -> ([a], [b])
258 -- ^ Teases a list of 'Either's apart into two lists
259 splitEithers [] = ([],[])
260 splitEithers (e : es) = case e of
261 Left x -> (x:xs, ys)
262 Right y -> (xs, y:ys)
263 where (xs,ys) = splitEithers es
264
265 chkAppend :: [a] -> [a] -> [a]
266 -- Checks for the second arguemnt being empty
267 -- Used in situations where that situation is common
268 chkAppend xs ys
269 | null ys = xs
270 | otherwise = xs ++ ys
271
272 {-
273 A paranoid @zip@ (and some @zipWith@ friends) that checks the lists
274 are of equal length. Alastair Reid thinks this should only happen if
275 DEBUGging on; hey, why not?
276 -}
277
278 zipEqual :: String -> [a] -> [b] -> [(a,b)]
279 zipWithEqual :: String -> (a->b->c) -> [a]->[b]->[c]
280 zipWith3Equal :: String -> (a->b->c->d) -> [a]->[b]->[c]->[d]
281 zipWith4Equal :: String -> (a->b->c->d->e) -> [a]->[b]->[c]->[d]->[e]
282
283 #ifndef DEBUG
284 zipEqual _ = zip
285 zipWithEqual _ = zipWith
286 zipWith3Equal _ = zipWith3
287 zipWith4Equal _ = zipWith4
288 #else
289 zipEqual _ [] [] = []
290 zipEqual msg (a:as) (b:bs) = (a,b) : zipEqual msg as bs
291 zipEqual msg _ _ = panic ("zipEqual: unequal lists:"++msg)
292
293 zipWithEqual msg z (a:as) (b:bs)= z a b : zipWithEqual msg z as bs
294 zipWithEqual _ _ [] [] = []
295 zipWithEqual msg _ _ _ = panic ("zipWithEqual: unequal lists:"++msg)
296
297 zipWith3Equal msg z (a:as) (b:bs) (c:cs)
298 = z a b c : zipWith3Equal msg z as bs cs
299 zipWith3Equal _ _ [] [] [] = []
300 zipWith3Equal msg _ _ _ _ = panic ("zipWith3Equal: unequal lists:"++msg)
301
302 zipWith4Equal msg z (a:as) (b:bs) (c:cs) (d:ds)
303 = z a b c d : zipWith4Equal msg z as bs cs ds
304 zipWith4Equal _ _ [] [] [] [] = []
305 zipWith4Equal msg _ _ _ _ _ = panic ("zipWith4Equal: unequal lists:"++msg)
306 #endif
307
308 -- | 'zipLazy' is a kind of 'zip' that is lazy in the second list (observe the ~)
309 zipLazy :: [a] -> [b] -> [(a,b)]
310 zipLazy [] _ = []
311 zipLazy (x:xs) ~(y:ys) = (x,y) : zipLazy xs ys
312
313 -- | 'filterByList' takes a list of Bools and a list of some elements and
314 -- filters out these elements for which the corresponding value in the list of
315 -- Bools is False. This function does not check whether the lists have equal
316 -- length.
317 filterByList :: [Bool] -> [a] -> [a]
318 filterByList (True:bs) (x:xs) = x : filterByList bs xs
319 filterByList (False:bs) (_:xs) = filterByList bs xs
320 filterByList _ _ = []
321
322 stretchZipWith :: (a -> Bool) -> b -> (a->b->c) -> [a] -> [b] -> [c]
323 -- ^ @stretchZipWith p z f xs ys@ stretches @ys@ by inserting @z@ in
324 -- the places where @p@ returns @True@
325
326 stretchZipWith _ _ _ [] _ = []
327 stretchZipWith p z f (x:xs) ys
328 | p x = f x z : stretchZipWith p z f xs ys
329 | otherwise = case ys of
330 [] -> []
331 (y:ys) -> f x y : stretchZipWith p z f xs ys
332
333 mapFst :: (a->c) -> [(a,b)] -> [(c,b)]
334 mapSnd :: (b->c) -> [(a,b)] -> [(a,c)]
335
336 mapFst f xys = [(f x, y) | (x,y) <- xys]
337 mapSnd f xys = [(x, f y) | (x,y) <- xys]
338
339 mapAndUnzip :: (a -> (b, c)) -> [a] -> ([b], [c])
340
341 mapAndUnzip _ [] = ([], [])
342 mapAndUnzip f (x:xs)
343 = let (r1, r2) = f x
344 (rs1, rs2) = mapAndUnzip f xs
345 in
346 (r1:rs1, r2:rs2)
347
348 mapAndUnzip3 :: (a -> (b, c, d)) -> [a] -> ([b], [c], [d])
349
350 mapAndUnzip3 _ [] = ([], [], [])
351 mapAndUnzip3 f (x:xs)
352 = let (r1, r2, r3) = f x
353 (rs1, rs2, rs3) = mapAndUnzip3 f xs
354 in
355 (r1:rs1, r2:rs2, r3:rs3)
356
357 zipWithAndUnzip :: (a -> b -> (c,d)) -> [a] -> [b] -> ([c],[d])
358 zipWithAndUnzip f (a:as) (b:bs)
359 = let (r1, r2) = f a b
360 (rs1, rs2) = zipWithAndUnzip f as bs
361 in
362 (r1:rs1, r2:rs2)
363 zipWithAndUnzip _ _ _ = ([],[])
364
365 mapAccumL2 :: (s1 -> s2 -> a -> (s1, s2, b)) -> s1 -> s2 -> [a] -> (s1, s2, [b])
366 mapAccumL2 f s1 s2 xs = (s1', s2', ys)
367 where ((s1', s2'), ys) = mapAccumL (\(s1, s2) x -> case f s1 s2 x of
368 (s1', s2', y) -> ((s1', s2'), y))
369 (s1, s2) xs
370
371 nOfThem :: Int -> a -> [a]
372 nOfThem n thing = replicate n thing
373
374 -- | @atLength atLen atEnd ls n@ unravels list @ls@ to position @n@. Precisely:
375 --
376 -- @
377 -- atLength atLenPred atEndPred ls n
378 -- | n < 0 = atLenPred ls
379 -- | length ls < n = atEndPred (n - length ls)
380 -- | otherwise = atLenPred (drop n ls)
381 -- @
382 atLength :: ([a] -> b) -- Called when length ls >= n, passed (drop n ls)
383 -- NB: arg passed to this function may be []
384 -> b -- Called when length ls < n
385 -> [a]
386 -> Int
387 -> b
388 atLength atLenPred atEnd ls n
389 | n < 0 = atLenPred ls
390 | otherwise = go n ls
391 where
392 -- go's first arg n >= 0
393 go 0 ls = atLenPred ls
394 go _ [] = atEnd -- n > 0 here
395 go n (_:xs) = go (n-1) xs
396
397 -- Some special cases of atLength:
398
399 lengthExceeds :: [a] -> Int -> Bool
400 -- ^ > (lengthExceeds xs n) = (length xs > n)
401 lengthExceeds = atLength notNull False
402
403 lengthAtLeast :: [a] -> Int -> Bool
404 lengthAtLeast = atLength (const True) False
405
406 lengthIs :: [a] -> Int -> Bool
407 lengthIs = atLength null False
408
409 listLengthCmp :: [a] -> Int -> Ordering
410 listLengthCmp = atLength atLen atEnd
411 where
412 atEnd = LT -- Not yet seen 'n' elts, so list length is < n.
413
414 atLen [] = EQ
415 atLen _ = GT
416
417 equalLength :: [a] -> [b] -> Bool
418 equalLength [] [] = True
419 equalLength (_:xs) (_:ys) = equalLength xs ys
420 equalLength _ _ = False
421
422 compareLength :: [a] -> [b] -> Ordering
423 compareLength [] [] = EQ
424 compareLength (_:xs) (_:ys) = compareLength xs ys
425 compareLength [] _ = LT
426 compareLength _ [] = GT
427
428 leLength :: [a] -> [b] -> Bool
429 -- ^ True if length xs <= length ys
430 leLength xs ys = case compareLength xs ys of
431 LT -> True
432 EQ -> True
433 GT -> False
434
435 ----------------------------
436 singleton :: a -> [a]
437 singleton x = [x]
438
439 isSingleton :: [a] -> Bool
440 isSingleton [_] = True
441 isSingleton _ = False
442
443 notNull :: [a] -> Bool
444 notNull [] = False
445 notNull _ = True
446
447 only :: [a] -> a
448 #ifdef DEBUG
449 only [a] = a
450 #else
451 only (a:_) = a
452 #endif
453 only _ = panic "Util: only"
454
455 -- Debugging/specialising versions of \tr{elem} and \tr{notElem}
456
457 isIn, isn'tIn :: Eq a => String -> a -> [a] -> Bool
458
459 # ifndef DEBUG
460 isIn _msg x ys = x `elem` ys
461 isn'tIn _msg x ys = x `notElem` ys
462
463 # else /* DEBUG */
464 isIn msg x ys
465 = elem100 0 x ys
466 where
467 elem100 :: Eq a => Int -> a -> [a] -> Bool
468 elem100 _ _ [] = False
469 elem100 i x (y:ys)
470 | i > 100 = trace ("Over-long elem in " ++ msg) (x `elem` (y:ys))
471 | otherwise = x == y || elem100 (i + 1) x ys
472
473 isn'tIn msg x ys
474 = notElem100 0 x ys
475 where
476 notElem100 :: Eq a => Int -> a -> [a] -> Bool
477 notElem100 _ _ [] = True
478 notElem100 i x (y:ys)
479 | i > 100 = trace ("Over-long notElem in " ++ msg) (x `notElem` (y:ys))
480 | otherwise = x /= y && notElem100 (i + 1) x ys
481 # endif /* DEBUG */
482
483 {-
484 ************************************************************************
485 * *
486 \subsubsection{Sort utils}
487 * *
488 ************************************************************************
489 -}
490
491 minWith :: Ord b => (a -> b) -> [a] -> a
492 minWith get_key xs = ASSERT( not (null xs) )
493 head (sortWith get_key xs)
494
495 nubSort :: Ord a => [a] -> [a]
496 nubSort = Set.toAscList . Set.fromList
497
498 {-
499 ************************************************************************
500 * *
501 \subsection[Utils-transitive-closure]{Transitive closure}
502 * *
503 ************************************************************************
504
505 This algorithm for transitive closure is straightforward, albeit quadratic.
506 -}
507
508 transitiveClosure :: (a -> [a]) -- Successor function
509 -> (a -> a -> Bool) -- Equality predicate
510 -> [a]
511 -> [a] -- The transitive closure
512
513 transitiveClosure succ eq xs
514 = go [] xs
515 where
516 go done [] = done
517 go done (x:xs) | x `is_in` done = go done xs
518 | otherwise = go (x:done) (succ x ++ xs)
519
520 _ `is_in` [] = False
521 x `is_in` (y:ys) | eq x y = True
522 | otherwise = x `is_in` ys
523
524 {-
525 ************************************************************************
526 * *
527 \subsection[Utils-accum]{Accumulating}
528 * *
529 ************************************************************************
530
531 A combination of foldl with zip. It works with equal length lists.
532 -}
533
534 foldl2 :: (acc -> a -> b -> acc) -> acc -> [a] -> [b] -> acc
535 foldl2 _ z [] [] = z
536 foldl2 k z (a:as) (b:bs) = foldl2 k (k z a b) as bs
537 foldl2 _ _ _ _ = panic "Util: foldl2"
538
539 all2 :: (a -> b -> Bool) -> [a] -> [b] -> Bool
540 -- True if the lists are the same length, and
541 -- all corresponding elements satisfy the predicate
542 all2 _ [] [] = True
543 all2 p (x:xs) (y:ys) = p x y && all2 p xs ys
544 all2 _ _ _ = False
545
546 -- Count the number of times a predicate is true
547
548 count :: (a -> Bool) -> [a] -> Int
549 count _ [] = 0
550 count p (x:xs) | p x = 1 + count p xs
551 | otherwise = count p xs
552
553 {-
554 @splitAt@, @take@, and @drop@ but with length of another
555 list giving the break-off point:
556 -}
557
558 takeList :: [b] -> [a] -> [a]
559 -- (takeList as bs) trims bs to the be same length
560 -- as as, unless as is longer in which case it's a no-op
561 takeList [] _ = []
562 takeList (_:xs) ls =
563 case ls of
564 [] -> []
565 (y:ys) -> y : takeList xs ys
566
567 dropList :: [b] -> [a] -> [a]
568 dropList [] xs = xs
569 dropList _ xs@[] = xs
570 dropList (_:xs) (_:ys) = dropList xs ys
571
572
573 splitAtList :: [b] -> [a] -> ([a], [a])
574 splitAtList [] xs = ([], xs)
575 splitAtList _ xs@[] = (xs, xs)
576 splitAtList (_:xs) (y:ys) = (y:ys', ys'')
577 where
578 (ys', ys'') = splitAtList xs ys
579
580 -- drop from the end of a list
581 dropTail :: Int -> [a] -> [a]
582 -- Specification: dropTail n = reverse . drop n . reverse
583 -- Better implemention due to Joachim Breitner
584 -- http://www.joachim-breitner.de/blog/archives/600-On-taking-the-last-n-elements-of-a-list.html
585 dropTail n xs
586 = go (drop n xs) xs
587 where
588 go (_:ys) (x:xs) = x : go ys xs
589 go _ _ = [] -- Stop when ys runs out
590 -- It'll always run out before xs does
591
592 -- dropWhile from the end of a list. This is similar to Data.List.dropWhileEnd,
593 -- but is lazy in the elements and strict in the spine. For reasonably short lists,
594 -- such as path names and typical lines of text, dropWhileEndLE is generally
595 -- faster than dropWhileEnd. Its advantage is magnified when the predicate is
596 -- expensive--using dropWhileEndLE isSpace to strip the space off a line of text
597 -- is generally much faster than using dropWhileEnd isSpace for that purpose.
598 -- Specification: dropWhileEndLE p = reverse . dropWhile p . reverse
599 -- Pay attention to the short-circuit (&&)! The order of its arguments is the only
600 -- difference between dropWhileEnd and dropWhileEndLE.
601 dropWhileEndLE :: (a -> Bool) -> [a] -> [a]
602 dropWhileEndLE p = foldr (\x r -> if null r && p x then [] else x:r) []
603
604 snocView :: [a] -> Maybe ([a],a)
605 -- Split off the last element
606 snocView [] = Nothing
607 snocView xs = go [] xs
608 where
609 -- Invariant: second arg is non-empty
610 go acc [x] = Just (reverse acc, x)
611 go acc (x:xs) = go (x:acc) xs
612 go _ [] = panic "Util: snocView"
613
614 split :: Char -> String -> [String]
615 split c s = case rest of
616 [] -> [chunk]
617 _:rest -> chunk : split c rest
618 where (chunk, rest) = break (==c) s
619
620 {-
621 ************************************************************************
622 * *
623 \subsection[Utils-comparison]{Comparisons}
624 * *
625 ************************************************************************
626 -}
627
628 isEqual :: Ordering -> Bool
629 -- Often used in (isEqual (a `compare` b))
630 isEqual GT = False
631 isEqual EQ = True
632 isEqual LT = False
633
634 thenCmp :: Ordering -> Ordering -> Ordering
635 {-# INLINE thenCmp #-}
636 thenCmp EQ ordering = ordering
637 thenCmp ordering _ = ordering
638
639 eqListBy :: (a->a->Bool) -> [a] -> [a] -> Bool
640 eqListBy _ [] [] = True
641 eqListBy eq (x:xs) (y:ys) = eq x y && eqListBy eq xs ys
642 eqListBy _ _ _ = False
643
644 eqMaybeBy :: (a ->a->Bool) -> Maybe a -> Maybe a -> Bool
645 eqMaybeBy _ Nothing Nothing = True
646 eqMaybeBy eq (Just x) (Just y) = eq x y
647 eqMaybeBy _ _ _ = False
648
649 cmpList :: (a -> a -> Ordering) -> [a] -> [a] -> Ordering
650 -- `cmpList' uses a user-specified comparer
651
652 cmpList _ [] [] = EQ
653 cmpList _ [] _ = LT
654 cmpList _ _ [] = GT
655 cmpList cmp (a:as) (b:bs)
656 = case cmp a b of { EQ -> cmpList cmp as bs; xxx -> xxx }
657
658 removeSpaces :: String -> String
659 removeSpaces = dropWhileEndLE isSpace . dropWhile isSpace
660
661 -- Boolean operators lifted to Applicative
662 (<&&>) :: Applicative f => f Bool -> f Bool -> f Bool
663 (<&&>) = liftA2 (&&)
664 infixr 3 <&&> -- same as (&&)
665
666 (<||>) :: Applicative f => f Bool -> f Bool -> f Bool
667 (<||>) = liftA2 (||)
668 infixr 2 <||> -- same as (||)
669
670 {-
671 ************************************************************************
672 * *
673 \subsection{Edit distance}
674 * *
675 ************************************************************************
676 -}
677
678 -- | Find the "restricted" Damerau-Levenshtein edit distance between two strings.
679 -- See: <http://en.wikipedia.org/wiki/Damerau-Levenshtein_distance>.
680 -- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing
681 -- Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro).
682 -- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and
683 -- http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation
684 restrictedDamerauLevenshteinDistance :: String -> String -> Int
685 restrictedDamerauLevenshteinDistance str1 str2
686 = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
687 where
688 m = length str1
689 n = length str2
690
691 restrictedDamerauLevenshteinDistanceWithLengths
692 :: Int -> Int -> String -> String -> Int
693 restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
694 | m <= n
695 = if n <= 32 -- n must be larger so this check is sufficient
696 then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2
697 else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2
698
699 | otherwise
700 = if m <= 32 -- m must be larger so this check is sufficient
701 then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1
702 else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1
703
704 restrictedDamerauLevenshteinDistance'
705 :: (Bits bv, Num bv) => bv -> Int -> Int -> String -> String -> Int
706 restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2
707 | [] <- str1 = n
708 | otherwise = extractAnswer $
709 foldl' (restrictedDamerauLevenshteinDistanceWorker
710 (matchVectors str1) top_bit_mask vector_mask)
711 (0, 0, m_ones, 0, m) str2
712 where
713 m_ones@vector_mask = (2 ^ m) - 1
714 top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy
715 extractAnswer (_, _, _, _, distance) = distance
716
717 restrictedDamerauLevenshteinDistanceWorker
718 :: (Bits bv, Num bv) => IM.IntMap bv -> bv -> bv
719 -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int)
720 restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask
721 (pm, d0, vp, vn, distance) char2
722 = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $
723 seq pm' $ seq d0' $ seq vp' $ seq vn' $
724 seq distance'' $ seq char2 $
725 (pm', d0', vp', vn', distance'')
726 where
727 pm' = IM.findWithDefault 0 (ord char2) str1_mvs
728
729 d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm)
730 .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn
731 -- No need to mask the shiftL because of the restricted range of pm
732
733 hp' = vn .|. sizedComplement vector_mask (d0' .|. vp)
734 hn' = d0' .&. vp
735
736 hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask
737 hn'_shift = (hn' `shiftL` 1) .&. vector_mask
738 vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift)
739 vn' = d0' .&. hp'_shift
740
741 distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance
742 distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance'
743
744 sizedComplement :: Bits bv => bv -> bv -> bv
745 sizedComplement vector_mask vect = vector_mask `xor` vect
746
747 matchVectors :: (Bits bv, Num bv) => String -> IM.IntMap bv
748 matchVectors = snd . foldl' go (0 :: Int, IM.empty)
749 where
750 go (ix, im) char = let ix' = ix + 1
751 im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im
752 in seq ix' $ seq im' $ (ix', im')
753
754 {-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
755 :: Word32 -> Int -> Int -> String -> String -> Int #-}
756 {-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
757 :: Integer -> Int -> Int -> String -> String -> Int #-}
758
759 {-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
760 :: IM.IntMap Word32 -> Word32 -> Word32
761 -> (Word32, Word32, Word32, Word32, Int)
762 -> Char -> (Word32, Word32, Word32, Word32, Int) #-}
763 {-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
764 :: IM.IntMap Integer -> Integer -> Integer
765 -> (Integer, Integer, Integer, Integer, Int)
766 -> Char -> (Integer, Integer, Integer, Integer, Int) #-}
767
768 {-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-}
769 {-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-}
770
771 {-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-}
772 {-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-}
773
774 fuzzyMatch :: String -> [String] -> [String]
775 fuzzyMatch key vals = fuzzyLookup key [(v,v) | v <- vals]
776
777 -- | Search for possible matches to the users input in the given list,
778 -- returning a small number of ranked results
779 fuzzyLookup :: String -> [(String,a)] -> [a]
780 fuzzyLookup user_entered possibilites
781 = map fst $ take mAX_RESULTS $ sortBy (comparing snd)
782 [ (poss_val, distance) | (poss_str, poss_val) <- possibilites
783 , let distance = restrictedDamerauLevenshteinDistance
784 poss_str user_entered
785 , distance <= fuzzy_threshold ]
786 where
787 -- Work out an approriate match threshold:
788 -- We report a candidate if its edit distance is <= the threshold,
789 -- The threshhold is set to about a quarter of the # of characters the user entered
790 -- Length Threshold
791 -- 1 0 -- Don't suggest *any* candidates
792 -- 2 1 -- for single-char identifiers
793 -- 3 1
794 -- 4 1
795 -- 5 1
796 -- 6 2
797 --
798 fuzzy_threshold = truncate $ fromIntegral (length user_entered + 2) / (4 :: Rational)
799 mAX_RESULTS = 3
800
801 {-
802 ************************************************************************
803 * *
804 \subsection[Utils-pairs]{Pairs}
805 * *
806 ************************************************************************
807 -}
808
809 unzipWith :: (a -> b -> c) -> [(a, b)] -> [c]
810 unzipWith f pairs = map ( \ (a, b) -> f a b ) pairs
811
812 seqList :: [a] -> b -> b
813 seqList [] b = b
814 seqList (x:xs) b = x `seq` seqList xs b
815
816 -- Global variables:
817
818 global :: a -> IORef a
819 global a = unsafePerformIO (newIORef a)
820
821 consIORef :: IORef [a] -> a -> IO ()
822 consIORef var x = do
823 atomicModifyIORef' var (\xs -> (x:xs,()))
824
825 globalM :: IO a -> IORef a
826 globalM ma = unsafePerformIO (ma >>= newIORef)
827
828 -- Module names:
829
830 looksLikeModuleName :: String -> Bool
831 looksLikeModuleName [] = False
832 looksLikeModuleName (c:cs) = isUpper c && go cs
833 where go [] = True
834 go ('.':cs) = looksLikeModuleName cs
835 go (c:cs) = (isAlphaNum c || c == '_' || c == '\'') && go cs
836
837 -- Similar to 'parse' for Distribution.Package.PackageName,
838 -- but we don't want to depend on Cabal.
839 looksLikePackageName :: String -> Bool
840 looksLikePackageName = all (all isAlphaNum <&&> not . (all isDigit)) . split '-'
841
842 {-
843 Akin to @Prelude.words@, but acts like the Bourne shell, treating
844 quoted strings as Haskell Strings, and also parses Haskell [String]
845 syntax.
846 -}
847
848 getCmd :: String -> Either String -- Error
849 (String, String) -- (Cmd, Rest)
850 getCmd s = case break isSpace $ dropWhile isSpace s of
851 ([], _) -> Left ("Couldn't find command in " ++ show s)
852 res -> Right res
853
854 toCmdArgs :: String -> Either String -- Error
855 (String, [String]) -- (Cmd, Args)
856 toCmdArgs s = case getCmd s of
857 Left err -> Left err
858 Right (cmd, s') -> case toArgs s' of
859 Left err -> Left err
860 Right args -> Right (cmd, args)
861
862 toArgs :: String -> Either String -- Error
863 [String] -- Args
864 toArgs str
865 = case dropWhile isSpace str of
866 s@('[':_) -> case reads s of
867 [(args, spaces)]
868 | all isSpace spaces ->
869 Right args
870 _ ->
871 Left ("Couldn't read " ++ show str ++ "as [String]")
872 s -> toArgs' s
873 where
874 toArgs' :: String -> Either String [String]
875 -- Remove outer quotes:
876 -- > toArgs' "\"foo\" \"bar baz\""
877 -- Right ["foo", "bar baz"]
878 --
879 -- Keep inner quotes:
880 -- > toArgs' "-DFOO=\"bar baz\""
881 -- Right ["-DFOO=\"bar baz\""]
882 toArgs' s = case dropWhile isSpace s of
883 [] -> Right []
884 ('"' : _) -> do
885 -- readAsString removes outer quotes
886 (arg, rest) <- readAsString s
887 (arg:) `fmap` toArgs' rest
888 s' -> case break (isSpace <||> (== '"')) s' of
889 (argPart1, s''@('"':_)) -> do
890 (argPart2, rest) <- readAsString s''
891 -- show argPart2 to keep inner quotes
892 ((argPart1 ++ show argPart2):) `fmap` toArgs' rest
893 (arg, s'') -> (arg:) `fmap` toArgs' s''
894
895 readAsString :: String -> Either String (String, String)
896 readAsString s = case reads s of
897 [(arg, rest)]
898 -- rest must either be [] or start with a space
899 | all isSpace (take 1 rest) ->
900 Right (arg, rest)
901 _ ->
902 Left ("Couldn't read " ++ show s ++ "as String")
903 {-
904 -- -----------------------------------------------------------------------------
905 -- Floats
906 -}
907
908 readRational__ :: ReadS Rational -- NB: doesn't handle leading "-"
909 readRational__ r = do
910 (n,d,s) <- readFix r
911 (k,t) <- readExp s
912 return ((n%1)*10^^(k-d), t)
913 where
914 readFix r = do
915 (ds,s) <- lexDecDigits r
916 (ds',t) <- lexDotDigits s
917 return (read (ds++ds'), length ds', t)
918
919 readExp (e:s) | e `elem` "eE" = readExp' s
920 readExp s = return (0,s)
921
922 readExp' ('+':s) = readDec s
923 readExp' ('-':s) = do (k,t) <- readDec s
924 return (-k,t)
925 readExp' s = readDec s
926
927 readDec s = do
928 (ds,r) <- nonnull isDigit s
929 return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
930 r)
931
932 lexDecDigits = nonnull isDigit
933
934 lexDotDigits ('.':s) = return (span isDigit s)
935 lexDotDigits s = return ("",s)
936
937 nonnull p s = do (cs@(_:_),t) <- return (span p s)
938 return (cs,t)
939
940 readRational :: String -> Rational -- NB: *does* handle a leading "-"
941 readRational top_s
942 = case top_s of
943 '-' : xs -> - (read_me xs)
944 xs -> read_me xs
945 where
946 read_me s
947 = case (do { (x,"") <- readRational__ s ; return x }) of
948 [x] -> x
949 [] -> error ("readRational: no parse:" ++ top_s)
950 _ -> error ("readRational: ambiguous parse:" ++ top_s)
951
952
953 -----------------------------------------------------------------------------
954 -- read helpers
955
956 maybeRead :: Read a => String -> Maybe a
957 maybeRead str = case reads str of
958 [(x, "")] -> Just x
959 _ -> Nothing
960
961 maybeReadFuzzy :: Read a => String -> Maybe a
962 maybeReadFuzzy str = case reads str of
963 [(x, s)]
964 | all isSpace s ->
965 Just x
966 _ ->
967 Nothing
968
969 -----------------------------------------------------------------------------
970 -- Verify that the 'dirname' portion of a FilePath exists.
971 --
972 doesDirNameExist :: FilePath -> IO Bool
973 doesDirNameExist fpath = doesDirectoryExist (takeDirectory fpath)
974
975 -----------------------------------------------------------------------------
976 -- Backwards compatibility definition of getModificationTime
977
978 getModificationUTCTime :: FilePath -> IO UTCTime
979 getModificationUTCTime = getModificationTime
980
981 -- --------------------------------------------------------------
982 -- check existence & modification time at the same time
983
984 modificationTimeIfExists :: FilePath -> IO (Maybe UTCTime)
985 modificationTimeIfExists f = do
986 (do t <- getModificationUTCTime f; return (Just t))
987 `catchIO` \e -> if isDoesNotExistError e
988 then return Nothing
989 else ioError e
990
991 -- --------------------------------------------------------------
992 -- Change the character encoding of the given Handle to transliterate
993 -- on unsupported characters instead of throwing an exception
994
995 hSetTranslit :: Handle -> IO ()
996 hSetTranslit h = do
997 menc <- hGetEncoding h
998 case fmap textEncodingName menc of
999 Just name | '/' `notElem` name -> do
1000 enc' <- mkTextEncoding $ name ++ "//TRANSLIT"
1001 hSetEncoding h enc'
1002 _ -> return ()
1003
1004 -- split a string at the last character where 'pred' is True,
1005 -- returning a pair of strings. The first component holds the string
1006 -- up (but not including) the last character for which 'pred' returned
1007 -- True, the second whatever comes after (but also not including the
1008 -- last character).
1009 --
1010 -- If 'pred' returns False for all characters in the string, the original
1011 -- string is returned in the first component (and the second one is just
1012 -- empty).
1013 splitLongestPrefix :: String -> (Char -> Bool) -> (String,String)
1014 splitLongestPrefix str pred
1015 | null r_pre = (str, [])
1016 | otherwise = (reverse (tail r_pre), reverse r_suf)
1017 -- 'tail' drops the char satisfying 'pred'
1018 where (r_suf, r_pre) = break pred (reverse str)
1019
1020 escapeSpaces :: String -> String
1021 escapeSpaces = foldr (\c s -> if isSpace c then '\\':c:s else c:s) ""
1022
1023 type Suffix = String
1024
1025 --------------------------------------------------------------
1026 -- * Search path
1027 --------------------------------------------------------------
1028
1029 data Direction = Forwards | Backwards
1030
1031 reslash :: Direction -> FilePath -> FilePath
1032 reslash d = f
1033 where f ('/' : xs) = slash : f xs
1034 f ('\\' : xs) = slash : f xs
1035 f (x : xs) = x : f xs
1036 f "" = ""
1037 slash = case d of
1038 Forwards -> '/'
1039 Backwards -> '\\'
1040
1041 makeRelativeTo :: FilePath -> FilePath -> FilePath
1042 this `makeRelativeTo` that = directory </> thisFilename
1043 where (thisDirectory, thisFilename) = splitFileName this
1044 thatDirectory = dropFileName that
1045 directory = joinPath $ f (splitPath thisDirectory)
1046 (splitPath thatDirectory)
1047
1048 f (x : xs) (y : ys)
1049 | x == y = f xs ys
1050 f xs ys = replicate (length ys) ".." ++ xs
1051
1052 {-
1053 ************************************************************************
1054 * *
1055 \subsection[Utils-Data]{Utils for defining Data instances}
1056 * *
1057 ************************************************************************
1058
1059 These functions helps us to define Data instances for abstract types.
1060 -}
1061
1062 abstractConstr :: String -> Constr
1063 abstractConstr n = mkConstr (abstractDataType n) ("{abstract:"++n++"}") [] Prefix
1064
1065 abstractDataType :: String -> DataType
1066 abstractDataType n = mkDataType n [abstractConstr n]
1067
1068 {-
1069 ************************************************************************
1070 * *
1071 \subsection[Utils-C]{Utils for printing C code}
1072 * *
1073 ************************************************************************
1074 -}
1075
1076 charToC :: Word8 -> String
1077 charToC w =
1078 case chr (fromIntegral w) of
1079 '\"' -> "\\\""
1080 '\'' -> "\\\'"
1081 '\\' -> "\\\\"
1082 c | c >= ' ' && c <= '~' -> [c]
1083 | otherwise -> ['\\',
1084 chr (ord '0' + ord c `div` 64),
1085 chr (ord '0' + ord c `div` 8 `mod` 8),
1086 chr (ord '0' + ord c `mod` 8)]
1087
1088 {-
1089 ************************************************************************
1090 * *
1091 \subsection[Utils-Hashing]{Utils for hashing}
1092 * *
1093 ************************************************************************
1094 -}
1095
1096 -- | A sample hash function for Strings. We keep multiplying by the
1097 -- golden ratio and adding. The implementation is:
1098 --
1099 -- > hashString = foldl' f golden
1100 -- > where f m c = fromIntegral (ord c) * magic + hashInt32 m
1101 -- > magic = 0xdeadbeef
1102 --
1103 -- Where hashInt32 works just as hashInt shown above.
1104 --
1105 -- Knuth argues that repeated multiplication by the golden ratio
1106 -- will minimize gaps in the hash space, and thus it's a good choice
1107 -- for combining together multiple keys to form one.
1108 --
1109 -- Here we know that individual characters c are often small, and this
1110 -- produces frequent collisions if we use ord c alone. A
1111 -- particular problem are the shorter low ASCII and ISO-8859-1
1112 -- character strings. We pre-multiply by a magic twiddle factor to
1113 -- obtain a good distribution. In fact, given the following test:
1114 --
1115 -- > testp :: Int32 -> Int
1116 -- > testp k = (n - ) . length . group . sort . map hs . take n $ ls
1117 -- > where ls = [] : [c : l | l <- ls, c <- ['\0'..'\xff']]
1118 -- > hs = foldl' f golden
1119 -- > f m c = fromIntegral (ord c) * k + hashInt32 m
1120 -- > n = 100000
1121 --
1122 -- We discover that testp magic = 0.
1123 hashString :: String -> Int32
1124 hashString = foldl' f golden
1125 where f m c = fromIntegral (ord c) * magic + hashInt32 m
1126 magic = fromIntegral (0xdeadbeef :: Word32)
1127
1128 golden :: Int32
1129 golden = 1013904242 -- = round ((sqrt 5 - 1) * 2^32) :: Int32
1130 -- was -1640531527 = round ((sqrt 5 - 1) * 2^31) :: Int32
1131 -- but that has bad mulHi properties (even adding 2^32 to get its inverse)
1132 -- Whereas the above works well and contains no hash duplications for
1133 -- [-32767..65536]
1134
1135 -- | A sample (and useful) hash function for Int32,
1136 -- implemented by extracting the uppermost 32 bits of the 64-bit
1137 -- result of multiplying by a 33-bit constant. The constant is from
1138 -- Knuth, derived from the golden ratio:
1139 --
1140 -- > golden = round ((sqrt 5 - 1) * 2^32)
1141 --
1142 -- We get good key uniqueness on small inputs
1143 -- (a problem with previous versions):
1144 -- (length $ group $ sort $ map hashInt32 [-32767..65536]) == 65536 + 32768
1145 --
1146 hashInt32 :: Int32 -> Int32
1147 hashInt32 x = mulHi x golden + x
1148
1149 -- hi 32 bits of a x-bit * 32 bit -> 64-bit multiply
1150 mulHi :: Int32 -> Int32 -> Int32
1151 mulHi a b = fromIntegral (r `shiftR` 32)
1152 where r :: Int64
1153 r = fromIntegral a * fromIntegral b