aa3a19b64c2758a5d86b7e367a4b8390f20c6346
[ghc.git] / compiler / utils / Util.hs
1 -- (c) The University of Glasgow 2006
2
3 {-# LANGUAGE CPP #-}
4
5 -- | Highly random utility functions
6 --
7 module Util (
8 -- * Flags dependent on the compiler build
9 ghciSupported, debugIsOn, ncgDebugIsOn,
10 ghciTablesNextToCode,
11 isWindowsHost, isDarwinHost,
12
13 -- * General list processing
14 zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal,
15 zipLazy, stretchZipWith, zipWithAndUnzip,
16
17 filterByList,
18
19 unzipWith,
20
21 mapFst, mapSnd, chkAppend,
22 mapAndUnzip, mapAndUnzip3, mapAccumL2,
23 nOfThem, filterOut, partitionWith, splitEithers,
24
25 dropWhileEndLE,
26
27 foldl1', foldl2, count, all2,
28
29 lengthExceeds, lengthIs, lengthAtLeast,
30 listLengthCmp, atLength,
31 equalLength, compareLength, leLength,
32
33 isSingleton, only, singleton,
34 notNull, snocView,
35
36 isIn, isn'tIn,
37
38 -- * Tuples
39 fstOf3, sndOf3, thirdOf3,
40 firstM, first3M,
41 third3,
42 uncurry3,
43
44 -- * List operations controlled by another list
45 takeList, dropList, splitAtList, split,
46 dropTail,
47
48 -- * For loop
49 nTimes,
50
51 -- * Sorting
52 sortWith, minWith, nubSort,
53
54 -- * Comparisons
55 isEqual, eqListBy, eqMaybeBy,
56 thenCmp, cmpList,
57 removeSpaces,
58
59 -- * Edit distance
60 fuzzyMatch, fuzzyLookup,
61
62 -- * Transitive closures
63 transitiveClosure,
64
65 -- * Strictness
66 seqList,
67
68 -- * Module names
69 looksLikeModuleName,
70
71 -- * Argument processing
72 getCmd, toCmdArgs, toArgs,
73
74 -- * Floating point
75 readRational,
76
77 -- * read helpers
78 maybeRead, maybeReadFuzzy,
79
80 -- * IO-ish utilities
81 doesDirNameExist,
82 getModificationUTCTime,
83 modificationTimeIfExists,
84
85 global, consIORef, globalM,
86
87 -- * Filenames and paths
88 Suffix,
89 splitLongestPrefix,
90 escapeSpaces,
91 Direction(..), reslash,
92 makeRelativeTo,
93
94 -- * Utils for defining Data instances
95 abstractConstr, abstractDataType, mkNoRepType,
96
97 -- * Utils for printing C code
98 charToC,
99
100 -- * Hashing
101 hashString,
102 ) where
103
104 #include "HsVersions.h"
105
106 import Exception
107 import Panic
108
109 import Data.Data
110 import Data.IORef ( IORef, newIORef, atomicModifyIORef )
111 import System.IO.Unsafe ( unsafePerformIO )
112 import Data.List hiding (group)
113
114 #ifdef DEBUG
115 import FastTypes
116 #endif
117
118 import Control.Monad ( liftM )
119 import System.IO.Error as IO ( isDoesNotExistError )
120 import System.Directory ( doesDirectoryExist, getModificationTime )
121 import System.FilePath
122
123 import Data.Char ( isUpper, isAlphaNum, isSpace, chr, ord, isDigit )
124 import Data.Int
125 import Data.Ratio ( (%) )
126 import Data.Ord ( comparing )
127 import Data.Bits
128 import Data.Word
129 import qualified Data.IntMap as IM
130 import qualified Data.Set as Set
131
132 import Data.Time
133
134 infixr 9 `thenCmp`
135
136 {-
137 ************************************************************************
138 * *
139 \subsection{Is DEBUG on, are we on Windows, etc?}
140 * *
141 ************************************************************************
142
143 These booleans are global constants, set by CPP flags. They allow us to
144 recompile a single module (this one) to change whether or not debug output
145 appears. They sometimes let us avoid even running CPP elsewhere.
146
147 It's important that the flags are literal constants (True/False). Then,
148 with -0, tests of the flags in other modules will simplify to the correct
149 branch of the conditional, thereby dropping debug code altogether when
150 the flags are off.
151 -}
152
153 ghciSupported :: Bool
154 #ifdef GHCI
155 ghciSupported = True
156 #else
157 ghciSupported = False
158 #endif
159
160 debugIsOn :: Bool
161 #ifdef DEBUG
162 debugIsOn = True
163 #else
164 debugIsOn = False
165 #endif
166
167 ncgDebugIsOn :: Bool
168 #ifdef NCG_DEBUG
169 ncgDebugIsOn = True
170 #else
171 ncgDebugIsOn = False
172 #endif
173
174 ghciTablesNextToCode :: Bool
175 #ifdef GHCI_TABLES_NEXT_TO_CODE
176 ghciTablesNextToCode = True
177 #else
178 ghciTablesNextToCode = False
179 #endif
180
181 isWindowsHost :: Bool
182 #ifdef mingw32_HOST_OS
183 isWindowsHost = True
184 #else
185 isWindowsHost = False
186 #endif
187
188 isDarwinHost :: Bool
189 #ifdef darwin_HOST_OS
190 isDarwinHost = True
191 #else
192 isDarwinHost = False
193 #endif
194
195 {-
196 ************************************************************************
197 * *
198 \subsection{A for loop}
199 * *
200 ************************************************************************
201 -}
202
203 -- | Compose a function with itself n times. (nth rather than twice)
204 nTimes :: Int -> (a -> a) -> (a -> a)
205 nTimes 0 _ = id
206 nTimes 1 f = f
207 nTimes n f = f . nTimes (n-1) f
208
209 fstOf3 :: (a,b,c) -> a
210 sndOf3 :: (a,b,c) -> b
211 thirdOf3 :: (a,b,c) -> c
212 fstOf3 (a,_,_) = a
213 sndOf3 (_,b,_) = b
214 thirdOf3 (_,_,c) = c
215
216 third3 :: (c -> d) -> (a, b, c) -> (a, b, d)
217 third3 f (a, b, c) = (a, b, f c)
218
219 uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
220 uncurry3 f (a, b, c) = f a b c
221
222 firstM :: Monad m => (a -> m c) -> (a, b) -> m (c, b)
223 firstM f (x, y) = liftM (\x' -> (x', y)) (f x)
224
225 first3M :: Monad m => (a -> m d) -> (a, b, c) -> m (d, b, c)
226 first3M f (x, y, z) = liftM (\x' -> (x', y, z)) (f x)
227
228 {-
229 ************************************************************************
230 * *
231 \subsection[Utils-lists]{General list processing}
232 * *
233 ************************************************************************
234 -}
235
236 filterOut :: (a->Bool) -> [a] -> [a]
237 -- ^ Like filter, only it reverses the sense of the test
238 filterOut _ [] = []
239 filterOut p (x:xs) | p x = filterOut p xs
240 | otherwise = x : filterOut p xs
241
242 partitionWith :: (a -> Either b c) -> [a] -> ([b], [c])
243 -- ^ Uses a function to determine which of two output lists an input element should join
244 partitionWith _ [] = ([],[])
245 partitionWith f (x:xs) = case f x of
246 Left b -> (b:bs, cs)
247 Right c -> (bs, c:cs)
248 where (bs,cs) = partitionWith f xs
249
250 splitEithers :: [Either a b] -> ([a], [b])
251 -- ^ Teases a list of 'Either's apart into two lists
252 splitEithers [] = ([],[])
253 splitEithers (e : es) = case e of
254 Left x -> (x:xs, ys)
255 Right y -> (xs, y:ys)
256 where (xs,ys) = splitEithers es
257
258 chkAppend :: [a] -> [a] -> [a]
259 -- Checks for the second arguemnt being empty
260 -- Used in situations where that situation is common
261 chkAppend xs ys
262 | null ys = xs
263 | otherwise = xs ++ ys
264
265 {-
266 A paranoid @zip@ (and some @zipWith@ friends) that checks the lists
267 are of equal length. Alastair Reid thinks this should only happen if
268 DEBUGging on; hey, why not?
269 -}
270
271 zipEqual :: String -> [a] -> [b] -> [(a,b)]
272 zipWithEqual :: String -> (a->b->c) -> [a]->[b]->[c]
273 zipWith3Equal :: String -> (a->b->c->d) -> [a]->[b]->[c]->[d]
274 zipWith4Equal :: String -> (a->b->c->d->e) -> [a]->[b]->[c]->[d]->[e]
275
276 #ifndef DEBUG
277 zipEqual _ = zip
278 zipWithEqual _ = zipWith
279 zipWith3Equal _ = zipWith3
280 zipWith4Equal _ = zipWith4
281 #else
282 zipEqual _ [] [] = []
283 zipEqual msg (a:as) (b:bs) = (a,b) : zipEqual msg as bs
284 zipEqual msg _ _ = panic ("zipEqual: unequal lists:"++msg)
285
286 zipWithEqual msg z (a:as) (b:bs)= z a b : zipWithEqual msg z as bs
287 zipWithEqual _ _ [] [] = []
288 zipWithEqual msg _ _ _ = panic ("zipWithEqual: unequal lists:"++msg)
289
290 zipWith3Equal msg z (a:as) (b:bs) (c:cs)
291 = z a b c : zipWith3Equal msg z as bs cs
292 zipWith3Equal _ _ [] [] [] = []
293 zipWith3Equal msg _ _ _ _ = panic ("zipWith3Equal: unequal lists:"++msg)
294
295 zipWith4Equal msg z (a:as) (b:bs) (c:cs) (d:ds)
296 = z a b c d : zipWith4Equal msg z as bs cs ds
297 zipWith4Equal _ _ [] [] [] [] = []
298 zipWith4Equal msg _ _ _ _ _ = panic ("zipWith4Equal: unequal lists:"++msg)
299 #endif
300
301 -- | 'zipLazy' is a kind of 'zip' that is lazy in the second list (observe the ~)
302 zipLazy :: [a] -> [b] -> [(a,b)]
303 zipLazy [] _ = []
304 zipLazy (x:xs) ~(y:ys) = (x,y) : zipLazy xs ys
305
306 -- | 'filterByList' takes a list of Bools and a list of some elements and
307 -- filters out these elements for which the corresponding value in the list of
308 -- Bools is False. This function does not check whether the lists have equal
309 -- length.
310 filterByList :: [Bool] -> [a] -> [a]
311 filterByList (True:bs) (x:xs) = x : filterByList bs xs
312 filterByList (False:bs) (_:xs) = filterByList bs xs
313 filterByList _ _ = []
314
315 stretchZipWith :: (a -> Bool) -> b -> (a->b->c) -> [a] -> [b] -> [c]
316 -- ^ @stretchZipWith p z f xs ys@ stretches @ys@ by inserting @z@ in
317 -- the places where @p@ returns @True@
318
319 stretchZipWith _ _ _ [] _ = []
320 stretchZipWith p z f (x:xs) ys
321 | p x = f x z : stretchZipWith p z f xs ys
322 | otherwise = case ys of
323 [] -> []
324 (y:ys) -> f x y : stretchZipWith p z f xs ys
325
326 mapFst :: (a->c) -> [(a,b)] -> [(c,b)]
327 mapSnd :: (b->c) -> [(a,b)] -> [(a,c)]
328
329 mapFst f xys = [(f x, y) | (x,y) <- xys]
330 mapSnd f xys = [(x, f y) | (x,y) <- xys]
331
332 mapAndUnzip :: (a -> (b, c)) -> [a] -> ([b], [c])
333
334 mapAndUnzip _ [] = ([], [])
335 mapAndUnzip f (x:xs)
336 = let (r1, r2) = f x
337 (rs1, rs2) = mapAndUnzip f xs
338 in
339 (r1:rs1, r2:rs2)
340
341 mapAndUnzip3 :: (a -> (b, c, d)) -> [a] -> ([b], [c], [d])
342
343 mapAndUnzip3 _ [] = ([], [], [])
344 mapAndUnzip3 f (x:xs)
345 = let (r1, r2, r3) = f x
346 (rs1, rs2, rs3) = mapAndUnzip3 f xs
347 in
348 (r1:rs1, r2:rs2, r3:rs3)
349
350 zipWithAndUnzip :: (a -> b -> (c,d)) -> [a] -> [b] -> ([c],[d])
351 zipWithAndUnzip f (a:as) (b:bs)
352 = let (r1, r2) = f a b
353 (rs1, rs2) = zipWithAndUnzip f as bs
354 in
355 (r1:rs1, r2:rs2)
356 zipWithAndUnzip _ _ _ = ([],[])
357
358 mapAccumL2 :: (s1 -> s2 -> a -> (s1, s2, b)) -> s1 -> s2 -> [a] -> (s1, s2, [b])
359 mapAccumL2 f s1 s2 xs = (s1', s2', ys)
360 where ((s1', s2'), ys) = mapAccumL (\(s1, s2) x -> case f s1 s2 x of
361 (s1', s2', y) -> ((s1', s2'), y))
362 (s1, s2) xs
363
364 nOfThem :: Int -> a -> [a]
365 nOfThem n thing = replicate n thing
366
367 -- | @atLength atLen atEnd ls n@ unravels list @ls@ to position @n@. Precisely:
368 --
369 -- @
370 -- atLength atLenPred atEndPred ls n
371 -- | n < 0 = atLenPred n
372 -- | length ls < n = atEndPred (n - length ls)
373 -- | otherwise = atLenPred (drop n ls)
374 -- @
375 atLength :: ([a] -> b)
376 -> (Int -> b)
377 -> [a]
378 -> Int
379 -> b
380 atLength atLenPred atEndPred ls n
381 | n < 0 = atEndPred n
382 | otherwise = go n ls
383 where
384 go n [] = atEndPred n
385 go 0 ls = atLenPred ls
386 go n (_:xs) = go (n-1) xs
387
388 -- Some special cases of atLength:
389
390 lengthExceeds :: [a] -> Int -> Bool
391 -- ^ > (lengthExceeds xs n) = (length xs > n)
392 lengthExceeds = atLength notNull (const False)
393
394 lengthAtLeast :: [a] -> Int -> Bool
395 lengthAtLeast = atLength notNull (== 0)
396
397 lengthIs :: [a] -> Int -> Bool
398 lengthIs = atLength null (==0)
399
400 listLengthCmp :: [a] -> Int -> Ordering
401 listLengthCmp = atLength atLen atEnd
402 where
403 atEnd 0 = EQ
404 atEnd x
405 | x > 0 = LT -- not yet seen 'n' elts, so list length is < n.
406 | otherwise = GT
407
408 atLen [] = EQ
409 atLen _ = GT
410
411 equalLength :: [a] -> [b] -> Bool
412 equalLength [] [] = True
413 equalLength (_:xs) (_:ys) = equalLength xs ys
414 equalLength _ _ = False
415
416 compareLength :: [a] -> [b] -> Ordering
417 compareLength [] [] = EQ
418 compareLength (_:xs) (_:ys) = compareLength xs ys
419 compareLength [] _ = LT
420 compareLength _ [] = GT
421
422 leLength :: [a] -> [b] -> Bool
423 -- ^ True if length xs <= length ys
424 leLength xs ys = case compareLength xs ys of
425 LT -> True
426 EQ -> True
427 GT -> False
428
429 ----------------------------
430 singleton :: a -> [a]
431 singleton x = [x]
432
433 isSingleton :: [a] -> Bool
434 isSingleton [_] = True
435 isSingleton _ = False
436
437 notNull :: [a] -> Bool
438 notNull [] = False
439 notNull _ = True
440
441 only :: [a] -> a
442 #ifdef DEBUG
443 only [a] = a
444 #else
445 only (a:_) = a
446 #endif
447 only _ = panic "Util: only"
448
449 -- Debugging/specialising versions of \tr{elem} and \tr{notElem}
450
451 isIn, isn'tIn :: Eq a => String -> a -> [a] -> Bool
452
453 # ifndef DEBUG
454 isIn _msg x ys = x `elem` ys
455 isn'tIn _msg x ys = x `notElem` ys
456
457 # else /* DEBUG */
458 isIn msg x ys
459 = elem100 (_ILIT(0)) x ys
460 where
461 elem100 _ _ [] = False
462 elem100 i x (y:ys)
463 | i ># _ILIT(100) = trace ("Over-long elem in " ++ msg)
464 (x `elem` (y:ys))
465 | otherwise = x == y || elem100 (i +# _ILIT(1)) x ys
466
467 isn'tIn msg x ys
468 = notElem100 (_ILIT(0)) x ys
469 where
470 notElem100 _ _ [] = True
471 notElem100 i x (y:ys)
472 | i ># _ILIT(100) = trace ("Over-long notElem in " ++ msg)
473 (x `notElem` (y:ys))
474 | otherwise = x /= y && notElem100 (i +# _ILIT(1)) x ys
475 # endif /* DEBUG */
476
477 {-
478 ************************************************************************
479 * *
480 \subsubsection{Sort utils}
481 * *
482 ************************************************************************
483 -}
484
485 sortWith :: Ord b => (a->b) -> [a] -> [a]
486 sortWith get_key xs = sortBy (comparing get_key) xs
487
488 minWith :: Ord b => (a -> b) -> [a] -> a
489 minWith get_key xs = ASSERT( not (null xs) )
490 head (sortWith get_key xs)
491
492 nubSort :: Ord a => [a] -> [a]
493 nubSort = Set.toAscList . Set.fromList
494
495 {-
496 ************************************************************************
497 * *
498 \subsection[Utils-transitive-closure]{Transitive closure}
499 * *
500 ************************************************************************
501
502 This algorithm for transitive closure is straightforward, albeit quadratic.
503 -}
504
505 transitiveClosure :: (a -> [a]) -- Successor function
506 -> (a -> a -> Bool) -- Equality predicate
507 -> [a]
508 -> [a] -- The transitive closure
509
510 transitiveClosure succ eq xs
511 = go [] xs
512 where
513 go done [] = done
514 go done (x:xs) | x `is_in` done = go done xs
515 | otherwise = go (x:done) (succ x ++ xs)
516
517 _ `is_in` [] = False
518 x `is_in` (y:ys) | eq x y = True
519 | otherwise = x `is_in` ys
520
521 {-
522 ************************************************************************
523 * *
524 \subsection[Utils-accum]{Accumulating}
525 * *
526 ************************************************************************
527
528 A combination of foldl with zip. It works with equal length lists.
529 -}
530
531 foldl2 :: (acc -> a -> b -> acc) -> acc -> [a] -> [b] -> acc
532 foldl2 _ z [] [] = z
533 foldl2 k z (a:as) (b:bs) = foldl2 k (k z a b) as bs
534 foldl2 _ _ _ _ = panic "Util: foldl2"
535
536 all2 :: (a -> b -> Bool) -> [a] -> [b] -> Bool
537 -- True if the lists are the same length, and
538 -- all corresponding elements satisfy the predicate
539 all2 _ [] [] = True
540 all2 p (x:xs) (y:ys) = p x y && all2 p xs ys
541 all2 _ _ _ = False
542
543 -- Count the number of times a predicate is true
544
545 count :: (a -> Bool) -> [a] -> Int
546 count _ [] = 0
547 count p (x:xs) | p x = 1 + count p xs
548 | otherwise = count p xs
549
550 {-
551 @splitAt@, @take@, and @drop@ but with length of another
552 list giving the break-off point:
553 -}
554
555 takeList :: [b] -> [a] -> [a]
556 takeList [] _ = []
557 takeList (_:xs) ls =
558 case ls of
559 [] -> []
560 (y:ys) -> y : takeList xs ys
561
562 dropList :: [b] -> [a] -> [a]
563 dropList [] xs = xs
564 dropList _ xs@[] = xs
565 dropList (_:xs) (_:ys) = dropList xs ys
566
567
568 splitAtList :: [b] -> [a] -> ([a], [a])
569 splitAtList [] xs = ([], xs)
570 splitAtList _ xs@[] = (xs, xs)
571 splitAtList (_:xs) (y:ys) = (y:ys', ys'')
572 where
573 (ys', ys'') = splitAtList xs ys
574
575 -- drop from the end of a list
576 dropTail :: Int -> [a] -> [a]
577 -- Specification: dropTail n = reverse . drop n . reverse
578 -- Better implemention due to Joachim Breitner
579 -- http://www.joachim-breitner.de/blog/archives/600-On-taking-the-last-n-elements-of-a-list.html
580 dropTail n xs
581 = go (drop n xs) xs
582 where
583 go (_:ys) (x:xs) = x : go ys xs
584 go _ _ = [] -- Stop when ys runs out
585 -- It'll always run out before xs does
586
587 -- dropWhile from the end of a list. This is similar to Data.List.dropWhileEnd,
588 -- but is lazy in the elements and strict in the spine. For reasonably short lists,
589 -- such as path names and typical lines of text, dropWhileEndLE is generally
590 -- faster than dropWhileEnd. Its advantage is magnified when the predicate is
591 -- expensive--using dropWhileEndLE isSpace to strip the space off a line of text
592 -- is generally much faster than using dropWhileEnd isSpace for that purpose.
593 -- Specification: dropWhileEndLE p = reverse . dropWhile p . reverse
594 -- Pay attention to the short-circuit (&&)! The order of its arguments is the only
595 -- difference between dropWhileEnd and dropWhileEndLE.
596 dropWhileEndLE :: (a -> Bool) -> [a] -> [a]
597 dropWhileEndLE p = foldr (\x r -> if null r && p x then [] else x:r) []
598
599 snocView :: [a] -> Maybe ([a],a)
600 -- Split off the last element
601 snocView [] = Nothing
602 snocView xs = go [] xs
603 where
604 -- Invariant: second arg is non-empty
605 go acc [x] = Just (reverse acc, x)
606 go acc (x:xs) = go (x:acc) xs
607 go _ [] = panic "Util: snocView"
608
609 split :: Char -> String -> [String]
610 split c s = case rest of
611 [] -> [chunk]
612 _:rest -> chunk : split c rest
613 where (chunk, rest) = break (==c) s
614
615 {-
616 ************************************************************************
617 * *
618 \subsection[Utils-comparison]{Comparisons}
619 * *
620 ************************************************************************
621 -}
622
623 isEqual :: Ordering -> Bool
624 -- Often used in (isEqual (a `compare` b))
625 isEqual GT = False
626 isEqual EQ = True
627 isEqual LT = False
628
629 thenCmp :: Ordering -> Ordering -> Ordering
630 {-# INLINE thenCmp #-}
631 thenCmp EQ ordering = ordering
632 thenCmp ordering _ = ordering
633
634 eqListBy :: (a->a->Bool) -> [a] -> [a] -> Bool
635 eqListBy _ [] [] = True
636 eqListBy eq (x:xs) (y:ys) = eq x y && eqListBy eq xs ys
637 eqListBy _ _ _ = False
638
639 eqMaybeBy :: (a ->a->Bool) -> Maybe a -> Maybe a -> Bool
640 eqMaybeBy _ Nothing Nothing = True
641 eqMaybeBy eq (Just x) (Just y) = eq x y
642 eqMaybeBy _ _ _ = False
643
644 cmpList :: (a -> a -> Ordering) -> [a] -> [a] -> Ordering
645 -- `cmpList' uses a user-specified comparer
646
647 cmpList _ [] [] = EQ
648 cmpList _ [] _ = LT
649 cmpList _ _ [] = GT
650 cmpList cmp (a:as) (b:bs)
651 = case cmp a b of { EQ -> cmpList cmp as bs; xxx -> xxx }
652
653 removeSpaces :: String -> String
654 removeSpaces = dropWhileEndLE isSpace . dropWhile isSpace
655
656 {-
657 ************************************************************************
658 * *
659 \subsection{Edit distance}
660 * *
661 ************************************************************************
662 -}
663
664 -- | Find the "restricted" Damerau-Levenshtein edit distance between two strings.
665 -- See: <http://en.wikipedia.org/wiki/Damerau-Levenshtein_distance>.
666 -- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing
667 -- Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro).
668 -- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and
669 -- http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation
670 restrictedDamerauLevenshteinDistance :: String -> String -> Int
671 restrictedDamerauLevenshteinDistance str1 str2
672 = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
673 where
674 m = length str1
675 n = length str2
676
677 restrictedDamerauLevenshteinDistanceWithLengths
678 :: Int -> Int -> String -> String -> Int
679 restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
680 | m <= n
681 = if n <= 32 -- n must be larger so this check is sufficient
682 then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2
683 else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2
684
685 | otherwise
686 = if m <= 32 -- m must be larger so this check is sufficient
687 then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1
688 else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1
689
690 restrictedDamerauLevenshteinDistance'
691 :: (Bits bv, Num bv) => bv -> Int -> Int -> String -> String -> Int
692 restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2
693 | [] <- str1 = n
694 | otherwise = extractAnswer $
695 foldl' (restrictedDamerauLevenshteinDistanceWorker
696 (matchVectors str1) top_bit_mask vector_mask)
697 (0, 0, m_ones, 0, m) str2
698 where
699 m_ones@vector_mask = (2 ^ m) - 1
700 top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy
701 extractAnswer (_, _, _, _, distance) = distance
702
703 restrictedDamerauLevenshteinDistanceWorker
704 :: (Bits bv, Num bv) => IM.IntMap bv -> bv -> bv
705 -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int)
706 restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask
707 (pm, d0, vp, vn, distance) char2
708 = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $
709 seq pm' $ seq d0' $ seq vp' $ seq vn' $
710 seq distance'' $ seq char2 $
711 (pm', d0', vp', vn', distance'')
712 where
713 pm' = IM.findWithDefault 0 (ord char2) str1_mvs
714
715 d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm)
716 .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn
717 -- No need to mask the shiftL because of the restricted range of pm
718
719 hp' = vn .|. sizedComplement vector_mask (d0' .|. vp)
720 hn' = d0' .&. vp
721
722 hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask
723 hn'_shift = (hn' `shiftL` 1) .&. vector_mask
724 vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift)
725 vn' = d0' .&. hp'_shift
726
727 distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance
728 distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance'
729
730 sizedComplement :: Bits bv => bv -> bv -> bv
731 sizedComplement vector_mask vect = vector_mask `xor` vect
732
733 matchVectors :: (Bits bv, Num bv) => String -> IM.IntMap bv
734 matchVectors = snd . foldl' go (0 :: Int, IM.empty)
735 where
736 go (ix, im) char = let ix' = ix + 1
737 im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im
738 in seq ix' $ seq im' $ (ix', im')
739
740 {-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
741 :: Word32 -> Int -> Int -> String -> String -> Int #-}
742 {-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
743 :: Integer -> Int -> Int -> String -> String -> Int #-}
744
745 {-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
746 :: IM.IntMap Word32 -> Word32 -> Word32
747 -> (Word32, Word32, Word32, Word32, Int)
748 -> Char -> (Word32, Word32, Word32, Word32, Int) #-}
749 {-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
750 :: IM.IntMap Integer -> Integer -> Integer
751 -> (Integer, Integer, Integer, Integer, Int)
752 -> Char -> (Integer, Integer, Integer, Integer, Int) #-}
753
754 {-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-}
755 {-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-}
756
757 {-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-}
758 {-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-}
759
760 fuzzyMatch :: String -> [String] -> [String]
761 fuzzyMatch key vals = fuzzyLookup key [(v,v) | v <- vals]
762
763 -- | Search for possible matches to the users input in the given list,
764 -- returning a small number of ranked results
765 fuzzyLookup :: String -> [(String,a)] -> [a]
766 fuzzyLookup user_entered possibilites
767 = map fst $ take mAX_RESULTS $ sortBy (comparing snd)
768 [ (poss_val, distance) | (poss_str, poss_val) <- possibilites
769 , let distance = restrictedDamerauLevenshteinDistance
770 poss_str user_entered
771 , distance <= fuzzy_threshold ]
772 where
773 -- Work out an approriate match threshold:
774 -- We report a candidate if its edit distance is <= the threshold,
775 -- The threshhold is set to about a quarter of the # of characters the user entered
776 -- Length Threshold
777 -- 1 0 -- Don't suggest *any* candidates
778 -- 2 1 -- for single-char identifiers
779 -- 3 1
780 -- 4 1
781 -- 5 1
782 -- 6 2
783 --
784 fuzzy_threshold = truncate $ fromIntegral (length user_entered + 2) / (4 :: Rational)
785 mAX_RESULTS = 3
786
787 {-
788 ************************************************************************
789 * *
790 \subsection[Utils-pairs]{Pairs}
791 * *
792 ************************************************************************
793 -}
794
795 unzipWith :: (a -> b -> c) -> [(a, b)] -> [c]
796 unzipWith f pairs = map ( \ (a, b) -> f a b ) pairs
797
798 seqList :: [a] -> b -> b
799 seqList [] b = b
800 seqList (x:xs) b = x `seq` seqList xs b
801
802 -- Global variables:
803
804 global :: a -> IORef a
805 global a = unsafePerformIO (newIORef a)
806
807 consIORef :: IORef [a] -> a -> IO ()
808 consIORef var x = do
809 atomicModifyIORef var (\xs -> (x:xs,()))
810
811 globalM :: IO a -> IORef a
812 globalM ma = unsafePerformIO (ma >>= newIORef)
813
814 -- Module names:
815
816 looksLikeModuleName :: String -> Bool
817 looksLikeModuleName [] = False
818 looksLikeModuleName (c:cs) = isUpper c && go cs
819 where go [] = True
820 go ('.':cs) = looksLikeModuleName cs
821 go (c:cs) = (isAlphaNum c || c == '_' || c == '\'') && go cs
822
823 {-
824 Akin to @Prelude.words@, but acts like the Bourne shell, treating
825 quoted strings as Haskell Strings, and also parses Haskell [String]
826 syntax.
827 -}
828
829 getCmd :: String -> Either String -- Error
830 (String, String) -- (Cmd, Rest)
831 getCmd s = case break isSpace $ dropWhile isSpace s of
832 ([], _) -> Left ("Couldn't find command in " ++ show s)
833 res -> Right res
834
835 toCmdArgs :: String -> Either String -- Error
836 (String, [String]) -- (Cmd, Args)
837 toCmdArgs s = case getCmd s of
838 Left err -> Left err
839 Right (cmd, s') -> case toArgs s' of
840 Left err -> Left err
841 Right args -> Right (cmd, args)
842
843 toArgs :: String -> Either String -- Error
844 [String] -- Args
845 toArgs str
846 = case dropWhile isSpace str of
847 s@('[':_) -> case reads s of
848 [(args, spaces)]
849 | all isSpace spaces ->
850 Right args
851 _ ->
852 Left ("Couldn't read " ++ show str ++ "as [String]")
853 s -> toArgs' s
854 where
855 toArgs' s = case dropWhile isSpace s of
856 [] -> Right []
857 ('"' : _) -> case reads s of
858 [(arg, rest)]
859 -- rest must either be [] or start with a space
860 | all isSpace (take 1 rest) ->
861 case toArgs' rest of
862 Left err -> Left err
863 Right args -> Right (arg : args)
864 _ ->
865 Left ("Couldn't read " ++ show s ++ "as String")
866 s' -> case break isSpace s' of
867 (arg, s'') -> case toArgs' s'' of
868 Left err -> Left err
869 Right args -> Right (arg : args)
870
871 {-
872 -- -----------------------------------------------------------------------------
873 -- Floats
874 -}
875
876 readRational__ :: ReadS Rational -- NB: doesn't handle leading "-"
877 readRational__ r = do
878 (n,d,s) <- readFix r
879 (k,t) <- readExp s
880 return ((n%1)*10^^(k-d), t)
881 where
882 readFix r = do
883 (ds,s) <- lexDecDigits r
884 (ds',t) <- lexDotDigits s
885 return (read (ds++ds'), length ds', t)
886
887 readExp (e:s) | e `elem` "eE" = readExp' s
888 readExp s = return (0,s)
889
890 readExp' ('+':s) = readDec s
891 readExp' ('-':s) = do (k,t) <- readDec s
892 return (-k,t)
893 readExp' s = readDec s
894
895 readDec s = do
896 (ds,r) <- nonnull isDigit s
897 return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
898 r)
899
900 lexDecDigits = nonnull isDigit
901
902 lexDotDigits ('.':s) = return (span isDigit s)
903 lexDotDigits s = return ("",s)
904
905 nonnull p s = do (cs@(_:_),t) <- return (span p s)
906 return (cs,t)
907
908 readRational :: String -> Rational -- NB: *does* handle a leading "-"
909 readRational top_s
910 = case top_s of
911 '-' : xs -> - (read_me xs)
912 xs -> read_me xs
913 where
914 read_me s
915 = case (do { (x,"") <- readRational__ s ; return x }) of
916 [x] -> x
917 [] -> error ("readRational: no parse:" ++ top_s)
918 _ -> error ("readRational: ambiguous parse:" ++ top_s)
919
920
921 -----------------------------------------------------------------------------
922 -- read helpers
923
924 maybeRead :: Read a => String -> Maybe a
925 maybeRead str = case reads str of
926 [(x, "")] -> Just x
927 _ -> Nothing
928
929 maybeReadFuzzy :: Read a => String -> Maybe a
930 maybeReadFuzzy str = case reads str of
931 [(x, s)]
932 | all isSpace s ->
933 Just x
934 _ ->
935 Nothing
936
937 -----------------------------------------------------------------------------
938 -- Verify that the 'dirname' portion of a FilePath exists.
939 --
940 doesDirNameExist :: FilePath -> IO Bool
941 doesDirNameExist fpath = doesDirectoryExist (takeDirectory fpath)
942
943 -----------------------------------------------------------------------------
944 -- Backwards compatibility definition of getModificationTime
945
946 getModificationUTCTime :: FilePath -> IO UTCTime
947 getModificationUTCTime = getModificationTime
948
949 -- --------------------------------------------------------------
950 -- check existence & modification time at the same time
951
952 modificationTimeIfExists :: FilePath -> IO (Maybe UTCTime)
953 modificationTimeIfExists f = do
954 (do t <- getModificationUTCTime f; return (Just t))
955 `catchIO` \e -> if isDoesNotExistError e
956 then return Nothing
957 else ioError e
958
959 -- split a string at the last character where 'pred' is True,
960 -- returning a pair of strings. The first component holds the string
961 -- up (but not including) the last character for which 'pred' returned
962 -- True, the second whatever comes after (but also not including the
963 -- last character).
964 --
965 -- If 'pred' returns False for all characters in the string, the original
966 -- string is returned in the first component (and the second one is just
967 -- empty).
968 splitLongestPrefix :: String -> (Char -> Bool) -> (String,String)
969 splitLongestPrefix str pred
970 | null r_pre = (str, [])
971 | otherwise = (reverse (tail r_pre), reverse r_suf)
972 -- 'tail' drops the char satisfying 'pred'
973 where (r_suf, r_pre) = break pred (reverse str)
974
975 escapeSpaces :: String -> String
976 escapeSpaces = foldr (\c s -> if isSpace c then '\\':c:s else c:s) ""
977
978 type Suffix = String
979
980 --------------------------------------------------------------
981 -- * Search path
982 --------------------------------------------------------------
983
984 data Direction = Forwards | Backwards
985
986 reslash :: Direction -> FilePath -> FilePath
987 reslash d = f
988 where f ('/' : xs) = slash : f xs
989 f ('\\' : xs) = slash : f xs
990 f (x : xs) = x : f xs
991 f "" = ""
992 slash = case d of
993 Forwards -> '/'
994 Backwards -> '\\'
995
996 makeRelativeTo :: FilePath -> FilePath -> FilePath
997 this `makeRelativeTo` that = directory </> thisFilename
998 where (thisDirectory, thisFilename) = splitFileName this
999 thatDirectory = dropFileName that
1000 directory = joinPath $ f (splitPath thisDirectory)
1001 (splitPath thatDirectory)
1002
1003 f (x : xs) (y : ys)
1004 | x == y = f xs ys
1005 f xs ys = replicate (length ys) ".." ++ xs
1006
1007 {-
1008 ************************************************************************
1009 * *
1010 \subsection[Utils-Data]{Utils for defining Data instances}
1011 * *
1012 ************************************************************************
1013
1014 These functions helps us to define Data instances for abstract types.
1015 -}
1016
1017 abstractConstr :: String -> Constr
1018 abstractConstr n = mkConstr (abstractDataType n) ("{abstract:"++n++"}") [] Prefix
1019
1020 abstractDataType :: String -> DataType
1021 abstractDataType n = mkDataType n [abstractConstr n]
1022
1023 {-
1024 ************************************************************************
1025 * *
1026 \subsection[Utils-C]{Utils for printing C code}
1027 * *
1028 ************************************************************************
1029 -}
1030
1031 charToC :: Word8 -> String
1032 charToC w =
1033 case chr (fromIntegral w) of
1034 '\"' -> "\\\""
1035 '\'' -> "\\\'"
1036 '\\' -> "\\\\"
1037 c | c >= ' ' && c <= '~' -> [c]
1038 | otherwise -> ['\\',
1039 chr (ord '0' + ord c `div` 64),
1040 chr (ord '0' + ord c `div` 8 `mod` 8),
1041 chr (ord '0' + ord c `mod` 8)]
1042
1043 {-
1044 ************************************************************************
1045 * *
1046 \subsection[Utils-Hashing]{Utils for hashing}
1047 * *
1048 ************************************************************************
1049 -}
1050
1051 -- | A sample hash function for Strings. We keep multiplying by the
1052 -- golden ratio and adding. The implementation is:
1053 --
1054 -- > hashString = foldl' f golden
1055 -- > where f m c = fromIntegral (ord c) * magic + hashInt32 m
1056 -- > magic = 0xdeadbeef
1057 --
1058 -- Where hashInt32 works just as hashInt shown above.
1059 --
1060 -- Knuth argues that repeated multiplication by the golden ratio
1061 -- will minimize gaps in the hash space, and thus it's a good choice
1062 -- for combining together multiple keys to form one.
1063 --
1064 -- Here we know that individual characters c are often small, and this
1065 -- produces frequent collisions if we use ord c alone. A
1066 -- particular problem are the shorter low ASCII and ISO-8859-1
1067 -- character strings. We pre-multiply by a magic twiddle factor to
1068 -- obtain a good distribution. In fact, given the following test:
1069 --
1070 -- > testp :: Int32 -> Int
1071 -- > testp k = (n - ) . length . group . sort . map hs . take n $ ls
1072 -- > where ls = [] : [c : l | l <- ls, c <- ['\0'..'\xff']]
1073 -- > hs = foldl' f golden
1074 -- > f m c = fromIntegral (ord c) * k + hashInt32 m
1075 -- > n = 100000
1076 --
1077 -- We discover that testp magic = 0.
1078 hashString :: String -> Int32
1079 hashString = foldl' f golden
1080 where f m c = fromIntegral (ord c) * magic + hashInt32 m
1081 magic = fromIntegral (0xdeadbeef :: Word32)
1082
1083 golden :: Int32
1084 golden = 1013904242 -- = round ((sqrt 5 - 1) * 2^32) :: Int32
1085 -- was -1640531527 = round ((sqrt 5 - 1) * 2^31) :: Int32
1086 -- but that has bad mulHi properties (even adding 2^32 to get its inverse)
1087 -- Whereas the above works well and contains no hash duplications for
1088 -- [-32767..65536]
1089
1090 -- | A sample (and useful) hash function for Int32,
1091 -- implemented by extracting the uppermost 32 bits of the 64-bit
1092 -- result of multiplying by a 33-bit constant. The constant is from
1093 -- Knuth, derived from the golden ratio:
1094 --
1095 -- > golden = round ((sqrt 5 - 1) * 2^32)
1096 --
1097 -- We get good key uniqueness on small inputs
1098 -- (a problem with previous versions):
1099 -- (length $ group $ sort $ map hashInt32 [-32767..65536]) == 65536 + 32768
1100 --
1101 hashInt32 :: Int32 -> Int32
1102 hashInt32 x = mulHi x golden + x
1103
1104 -- hi 32 bits of a x-bit * 32 bit -> 64-bit multiply
1105 mulHi :: Int32 -> Int32 -> Int32
1106 mulHi a b = fromIntegral (r `shiftR` 32)
1107 where r :: Int64
1108 r = fromIntegral a * fromIntegral b