ddcfe1117bb7a876402657bf44518b4e177c4eef
[ghc.git] / compiler / utils / Util.hs
1 -- (c) The University of Glasgow 2006
2
3 {-# LANGUAGE CPP #-}
4
5 -- | Highly random utility functions
6 --
7 module Util (
8 -- * Flags dependent on the compiler build
9 ghciSupported, debugIsOn, ncgDebugIsOn,
10 ghciTablesNextToCode,
11 isWindowsHost, isDarwinHost,
12
13 -- * General list processing
14 zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal,
15 zipLazy, stretchZipWith, zipWithAndUnzip,
16
17 filterByList,
18
19 unzipWith,
20
21 mapFst, mapSnd, chkAppend,
22 mapAndUnzip, mapAndUnzip3, mapAccumL2,
23 nOfThem, filterOut, partitionWith, splitEithers,
24
25 dropWhileEndLE,
26
27 foldl1', foldl2, count, all2,
28
29 lengthExceeds, lengthIs, lengthAtLeast,
30 listLengthCmp, atLength,
31 equalLength, compareLength, leLength,
32
33 isSingleton, only, singleton,
34 notNull, snocView,
35
36 isIn, isn'tIn,
37
38 -- * Tuples
39 fstOf3, sndOf3, thirdOf3,
40 firstM, first3M,
41 third3,
42 uncurry3,
43
44 -- * List operations controlled by another list
45 takeList, dropList, splitAtList, split,
46 dropTail,
47
48 -- * For loop
49 nTimes,
50
51 -- * Sorting
52 sortWith, minWith, nubSort,
53
54 -- * Comparisons
55 isEqual, eqListBy, eqMaybeBy,
56 thenCmp, cmpList,
57 removeSpaces,
58
59 -- * Edit distance
60 fuzzyMatch, fuzzyLookup,
61
62 -- * Transitive closures
63 transitiveClosure,
64
65 -- * Strictness
66 seqList,
67
68 -- * Module names
69 looksLikeModuleName,
70
71 -- * Argument processing
72 getCmd, toCmdArgs, toArgs,
73
74 -- * Floating point
75 readRational,
76
77 -- * read helpers
78 maybeRead, maybeReadFuzzy,
79
80 -- * IO-ish utilities
81 doesDirNameExist,
82 getModificationUTCTime,
83 modificationTimeIfExists,
84
85 global, consIORef, globalM,
86
87 -- * Filenames and paths
88 Suffix,
89 splitLongestPrefix,
90 escapeSpaces,
91 Direction(..), reslash,
92 makeRelativeTo,
93
94 -- * Utils for defining Data instances
95 abstractConstr, abstractDataType, mkNoRepType,
96
97 -- * Utils for printing C code
98 charToC,
99
100 -- * Hashing
101 hashString,
102 ) where
103
104 #include "HsVersions.h"
105
106 import Exception
107 import Panic
108
109 import Data.Data
110 import Data.IORef ( IORef, newIORef, atomicModifyIORef' )
111 import System.IO.Unsafe ( unsafePerformIO )
112 import Data.List hiding (group)
113
114 #ifdef DEBUG
115 import FastTypes
116 #endif
117
118 import Control.Monad ( liftM )
119 import System.IO.Error as IO ( isDoesNotExistError )
120 import System.Directory ( doesDirectoryExist, getModificationTime )
121 import System.FilePath
122
123 import Data.Char ( isUpper, isAlphaNum, isSpace, chr, ord, isDigit )
124 import Data.Int
125 import Data.Ratio ( (%) )
126 import Data.Ord ( comparing )
127 import Data.Bits
128 import Data.Word
129 import qualified Data.IntMap as IM
130 import qualified Data.Set as Set
131
132 import Data.Time
133
134 infixr 9 `thenCmp`
135
136 {-
137 ************************************************************************
138 * *
139 \subsection{Is DEBUG on, are we on Windows, etc?}
140 * *
141 ************************************************************************
142
143 These booleans are global constants, set by CPP flags. They allow us to
144 recompile a single module (this one) to change whether or not debug output
145 appears. They sometimes let us avoid even running CPP elsewhere.
146
147 It's important that the flags are literal constants (True/False). Then,
148 with -0, tests of the flags in other modules will simplify to the correct
149 branch of the conditional, thereby dropping debug code altogether when
150 the flags are off.
151 -}
152
153 ghciSupported :: Bool
154 #ifdef GHCI
155 ghciSupported = True
156 #else
157 ghciSupported = False
158 #endif
159
160 debugIsOn :: Bool
161 #ifdef DEBUG
162 debugIsOn = True
163 #else
164 debugIsOn = False
165 #endif
166
167 ncgDebugIsOn :: Bool
168 #ifdef NCG_DEBUG
169 ncgDebugIsOn = True
170 #else
171 ncgDebugIsOn = False
172 #endif
173
174 ghciTablesNextToCode :: Bool
175 #ifdef GHCI_TABLES_NEXT_TO_CODE
176 ghciTablesNextToCode = True
177 #else
178 ghciTablesNextToCode = False
179 #endif
180
181 isWindowsHost :: Bool
182 #ifdef mingw32_HOST_OS
183 isWindowsHost = True
184 #else
185 isWindowsHost = False
186 #endif
187
188 isDarwinHost :: Bool
189 #ifdef darwin_HOST_OS
190 isDarwinHost = True
191 #else
192 isDarwinHost = False
193 #endif
194
195 {-
196 ************************************************************************
197 * *
198 \subsection{A for loop}
199 * *
200 ************************************************************************
201 -}
202
203 -- | Compose a function with itself n times. (nth rather than twice)
204 nTimes :: Int -> (a -> a) -> (a -> a)
205 nTimes 0 _ = id
206 nTimes 1 f = f
207 nTimes n f = f . nTimes (n-1) f
208
209 fstOf3 :: (a,b,c) -> a
210 sndOf3 :: (a,b,c) -> b
211 thirdOf3 :: (a,b,c) -> c
212 fstOf3 (a,_,_) = a
213 sndOf3 (_,b,_) = b
214 thirdOf3 (_,_,c) = c
215
216 third3 :: (c -> d) -> (a, b, c) -> (a, b, d)
217 third3 f (a, b, c) = (a, b, f c)
218
219 uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
220 uncurry3 f (a, b, c) = f a b c
221
222 firstM :: Monad m => (a -> m c) -> (a, b) -> m (c, b)
223 firstM f (x, y) = liftM (\x' -> (x', y)) (f x)
224
225 first3M :: Monad m => (a -> m d) -> (a, b, c) -> m (d, b, c)
226 first3M f (x, y, z) = liftM (\x' -> (x', y, z)) (f x)
227
228 {-
229 ************************************************************************
230 * *
231 \subsection[Utils-lists]{General list processing}
232 * *
233 ************************************************************************
234 -}
235
236 filterOut :: (a->Bool) -> [a] -> [a]
237 -- ^ Like filter, only it reverses the sense of the test
238 filterOut _ [] = []
239 filterOut p (x:xs) | p x = filterOut p xs
240 | otherwise = x : filterOut p xs
241
242 partitionWith :: (a -> Either b c) -> [a] -> ([b], [c])
243 -- ^ Uses a function to determine which of two output lists an input element should join
244 partitionWith _ [] = ([],[])
245 partitionWith f (x:xs) = case f x of
246 Left b -> (b:bs, cs)
247 Right c -> (bs, c:cs)
248 where (bs,cs) = partitionWith f xs
249
250 splitEithers :: [Either a b] -> ([a], [b])
251 -- ^ Teases a list of 'Either's apart into two lists
252 splitEithers [] = ([],[])
253 splitEithers (e : es) = case e of
254 Left x -> (x:xs, ys)
255 Right y -> (xs, y:ys)
256 where (xs,ys) = splitEithers es
257
258 chkAppend :: [a] -> [a] -> [a]
259 -- Checks for the second arguemnt being empty
260 -- Used in situations where that situation is common
261 chkAppend xs ys
262 | null ys = xs
263 | otherwise = xs ++ ys
264
265 {-
266 A paranoid @zip@ (and some @zipWith@ friends) that checks the lists
267 are of equal length. Alastair Reid thinks this should only happen if
268 DEBUGging on; hey, why not?
269 -}
270
271 zipEqual :: String -> [a] -> [b] -> [(a,b)]
272 zipWithEqual :: String -> (a->b->c) -> [a]->[b]->[c]
273 zipWith3Equal :: String -> (a->b->c->d) -> [a]->[b]->[c]->[d]
274 zipWith4Equal :: String -> (a->b->c->d->e) -> [a]->[b]->[c]->[d]->[e]
275
276 #ifndef DEBUG
277 zipEqual _ = zip
278 zipWithEqual _ = zipWith
279 zipWith3Equal _ = zipWith3
280 zipWith4Equal _ = zipWith4
281 #else
282 zipEqual _ [] [] = []
283 zipEqual msg (a:as) (b:bs) = (a,b) : zipEqual msg as bs
284 zipEqual msg _ _ = panic ("zipEqual: unequal lists:"++msg)
285
286 zipWithEqual msg z (a:as) (b:bs)= z a b : zipWithEqual msg z as bs
287 zipWithEqual _ _ [] [] = []
288 zipWithEqual msg _ _ _ = panic ("zipWithEqual: unequal lists:"++msg)
289
290 zipWith3Equal msg z (a:as) (b:bs) (c:cs)
291 = z a b c : zipWith3Equal msg z as bs cs
292 zipWith3Equal _ _ [] [] [] = []
293 zipWith3Equal msg _ _ _ _ = panic ("zipWith3Equal: unequal lists:"++msg)
294
295 zipWith4Equal msg z (a:as) (b:bs) (c:cs) (d:ds)
296 = z a b c d : zipWith4Equal msg z as bs cs ds
297 zipWith4Equal _ _ [] [] [] [] = []
298 zipWith4Equal msg _ _ _ _ _ = panic ("zipWith4Equal: unequal lists:"++msg)
299 #endif
300
301 -- | 'zipLazy' is a kind of 'zip' that is lazy in the second list (observe the ~)
302 zipLazy :: [a] -> [b] -> [(a,b)]
303 zipLazy [] _ = []
304 zipLazy (x:xs) ~(y:ys) = (x,y) : zipLazy xs ys
305
306 -- | 'filterByList' takes a list of Bools and a list of some elements and
307 -- filters out these elements for which the corresponding value in the list of
308 -- Bools is False. This function does not check whether the lists have equal
309 -- length.
310 filterByList :: [Bool] -> [a] -> [a]
311 filterByList (True:bs) (x:xs) = x : filterByList bs xs
312 filterByList (False:bs) (_:xs) = filterByList bs xs
313 filterByList _ _ = []
314
315 stretchZipWith :: (a -> Bool) -> b -> (a->b->c) -> [a] -> [b] -> [c]
316 -- ^ @stretchZipWith p z f xs ys@ stretches @ys@ by inserting @z@ in
317 -- the places where @p@ returns @True@
318
319 stretchZipWith _ _ _ [] _ = []
320 stretchZipWith p z f (x:xs) ys
321 | p x = f x z : stretchZipWith p z f xs ys
322 | otherwise = case ys of
323 [] -> []
324 (y:ys) -> f x y : stretchZipWith p z f xs ys
325
326 mapFst :: (a->c) -> [(a,b)] -> [(c,b)]
327 mapSnd :: (b->c) -> [(a,b)] -> [(a,c)]
328
329 mapFst f xys = [(f x, y) | (x,y) <- xys]
330 mapSnd f xys = [(x, f y) | (x,y) <- xys]
331
332 mapAndUnzip :: (a -> (b, c)) -> [a] -> ([b], [c])
333
334 mapAndUnzip _ [] = ([], [])
335 mapAndUnzip f (x:xs)
336 = let (r1, r2) = f x
337 (rs1, rs2) = mapAndUnzip f xs
338 in
339 (r1:rs1, r2:rs2)
340
341 mapAndUnzip3 :: (a -> (b, c, d)) -> [a] -> ([b], [c], [d])
342
343 mapAndUnzip3 _ [] = ([], [], [])
344 mapAndUnzip3 f (x:xs)
345 = let (r1, r2, r3) = f x
346 (rs1, rs2, rs3) = mapAndUnzip3 f xs
347 in
348 (r1:rs1, r2:rs2, r3:rs3)
349
350 zipWithAndUnzip :: (a -> b -> (c,d)) -> [a] -> [b] -> ([c],[d])
351 zipWithAndUnzip f (a:as) (b:bs)
352 = let (r1, r2) = f a b
353 (rs1, rs2) = zipWithAndUnzip f as bs
354 in
355 (r1:rs1, r2:rs2)
356 zipWithAndUnzip _ _ _ = ([],[])
357
358 mapAccumL2 :: (s1 -> s2 -> a -> (s1, s2, b)) -> s1 -> s2 -> [a] -> (s1, s2, [b])
359 mapAccumL2 f s1 s2 xs = (s1', s2', ys)
360 where ((s1', s2'), ys) = mapAccumL (\(s1, s2) x -> case f s1 s2 x of
361 (s1', s2', y) -> ((s1', s2'), y))
362 (s1, s2) xs
363
364 nOfThem :: Int -> a -> [a]
365 nOfThem n thing = replicate n thing
366
367 -- | @atLength atLen atEnd ls n@ unravels list @ls@ to position @n@. Precisely:
368 --
369 -- @
370 -- atLength atLenPred atEndPred ls n
371 -- | n < 0 = atLenPred n
372 -- | length ls < n = atEndPred (n - length ls)
373 -- | otherwise = atLenPred (drop n ls)
374 -- @
375 atLength :: ([a] -> b)
376 -> (Int -> b)
377 -> [a]
378 -> Int
379 -> b
380 atLength atLenPred atEndPred ls n
381 | n < 0 = atEndPred n
382 | otherwise = go n ls
383 where
384 go n [] = atEndPred n
385 go 0 ls = atLenPred ls
386 go n (_:xs) = go (n-1) xs
387
388 -- Some special cases of atLength:
389
390 lengthExceeds :: [a] -> Int -> Bool
391 -- ^ > (lengthExceeds xs n) = (length xs > n)
392 lengthExceeds = atLength notNull (const False)
393
394 lengthAtLeast :: [a] -> Int -> Bool
395 lengthAtLeast = atLength notNull (== 0)
396
397 lengthIs :: [a] -> Int -> Bool
398 lengthIs = atLength null (==0)
399
400 listLengthCmp :: [a] -> Int -> Ordering
401 listLengthCmp = atLength atLen atEnd
402 where
403 atEnd 0 = EQ
404 atEnd x
405 | x > 0 = LT -- not yet seen 'n' elts, so list length is < n.
406 | otherwise = GT
407
408 atLen [] = EQ
409 atLen _ = GT
410
411 equalLength :: [a] -> [b] -> Bool
412 equalLength [] [] = True
413 equalLength (_:xs) (_:ys) = equalLength xs ys
414 equalLength _ _ = False
415
416 compareLength :: [a] -> [b] -> Ordering
417 compareLength [] [] = EQ
418 compareLength (_:xs) (_:ys) = compareLength xs ys
419 compareLength [] _ = LT
420 compareLength _ [] = GT
421
422 leLength :: [a] -> [b] -> Bool
423 -- ^ True if length xs <= length ys
424 leLength xs ys = case compareLength xs ys of
425 LT -> True
426 EQ -> True
427 GT -> False
428
429 ----------------------------
430 singleton :: a -> [a]
431 singleton x = [x]
432
433 isSingleton :: [a] -> Bool
434 isSingleton [_] = True
435 isSingleton _ = False
436
437 notNull :: [a] -> Bool
438 notNull [] = False
439 notNull _ = True
440
441 only :: [a] -> a
442 #ifdef DEBUG
443 only [a] = a
444 #else
445 only (a:_) = a
446 #endif
447 only _ = panic "Util: only"
448
449 -- Debugging/specialising versions of \tr{elem} and \tr{notElem}
450
451 isIn, isn'tIn :: Eq a => String -> a -> [a] -> Bool
452
453 # ifndef DEBUG
454 isIn _msg x ys = x `elem` ys
455 isn'tIn _msg x ys = x `notElem` ys
456
457 # else /* DEBUG */
458 isIn msg x ys
459 = elem100 (_ILIT(0)) x ys
460 where
461 elem100 _ _ [] = False
462 elem100 i x (y:ys)
463 | i ># _ILIT(100) = trace ("Over-long elem in " ++ msg)
464 (x `elem` (y:ys))
465 | otherwise = x == y || elem100 (i +# _ILIT(1)) x ys
466
467 isn'tIn msg x ys
468 = notElem100 (_ILIT(0)) x ys
469 where
470 notElem100 _ _ [] = True
471 notElem100 i x (y:ys)
472 | i ># _ILIT(100) = trace ("Over-long notElem in " ++ msg)
473 (x `notElem` (y:ys))
474 | otherwise = x /= y && notElem100 (i +# _ILIT(1)) x ys
475 # endif /* DEBUG */
476
477 {-
478 ************************************************************************
479 * *
480 \subsubsection{Sort utils}
481 * *
482 ************************************************************************
483 -}
484
485 sortWith :: Ord b => (a->b) -> [a] -> [a]
486 sortWith get_key xs = sortBy (comparing get_key) xs
487
488 minWith :: Ord b => (a -> b) -> [a] -> a
489 minWith get_key xs = ASSERT( not (null xs) )
490 head (sortWith get_key xs)
491
492 nubSort :: Ord a => [a] -> [a]
493 nubSort = Set.toAscList . Set.fromList
494
495 {-
496 ************************************************************************
497 * *
498 \subsection[Utils-transitive-closure]{Transitive closure}
499 * *
500 ************************************************************************
501
502 This algorithm for transitive closure is straightforward, albeit quadratic.
503 -}
504
505 transitiveClosure :: (a -> [a]) -- Successor function
506 -> (a -> a -> Bool) -- Equality predicate
507 -> [a]
508 -> [a] -- The transitive closure
509
510 transitiveClosure succ eq xs
511 = go [] xs
512 where
513 go done [] = done
514 go done (x:xs) | x `is_in` done = go done xs
515 | otherwise = go (x:done) (succ x ++ xs)
516
517 _ `is_in` [] = False
518 x `is_in` (y:ys) | eq x y = True
519 | otherwise = x `is_in` ys
520
521 {-
522 ************************************************************************
523 * *
524 \subsection[Utils-accum]{Accumulating}
525 * *
526 ************************************************************************
527
528 A combination of foldl with zip. It works with equal length lists.
529 -}
530
531 foldl2 :: (acc -> a -> b -> acc) -> acc -> [a] -> [b] -> acc
532 foldl2 _ z [] [] = z
533 foldl2 k z (a:as) (b:bs) = foldl2 k (k z a b) as bs
534 foldl2 _ _ _ _ = panic "Util: foldl2"
535
536 all2 :: (a -> b -> Bool) -> [a] -> [b] -> Bool
537 -- True if the lists are the same length, and
538 -- all corresponding elements satisfy the predicate
539 all2 _ [] [] = True
540 all2 p (x:xs) (y:ys) = p x y && all2 p xs ys
541 all2 _ _ _ = False
542
543 -- Count the number of times a predicate is true
544
545 count :: (a -> Bool) -> [a] -> Int
546 count _ [] = 0
547 count p (x:xs) | p x = 1 + count p xs
548 | otherwise = count p xs
549
550 {-
551 @splitAt@, @take@, and @drop@ but with length of another
552 list giving the break-off point:
553 -}
554
555 takeList :: [b] -> [a] -> [a]
556 -- (takeList as bs) trims bs to the be same length
557 -- as as, unless as is longer in which case it's a no-op
558 takeList [] _ = []
559 takeList (_:xs) ls =
560 case ls of
561 [] -> []
562 (y:ys) -> y : takeList xs ys
563
564 dropList :: [b] -> [a] -> [a]
565 dropList [] xs = xs
566 dropList _ xs@[] = xs
567 dropList (_:xs) (_:ys) = dropList xs ys
568
569
570 splitAtList :: [b] -> [a] -> ([a], [a])
571 splitAtList [] xs = ([], xs)
572 splitAtList _ xs@[] = (xs, xs)
573 splitAtList (_:xs) (y:ys) = (y:ys', ys'')
574 where
575 (ys', ys'') = splitAtList xs ys
576
577 -- drop from the end of a list
578 dropTail :: Int -> [a] -> [a]
579 -- Specification: dropTail n = reverse . drop n . reverse
580 -- Better implemention due to Joachim Breitner
581 -- http://www.joachim-breitner.de/blog/archives/600-On-taking-the-last-n-elements-of-a-list.html
582 dropTail n xs
583 = go (drop n xs) xs
584 where
585 go (_:ys) (x:xs) = x : go ys xs
586 go _ _ = [] -- Stop when ys runs out
587 -- It'll always run out before xs does
588
589 -- dropWhile from the end of a list. This is similar to Data.List.dropWhileEnd,
590 -- but is lazy in the elements and strict in the spine. For reasonably short lists,
591 -- such as path names and typical lines of text, dropWhileEndLE is generally
592 -- faster than dropWhileEnd. Its advantage is magnified when the predicate is
593 -- expensive--using dropWhileEndLE isSpace to strip the space off a line of text
594 -- is generally much faster than using dropWhileEnd isSpace for that purpose.
595 -- Specification: dropWhileEndLE p = reverse . dropWhile p . reverse
596 -- Pay attention to the short-circuit (&&)! The order of its arguments is the only
597 -- difference between dropWhileEnd and dropWhileEndLE.
598 dropWhileEndLE :: (a -> Bool) -> [a] -> [a]
599 dropWhileEndLE p = foldr (\x r -> if null r && p x then [] else x:r) []
600
601 snocView :: [a] -> Maybe ([a],a)
602 -- Split off the last element
603 snocView [] = Nothing
604 snocView xs = go [] xs
605 where
606 -- Invariant: second arg is non-empty
607 go acc [x] = Just (reverse acc, x)
608 go acc (x:xs) = go (x:acc) xs
609 go _ [] = panic "Util: snocView"
610
611 split :: Char -> String -> [String]
612 split c s = case rest of
613 [] -> [chunk]
614 _:rest -> chunk : split c rest
615 where (chunk, rest) = break (==c) s
616
617 {-
618 ************************************************************************
619 * *
620 \subsection[Utils-comparison]{Comparisons}
621 * *
622 ************************************************************************
623 -}
624
625 isEqual :: Ordering -> Bool
626 -- Often used in (isEqual (a `compare` b))
627 isEqual GT = False
628 isEqual EQ = True
629 isEqual LT = False
630
631 thenCmp :: Ordering -> Ordering -> Ordering
632 {-# INLINE thenCmp #-}
633 thenCmp EQ ordering = ordering
634 thenCmp ordering _ = ordering
635
636 eqListBy :: (a->a->Bool) -> [a] -> [a] -> Bool
637 eqListBy _ [] [] = True
638 eqListBy eq (x:xs) (y:ys) = eq x y && eqListBy eq xs ys
639 eqListBy _ _ _ = False
640
641 eqMaybeBy :: (a ->a->Bool) -> Maybe a -> Maybe a -> Bool
642 eqMaybeBy _ Nothing Nothing = True
643 eqMaybeBy eq (Just x) (Just y) = eq x y
644 eqMaybeBy _ _ _ = False
645
646 cmpList :: (a -> a -> Ordering) -> [a] -> [a] -> Ordering
647 -- `cmpList' uses a user-specified comparer
648
649 cmpList _ [] [] = EQ
650 cmpList _ [] _ = LT
651 cmpList _ _ [] = GT
652 cmpList cmp (a:as) (b:bs)
653 = case cmp a b of { EQ -> cmpList cmp as bs; xxx -> xxx }
654
655 removeSpaces :: String -> String
656 removeSpaces = dropWhileEndLE isSpace . dropWhile isSpace
657
658 {-
659 ************************************************************************
660 * *
661 \subsection{Edit distance}
662 * *
663 ************************************************************************
664 -}
665
666 -- | Find the "restricted" Damerau-Levenshtein edit distance between two strings.
667 -- See: <http://en.wikipedia.org/wiki/Damerau-Levenshtein_distance>.
668 -- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing
669 -- Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro).
670 -- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and
671 -- http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation
672 restrictedDamerauLevenshteinDistance :: String -> String -> Int
673 restrictedDamerauLevenshteinDistance str1 str2
674 = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
675 where
676 m = length str1
677 n = length str2
678
679 restrictedDamerauLevenshteinDistanceWithLengths
680 :: Int -> Int -> String -> String -> Int
681 restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
682 | m <= n
683 = if n <= 32 -- n must be larger so this check is sufficient
684 then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2
685 else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2
686
687 | otherwise
688 = if m <= 32 -- m must be larger so this check is sufficient
689 then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1
690 else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1
691
692 restrictedDamerauLevenshteinDistance'
693 :: (Bits bv, Num bv) => bv -> Int -> Int -> String -> String -> Int
694 restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2
695 | [] <- str1 = n
696 | otherwise = extractAnswer $
697 foldl' (restrictedDamerauLevenshteinDistanceWorker
698 (matchVectors str1) top_bit_mask vector_mask)
699 (0, 0, m_ones, 0, m) str2
700 where
701 m_ones@vector_mask = (2 ^ m) - 1
702 top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy
703 extractAnswer (_, _, _, _, distance) = distance
704
705 restrictedDamerauLevenshteinDistanceWorker
706 :: (Bits bv, Num bv) => IM.IntMap bv -> bv -> bv
707 -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int)
708 restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask
709 (pm, d0, vp, vn, distance) char2
710 = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $
711 seq pm' $ seq d0' $ seq vp' $ seq vn' $
712 seq distance'' $ seq char2 $
713 (pm', d0', vp', vn', distance'')
714 where
715 pm' = IM.findWithDefault 0 (ord char2) str1_mvs
716
717 d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm)
718 .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn
719 -- No need to mask the shiftL because of the restricted range of pm
720
721 hp' = vn .|. sizedComplement vector_mask (d0' .|. vp)
722 hn' = d0' .&. vp
723
724 hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask
725 hn'_shift = (hn' `shiftL` 1) .&. vector_mask
726 vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift)
727 vn' = d0' .&. hp'_shift
728
729 distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance
730 distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance'
731
732 sizedComplement :: Bits bv => bv -> bv -> bv
733 sizedComplement vector_mask vect = vector_mask `xor` vect
734
735 matchVectors :: (Bits bv, Num bv) => String -> IM.IntMap bv
736 matchVectors = snd . foldl' go (0 :: Int, IM.empty)
737 where
738 go (ix, im) char = let ix' = ix + 1
739 im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im
740 in seq ix' $ seq im' $ (ix', im')
741
742 {-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
743 :: Word32 -> Int -> Int -> String -> String -> Int #-}
744 {-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
745 :: Integer -> Int -> Int -> String -> String -> Int #-}
746
747 {-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
748 :: IM.IntMap Word32 -> Word32 -> Word32
749 -> (Word32, Word32, Word32, Word32, Int)
750 -> Char -> (Word32, Word32, Word32, Word32, Int) #-}
751 {-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
752 :: IM.IntMap Integer -> Integer -> Integer
753 -> (Integer, Integer, Integer, Integer, Int)
754 -> Char -> (Integer, Integer, Integer, Integer, Int) #-}
755
756 {-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-}
757 {-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-}
758
759 {-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-}
760 {-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-}
761
762 fuzzyMatch :: String -> [String] -> [String]
763 fuzzyMatch key vals = fuzzyLookup key [(v,v) | v <- vals]
764
765 -- | Search for possible matches to the users input in the given list,
766 -- returning a small number of ranked results
767 fuzzyLookup :: String -> [(String,a)] -> [a]
768 fuzzyLookup user_entered possibilites
769 = map fst $ take mAX_RESULTS $ sortBy (comparing snd)
770 [ (poss_val, distance) | (poss_str, poss_val) <- possibilites
771 , let distance = restrictedDamerauLevenshteinDistance
772 poss_str user_entered
773 , distance <= fuzzy_threshold ]
774 where
775 -- Work out an approriate match threshold:
776 -- We report a candidate if its edit distance is <= the threshold,
777 -- The threshhold is set to about a quarter of the # of characters the user entered
778 -- Length Threshold
779 -- 1 0 -- Don't suggest *any* candidates
780 -- 2 1 -- for single-char identifiers
781 -- 3 1
782 -- 4 1
783 -- 5 1
784 -- 6 2
785 --
786 fuzzy_threshold = truncate $ fromIntegral (length user_entered + 2) / (4 :: Rational)
787 mAX_RESULTS = 3
788
789 {-
790 ************************************************************************
791 * *
792 \subsection[Utils-pairs]{Pairs}
793 * *
794 ************************************************************************
795 -}
796
797 unzipWith :: (a -> b -> c) -> [(a, b)] -> [c]
798 unzipWith f pairs = map ( \ (a, b) -> f a b ) pairs
799
800 seqList :: [a] -> b -> b
801 seqList [] b = b
802 seqList (x:xs) b = x `seq` seqList xs b
803
804 -- Global variables:
805
806 global :: a -> IORef a
807 global a = unsafePerformIO (newIORef a)
808
809 consIORef :: IORef [a] -> a -> IO ()
810 consIORef var x = do
811 atomicModifyIORef' var (\xs -> (x:xs,()))
812
813 globalM :: IO a -> IORef a
814 globalM ma = unsafePerformIO (ma >>= newIORef)
815
816 -- Module names:
817
818 looksLikeModuleName :: String -> Bool
819 looksLikeModuleName [] = False
820 looksLikeModuleName (c:cs) = isUpper c && go cs
821 where go [] = True
822 go ('.':cs) = looksLikeModuleName cs
823 go (c:cs) = (isAlphaNum c || c == '_' || c == '\'') && go cs
824
825 {-
826 Akin to @Prelude.words@, but acts like the Bourne shell, treating
827 quoted strings as Haskell Strings, and also parses Haskell [String]
828 syntax.
829 -}
830
831 getCmd :: String -> Either String -- Error
832 (String, String) -- (Cmd, Rest)
833 getCmd s = case break isSpace $ dropWhile isSpace s of
834 ([], _) -> Left ("Couldn't find command in " ++ show s)
835 res -> Right res
836
837 toCmdArgs :: String -> Either String -- Error
838 (String, [String]) -- (Cmd, Args)
839 toCmdArgs s = case getCmd s of
840 Left err -> Left err
841 Right (cmd, s') -> case toArgs s' of
842 Left err -> Left err
843 Right args -> Right (cmd, args)
844
845 toArgs :: String -> Either String -- Error
846 [String] -- Args
847 toArgs str
848 = case dropWhile isSpace str of
849 s@('[':_) -> case reads s of
850 [(args, spaces)]
851 | all isSpace spaces ->
852 Right args
853 _ ->
854 Left ("Couldn't read " ++ show str ++ "as [String]")
855 s -> toArgs' s
856 where
857 toArgs' s = case dropWhile isSpace s of
858 [] -> Right []
859 ('"' : _) -> case reads s of
860 [(arg, rest)]
861 -- rest must either be [] or start with a space
862 | all isSpace (take 1 rest) ->
863 case toArgs' rest of
864 Left err -> Left err
865 Right args -> Right (arg : args)
866 _ ->
867 Left ("Couldn't read " ++ show s ++ "as String")
868 s' -> case break isSpace s' of
869 (arg, s'') -> case toArgs' s'' of
870 Left err -> Left err
871 Right args -> Right (arg : args)
872
873 {-
874 -- -----------------------------------------------------------------------------
875 -- Floats
876 -}
877
878 readRational__ :: ReadS Rational -- NB: doesn't handle leading "-"
879 readRational__ r = do
880 (n,d,s) <- readFix r
881 (k,t) <- readExp s
882 return ((n%1)*10^^(k-d), t)
883 where
884 readFix r = do
885 (ds,s) <- lexDecDigits r
886 (ds',t) <- lexDotDigits s
887 return (read (ds++ds'), length ds', t)
888
889 readExp (e:s) | e `elem` "eE" = readExp' s
890 readExp s = return (0,s)
891
892 readExp' ('+':s) = readDec s
893 readExp' ('-':s) = do (k,t) <- readDec s
894 return (-k,t)
895 readExp' s = readDec s
896
897 readDec s = do
898 (ds,r) <- nonnull isDigit s
899 return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
900 r)
901
902 lexDecDigits = nonnull isDigit
903
904 lexDotDigits ('.':s) = return (span isDigit s)
905 lexDotDigits s = return ("",s)
906
907 nonnull p s = do (cs@(_:_),t) <- return (span p s)
908 return (cs,t)
909
910 readRational :: String -> Rational -- NB: *does* handle a leading "-"
911 readRational top_s
912 = case top_s of
913 '-' : xs -> - (read_me xs)
914 xs -> read_me xs
915 where
916 read_me s
917 = case (do { (x,"") <- readRational__ s ; return x }) of
918 [x] -> x
919 [] -> error ("readRational: no parse:" ++ top_s)
920 _ -> error ("readRational: ambiguous parse:" ++ top_s)
921
922
923 -----------------------------------------------------------------------------
924 -- read helpers
925
926 maybeRead :: Read a => String -> Maybe a
927 maybeRead str = case reads str of
928 [(x, "")] -> Just x
929 _ -> Nothing
930
931 maybeReadFuzzy :: Read a => String -> Maybe a
932 maybeReadFuzzy str = case reads str of
933 [(x, s)]
934 | all isSpace s ->
935 Just x
936 _ ->
937 Nothing
938
939 -----------------------------------------------------------------------------
940 -- Verify that the 'dirname' portion of a FilePath exists.
941 --
942 doesDirNameExist :: FilePath -> IO Bool
943 doesDirNameExist fpath = doesDirectoryExist (takeDirectory fpath)
944
945 -----------------------------------------------------------------------------
946 -- Backwards compatibility definition of getModificationTime
947
948 getModificationUTCTime :: FilePath -> IO UTCTime
949 getModificationUTCTime = getModificationTime
950
951 -- --------------------------------------------------------------
952 -- check existence & modification time at the same time
953
954 modificationTimeIfExists :: FilePath -> IO (Maybe UTCTime)
955 modificationTimeIfExists f = do
956 (do t <- getModificationUTCTime f; return (Just t))
957 `catchIO` \e -> if isDoesNotExistError e
958 then return Nothing
959 else ioError e
960
961 -- split a string at the last character where 'pred' is True,
962 -- returning a pair of strings. The first component holds the string
963 -- up (but not including) the last character for which 'pred' returned
964 -- True, the second whatever comes after (but also not including the
965 -- last character).
966 --
967 -- If 'pred' returns False for all characters in the string, the original
968 -- string is returned in the first component (and the second one is just
969 -- empty).
970 splitLongestPrefix :: String -> (Char -> Bool) -> (String,String)
971 splitLongestPrefix str pred
972 | null r_pre = (str, [])
973 | otherwise = (reverse (tail r_pre), reverse r_suf)
974 -- 'tail' drops the char satisfying 'pred'
975 where (r_suf, r_pre) = break pred (reverse str)
976
977 escapeSpaces :: String -> String
978 escapeSpaces = foldr (\c s -> if isSpace c then '\\':c:s else c:s) ""
979
980 type Suffix = String
981
982 --------------------------------------------------------------
983 -- * Search path
984 --------------------------------------------------------------
985
986 data Direction = Forwards | Backwards
987
988 reslash :: Direction -> FilePath -> FilePath
989 reslash d = f
990 where f ('/' : xs) = slash : f xs
991 f ('\\' : xs) = slash : f xs
992 f (x : xs) = x : f xs
993 f "" = ""
994 slash = case d of
995 Forwards -> '/'
996 Backwards -> '\\'
997
998 makeRelativeTo :: FilePath -> FilePath -> FilePath
999 this `makeRelativeTo` that = directory </> thisFilename
1000 where (thisDirectory, thisFilename) = splitFileName this
1001 thatDirectory = dropFileName that
1002 directory = joinPath $ f (splitPath thisDirectory)
1003 (splitPath thatDirectory)
1004
1005 f (x : xs) (y : ys)
1006 | x == y = f xs ys
1007 f xs ys = replicate (length ys) ".." ++ xs
1008
1009 {-
1010 ************************************************************************
1011 * *
1012 \subsection[Utils-Data]{Utils for defining Data instances}
1013 * *
1014 ************************************************************************
1015
1016 These functions helps us to define Data instances for abstract types.
1017 -}
1018
1019 abstractConstr :: String -> Constr
1020 abstractConstr n = mkConstr (abstractDataType n) ("{abstract:"++n++"}") [] Prefix
1021
1022 abstractDataType :: String -> DataType
1023 abstractDataType n = mkDataType n [abstractConstr n]
1024
1025 {-
1026 ************************************************************************
1027 * *
1028 \subsection[Utils-C]{Utils for printing C code}
1029 * *
1030 ************************************************************************
1031 -}
1032
1033 charToC :: Word8 -> String
1034 charToC w =
1035 case chr (fromIntegral w) of
1036 '\"' -> "\\\""
1037 '\'' -> "\\\'"
1038 '\\' -> "\\\\"
1039 c | c >= ' ' && c <= '~' -> [c]
1040 | otherwise -> ['\\',
1041 chr (ord '0' + ord c `div` 64),
1042 chr (ord '0' + ord c `div` 8 `mod` 8),
1043 chr (ord '0' + ord c `mod` 8)]
1044
1045 {-
1046 ************************************************************************
1047 * *
1048 \subsection[Utils-Hashing]{Utils for hashing}
1049 * *
1050 ************************************************************************
1051 -}
1052
1053 -- | A sample hash function for Strings. We keep multiplying by the
1054 -- golden ratio and adding. The implementation is:
1055 --
1056 -- > hashString = foldl' f golden
1057 -- > where f m c = fromIntegral (ord c) * magic + hashInt32 m
1058 -- > magic = 0xdeadbeef
1059 --
1060 -- Where hashInt32 works just as hashInt shown above.
1061 --
1062 -- Knuth argues that repeated multiplication by the golden ratio
1063 -- will minimize gaps in the hash space, and thus it's a good choice
1064 -- for combining together multiple keys to form one.
1065 --
1066 -- Here we know that individual characters c are often small, and this
1067 -- produces frequent collisions if we use ord c alone. A
1068 -- particular problem are the shorter low ASCII and ISO-8859-1
1069 -- character strings. We pre-multiply by a magic twiddle factor to
1070 -- obtain a good distribution. In fact, given the following test:
1071 --
1072 -- > testp :: Int32 -> Int
1073 -- > testp k = (n - ) . length . group . sort . map hs . take n $ ls
1074 -- > where ls = [] : [c : l | l <- ls, c <- ['\0'..'\xff']]
1075 -- > hs = foldl' f golden
1076 -- > f m c = fromIntegral (ord c) * k + hashInt32 m
1077 -- > n = 100000
1078 --
1079 -- We discover that testp magic = 0.
1080 hashString :: String -> Int32
1081 hashString = foldl' f golden
1082 where f m c = fromIntegral (ord c) * magic + hashInt32 m
1083 magic = fromIntegral (0xdeadbeef :: Word32)
1084
1085 golden :: Int32
1086 golden = 1013904242 -- = round ((sqrt 5 - 1) * 2^32) :: Int32
1087 -- was -1640531527 = round ((sqrt 5 - 1) * 2^31) :: Int32
1088 -- but that has bad mulHi properties (even adding 2^32 to get its inverse)
1089 -- Whereas the above works well and contains no hash duplications for
1090 -- [-32767..65536]
1091
1092 -- | A sample (and useful) hash function for Int32,
1093 -- implemented by extracting the uppermost 32 bits of the 64-bit
1094 -- result of multiplying by a 33-bit constant. The constant is from
1095 -- Knuth, derived from the golden ratio:
1096 --
1097 -- > golden = round ((sqrt 5 - 1) * 2^32)
1098 --
1099 -- We get good key uniqueness on small inputs
1100 -- (a problem with previous versions):
1101 -- (length $ group $ sort $ map hashInt32 [-32767..65536]) == 65536 + 32768
1102 --
1103 hashInt32 :: Int32 -> Int32
1104 hashInt32 x = mulHi x golden + x
1105
1106 -- hi 32 bits of a x-bit * 32 bit -> 64-bit multiply
1107 mulHi :: Int32 -> Int32 -> Int32
1108 mulHi a b = fromIntegral (r `shiftR` 32)
1109 where r :: Int64
1110 r = fromIntegral a * fromIntegral b