Syntax check package-qualified imports (#9225)
[ghc.git] / compiler / utils / Util.hs
1 -- (c) The University of Glasgow 2006
2
3 {-# LANGUAGE CPP #-}
4
5 -- | Highly random utility functions
6 --
7 module Util (
8 -- * Flags dependent on the compiler build
9 ghciSupported, debugIsOn, ncgDebugIsOn,
10 ghciTablesNextToCode,
11 isWindowsHost, isDarwinHost,
12
13 -- * General list processing
14 zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal,
15 zipLazy, stretchZipWith, zipWithAndUnzip,
16
17 filterByList,
18
19 unzipWith,
20
21 mapFst, mapSnd, chkAppend,
22 mapAndUnzip, mapAndUnzip3, mapAccumL2,
23 nOfThem, filterOut, partitionWith, splitEithers,
24
25 dropWhileEndLE,
26
27 foldl1', foldl2, count, all2,
28
29 lengthExceeds, lengthIs, lengthAtLeast,
30 listLengthCmp, atLength,
31 equalLength, compareLength, leLength,
32
33 isSingleton, only, singleton,
34 notNull, snocView,
35
36 isIn, isn'tIn,
37
38 -- * Tuples
39 fstOf3, sndOf3, thirdOf3,
40 firstM, first3M,
41 third3,
42 uncurry3,
43
44 -- * List operations controlled by another list
45 takeList, dropList, splitAtList, split,
46 dropTail,
47
48 -- * For loop
49 nTimes,
50
51 -- * Sorting
52 sortWith, minWith, nubSort,
53
54 -- * Comparisons
55 isEqual, eqListBy, eqMaybeBy,
56 thenCmp, cmpList,
57 removeSpaces,
58
59 -- * Edit distance
60 fuzzyMatch, fuzzyLookup,
61
62 -- * Transitive closures
63 transitiveClosure,
64
65 -- * Strictness
66 seqList,
67
68 -- * Module names
69 looksLikeModuleName,
70 looksLikePackageName,
71
72 -- * Argument processing
73 getCmd, toCmdArgs, toArgs,
74
75 -- * Floating point
76 readRational,
77
78 -- * read helpers
79 maybeRead, maybeReadFuzzy,
80
81 -- * IO-ish utilities
82 doesDirNameExist,
83 getModificationUTCTime,
84 modificationTimeIfExists,
85
86 global, consIORef, globalM,
87
88 -- * Filenames and paths
89 Suffix,
90 splitLongestPrefix,
91 escapeSpaces,
92 Direction(..), reslash,
93 makeRelativeTo,
94
95 -- * Utils for defining Data instances
96 abstractConstr, abstractDataType, mkNoRepType,
97
98 -- * Utils for printing C code
99 charToC,
100
101 -- * Hashing
102 hashString,
103 ) where
104
105 #include "HsVersions.h"
106
107 import Exception
108 import Panic
109
110 import Data.Data
111 import Data.IORef ( IORef, newIORef, atomicModifyIORef' )
112 import System.IO.Unsafe ( unsafePerformIO )
113 import Data.List hiding (group)
114
115 #ifdef DEBUG
116 import FastTypes
117 #endif
118
119 #if __GLASGOW_HASKELL__ < 709
120 import Control.Applicative (Applicative)
121 #endif
122 import Control.Applicative ( liftA2 )
123 import Control.Monad ( liftM )
124 import System.IO.Error as IO ( isDoesNotExistError )
125 import System.Directory ( doesDirectoryExist, getModificationTime )
126 import System.FilePath
127
128 import Data.Char ( isUpper, isAlphaNum, isSpace, chr, ord, isDigit )
129 import Data.Int
130 import Data.Ratio ( (%) )
131 import Data.Ord ( comparing )
132 import Data.Bits
133 import Data.Word
134 import qualified Data.IntMap as IM
135 import qualified Data.Set as Set
136
137 import Data.Time
138
139 infixr 9 `thenCmp`
140
141 {-
142 ************************************************************************
143 * *
144 \subsection{Is DEBUG on, are we on Windows, etc?}
145 * *
146 ************************************************************************
147
148 These booleans are global constants, set by CPP flags. They allow us to
149 recompile a single module (this one) to change whether or not debug output
150 appears. They sometimes let us avoid even running CPP elsewhere.
151
152 It's important that the flags are literal constants (True/False). Then,
153 with -0, tests of the flags in other modules will simplify to the correct
154 branch of the conditional, thereby dropping debug code altogether when
155 the flags are off.
156 -}
157
158 ghciSupported :: Bool
159 #ifdef GHCI
160 ghciSupported = True
161 #else
162 ghciSupported = False
163 #endif
164
165 debugIsOn :: Bool
166 #ifdef DEBUG
167 debugIsOn = True
168 #else
169 debugIsOn = False
170 #endif
171
172 ncgDebugIsOn :: Bool
173 #ifdef NCG_DEBUG
174 ncgDebugIsOn = True
175 #else
176 ncgDebugIsOn = False
177 #endif
178
179 ghciTablesNextToCode :: Bool
180 #ifdef GHCI_TABLES_NEXT_TO_CODE
181 ghciTablesNextToCode = True
182 #else
183 ghciTablesNextToCode = False
184 #endif
185
186 isWindowsHost :: Bool
187 #ifdef mingw32_HOST_OS
188 isWindowsHost = True
189 #else
190 isWindowsHost = False
191 #endif
192
193 isDarwinHost :: Bool
194 #ifdef darwin_HOST_OS
195 isDarwinHost = True
196 #else
197 isDarwinHost = False
198 #endif
199
200 {-
201 ************************************************************************
202 * *
203 \subsection{A for loop}
204 * *
205 ************************************************************************
206 -}
207
208 -- | Compose a function with itself n times. (nth rather than twice)
209 nTimes :: Int -> (a -> a) -> (a -> a)
210 nTimes 0 _ = id
211 nTimes 1 f = f
212 nTimes n f = f . nTimes (n-1) f
213
214 fstOf3 :: (a,b,c) -> a
215 sndOf3 :: (a,b,c) -> b
216 thirdOf3 :: (a,b,c) -> c
217 fstOf3 (a,_,_) = a
218 sndOf3 (_,b,_) = b
219 thirdOf3 (_,_,c) = c
220
221 third3 :: (c -> d) -> (a, b, c) -> (a, b, d)
222 third3 f (a, b, c) = (a, b, f c)
223
224 uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
225 uncurry3 f (a, b, c) = f a b c
226
227 firstM :: Monad m => (a -> m c) -> (a, b) -> m (c, b)
228 firstM f (x, y) = liftM (\x' -> (x', y)) (f x)
229
230 first3M :: Monad m => (a -> m d) -> (a, b, c) -> m (d, b, c)
231 first3M f (x, y, z) = liftM (\x' -> (x', y, z)) (f x)
232
233 {-
234 ************************************************************************
235 * *
236 \subsection[Utils-lists]{General list processing}
237 * *
238 ************************************************************************
239 -}
240
241 filterOut :: (a->Bool) -> [a] -> [a]
242 -- ^ Like filter, only it reverses the sense of the test
243 filterOut _ [] = []
244 filterOut p (x:xs) | p x = filterOut p xs
245 | otherwise = x : filterOut p xs
246
247 partitionWith :: (a -> Either b c) -> [a] -> ([b], [c])
248 -- ^ Uses a function to determine which of two output lists an input element should join
249 partitionWith _ [] = ([],[])
250 partitionWith f (x:xs) = case f x of
251 Left b -> (b:bs, cs)
252 Right c -> (bs, c:cs)
253 where (bs,cs) = partitionWith f xs
254
255 splitEithers :: [Either a b] -> ([a], [b])
256 -- ^ Teases a list of 'Either's apart into two lists
257 splitEithers [] = ([],[])
258 splitEithers (e : es) = case e of
259 Left x -> (x:xs, ys)
260 Right y -> (xs, y:ys)
261 where (xs,ys) = splitEithers es
262
263 chkAppend :: [a] -> [a] -> [a]
264 -- Checks for the second arguemnt being empty
265 -- Used in situations where that situation is common
266 chkAppend xs ys
267 | null ys = xs
268 | otherwise = xs ++ ys
269
270 {-
271 A paranoid @zip@ (and some @zipWith@ friends) that checks the lists
272 are of equal length. Alastair Reid thinks this should only happen if
273 DEBUGging on; hey, why not?
274 -}
275
276 zipEqual :: String -> [a] -> [b] -> [(a,b)]
277 zipWithEqual :: String -> (a->b->c) -> [a]->[b]->[c]
278 zipWith3Equal :: String -> (a->b->c->d) -> [a]->[b]->[c]->[d]
279 zipWith4Equal :: String -> (a->b->c->d->e) -> [a]->[b]->[c]->[d]->[e]
280
281 #ifndef DEBUG
282 zipEqual _ = zip
283 zipWithEqual _ = zipWith
284 zipWith3Equal _ = zipWith3
285 zipWith4Equal _ = zipWith4
286 #else
287 zipEqual _ [] [] = []
288 zipEqual msg (a:as) (b:bs) = (a,b) : zipEqual msg as bs
289 zipEqual msg _ _ = panic ("zipEqual: unequal lists:"++msg)
290
291 zipWithEqual msg z (a:as) (b:bs)= z a b : zipWithEqual msg z as bs
292 zipWithEqual _ _ [] [] = []
293 zipWithEqual msg _ _ _ = panic ("zipWithEqual: unequal lists:"++msg)
294
295 zipWith3Equal msg z (a:as) (b:bs) (c:cs)
296 = z a b c : zipWith3Equal msg z as bs cs
297 zipWith3Equal _ _ [] [] [] = []
298 zipWith3Equal msg _ _ _ _ = panic ("zipWith3Equal: unequal lists:"++msg)
299
300 zipWith4Equal msg z (a:as) (b:bs) (c:cs) (d:ds)
301 = z a b c d : zipWith4Equal msg z as bs cs ds
302 zipWith4Equal _ _ [] [] [] [] = []
303 zipWith4Equal msg _ _ _ _ _ = panic ("zipWith4Equal: unequal lists:"++msg)
304 #endif
305
306 -- | 'zipLazy' is a kind of 'zip' that is lazy in the second list (observe the ~)
307 zipLazy :: [a] -> [b] -> [(a,b)]
308 zipLazy [] _ = []
309 zipLazy (x:xs) ~(y:ys) = (x,y) : zipLazy xs ys
310
311 -- | 'filterByList' takes a list of Bools and a list of some elements and
312 -- filters out these elements for which the corresponding value in the list of
313 -- Bools is False. This function does not check whether the lists have equal
314 -- length.
315 filterByList :: [Bool] -> [a] -> [a]
316 filterByList (True:bs) (x:xs) = x : filterByList bs xs
317 filterByList (False:bs) (_:xs) = filterByList bs xs
318 filterByList _ _ = []
319
320 stretchZipWith :: (a -> Bool) -> b -> (a->b->c) -> [a] -> [b] -> [c]
321 -- ^ @stretchZipWith p z f xs ys@ stretches @ys@ by inserting @z@ in
322 -- the places where @p@ returns @True@
323
324 stretchZipWith _ _ _ [] _ = []
325 stretchZipWith p z f (x:xs) ys
326 | p x = f x z : stretchZipWith p z f xs ys
327 | otherwise = case ys of
328 [] -> []
329 (y:ys) -> f x y : stretchZipWith p z f xs ys
330
331 mapFst :: (a->c) -> [(a,b)] -> [(c,b)]
332 mapSnd :: (b->c) -> [(a,b)] -> [(a,c)]
333
334 mapFst f xys = [(f x, y) | (x,y) <- xys]
335 mapSnd f xys = [(x, f y) | (x,y) <- xys]
336
337 mapAndUnzip :: (a -> (b, c)) -> [a] -> ([b], [c])
338
339 mapAndUnzip _ [] = ([], [])
340 mapAndUnzip f (x:xs)
341 = let (r1, r2) = f x
342 (rs1, rs2) = mapAndUnzip f xs
343 in
344 (r1:rs1, r2:rs2)
345
346 mapAndUnzip3 :: (a -> (b, c, d)) -> [a] -> ([b], [c], [d])
347
348 mapAndUnzip3 _ [] = ([], [], [])
349 mapAndUnzip3 f (x:xs)
350 = let (r1, r2, r3) = f x
351 (rs1, rs2, rs3) = mapAndUnzip3 f xs
352 in
353 (r1:rs1, r2:rs2, r3:rs3)
354
355 zipWithAndUnzip :: (a -> b -> (c,d)) -> [a] -> [b] -> ([c],[d])
356 zipWithAndUnzip f (a:as) (b:bs)
357 = let (r1, r2) = f a b
358 (rs1, rs2) = zipWithAndUnzip f as bs
359 in
360 (r1:rs1, r2:rs2)
361 zipWithAndUnzip _ _ _ = ([],[])
362
363 mapAccumL2 :: (s1 -> s2 -> a -> (s1, s2, b)) -> s1 -> s2 -> [a] -> (s1, s2, [b])
364 mapAccumL2 f s1 s2 xs = (s1', s2', ys)
365 where ((s1', s2'), ys) = mapAccumL (\(s1, s2) x -> case f s1 s2 x of
366 (s1', s2', y) -> ((s1', s2'), y))
367 (s1, s2) xs
368
369 nOfThem :: Int -> a -> [a]
370 nOfThem n thing = replicate n thing
371
372 -- | @atLength atLen atEnd ls n@ unravels list @ls@ to position @n@. Precisely:
373 --
374 -- @
375 -- atLength atLenPred atEndPred ls n
376 -- | n < 0 = atLenPred n
377 -- | length ls < n = atEndPred (n - length ls)
378 -- | otherwise = atLenPred (drop n ls)
379 -- @
380 atLength :: ([a] -> b)
381 -> (Int -> b)
382 -> [a]
383 -> Int
384 -> b
385 atLength atLenPred atEndPred ls n
386 | n < 0 = atEndPred n
387 | otherwise = go n ls
388 where
389 go n [] = atEndPred n
390 go 0 ls = atLenPred ls
391 go n (_:xs) = go (n-1) xs
392
393 -- Some special cases of atLength:
394
395 lengthExceeds :: [a] -> Int -> Bool
396 -- ^ > (lengthExceeds xs n) = (length xs > n)
397 lengthExceeds = atLength notNull (const False)
398
399 lengthAtLeast :: [a] -> Int -> Bool
400 lengthAtLeast = atLength notNull (== 0)
401
402 lengthIs :: [a] -> Int -> Bool
403 lengthIs = atLength null (==0)
404
405 listLengthCmp :: [a] -> Int -> Ordering
406 listLengthCmp = atLength atLen atEnd
407 where
408 atEnd 0 = EQ
409 atEnd x
410 | x > 0 = LT -- not yet seen 'n' elts, so list length is < n.
411 | otherwise = GT
412
413 atLen [] = EQ
414 atLen _ = GT
415
416 equalLength :: [a] -> [b] -> Bool
417 equalLength [] [] = True
418 equalLength (_:xs) (_:ys) = equalLength xs ys
419 equalLength _ _ = False
420
421 compareLength :: [a] -> [b] -> Ordering
422 compareLength [] [] = EQ
423 compareLength (_:xs) (_:ys) = compareLength xs ys
424 compareLength [] _ = LT
425 compareLength _ [] = GT
426
427 leLength :: [a] -> [b] -> Bool
428 -- ^ True if length xs <= length ys
429 leLength xs ys = case compareLength xs ys of
430 LT -> True
431 EQ -> True
432 GT -> False
433
434 ----------------------------
435 singleton :: a -> [a]
436 singleton x = [x]
437
438 isSingleton :: [a] -> Bool
439 isSingleton [_] = True
440 isSingleton _ = False
441
442 notNull :: [a] -> Bool
443 notNull [] = False
444 notNull _ = True
445
446 only :: [a] -> a
447 #ifdef DEBUG
448 only [a] = a
449 #else
450 only (a:_) = a
451 #endif
452 only _ = panic "Util: only"
453
454 -- Debugging/specialising versions of \tr{elem} and \tr{notElem}
455
456 isIn, isn'tIn :: Eq a => String -> a -> [a] -> Bool
457
458 # ifndef DEBUG
459 isIn _msg x ys = x `elem` ys
460 isn'tIn _msg x ys = x `notElem` ys
461
462 # else /* DEBUG */
463 isIn msg x ys
464 = elem100 (_ILIT(0)) x ys
465 where
466 elem100 _ _ [] = False
467 elem100 i x (y:ys)
468 | i ># _ILIT(100) = trace ("Over-long elem in " ++ msg)
469 (x `elem` (y:ys))
470 | otherwise = x == y || elem100 (i +# _ILIT(1)) x ys
471
472 isn'tIn msg x ys
473 = notElem100 (_ILIT(0)) x ys
474 where
475 notElem100 _ _ [] = True
476 notElem100 i x (y:ys)
477 | i ># _ILIT(100) = trace ("Over-long notElem in " ++ msg)
478 (x `notElem` (y:ys))
479 | otherwise = x /= y && notElem100 (i +# _ILIT(1)) x ys
480 # endif /* DEBUG */
481
482 {-
483 ************************************************************************
484 * *
485 \subsubsection{Sort utils}
486 * *
487 ************************************************************************
488 -}
489
490 sortWith :: Ord b => (a->b) -> [a] -> [a]
491 sortWith get_key xs = sortBy (comparing get_key) xs
492
493 minWith :: Ord b => (a -> b) -> [a] -> a
494 minWith get_key xs = ASSERT( not (null xs) )
495 head (sortWith get_key xs)
496
497 nubSort :: Ord a => [a] -> [a]
498 nubSort = Set.toAscList . Set.fromList
499
500 {-
501 ************************************************************************
502 * *
503 \subsection[Utils-transitive-closure]{Transitive closure}
504 * *
505 ************************************************************************
506
507 This algorithm for transitive closure is straightforward, albeit quadratic.
508 -}
509
510 transitiveClosure :: (a -> [a]) -- Successor function
511 -> (a -> a -> Bool) -- Equality predicate
512 -> [a]
513 -> [a] -- The transitive closure
514
515 transitiveClosure succ eq xs
516 = go [] xs
517 where
518 go done [] = done
519 go done (x:xs) | x `is_in` done = go done xs
520 | otherwise = go (x:done) (succ x ++ xs)
521
522 _ `is_in` [] = False
523 x `is_in` (y:ys) | eq x y = True
524 | otherwise = x `is_in` ys
525
526 {-
527 ************************************************************************
528 * *
529 \subsection[Utils-accum]{Accumulating}
530 * *
531 ************************************************************************
532
533 A combination of foldl with zip. It works with equal length lists.
534 -}
535
536 foldl2 :: (acc -> a -> b -> acc) -> acc -> [a] -> [b] -> acc
537 foldl2 _ z [] [] = z
538 foldl2 k z (a:as) (b:bs) = foldl2 k (k z a b) as bs
539 foldl2 _ _ _ _ = panic "Util: foldl2"
540
541 all2 :: (a -> b -> Bool) -> [a] -> [b] -> Bool
542 -- True if the lists are the same length, and
543 -- all corresponding elements satisfy the predicate
544 all2 _ [] [] = True
545 all2 p (x:xs) (y:ys) = p x y && all2 p xs ys
546 all2 _ _ _ = False
547
548 -- Count the number of times a predicate is true
549
550 count :: (a -> Bool) -> [a] -> Int
551 count _ [] = 0
552 count p (x:xs) | p x = 1 + count p xs
553 | otherwise = count p xs
554
555 {-
556 @splitAt@, @take@, and @drop@ but with length of another
557 list giving the break-off point:
558 -}
559
560 takeList :: [b] -> [a] -> [a]
561 -- (takeList as bs) trims bs to the be same length
562 -- as as, unless as is longer in which case it's a no-op
563 takeList [] _ = []
564 takeList (_:xs) ls =
565 case ls of
566 [] -> []
567 (y:ys) -> y : takeList xs ys
568
569 dropList :: [b] -> [a] -> [a]
570 dropList [] xs = xs
571 dropList _ xs@[] = xs
572 dropList (_:xs) (_:ys) = dropList xs ys
573
574
575 splitAtList :: [b] -> [a] -> ([a], [a])
576 splitAtList [] xs = ([], xs)
577 splitAtList _ xs@[] = (xs, xs)
578 splitAtList (_:xs) (y:ys) = (y:ys', ys'')
579 where
580 (ys', ys'') = splitAtList xs ys
581
582 -- drop from the end of a list
583 dropTail :: Int -> [a] -> [a]
584 -- Specification: dropTail n = reverse . drop n . reverse
585 -- Better implemention due to Joachim Breitner
586 -- http://www.joachim-breitner.de/blog/archives/600-On-taking-the-last-n-elements-of-a-list.html
587 dropTail n xs
588 = go (drop n xs) xs
589 where
590 go (_:ys) (x:xs) = x : go ys xs
591 go _ _ = [] -- Stop when ys runs out
592 -- It'll always run out before xs does
593
594 -- dropWhile from the end of a list. This is similar to Data.List.dropWhileEnd,
595 -- but is lazy in the elements and strict in the spine. For reasonably short lists,
596 -- such as path names and typical lines of text, dropWhileEndLE is generally
597 -- faster than dropWhileEnd. Its advantage is magnified when the predicate is
598 -- expensive--using dropWhileEndLE isSpace to strip the space off a line of text
599 -- is generally much faster than using dropWhileEnd isSpace for that purpose.
600 -- Specification: dropWhileEndLE p = reverse . dropWhile p . reverse
601 -- Pay attention to the short-circuit (&&)! The order of its arguments is the only
602 -- difference between dropWhileEnd and dropWhileEndLE.
603 dropWhileEndLE :: (a -> Bool) -> [a] -> [a]
604 dropWhileEndLE p = foldr (\x r -> if null r && p x then [] else x:r) []
605
606 snocView :: [a] -> Maybe ([a],a)
607 -- Split off the last element
608 snocView [] = Nothing
609 snocView xs = go [] xs
610 where
611 -- Invariant: second arg is non-empty
612 go acc [x] = Just (reverse acc, x)
613 go acc (x:xs) = go (x:acc) xs
614 go _ [] = panic "Util: snocView"
615
616 split :: Char -> String -> [String]
617 split c s = case rest of
618 [] -> [chunk]
619 _:rest -> chunk : split c rest
620 where (chunk, rest) = break (==c) s
621
622 {-
623 ************************************************************************
624 * *
625 \subsection[Utils-comparison]{Comparisons}
626 * *
627 ************************************************************************
628 -}
629
630 isEqual :: Ordering -> Bool
631 -- Often used in (isEqual (a `compare` b))
632 isEqual GT = False
633 isEqual EQ = True
634 isEqual LT = False
635
636 thenCmp :: Ordering -> Ordering -> Ordering
637 {-# INLINE thenCmp #-}
638 thenCmp EQ ordering = ordering
639 thenCmp ordering _ = ordering
640
641 eqListBy :: (a->a->Bool) -> [a] -> [a] -> Bool
642 eqListBy _ [] [] = True
643 eqListBy eq (x:xs) (y:ys) = eq x y && eqListBy eq xs ys
644 eqListBy _ _ _ = False
645
646 eqMaybeBy :: (a ->a->Bool) -> Maybe a -> Maybe a -> Bool
647 eqMaybeBy _ Nothing Nothing = True
648 eqMaybeBy eq (Just x) (Just y) = eq x y
649 eqMaybeBy _ _ _ = False
650
651 cmpList :: (a -> a -> Ordering) -> [a] -> [a] -> Ordering
652 -- `cmpList' uses a user-specified comparer
653
654 cmpList _ [] [] = EQ
655 cmpList _ [] _ = LT
656 cmpList _ _ [] = GT
657 cmpList cmp (a:as) (b:bs)
658 = case cmp a b of { EQ -> cmpList cmp as bs; xxx -> xxx }
659
660 removeSpaces :: String -> String
661 removeSpaces = dropWhileEndLE isSpace . dropWhile isSpace
662
663 -- Boolean operators lifted to Applicative
664 (<&&>) :: Applicative f => f Bool -> f Bool -> f Bool
665 (<&&>) = liftA2 (&&)
666 infixr 3 <&&> -- same as (&&)
667
668 {-
669 ************************************************************************
670 * *
671 \subsection{Edit distance}
672 * *
673 ************************************************************************
674 -}
675
676 -- | Find the "restricted" Damerau-Levenshtein edit distance between two strings.
677 -- See: <http://en.wikipedia.org/wiki/Damerau-Levenshtein_distance>.
678 -- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing
679 -- Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro).
680 -- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and
681 -- http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation
682 restrictedDamerauLevenshteinDistance :: String -> String -> Int
683 restrictedDamerauLevenshteinDistance str1 str2
684 = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
685 where
686 m = length str1
687 n = length str2
688
689 restrictedDamerauLevenshteinDistanceWithLengths
690 :: Int -> Int -> String -> String -> Int
691 restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
692 | m <= n
693 = if n <= 32 -- n must be larger so this check is sufficient
694 then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2
695 else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2
696
697 | otherwise
698 = if m <= 32 -- m must be larger so this check is sufficient
699 then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1
700 else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1
701
702 restrictedDamerauLevenshteinDistance'
703 :: (Bits bv, Num bv) => bv -> Int -> Int -> String -> String -> Int
704 restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2
705 | [] <- str1 = n
706 | otherwise = extractAnswer $
707 foldl' (restrictedDamerauLevenshteinDistanceWorker
708 (matchVectors str1) top_bit_mask vector_mask)
709 (0, 0, m_ones, 0, m) str2
710 where
711 m_ones@vector_mask = (2 ^ m) - 1
712 top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy
713 extractAnswer (_, _, _, _, distance) = distance
714
715 restrictedDamerauLevenshteinDistanceWorker
716 :: (Bits bv, Num bv) => IM.IntMap bv -> bv -> bv
717 -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int)
718 restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask
719 (pm, d0, vp, vn, distance) char2
720 = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $
721 seq pm' $ seq d0' $ seq vp' $ seq vn' $
722 seq distance'' $ seq char2 $
723 (pm', d0', vp', vn', distance'')
724 where
725 pm' = IM.findWithDefault 0 (ord char2) str1_mvs
726
727 d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm)
728 .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn
729 -- No need to mask the shiftL because of the restricted range of pm
730
731 hp' = vn .|. sizedComplement vector_mask (d0' .|. vp)
732 hn' = d0' .&. vp
733
734 hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask
735 hn'_shift = (hn' `shiftL` 1) .&. vector_mask
736 vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift)
737 vn' = d0' .&. hp'_shift
738
739 distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance
740 distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance'
741
742 sizedComplement :: Bits bv => bv -> bv -> bv
743 sizedComplement vector_mask vect = vector_mask `xor` vect
744
745 matchVectors :: (Bits bv, Num bv) => String -> IM.IntMap bv
746 matchVectors = snd . foldl' go (0 :: Int, IM.empty)
747 where
748 go (ix, im) char = let ix' = ix + 1
749 im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im
750 in seq ix' $ seq im' $ (ix', im')
751
752 {-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
753 :: Word32 -> Int -> Int -> String -> String -> Int #-}
754 {-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
755 :: Integer -> Int -> Int -> String -> String -> Int #-}
756
757 {-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
758 :: IM.IntMap Word32 -> Word32 -> Word32
759 -> (Word32, Word32, Word32, Word32, Int)
760 -> Char -> (Word32, Word32, Word32, Word32, Int) #-}
761 {-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
762 :: IM.IntMap Integer -> Integer -> Integer
763 -> (Integer, Integer, Integer, Integer, Int)
764 -> Char -> (Integer, Integer, Integer, Integer, Int) #-}
765
766 {-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-}
767 {-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-}
768
769 {-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-}
770 {-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-}
771
772 fuzzyMatch :: String -> [String] -> [String]
773 fuzzyMatch key vals = fuzzyLookup key [(v,v) | v <- vals]
774
775 -- | Search for possible matches to the users input in the given list,
776 -- returning a small number of ranked results
777 fuzzyLookup :: String -> [(String,a)] -> [a]
778 fuzzyLookup user_entered possibilites
779 = map fst $ take mAX_RESULTS $ sortBy (comparing snd)
780 [ (poss_val, distance) | (poss_str, poss_val) <- possibilites
781 , let distance = restrictedDamerauLevenshteinDistance
782 poss_str user_entered
783 , distance <= fuzzy_threshold ]
784 where
785 -- Work out an approriate match threshold:
786 -- We report a candidate if its edit distance is <= the threshold,
787 -- The threshhold is set to about a quarter of the # of characters the user entered
788 -- Length Threshold
789 -- 1 0 -- Don't suggest *any* candidates
790 -- 2 1 -- for single-char identifiers
791 -- 3 1
792 -- 4 1
793 -- 5 1
794 -- 6 2
795 --
796 fuzzy_threshold = truncate $ fromIntegral (length user_entered + 2) / (4 :: Rational)
797 mAX_RESULTS = 3
798
799 {-
800 ************************************************************************
801 * *
802 \subsection[Utils-pairs]{Pairs}
803 * *
804 ************************************************************************
805 -}
806
807 unzipWith :: (a -> b -> c) -> [(a, b)] -> [c]
808 unzipWith f pairs = map ( \ (a, b) -> f a b ) pairs
809
810 seqList :: [a] -> b -> b
811 seqList [] b = b
812 seqList (x:xs) b = x `seq` seqList xs b
813
814 -- Global variables:
815
816 global :: a -> IORef a
817 global a = unsafePerformIO (newIORef a)
818
819 consIORef :: IORef [a] -> a -> IO ()
820 consIORef var x = do
821 atomicModifyIORef' var (\xs -> (x:xs,()))
822
823 globalM :: IO a -> IORef a
824 globalM ma = unsafePerformIO (ma >>= newIORef)
825
826 -- Module names:
827
828 looksLikeModuleName :: String -> Bool
829 looksLikeModuleName [] = False
830 looksLikeModuleName (c:cs) = isUpper c && go cs
831 where go [] = True
832 go ('.':cs) = looksLikeModuleName cs
833 go (c:cs) = (isAlphaNum c || c == '_' || c == '\'') && go cs
834
835 -- Similar to 'parse' for Distribution.Package.PackageName,
836 -- but we don't want to depend on Cabal.
837 looksLikePackageName :: String -> Bool
838 looksLikePackageName = all (all isAlphaNum <&&> not . (all isDigit)) . split '-'
839
840 {-
841 Akin to @Prelude.words@, but acts like the Bourne shell, treating
842 quoted strings as Haskell Strings, and also parses Haskell [String]
843 syntax.
844 -}
845
846 getCmd :: String -> Either String -- Error
847 (String, String) -- (Cmd, Rest)
848 getCmd s = case break isSpace $ dropWhile isSpace s of
849 ([], _) -> Left ("Couldn't find command in " ++ show s)
850 res -> Right res
851
852 toCmdArgs :: String -> Either String -- Error
853 (String, [String]) -- (Cmd, Args)
854 toCmdArgs s = case getCmd s of
855 Left err -> Left err
856 Right (cmd, s') -> case toArgs s' of
857 Left err -> Left err
858 Right args -> Right (cmd, args)
859
860 toArgs :: String -> Either String -- Error
861 [String] -- Args
862 toArgs str
863 = case dropWhile isSpace str of
864 s@('[':_) -> case reads s of
865 [(args, spaces)]
866 | all isSpace spaces ->
867 Right args
868 _ ->
869 Left ("Couldn't read " ++ show str ++ "as [String]")
870 s -> toArgs' s
871 where
872 toArgs' s = case dropWhile isSpace s of
873 [] -> Right []
874 ('"' : _) -> case reads s of
875 [(arg, rest)]
876 -- rest must either be [] or start with a space
877 | all isSpace (take 1 rest) ->
878 case toArgs' rest of
879 Left err -> Left err
880 Right args -> Right (arg : args)
881 _ ->
882 Left ("Couldn't read " ++ show s ++ "as String")
883 s' -> case break isSpace s' of
884 (arg, s'') -> case toArgs' s'' of
885 Left err -> Left err
886 Right args -> Right (arg : args)
887
888 {-
889 -- -----------------------------------------------------------------------------
890 -- Floats
891 -}
892
893 readRational__ :: ReadS Rational -- NB: doesn't handle leading "-"
894 readRational__ r = do
895 (n,d,s) <- readFix r
896 (k,t) <- readExp s
897 return ((n%1)*10^^(k-d), t)
898 where
899 readFix r = do
900 (ds,s) <- lexDecDigits r
901 (ds',t) <- lexDotDigits s
902 return (read (ds++ds'), length ds', t)
903
904 readExp (e:s) | e `elem` "eE" = readExp' s
905 readExp s = return (0,s)
906
907 readExp' ('+':s) = readDec s
908 readExp' ('-':s) = do (k,t) <- readDec s
909 return (-k,t)
910 readExp' s = readDec s
911
912 readDec s = do
913 (ds,r) <- nonnull isDigit s
914 return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
915 r)
916
917 lexDecDigits = nonnull isDigit
918
919 lexDotDigits ('.':s) = return (span isDigit s)
920 lexDotDigits s = return ("",s)
921
922 nonnull p s = do (cs@(_:_),t) <- return (span p s)
923 return (cs,t)
924
925 readRational :: String -> Rational -- NB: *does* handle a leading "-"
926 readRational top_s
927 = case top_s of
928 '-' : xs -> - (read_me xs)
929 xs -> read_me xs
930 where
931 read_me s
932 = case (do { (x,"") <- readRational__ s ; return x }) of
933 [x] -> x
934 [] -> error ("readRational: no parse:" ++ top_s)
935 _ -> error ("readRational: ambiguous parse:" ++ top_s)
936
937
938 -----------------------------------------------------------------------------
939 -- read helpers
940
941 maybeRead :: Read a => String -> Maybe a
942 maybeRead str = case reads str of
943 [(x, "")] -> Just x
944 _ -> Nothing
945
946 maybeReadFuzzy :: Read a => String -> Maybe a
947 maybeReadFuzzy str = case reads str of
948 [(x, s)]
949 | all isSpace s ->
950 Just x
951 _ ->
952 Nothing
953
954 -----------------------------------------------------------------------------
955 -- Verify that the 'dirname' portion of a FilePath exists.
956 --
957 doesDirNameExist :: FilePath -> IO Bool
958 doesDirNameExist fpath = doesDirectoryExist (takeDirectory fpath)
959
960 -----------------------------------------------------------------------------
961 -- Backwards compatibility definition of getModificationTime
962
963 getModificationUTCTime :: FilePath -> IO UTCTime
964 getModificationUTCTime = getModificationTime
965
966 -- --------------------------------------------------------------
967 -- check existence & modification time at the same time
968
969 modificationTimeIfExists :: FilePath -> IO (Maybe UTCTime)
970 modificationTimeIfExists f = do
971 (do t <- getModificationUTCTime f; return (Just t))
972 `catchIO` \e -> if isDoesNotExistError e
973 then return Nothing
974 else ioError e
975
976 -- split a string at the last character where 'pred' is True,
977 -- returning a pair of strings. The first component holds the string
978 -- up (but not including) the last character for which 'pred' returned
979 -- True, the second whatever comes after (but also not including the
980 -- last character).
981 --
982 -- If 'pred' returns False for all characters in the string, the original
983 -- string is returned in the first component (and the second one is just
984 -- empty).
985 splitLongestPrefix :: String -> (Char -> Bool) -> (String,String)
986 splitLongestPrefix str pred
987 | null r_pre = (str, [])
988 | otherwise = (reverse (tail r_pre), reverse r_suf)
989 -- 'tail' drops the char satisfying 'pred'
990 where (r_suf, r_pre) = break pred (reverse str)
991
992 escapeSpaces :: String -> String
993 escapeSpaces = foldr (\c s -> if isSpace c then '\\':c:s else c:s) ""
994
995 type Suffix = String
996
997 --------------------------------------------------------------
998 -- * Search path
999 --------------------------------------------------------------
1000
1001 data Direction = Forwards | Backwards
1002
1003 reslash :: Direction -> FilePath -> FilePath
1004 reslash d = f
1005 where f ('/' : xs) = slash : f xs
1006 f ('\\' : xs) = slash : f xs
1007 f (x : xs) = x : f xs
1008 f "" = ""
1009 slash = case d of
1010 Forwards -> '/'
1011 Backwards -> '\\'
1012
1013 makeRelativeTo :: FilePath -> FilePath -> FilePath
1014 this `makeRelativeTo` that = directory </> thisFilename
1015 where (thisDirectory, thisFilename) = splitFileName this
1016 thatDirectory = dropFileName that
1017 directory = joinPath $ f (splitPath thisDirectory)
1018 (splitPath thatDirectory)
1019
1020 f (x : xs) (y : ys)
1021 | x == y = f xs ys
1022 f xs ys = replicate (length ys) ".." ++ xs
1023
1024 {-
1025 ************************************************************************
1026 * *
1027 \subsection[Utils-Data]{Utils for defining Data instances}
1028 * *
1029 ************************************************************************
1030
1031 These functions helps us to define Data instances for abstract types.
1032 -}
1033
1034 abstractConstr :: String -> Constr
1035 abstractConstr n = mkConstr (abstractDataType n) ("{abstract:"++n++"}") [] Prefix
1036
1037 abstractDataType :: String -> DataType
1038 abstractDataType n = mkDataType n [abstractConstr n]
1039
1040 {-
1041 ************************************************************************
1042 * *
1043 \subsection[Utils-C]{Utils for printing C code}
1044 * *
1045 ************************************************************************
1046 -}
1047
1048 charToC :: Word8 -> String
1049 charToC w =
1050 case chr (fromIntegral w) of
1051 '\"' -> "\\\""
1052 '\'' -> "\\\'"
1053 '\\' -> "\\\\"
1054 c | c >= ' ' && c <= '~' -> [c]
1055 | otherwise -> ['\\',
1056 chr (ord '0' + ord c `div` 64),
1057 chr (ord '0' + ord c `div` 8 `mod` 8),
1058 chr (ord '0' + ord c `mod` 8)]
1059
1060 {-
1061 ************************************************************************
1062 * *
1063 \subsection[Utils-Hashing]{Utils for hashing}
1064 * *
1065 ************************************************************************
1066 -}
1067
1068 -- | A sample hash function for Strings. We keep multiplying by the
1069 -- golden ratio and adding. The implementation is:
1070 --
1071 -- > hashString = foldl' f golden
1072 -- > where f m c = fromIntegral (ord c) * magic + hashInt32 m
1073 -- > magic = 0xdeadbeef
1074 --
1075 -- Where hashInt32 works just as hashInt shown above.
1076 --
1077 -- Knuth argues that repeated multiplication by the golden ratio
1078 -- will minimize gaps in the hash space, and thus it's a good choice
1079 -- for combining together multiple keys to form one.
1080 --
1081 -- Here we know that individual characters c are often small, and this
1082 -- produces frequent collisions if we use ord c alone. A
1083 -- particular problem are the shorter low ASCII and ISO-8859-1
1084 -- character strings. We pre-multiply by a magic twiddle factor to
1085 -- obtain a good distribution. In fact, given the following test:
1086 --
1087 -- > testp :: Int32 -> Int
1088 -- > testp k = (n - ) . length . group . sort . map hs . take n $ ls
1089 -- > where ls = [] : [c : l | l <- ls, c <- ['\0'..'\xff']]
1090 -- > hs = foldl' f golden
1091 -- > f m c = fromIntegral (ord c) * k + hashInt32 m
1092 -- > n = 100000
1093 --
1094 -- We discover that testp magic = 0.
1095 hashString :: String -> Int32
1096 hashString = foldl' f golden
1097 where f m c = fromIntegral (ord c) * magic + hashInt32 m
1098 magic = fromIntegral (0xdeadbeef :: Word32)
1099
1100 golden :: Int32
1101 golden = 1013904242 -- = round ((sqrt 5 - 1) * 2^32) :: Int32
1102 -- was -1640531527 = round ((sqrt 5 - 1) * 2^31) :: Int32
1103 -- but that has bad mulHi properties (even adding 2^32 to get its inverse)
1104 -- Whereas the above works well and contains no hash duplications for
1105 -- [-32767..65536]
1106
1107 -- | A sample (and useful) hash function for Int32,
1108 -- implemented by extracting the uppermost 32 bits of the 64-bit
1109 -- result of multiplying by a 33-bit constant. The constant is from
1110 -- Knuth, derived from the golden ratio:
1111 --
1112 -- > golden = round ((sqrt 5 - 1) * 2^32)
1113 --
1114 -- We get good key uniqueness on small inputs
1115 -- (a problem with previous versions):
1116 -- (length $ group $ sort $ map hashInt32 [-32767..65536]) == 65536 + 32768
1117 --
1118 hashInt32 :: Int32 -> Int32
1119 hashInt32 x = mulHi x golden + x
1120
1121 -- hi 32 bits of a x-bit * 32 bit -> 64-bit multiply
1122 mulHi :: Int32 -> Int32 -> Int32
1123 mulHi a b = fromIntegral (r `shiftR` 32)
1124 where r :: Int64
1125 r = fromIntegral a * fromIntegral b