Fix associativity of size in caseConvert
[packages/text.git] / Data / Text / Internal / Fusion / Common.hs
1 {-# LANGUAGE PatternGuards, BangPatterns, MagicHash, Rank2Types #-}
2 -- |
3 -- Module : Data.Text.Internal.Fusion.Common
4 -- Copyright : (c) Bryan O'Sullivan 2009, 2012
5 --
6 -- License : BSD-style
7 -- Maintainer : bos@serpentine.com
8 -- Stability : experimental
9 -- Portability : GHC
10 --
11 -- /Warning/: this is an internal module, and does not have a stable
12 -- API or name. Functions in this module may not check or enforce
13 -- preconditions expected by public modules. Use at your own risk!
14 --
15 -- Common stream fusion functionality for text.
16
17 module Data.Text.Internal.Fusion.Common
18 (
19 -- * Creation and elimination
20 singleton
21 , streamList
22 , unstreamList
23 , streamCString#
24
25 -- * Basic interface
26 , cons
27 , snoc
28 , append
29 , head
30 , uncons
31 , last
32 , tail
33 , init
34 , null
35 , lengthI
36 , compareLengthI
37 , isSingleton
38
39 -- * Transformations
40 , map
41 , intercalate
42 , intersperse
43
44 -- ** Case conversion
45 -- $case
46 , toCaseFold
47 , toLower
48 , toTitle
49 , toUpper
50
51 -- ** Justification
52 , justifyLeftI
53
54 -- * Folds
55 , foldl
56 , foldl'
57 , foldl1
58 , foldl1'
59 , foldr
60 , foldr1
61
62 -- ** Special folds
63 , concat
64 , concatMap
65 , any
66 , all
67 , maximum
68 , minimum
69
70 -- * Construction
71 -- ** Scans
72 , scanl
73
74 -- ** Generation and unfolding
75 , replicateCharI
76 , replicateI
77 , unfoldr
78 , unfoldrNI
79
80 -- * Substrings
81 -- ** Breaking strings
82 , take
83 , drop
84 , takeWhile
85 , dropWhile
86
87 -- * Predicates
88 , isPrefixOf
89
90 -- * Searching
91 , elem
92 , filter
93
94 -- * Indexing
95 , findBy
96 , indexI
97 , findIndexI
98 , countCharI
99
100 -- * Zipping and unzipping
101 , zipWith
102 ) where
103
104 import Prelude (Bool(..), Char, Eq(..), Int, Integral, Maybe(..),
105 Ord(..), Ordering(..), String, (.), ($), (+), (-), (*), (++),
106 (&&), fromIntegral, otherwise)
107 import qualified Data.List as L
108 import qualified Prelude as P
109 import Data.Bits (shiftL)
110 import Data.Char (isLetter, isSpace)
111 import Data.Int (Int64)
112 import Data.Text.Internal.Fusion.Types
113 import Data.Text.Internal.Fusion.CaseMapping (foldMapping, lowerMapping, titleMapping,
114 upperMapping)
115 import Data.Text.Internal.Fusion.Size
116 import GHC.Prim (Addr#, chr#, indexCharOffAddr#, ord#)
117 import GHC.Types (Char(..), Int(..))
118
119 singleton :: Char -> Stream Char
120 singleton c = Stream next False (codePointsSize 1)
121 where next False = Yield c True
122 next True = Done
123 {-# INLINE [0] singleton #-}
124
125 streamList :: [a] -> Stream a
126 {-# INLINE [0] streamList #-}
127 streamList s = Stream next s unknownSize
128 where next [] = Done
129 next (x:xs) = Yield x xs
130
131 unstreamList :: Stream a -> [a]
132 unstreamList (Stream next s0 _len) = unfold s0
133 where unfold !s = case next s of
134 Done -> []
135 Skip s' -> unfold s'
136 Yield x s' -> x : unfold s'
137 {-# INLINE [0] unstreamList #-}
138
139 {-# RULES "STREAM streamList/unstreamList fusion" forall s. streamList (unstreamList s) = s #-}
140
141 -- | Stream the UTF-8-like packed encoding used by GHC to represent
142 -- constant strings in generated code.
143 --
144 -- This encoding uses the byte sequence "\xc0\x80" to represent NUL,
145 -- and the string is NUL-terminated.
146 streamCString# :: Addr# -> Stream Char
147 streamCString# addr = Stream step 0 unknownSize
148 where
149 step !i
150 | b == 0 = Done
151 | b <= 0x7f = Yield (C# b#) (i+1)
152 | b <= 0xdf = let !c = chr $ ((b-0xc0) `shiftL` 6) + next 1
153 in Yield c (i+2)
154 | b <= 0xef = let !c = chr $ ((b-0xe0) `shiftL` 12) +
155 (next 1 `shiftL` 6) +
156 next 2
157 in Yield c (i+3)
158 | otherwise = let !c = chr $ ((b-0xf0) `shiftL` 18) +
159 (next 1 `shiftL` 12) +
160 (next 2 `shiftL` 6) +
161 next 3
162 in Yield c (i+4)
163 where b = I# (ord# b#)
164 next n = I# (ord# (at# (i+n))) - 0x80
165 !b# = at# i
166 at# (I# i#) = indexCharOffAddr# addr i#
167 chr (I# i#) = C# (chr# i#)
168 {-# INLINE [0] streamCString# #-}
169
170 -- ----------------------------------------------------------------------------
171 -- * Basic stream functions
172
173 data C s = C0 !s
174 | C1 !s
175
176 -- | /O(n)/ Adds a character to the front of a Stream Char.
177 cons :: Char -> Stream Char -> Stream Char
178 cons !w (Stream next0 s0 len) = Stream next (C1 s0) (len + codePointsSize 1)
179 where
180 next (C1 s) = Yield w (C0 s)
181 next (C0 s) = case next0 s of
182 Done -> Done
183 Skip s' -> Skip (C0 s')
184 Yield x s' -> Yield x (C0 s')
185 {-# INLINE [0] cons #-}
186
187 data Snoc a = N
188 | J !a
189
190 -- | /O(n)/ Adds a character to the end of a stream.
191 snoc :: Stream Char -> Char -> Stream Char
192 snoc (Stream next0 xs0 len) w = Stream next (J xs0) (len + codePointsSize 1)
193 where
194 next (J xs) = case next0 xs of
195 Done -> Yield w N
196 Skip xs' -> Skip (J xs')
197 Yield x xs' -> Yield x (J xs')
198 next N = Done
199 {-# INLINE [0] snoc #-}
200
201 data E l r = L !l
202 | R !r
203
204 -- | /O(n)/ Appends one Stream to the other.
205 append :: Stream Char -> Stream Char -> Stream Char
206 append (Stream next0 s01 len1) (Stream next1 s02 len2) =
207 Stream next (L s01) (len1 + len2)
208 where
209 next (L s1) = case next0 s1 of
210 Done -> Skip (R s02)
211 Skip s1' -> Skip (L s1')
212 Yield x s1' -> Yield x (L s1')
213 next (R s2) = case next1 s2 of
214 Done -> Done
215 Skip s2' -> Skip (R s2')
216 Yield x s2' -> Yield x (R s2')
217 {-# INLINE [0] append #-}
218
219 -- | /O(1)/ Returns the first character of a Text, which must be non-empty.
220 -- Subject to array fusion.
221 head :: Stream Char -> Char
222 head (Stream next s0 _len) = loop_head s0
223 where
224 loop_head !s = case next s of
225 Yield x _ -> x
226 Skip s' -> loop_head s'
227 Done -> head_empty
228 {-# INLINE [0] head #-}
229
230 head_empty :: a
231 head_empty = streamError "head" "Empty stream"
232 {-# NOINLINE head_empty #-}
233
234 -- | /O(1)/ Returns the first character and remainder of a 'Stream
235 -- Char', or 'Nothing' if empty. Subject to array fusion.
236 uncons :: Stream Char -> Maybe (Char, Stream Char)
237 uncons (Stream next s0 len) = loop_uncons s0
238 where
239 loop_uncons !s = case next s of
240 Yield x s1 -> Just (x, Stream next s1 (len - codePointsSize 1))
241 Skip s' -> loop_uncons s'
242 Done -> Nothing
243 {-# INLINE [0] uncons #-}
244
245 -- | /O(n)/ Returns the last character of a 'Stream Char', which must
246 -- be non-empty.
247 last :: Stream Char -> Char
248 last (Stream next s0 _len) = loop0_last s0
249 where
250 loop0_last !s = case next s of
251 Done -> emptyError "last"
252 Skip s' -> loop0_last s'
253 Yield x s' -> loop_last x s'
254 loop_last !x !s = case next s of
255 Done -> x
256 Skip s' -> loop_last x s'
257 Yield x' s' -> loop_last x' s'
258 {-# INLINE[0] last #-}
259
260 -- | /O(1)/ Returns all characters after the head of a Stream Char, which must
261 -- be non-empty.
262 tail :: Stream Char -> Stream Char
263 tail (Stream next0 s0 len) = Stream next (C0 s0) (len - codePointsSize 1)
264 where
265 next (C0 s) = case next0 s of
266 Done -> emptyError "tail"
267 Skip s' -> Skip (C0 s')
268 Yield _ s' -> Skip (C1 s')
269 next (C1 s) = case next0 s of
270 Done -> Done
271 Skip s' -> Skip (C1 s')
272 Yield x s' -> Yield x (C1 s')
273 {-# INLINE [0] tail #-}
274
275 data Init s = Init0 !s
276 | Init1 {-# UNPACK #-} !Char !s
277
278 -- | /O(1)/ Returns all but the last character of a Stream Char, which
279 -- must be non-empty.
280 init :: Stream Char -> Stream Char
281 init (Stream next0 s0 len) = Stream next (Init0 s0) (len - codePointsSize 1)
282 where
283 next (Init0 s) = case next0 s of
284 Done -> emptyError "init"
285 Skip s' -> Skip (Init0 s')
286 Yield x s' -> Skip (Init1 x s')
287 next (Init1 x s) = case next0 s of
288 Done -> Done
289 Skip s' -> Skip (Init1 x s')
290 Yield x' s' -> Yield x (Init1 x' s')
291 {-# INLINE [0] init #-}
292
293 -- | /O(1)/ Tests whether a Stream Char is empty or not.
294 null :: Stream Char -> Bool
295 null (Stream next s0 _len) = loop_null s0
296 where
297 loop_null !s = case next s of
298 Done -> True
299 Yield _ _ -> False
300 Skip s' -> loop_null s'
301 {-# INLINE[0] null #-}
302
303 -- | /O(n)/ Returns the number of characters in a string.
304 lengthI :: Integral a => Stream Char -> a
305 lengthI (Stream next s0 _len) = loop_length 0 s0
306 where
307 loop_length !z s = case next s of
308 Done -> z
309 Skip s' -> loop_length z s'
310 Yield _ s' -> loop_length (z + 1) s'
311 {-# INLINE[0] lengthI #-}
312
313 -- | /O(n)/ Compares the count of characters in a string to a number.
314 -- Subject to fusion.
315 --
316 -- This function gives the same answer as comparing against the result
317 -- of 'lengthI', but can short circuit if the count of characters is
318 -- greater than the number or if the stream can't possibly be as long
319 -- as the number supplied, and hence be more efficient.
320 compareLengthI :: Integral a => Stream Char -> a -> Ordering
321 compareLengthI (Stream next s0 len) n
322 -- Note that @len@ tracks code units whereas we want to compare the length
323 -- in code points. Specifically, a stream with hint @len@ may consist of
324 -- anywhere from @len/2@ to @len@ code points.
325 | Just r <- compareSize len n' = r
326 | otherwise = loop_cmp 0 s0
327 where
328 n' = codePointsSize $ fromIntegral n
329 loop_cmp !z s = case next s of
330 Done -> compare z n
331 Skip s' -> loop_cmp z s'
332 Yield _ s' | z > n -> GT
333 | otherwise -> loop_cmp (z + 1) s'
334 {-# INLINE[0] compareLengthI #-}
335
336 -- | /O(n)/ Indicate whether a string contains exactly one element.
337 isSingleton :: Stream Char -> Bool
338 isSingleton (Stream next s0 _len) = loop 0 s0
339 where
340 loop !z s = case next s of
341 Done -> z == (1::Int)
342 Skip s' -> loop z s'
343 Yield _ s'
344 | z >= 1 -> False
345 | otherwise -> loop (z+1) s'
346 {-# INLINE[0] isSingleton #-}
347
348 -- ----------------------------------------------------------------------------
349 -- * Stream transformations
350
351 -- | /O(n)/ 'map' @f @xs is the Stream Char obtained by applying @f@
352 -- to each element of @xs@.
353 map :: (Char -> Char) -> Stream Char -> Stream Char
354 map f (Stream next0 s0 len) = Stream next s0 len
355 where
356 next !s = case next0 s of
357 Done -> Done
358 Skip s' -> Skip s'
359 Yield x s' -> Yield (f x) s'
360 {-# INLINE [0] map #-}
361
362 {-#
363 RULES "STREAM map/map fusion" forall f g s.
364 map f (map g s) = map (\x -> f (g x)) s
365 #-}
366
367 data I s = I1 !s
368 | I2 !s {-# UNPACK #-} !Char
369 | I3 !s
370
371 -- | /O(n)/ Take a character and place it between each of the
372 -- characters of a 'Stream Char'.
373 intersperse :: Char -> Stream Char -> Stream Char
374 intersperse c (Stream next0 s0 len) = Stream next (I1 s0) (len + unknownSize)
375 where
376 next (I1 s) = case next0 s of
377 Done -> Done
378 Skip s' -> Skip (I1 s')
379 Yield x s' -> Skip (I2 s' x)
380 next (I2 s x) = Yield x (I3 s)
381 next (I3 s) = case next0 s of
382 Done -> Done
383 Skip s' -> Skip (I3 s')
384 Yield x s' -> Yield c (I2 s' x)
385 {-# INLINE [0] intersperse #-}
386
387 -- ----------------------------------------------------------------------------
388 -- ** Case conversions (folds)
389
390 -- $case
391 --
392 -- With Unicode text, it is incorrect to use combinators like @map
393 -- toUpper@ to case convert each character of a string individually.
394 -- Instead, use the whole-string case conversion functions from this
395 -- module. For correctness in different writing systems, these
396 -- functions may map one input character to two or three output
397 -- characters.
398
399 -- | Map a 'Stream' through the given case-mapping function.
400 caseConvert :: (forall s. Char -> s -> Step (CC s) Char)
401 -> Stream Char -> Stream Char
402 caseConvert remap (Stream next0 s0 len) =
403 Stream next (CC s0 '\0' '\0') (len `unionSize` (3*len))
404 where
405 next (CC s '\0' _) =
406 case next0 s of
407 Done -> Done
408 Skip s' -> Skip (CC s' '\0' '\0')
409 Yield c s' -> remap c s'
410 next (CC s a b) = Yield a (CC s b '\0')
411
412 -- | /O(n)/ Convert a string to folded case. This function is mainly
413 -- useful for performing caseless (or case insensitive) string
414 -- comparisons.
415 --
416 -- A string @x@ is a caseless match for a string @y@ if and only if:
417 --
418 -- @toCaseFold x == toCaseFold y@
419 --
420 -- The result string may be longer than the input string, and may
421 -- differ from applying 'toLower' to the input string. For instance,
422 -- the Armenian small ligature men now (U+FB13) is case folded to the
423 -- bigram men now (U+0574 U+0576), while the micro sign (U+00B5) is
424 -- case folded to the Greek small letter letter mu (U+03BC) instead of
425 -- itself.
426 toCaseFold :: Stream Char -> Stream Char
427 toCaseFold = caseConvert foldMapping
428 {-# INLINE [0] toCaseFold #-}
429
430 -- | /O(n)/ Convert a string to upper case, using simple case
431 -- conversion. The result string may be longer than the input string.
432 -- For instance, the German eszett (U+00DF) maps to the two-letter
433 -- sequence SS.
434 toUpper :: Stream Char -> Stream Char
435 toUpper = caseConvert upperMapping
436 {-# INLINE [0] toUpper #-}
437
438 -- | /O(n)/ Convert a string to lower case, using simple case
439 -- conversion. The result string may be longer than the input string.
440 -- For instance, the Latin capital letter I with dot above (U+0130)
441 -- maps to the sequence Latin small letter i (U+0069) followed by
442 -- combining dot above (U+0307).
443 toLower :: Stream Char -> Stream Char
444 toLower = caseConvert lowerMapping
445 {-# INLINE [0] toLower #-}
446
447 -- | /O(n)/ Convert a string to title case, using simple case
448 -- conversion.
449 --
450 -- The first letter of the input is converted to title case, as is
451 -- every subsequent letter that immediately follows a non-letter.
452 -- Every letter that immediately follows another letter is converted
453 -- to lower case.
454 --
455 -- The result string may be longer than the input string. For example,
456 -- the Latin small ligature &#xfb02; (U+FB02) is converted to the
457 -- sequence Latin capital letter F (U+0046) followed by Latin small
458 -- letter l (U+006C).
459 --
460 -- /Note/: this function does not take language or culture specific
461 -- rules into account. For instance, in English, different style
462 -- guides disagree on whether the book name \"The Hill of the Red
463 -- Fox\" is correctly title cased&#x2014;but this function will
464 -- capitalize /every/ word.
465 toTitle :: Stream Char -> Stream Char
466 toTitle (Stream next0 s0 len) = Stream next (CC (False :*: s0) '\0' '\0') (len + unknownSize)
467 where
468 next (CC (letter :*: s) '\0' _) =
469 case next0 s of
470 Done -> Done
471 Skip s' -> Skip (CC (letter :*: s') '\0' '\0')
472 Yield c s'
473 | nonSpace -> if letter
474 then lowerMapping c (nonSpace :*: s')
475 else titleMapping c (letter' :*: s')
476 | otherwise -> Yield c (CC (letter' :*: s') '\0' '\0')
477 where nonSpace = P.not (isSpace c)
478 letter' = isLetter c
479 next (CC s a b) = Yield a (CC s b '\0')
480 {-# INLINE [0] toTitle #-}
481
482 data Justify i s = Just1 !i !s
483 | Just2 !i !s
484
485 justifyLeftI :: Integral a => a -> Char -> Stream Char -> Stream Char
486 justifyLeftI k c (Stream next0 s0 len) =
487 Stream next (Just1 0 s0) (larger (fromIntegral k * charSize c + len) len)
488 where
489 next (Just1 n s) =
490 case next0 s of
491 Done -> next (Just2 n s)
492 Skip s' -> Skip (Just1 n s')
493 Yield x s' -> Yield x (Just1 (n+1) s')
494 next (Just2 n s)
495 | n < k = Yield c (Just2 (n+1) s)
496 | otherwise = Done
497 {-# INLINE next #-}
498 {-# INLINE [0] justifyLeftI #-}
499
500 -- ----------------------------------------------------------------------------
501 -- * Reducing Streams (folds)
502
503 -- | foldl, applied to a binary operator, a starting value (typically the
504 -- left-identity of the operator), and a Stream, reduces the Stream using the
505 -- binary operator, from left to right.
506 foldl :: (b -> Char -> b) -> b -> Stream Char -> b
507 foldl f z0 (Stream next s0 _len) = loop_foldl z0 s0
508 where
509 loop_foldl z !s = case next s of
510 Done -> z
511 Skip s' -> loop_foldl z s'
512 Yield x s' -> loop_foldl (f z x) s'
513 {-# INLINE [0] foldl #-}
514
515 -- | A strict version of foldl.
516 foldl' :: (b -> Char -> b) -> b -> Stream Char -> b
517 foldl' f z0 (Stream next s0 _len) = loop_foldl' z0 s0
518 where
519 loop_foldl' !z !s = case next s of
520 Done -> z
521 Skip s' -> loop_foldl' z s'
522 Yield x s' -> loop_foldl' (f z x) s'
523 {-# INLINE [0] foldl' #-}
524
525 -- | foldl1 is a variant of foldl that has no starting value argument,
526 -- and thus must be applied to non-empty Streams.
527 foldl1 :: (Char -> Char -> Char) -> Stream Char -> Char
528 foldl1 f (Stream next s0 _len) = loop0_foldl1 s0
529 where
530 loop0_foldl1 !s = case next s of
531 Skip s' -> loop0_foldl1 s'
532 Yield x s' -> loop_foldl1 x s'
533 Done -> emptyError "foldl1"
534 loop_foldl1 z !s = case next s of
535 Done -> z
536 Skip s' -> loop_foldl1 z s'
537 Yield x s' -> loop_foldl1 (f z x) s'
538 {-# INLINE [0] foldl1 #-}
539
540 -- | A strict version of foldl1.
541 foldl1' :: (Char -> Char -> Char) -> Stream Char -> Char
542 foldl1' f (Stream next s0 _len) = loop0_foldl1' s0
543 where
544 loop0_foldl1' !s = case next s of
545 Skip s' -> loop0_foldl1' s'
546 Yield x s' -> loop_foldl1' x s'
547 Done -> emptyError "foldl1"
548 loop_foldl1' !z !s = case next s of
549 Done -> z
550 Skip s' -> loop_foldl1' z s'
551 Yield x s' -> loop_foldl1' (f z x) s'
552 {-# INLINE [0] foldl1' #-}
553
554 -- | 'foldr', applied to a binary operator, a starting value (typically the
555 -- right-identity of the operator), and a stream, reduces the stream using the
556 -- binary operator, from right to left.
557 foldr :: (Char -> b -> b) -> b -> Stream Char -> b
558 foldr f z (Stream next s0 _len) = loop_foldr s0
559 where
560 loop_foldr !s = case next s of
561 Done -> z
562 Skip s' -> loop_foldr s'
563 Yield x s' -> f x (loop_foldr s')
564 {-# INLINE [0] foldr #-}
565
566 -- | foldr1 is a variant of 'foldr' that has no starting value argument,
567 -- and thus must be applied to non-empty streams.
568 -- Subject to array fusion.
569 foldr1 :: (Char -> Char -> Char) -> Stream Char -> Char
570 foldr1 f (Stream next s0 _len) = loop0_foldr1 s0
571 where
572 loop0_foldr1 !s = case next s of
573 Done -> emptyError "foldr1"
574 Skip s' -> loop0_foldr1 s'
575 Yield x s' -> loop_foldr1 x s'
576
577 loop_foldr1 x !s = case next s of
578 Done -> x
579 Skip s' -> loop_foldr1 x s'
580 Yield x' s' -> f x (loop_foldr1 x' s')
581 {-# INLINE [0] foldr1 #-}
582
583 intercalate :: Stream Char -> [Stream Char] -> Stream Char
584 intercalate s = concat . (L.intersperse s)
585 {-# INLINE [0] intercalate #-}
586
587 -- ----------------------------------------------------------------------------
588 -- ** Special folds
589
590 -- | /O(n)/ Concatenate a list of streams. Subject to array fusion.
591 concat :: [Stream Char] -> Stream Char
592 concat = L.foldr append empty
593 {-# INLINE [0] concat #-}
594
595 -- | Map a function over a stream that results in a stream and concatenate the
596 -- results.
597 concatMap :: (Char -> Stream Char) -> Stream Char -> Stream Char
598 concatMap f = foldr (append . f) empty
599 {-# INLINE [0] concatMap #-}
600
601 -- | /O(n)/ any @p @xs determines if any character in the stream
602 -- @xs@ satisfies the predicate @p@.
603 any :: (Char -> Bool) -> Stream Char -> Bool
604 any p (Stream next0 s0 _len) = loop_any s0
605 where
606 loop_any !s = case next0 s of
607 Done -> False
608 Skip s' -> loop_any s'
609 Yield x s' | p x -> True
610 | otherwise -> loop_any s'
611 {-# INLINE [0] any #-}
612
613 -- | /O(n)/ all @p @xs determines if all characters in the 'Text'
614 -- @xs@ satisfy the predicate @p@.
615 all :: (Char -> Bool) -> Stream Char -> Bool
616 all p (Stream next0 s0 _len) = loop_all s0
617 where
618 loop_all !s = case next0 s of
619 Done -> True
620 Skip s' -> loop_all s'
621 Yield x s' | p x -> loop_all s'
622 | otherwise -> False
623 {-# INLINE [0] all #-}
624
625 -- | /O(n)/ maximum returns the maximum value from a stream, which must be
626 -- non-empty.
627 maximum :: Stream Char -> Char
628 maximum (Stream next0 s0 _len) = loop0_maximum s0
629 where
630 loop0_maximum !s = case next0 s of
631 Done -> emptyError "maximum"
632 Skip s' -> loop0_maximum s'
633 Yield x s' -> loop_maximum x s'
634 loop_maximum !z !s = case next0 s of
635 Done -> z
636 Skip s' -> loop_maximum z s'
637 Yield x s'
638 | x > z -> loop_maximum x s'
639 | otherwise -> loop_maximum z s'
640 {-# INLINE [0] maximum #-}
641
642 -- | /O(n)/ minimum returns the minimum value from a 'Text', which must be
643 -- non-empty.
644 minimum :: Stream Char -> Char
645 minimum (Stream next0 s0 _len) = loop0_minimum s0
646 where
647 loop0_minimum !s = case next0 s of
648 Done -> emptyError "minimum"
649 Skip s' -> loop0_minimum s'
650 Yield x s' -> loop_minimum x s'
651 loop_minimum !z !s = case next0 s of
652 Done -> z
653 Skip s' -> loop_minimum z s'
654 Yield x s'
655 | x < z -> loop_minimum x s'
656 | otherwise -> loop_minimum z s'
657 {-# INLINE [0] minimum #-}
658
659 -- -----------------------------------------------------------------------------
660 -- * Building streams
661
662 scanl :: (Char -> Char -> Char) -> Char -> Stream Char -> Stream Char
663 scanl f z0 (Stream next0 s0 len) = Stream next (Scan1 z0 s0) (len+1) -- HINT maybe too low
664 where
665 {-# INLINE next #-}
666 next (Scan1 z s) = Yield z (Scan2 z s)
667 next (Scan2 z s) = case next0 s of
668 Yield x s' -> let !x' = f z x
669 in Yield x' (Scan2 x' s')
670 Skip s' -> Skip (Scan2 z s')
671 Done -> Done
672 {-# INLINE [0] scanl #-}
673
674 -- -----------------------------------------------------------------------------
675 -- ** Generating and unfolding streams
676
677 replicateCharI :: Integral a => a -> Char -> Stream Char
678 replicateCharI !n !c
679 | n < 0 = empty
680 | otherwise = Stream next 0 (fromIntegral n) -- HINT maybe too low
681 where
682 next !i | i >= n = Done
683 | otherwise = Yield c (i + 1)
684 {-# INLINE [0] replicateCharI #-}
685
686 data RI s = RI !s {-# UNPACK #-} !Int64
687
688 replicateI :: Int64 -> Stream Char -> Stream Char
689 replicateI n (Stream next0 s0 len) =
690 Stream next (RI s0 0) (fromIntegral (max 0 n) * len)
691 where
692 next (RI s k)
693 | k >= n = Done
694 | otherwise = case next0 s of
695 Done -> Skip (RI s0 (k+1))
696 Skip s' -> Skip (RI s' k)
697 Yield x s' -> Yield x (RI s' k)
698 {-# INLINE [0] replicateI #-}
699
700 -- | /O(n)/, where @n@ is the length of the result. The unfoldr function
701 -- is analogous to the List 'unfoldr'. unfoldr builds a stream
702 -- from a seed value. The function takes the element and returns
703 -- Nothing if it is done producing the stream or returns Just
704 -- (a,b), in which case, a is the next Char in the string, and b is
705 -- the seed value for further production.
706 unfoldr :: (a -> Maybe (Char,a)) -> a -> Stream Char
707 unfoldr f s0 = Stream next s0 unknownSize
708 where
709 {-# INLINE next #-}
710 next !s = case f s of
711 Nothing -> Done
712 Just (w, s') -> Yield w s'
713 {-# INLINE [0] unfoldr #-}
714
715 -- | /O(n)/ Like 'unfoldr', 'unfoldrNI' builds a stream from a seed
716 -- value. However, the length of the result is limited by the
717 -- first argument to 'unfoldrNI'. This function is more efficient than
718 -- 'unfoldr' when the length of the result is known.
719 unfoldrNI :: Integral a => a -> (b -> Maybe (Char,b)) -> b -> Stream Char
720 unfoldrNI n f s0 | n < 0 = empty
721 | otherwise = Stream next (0 :*: s0) (maxSize $ fromIntegral (n*2))
722 where
723 {-# INLINE next #-}
724 next (z :*: s) = case f s of
725 Nothing -> Done
726 Just (w, s') | z >= n -> Done
727 | otherwise -> Yield w ((z + 1) :*: s')
728 {-# INLINE unfoldrNI #-}
729
730 -------------------------------------------------------------------------------
731 -- * Substreams
732
733 -- | /O(n)/ @'take' n@, applied to a stream, returns the prefix of the
734 -- stream of length @n@, or the stream itself if @n@ is greater than the
735 -- length of the stream.
736 take :: Integral a => a -> Stream Char -> Stream Char
737 take n0 (Stream next0 s0 len) =
738 Stream next (n0' :*: s0) (smaller len (codePointsSize $ fromIntegral n0'))
739 where
740 n0' = max n0 0
741
742 {-# INLINE next #-}
743 next (n :*: s) | n <= 0 = Done
744 | otherwise = case next0 s of
745 Done -> Done
746 Skip s' -> Skip (n :*: s')
747 Yield x s' -> Yield x ((n-1) :*: s')
748 {-# INLINE [0] take #-}
749
750 data Drop a s = NS !s
751 | JS !a !s
752
753 -- | /O(n)/ @'drop' n@, applied to a stream, returns the suffix of the
754 -- stream after the first @n@ characters, or the empty stream if @n@
755 -- is greater than the length of the stream.
756 drop :: Integral a => a -> Stream Char -> Stream Char
757 drop n0 (Stream next0 s0 len) =
758 Stream next (JS n0' s0) (len - codePointsSize (fromIntegral n0'))
759 where
760 n0' = max n0 0
761
762 {-# INLINE next #-}
763 next (JS n s)
764 | n <= 0 = Skip (NS s)
765 | otherwise = case next0 s of
766 Done -> Done
767 Skip s' -> Skip (JS n s')
768 Yield _ s' -> Skip (JS (n-1) s')
769 next (NS s) = case next0 s of
770 Done -> Done
771 Skip s' -> Skip (NS s')
772 Yield x s' -> Yield x (NS s')
773 {-# INLINE [0] drop #-}
774
775 -- | 'takeWhile', applied to a predicate @p@ and a stream, returns the
776 -- longest prefix (possibly empty) of elements that satisfy @p@.
777 takeWhile :: (Char -> Bool) -> Stream Char -> Stream Char
778 takeWhile p (Stream next0 s0 len) = Stream next s0 (len - unknownSize)
779 where
780 {-# INLINE next #-}
781 next !s = case next0 s of
782 Done -> Done
783 Skip s' -> Skip s'
784 Yield x s' | p x -> Yield x s'
785 | otherwise -> Done
786 {-# INLINE [0] takeWhile #-}
787
788 -- | @'dropWhile' p xs@ returns the suffix remaining after @'takeWhile' p xs@.
789 dropWhile :: (Char -> Bool) -> Stream Char -> Stream Char
790 dropWhile p (Stream next0 s0 len) = Stream next (L s0) (len - unknownSize)
791 where
792 {-# INLINE next #-}
793 next (L s) = case next0 s of
794 Done -> Done
795 Skip s' -> Skip (L s')
796 Yield x s' | p x -> Skip (L s')
797 | otherwise -> Yield x (R s')
798 next (R s) = case next0 s of
799 Done -> Done
800 Skip s' -> Skip (R s')
801 Yield x s' -> Yield x (R s')
802 {-# INLINE [0] dropWhile #-}
803
804 -- | /O(n)/ The 'isPrefixOf' function takes two 'Stream's and returns
805 -- 'True' iff the first is a prefix of the second.
806 isPrefixOf :: (Eq a) => Stream a -> Stream a -> Bool
807 isPrefixOf (Stream next1 s1 _) (Stream next2 s2 _) = loop (next1 s1) (next2 s2)
808 where
809 loop Done _ = True
810 loop _ Done = False
811 loop (Skip s1') (Skip s2') = loop (next1 s1') (next2 s2')
812 loop (Skip s1') x2 = loop (next1 s1') x2
813 loop x1 (Skip s2') = loop x1 (next2 s2')
814 loop (Yield x1 s1') (Yield x2 s2') = x1 == x2 &&
815 loop (next1 s1') (next2 s2')
816 {-# INLINE [0] isPrefixOf #-}
817
818 -- ----------------------------------------------------------------------------
819 -- * Searching
820
821 -------------------------------------------------------------------------------
822 -- ** Searching by equality
823
824 -- | /O(n)/ 'elem' is the stream membership predicate.
825 elem :: Char -> Stream Char -> Bool
826 elem w (Stream next s0 _len) = loop_elem s0
827 where
828 loop_elem !s = case next s of
829 Done -> False
830 Skip s' -> loop_elem s'
831 Yield x s' | x == w -> True
832 | otherwise -> loop_elem s'
833 {-# INLINE [0] elem #-}
834
835 -------------------------------------------------------------------------------
836 -- ** Searching with a predicate
837
838 -- | /O(n)/ The 'findBy' function takes a predicate and a stream,
839 -- and returns the first element in matching the predicate, or 'Nothing'
840 -- if there is no such element.
841
842 findBy :: (Char -> Bool) -> Stream Char -> Maybe Char
843 findBy p (Stream next s0 _len) = loop_find s0
844 where
845 loop_find !s = case next s of
846 Done -> Nothing
847 Skip s' -> loop_find s'
848 Yield x s' | p x -> Just x
849 | otherwise -> loop_find s'
850 {-# INLINE [0] findBy #-}
851
852 -- | /O(n)/ Stream index (subscript) operator, starting from 0.
853 indexI :: Integral a => Stream Char -> a -> Char
854 indexI (Stream next s0 _len) n0
855 | n0 < 0 = streamError "index" "Negative index"
856 | otherwise = loop_index n0 s0
857 where
858 loop_index !n !s = case next s of
859 Done -> streamError "index" "Index too large"
860 Skip s' -> loop_index n s'
861 Yield x s' | n == 0 -> x
862 | otherwise -> loop_index (n-1) s'
863 {-# INLINE [0] indexI #-}
864
865 -- | /O(n)/ 'filter', applied to a predicate and a stream,
866 -- returns a stream containing those characters that satisfy the
867 -- predicate.
868 filter :: (Char -> Bool) -> Stream Char -> Stream Char
869 filter p (Stream next0 s0 len) =
870 Stream next s0 (len - unknownSize) -- HINT maybe too high
871 where
872 next !s = case next0 s of
873 Done -> Done
874 Skip s' -> Skip s'
875 Yield x s' | p x -> Yield x s'
876 | otherwise -> Skip s'
877 {-# INLINE [0] filter #-}
878
879 {-# RULES
880 "STREAM filter/filter fusion" forall p q s.
881 filter p (filter q s) = filter (\x -> q x && p x) s
882 #-}
883
884 -- | The 'findIndexI' function takes a predicate and a stream and
885 -- returns the index of the first element in the stream satisfying the
886 -- predicate.
887 findIndexI :: Integral a => (Char -> Bool) -> Stream Char -> Maybe a
888 findIndexI p s = case findIndicesI p s of
889 (i:_) -> Just i
890 _ -> Nothing
891 {-# INLINE [0] findIndexI #-}
892
893 -- | The 'findIndicesI' function takes a predicate and a stream and
894 -- returns all indices of the elements in the stream satisfying the
895 -- predicate.
896 findIndicesI :: Integral a => (Char -> Bool) -> Stream Char -> [a]
897 findIndicesI p (Stream next s0 _len) = loop_findIndex 0 s0
898 where
899 loop_findIndex !i !s = case next s of
900 Done -> []
901 Skip s' -> loop_findIndex i s' -- hmm. not caught by QC
902 Yield x s' | p x -> i : loop_findIndex (i+1) s'
903 | otherwise -> loop_findIndex (i+1) s'
904 {-# INLINE [0] findIndicesI #-}
905
906 -------------------------------------------------------------------------------
907 -- * Zipping
908
909 -- | Strict triple.
910 data Zip a b m = Z1 !a !b
911 | Z2 !a !b !m
912
913 -- | zipWith generalises 'zip' by zipping with the function given as
914 -- the first argument, instead of a tupling function.
915 zipWith :: (a -> a -> b) -> Stream a -> Stream a -> Stream b
916 zipWith f (Stream next0 sa0 len1) (Stream next1 sb0 len2) =
917 Stream next (Z1 sa0 sb0) (smaller len1 len2)
918 where
919 next (Z1 sa sb) = case next0 sa of
920 Done -> Done
921 Skip sa' -> Skip (Z1 sa' sb)
922 Yield a sa' -> Skip (Z2 sa' sb a)
923
924 next (Z2 sa' sb a) = case next1 sb of
925 Done -> Done
926 Skip sb' -> Skip (Z2 sa' sb' a)
927 Yield b sb' -> Yield (f a b) (Z1 sa' sb')
928 {-# INLINE [0] zipWith #-}
929
930 -- | /O(n)/ The 'countCharI' function returns the number of times the
931 -- query element appears in the given stream.
932 countCharI :: Integral a => Char -> Stream Char -> a
933 countCharI a (Stream next s0 _len) = loop 0 s0
934 where
935 loop !i !s = case next s of
936 Done -> i
937 Skip s' -> loop i s'
938 Yield x s' | a == x -> loop (i+1) s'
939 | otherwise -> loop i s'
940 {-# INLINE [0] countCharI #-}
941
942 streamError :: String -> String -> a
943 streamError func msg = P.error $ "Data.Text.Internal.Fusion.Common." ++ func ++ ": " ++ msg
944
945 emptyError :: String -> a
946 emptyError func = internalError func "Empty input"
947
948 internalError :: String -> a
949 internalError func = streamError func "Internal error"