1 {-# LANGUAGE Trustworthy #-}

2 {-# LANGUAGE CPP, NoImplicitPrelude, ScopedTypeVariables,

3 MagicHash, BangPatterns #-}

5 -----------------------------------------------------------------------------

6 -- |

7 -- Module : Data.List

8 -- Copyright : (c) The University of Glasgow 2001

9 -- License : BSD-style (see the file libraries/base/LICENSE)

10 --

11 -- Maintainer : libraries@haskell.org

12 -- Stability : stable

13 -- Portability : portable

14 --

15 -- Operations on lists.

16 --

17 -----------------------------------------------------------------------------

20 (

21 -- * Basic functions

23 (++)

28 , uncons

32 -- * List transformations

37 , intercalate

40 , subsequences

41 , permutations

43 -- * Reducing lists (folds)

52 -- ** Special folds

65 -- * Building lists

67 -- ** Scans

74 -- ** Accumulating maps

78 -- ** Infinite lists

84 -- ** Unfolding

87 -- * Sublists

89 -- ** Extracting sublists

96 , dropWhileEnd

97 , span

100 , stripPrefix

107 -- ** Predicates

110 , isInfixOf

112 -- * Searching lists

114 -- ** Searching by equality

119 -- ** Searching with a predicate

124 -- * Indexing lists

125 -- | These functions treat a list @xs@ as a indexed collection,

126 -- with indices ranging from 0 to @'length' xs - 1@.

128 , (!!)

136 -- * Zipping and unzipping lists

150 -- * Special lists

152 -- ** Functions on strings

158 -- ** \"Set\" operations

168 -- ** Ordered lists

170 , sortOn

173 -- * Generalized functions

175 -- ** The \"@By@\" operations

176 -- | By convention, overloaded functions have a non-overloaded

177 -- counterpart whose name is suffixed with \`@By@\'.

178 --

179 -- It is often convenient to use these functions together with

180 -- 'Data.Function.on', for instance @'sortBy' ('compare'

181 -- \`on\` 'fst')@.

183 -- *** User-supplied equality (replacing an @Eq@ context)

184 -- | The predicate is assumed to define an equivalence.

187 , deleteFirstsBy

192 -- *** User-supplied comparison (replacing an @Ord@ context)

193 -- | The function is assumed to define a total ordering.

199 -- ** The \"@generic@\" operations

200 -- | The prefix \`@generic@\' indicates an overloaded function that

201 -- is a generalized version of a "Prelude" function.

223 infix 5 \\ -- comment to fool cpp: https://www.haskell.org/ghc/docs/latest/html/users_guide/options-phases.html#cpp-string-gaps

225 -- -----------------------------------------------------------------------------

226 -- List functions

228 -- | The 'dropWhileEnd' function drops the largest suffix of a list

229 -- in which the given predicate holds for all elements. For example:

230 --

231 -- > dropWhileEnd isSpace "foo\n" == "foo"

232 -- > dropWhileEnd isSpace "foo bar" == "foo bar"

233 -- > dropWhileEnd isSpace ("foo\n" ++ undefined) == "foo" ++ undefined

234 --

235 -- @since 4.5.0.0

239 -- | The 'stripPrefix' function drops the given prefix from a list.

240 -- It returns 'Nothing' if the list did not start with the prefix

241 -- given, or 'Just' the list after the prefix, if it does.

242 --

243 -- > stripPrefix "foo" "foobar" == Just "bar"

244 -- > stripPrefix "foo" "foo" == Just ""

245 -- > stripPrefix "foo" "barfoo" == Nothing

246 -- > stripPrefix "foo" "barfoobaz" == Nothing

251 stripPrefix _ _ = Nothing

253 -- | The 'elemIndex' function returns the index of the first element

254 -- in the given list which is equal (by '==') to the query element,

255 -- or 'Nothing' if there is no such element.

259 -- | The 'elemIndices' function extends 'elemIndex', by returning the

260 -- indices of all elements equal to the query element, in ascending order.

264 -- | The 'find' function takes a predicate and a list and returns the

265 -- first element in the list matching the predicate, or 'Nothing' if

266 -- there is no such element.

270 -- | The 'findIndex' function takes a predicate and a list and returns

271 -- the index of the first element in the list satisfying the predicate,

272 -- or 'Nothing' if there is no such element.

276 -- | The 'findIndices' function extends 'findIndex', by returning the

277 -- indices of all elements satisfying the predicate, in ascending order.

282 -- Efficient definition, adapted from Data.Sequence

283 {-# INLINE findIndices #-}

290 -- | The 'isPrefixOf' function takes two lists and returns 'True'

291 -- iff the first list is a prefix of the second.

297 -- | The 'isSuffixOf' function takes two lists and returns 'True' iff

298 -- the first list is a suffix of the second. The second list must be

299 -- finite.

302 delta <- dropLengthMaybe ns hs

304 -- Since dropLengthMaybe ns hs succeeded, we know that (if hs is finite)

305 -- length ns + length delta = length hs

306 -- so dropping the length of delta from hs will yield a suffix exactly

307 -- the length of ns.

309 -- A version of drop that drops the length of the first argument from the

310 -- second argument. If xs is longer than ys, xs will not be traversed in its

311 -- entirety. dropLength is also generally faster than (drop . length)

312 -- Both this and dropLengthMaybe could be written as folds over their first

313 -- arguments, but this reduces clarity with no benefit to isSuffixOf.

316 dropLength _ [] = []

319 -- A version of dropLength that returns Nothing if the second list runs out of

320 -- elements before the first.

323 dropLengthMaybe _ [] = Nothing

326 -- | The 'isInfixOf' function takes two lists and returns 'True'

327 -- iff the first list is contained, wholly and intact,

328 -- anywhere within the second.

329 --

330 -- Example:

331 --

332 -- >isInfixOf "Haskell" "I really like Haskell." == True

333 -- >isInfixOf "Ial" "I really like Haskell." == False

337 -- | /O(n^2)/. The 'nub' function removes duplicate elements from a list.

338 -- In particular, it keeps only the first occurrence of each element.

339 -- (The name 'nub' means \`essence\'.)

340 -- It is a special case of 'nubBy', which allows the programmer to supply

341 -- their own equality test.

345 -- | The 'nubBy' function behaves just like 'nub', except it uses a

346 -- user-supplied equality predicate instead of the overloaded '=='

347 -- function.

353 -- stolen from HBC

355 where

361 -- Not exported:

362 -- Note that we keep the call to `eq` with arguments in the

363 -- same order as in the reference (prelude) implementation,

364 -- and that this order is different from how `elem` calls (==).

365 -- See #2528, #3280 and #7913.

366 -- 'xs' is the list of things we've seen so far,

367 -- 'y' is the potential new element

371 #endif

374 -- | 'delete' @x@ removes the first occurrence of @x@ from its list argument.

375 -- For example,

376 --

377 -- > delete 'a' "banana" == "bnana"

378 --

379 -- It is a special case of 'deleteBy', which allows the programmer to

380 -- supply their own equality test.

385 -- | The 'deleteBy' function behaves like 'delete', but takes a

386 -- user-supplied equality predicate.

391 -- | The '\\' function is list difference (non-associative).

392 -- In the result of @xs@ '\\' @ys@, the first occurrence of each element of

393 -- @ys@ in turn (if any) has been removed from @xs@. Thus

394 --

395 -- > (xs ++ ys) \\ xs == ys.

396 --

397 -- It is a special case of 'deleteFirstsBy', which allows the programmer

398 -- to supply their own equality test.

403 -- | The 'union' function returns the list union of the two lists.

404 -- For example,

405 --

406 -- > "dog" `union` "cow" == "dogcw"

407 --

408 -- Duplicates, and elements of the first list, are removed from the

409 -- the second list, but if the first list contains duplicates, so will

410 -- the result.

411 -- It is a special case of 'unionBy', which allows the programmer to supply

412 -- their own equality test.

417 -- | The 'unionBy' function is the non-overloaded version of 'union'.

421 -- | The 'intersect' function takes the list intersection of two lists.

422 -- For example,

423 --

424 -- > [1,2,3,4] `intersect` [2,4,6,8] == [2,4]

425 --

426 -- If the first list contains duplicates, so will the result.

427 --

428 -- > [1,2,2,3,4] `intersect` [6,4,4,2] == [2,2,4]

429 --

430 -- It is a special case of 'intersectBy', which allows the programmer to

431 -- supply their own equality test. If the element is found in both the first

432 -- and the second list, the element from the first list will be used.

437 -- | The 'intersectBy' function is the non-overloaded version of 'intersect'.

443 -- | The 'intersperse' function takes an element and a list and

444 -- \`intersperses\' that element between the elements of the list.

445 -- For example,

446 --

447 -- > intersperse ',' "abcde" == "a,b,c,d,e"

454 -- Not exported:

455 -- We want to make every element in the 'intersperse'd list available

456 -- as soon as possible to avoid space leaks. Experiments suggested that

457 -- a separate top-level helper is more efficient than a local worker.

459 prependToAll _ [] = []

462 -- | 'intercalate' @xs xss@ is equivalent to @('concat' ('intersperse' xs xss))@.

463 -- It inserts the list @xs@ in between the lists in @xss@ and concatenates the

464 -- result.

468 -- | The 'transpose' function transposes the rows and columns of its argument.

469 -- For example,

470 --

471 -- > transpose [[1,2,3],[4,5,6]] == [[1,4],[2,5],[3,6]]

472 --

473 -- If some of the rows are shorter than the following rows, their elements are skipped:

474 --

475 -- > transpose [[10,11],[20],[],[30,31,32]] == [[10,20,30],[11,31],[32]]

483 -- | The 'partition' function takes a predicate a list and returns

484 -- the pair of lists of elements which do and do not satisfy the

485 -- predicate, respectively; i.e.,

486 --

487 -- > partition p xs == (filter p xs, filter (not . p) xs)

490 {-# INLINE partition #-}

497 -- | The 'mapAccumL' function behaves like a combination of 'map' and

498 -- 'foldl'; it applies a function to each element of a list, passing

499 -- an accumulating parameter from left to right, and returning a final

500 -- value of this accumulator together with the new list.

502 -- and accumulator, returning new

503 -- accumulator and elt of result list

507 {-# NOINLINE [1] mapAccumL #-}

513 {-# RULES

514 "mapAccumL" [~1] forall f s xs . mapAccumL f s xs = foldr (mapAccumLF f) pairWithNil xs s

515 "mapAccumLList" [1] forall f s xs . foldr (mapAccumLF f) pairWithNil xs s = mapAccumL f s xs

516 #-}

519 {-# INLINE [0] pairWithNil #-}

523 {-# INLINE [0] mapAccumLF #-}

528 -- See Note [Left folds via right fold]

531 -- | The 'mapAccumR' function behaves like a combination of 'map' and

532 -- 'foldr'; it applies a function to each element of a list, passing

533 -- an accumulating parameter from right to left, and returning a final

534 -- value of this accumulator together with the new list.

536 -- and accumulator, returning new

537 -- accumulator and elt of result list

546 -- | The 'insert' function takes an element and a list and inserts the

547 -- element into the list at the first position where it is less

548 -- than or equal to the next element. In particular, if the list

549 -- is sorted before the call, the result will also be sorted.

550 -- It is a special case of 'insertBy', which allows the programmer to

551 -- supply their own comparison function.

555 -- | The non-overloaded version of 'insert'.

563 -- | The 'maximumBy' function takes a comparison function and a list

564 -- and returns the greatest element of the list by the comparison function.

565 -- The list must be finite and non-empty.

569 where

571 GT -> x

572 _ -> y

574 -- | The 'minimumBy' function takes a comparison function and a list

575 -- and returns the least element of the list by the comparison function.

576 -- The list must be finite and non-empty.

580 where

582 GT -> y

583 _ -> x

585 -- | The 'genericLength' function is an overloaded version of 'length'. In

586 -- particular, instead of returning an 'Int', it returns any type which is

587 -- an instance of 'Num'. It is, however, less efficient than 'length'.

589 {-# NOINLINE [1] genericLength #-}

593 {-# RULES

594 "genericLengthInt" genericLength = (strictGenericLength :: [a] -> Int);

595 "genericLengthInteger" genericLength = (strictGenericLength :: [a] -> Integer);

596 #-}

600 where

604 -- | The 'genericTake' function is an overloaded version of 'take', which

605 -- accepts any 'Integral' value as the number of elements to take.

611 -- | The 'genericDrop' function is an overloaded version of 'drop', which

612 -- accepts any 'Integral' value as the number of elements to drop.

619 -- | The 'genericSplitAt' function is an overloaded version of 'splitAt', which

620 -- accepts any 'Integral' value as the position at which to split.

627 -- | The 'genericIndex' function is an overloaded version of '!!', which

628 -- accepts any 'Integral' value as the index.

636 -- | The 'genericReplicate' function is an overloaded version of 'replicate',

637 -- which accepts any 'Integral' value as the number of repetitions to make.

641 -- | The 'zip4' function takes four lists and returns a list of

642 -- quadruples, analogous to 'zip'.

646 -- | The 'zip5' function takes five lists and returns a list of

647 -- five-tuples, analogous to 'zip'.

651 -- | The 'zip6' function takes six lists and returns a list of six-tuples,

652 -- analogous to 'zip'.

657 -- | The 'zip7' function takes seven lists and returns a list of

658 -- seven-tuples, analogous to 'zip'.

663 -- | The 'zipWith4' function takes a function which combines four

664 -- elements, as well as four lists and returns a list of their point-wise

665 -- combination, analogous to 'zipWith'.

671 -- | The 'zipWith5' function takes a function which combines five

672 -- elements, as well as five lists and returns a list of their point-wise

673 -- combination, analogous to 'zipWith'.

680 -- | The 'zipWith6' function takes a function which combines six

681 -- elements, as well as six lists and returns a list of their point-wise

682 -- combination, analogous to 'zipWith'.

689 -- | The 'zipWith7' function takes a function which combines seven

690 -- elements, as well as seven lists and returns a list of their point-wise

691 -- combination, analogous to 'zipWith'.

698 -- | The 'unzip4' function takes a list of quadruples and returns four

699 -- lists, analogous to 'unzip'.

703 ([],[],[],[])

705 -- | The 'unzip5' function takes a list of five-tuples and returns five

706 -- lists, analogous to 'unzip'.

710 ([],[],[],[],[])

712 -- | The 'unzip6' function takes a list of six-tuples and returns six

713 -- lists, analogous to 'unzip'.

717 ([],[],[],[],[],[])

719 -- | The 'unzip7' function takes a list of seven-tuples and returns

720 -- seven lists, analogous to 'unzip'.

724 ([],[],[],[],[],[],[])

727 -- | The 'deleteFirstsBy' function takes a predicate and two lists and

728 -- returns the first list with the first occurrence of each element of

729 -- the second list removed.

733 -- | The 'group' function takes a list and returns a list of lists such

734 -- that the concatenation of the result is equal to the argument. Moreover,

735 -- each sublist in the result contains only equal elements. For example,

736 --

737 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]

738 --

739 -- It is a special case of 'groupBy', which allows the programmer to supply

740 -- their own equality test.

744 -- | The 'groupBy' function is the non-overloaded version of 'group'.

750 -- | The 'inits' function returns all initial segments of the argument,

751 -- shortest first. For example,

752 --

753 -- > inits "abc" == ["","a","ab","abc"]

754 --

755 -- Note that 'inits' has the following strictness property:

756 -- @inits (xs ++ _|_) = inits xs ++ _|_@

757 --

758 -- In particular,

759 -- @inits _|_ = [] : _|_@

762 {-# NOINLINE inits #-}

764 -- We do not allow inits to inline, because it plays havoc with Call Arity

765 -- if it fuses with a consumer, and it would generally lead to serious

766 -- loss of sharing if allowed to fuse with a producer.

768 -- | The 'tails' function returns all final segments of the argument,

769 -- longest first. For example,

770 --

771 -- > tails "abc" == ["abc", "bc", "c",""]

772 --

773 -- Note that 'tails' has the following strictness property:

774 -- @tails _|_ = _|_ : _|_@

776 {-# INLINABLE tails #-}

779 [] -> n

783 -- | The 'subsequences' function returns the list of all subsequences of the argument.

784 --

785 -- > subsequences "abc" == ["","a","b","ab","c","ac","bc","abc"]

787 subsequences xs = [] : nonEmptySubsequences xs

789 -- | The 'nonEmptySubsequences' function returns the list of all subsequences of the argument,

790 -- except for the empty list.

791 --

792 -- > nonEmptySubsequences "abc" == ["a","b","ab","c","ac","bc","abc"]

794 nonEmptySubsequences [] = []

799 -- | The 'permutations' function returns the list of all permutations of the argument.

800 --

801 -- > permutations "abc" == ["abc","bac","cba","bca","cab","acb"]

804 where

813 ------------------------------------------------------------------------------

814 -- Quick Sort algorithm taken from HBC's QSort library.

816 -- | The 'sort' function implements a stable sorting algorithm.

817 -- It is a special case of 'sortBy', which allows the programmer to supply

818 -- their own comparison function.

819 --

820 -- Elements are arranged from from lowest to highest, keeping duplicates in

821 -- the order they appeared in the input.

824 -- | The 'sortBy' function is the non-overloaded version of 'sort'.

832 {-

833 GHC's mergesort replaced by a better implementation, 24/12/2009.

834 This code originally contributed to the nhc12 compiler by Thomas Nordin

835 in 2002. Rumoured to have been based on code by Lennart Augustsson, e.g.

836 http://www.mail-archive.com/haskell@haskell.org/msg01822.html

837 and possibly to bear similarities to a 1982 paper by Richard O'Keefe:

838 "A smooth applicative merge sort".

840 Benchmarks show it to be often 2x the speed of the previous implementation.

841 Fixes ticket http://ghc.haskell.org/trac/ghc/ticket/2143

842 -}

846 where

866 mergePairs xs = xs

874 {-

875 sortBy cmp l = mergesort cmp l

876 sort l = mergesort compare l

878 Quicksort replaced by mergesort, 14/5/2002.

880 From: Ian Lynagh <igloo@earth.li>

882 I am curious as to why the List.sort implementation in GHC is a

883 quicksort algorithm rather than an algorithm that guarantees n log n

884 time in the worst case? I have attached a mergesort implementation along

885 with a few scripts to time it's performance, the results of which are

886 shown below (* means it didn't finish successfully - in all cases this

887 was due to a stack overflow).

889 If I heap profile the random_list case with only 10000 then I see

890 random_list peaks at using about 2.5M of memory, whereas in the same

891 program using List.sort it uses only 100k.

893 Input style Input length Sort data Sort alg User time

894 stdin 10000 random_list sort 2.82

895 stdin 10000 random_list mergesort 2.96

896 stdin 10000 sorted sort 31.37

897 stdin 10000 sorted mergesort 1.90

898 stdin 10000 revsorted sort 31.21

899 stdin 10000 revsorted mergesort 1.88

900 stdin 100000 random_list sort *

901 stdin 100000 random_list mergesort *

902 stdin 100000 sorted sort *

903 stdin 100000 sorted mergesort *

904 stdin 100000 revsorted sort *

905 stdin 100000 revsorted mergesort *

906 func 10000 random_list sort 0.31

907 func 10000 random_list mergesort 0.91

908 func 10000 sorted sort 19.09

909 func 10000 sorted mergesort 0.15

910 func 10000 revsorted sort 19.17

911 func 10000 revsorted mergesort 0.16

912 func 100000 random_list sort 3.85

913 func 100000 random_list mergesort *

914 func 100000 sorted sort 5831.47

915 func 100000 sorted mergesort 2.23

916 func 100000 revsorted sort 5872.34

917 func 100000 revsorted mergesort 2.24

919 mergesort :: (a -> a -> Ordering) -> [a] -> [a]

920 mergesort cmp = mergesort' cmp . map wrap

922 mergesort' :: (a -> a -> Ordering) -> [[a]] -> [a]

923 mergesort' _ [] = []

924 mergesort' _ [xs] = xs

925 mergesort' cmp xss = mergesort' cmp (merge_pairs cmp xss)

927 merge_pairs :: (a -> a -> Ordering) -> [[a]] -> [[a]]

928 merge_pairs _ [] = []

929 merge_pairs _ [xs] = [xs]

930 merge_pairs cmp (xs:ys:xss) = merge cmp xs ys : merge_pairs cmp xss

932 merge :: (a -> a -> Ordering) -> [a] -> [a] -> [a]

933 merge _ [] ys = ys

934 merge _ xs [] = xs

935 merge cmp (x:xs) (y:ys)

936 = case x `cmp` y of

937 GT -> y : merge cmp (x:xs) ys

938 _ -> x : merge cmp xs (y:ys)

940 wrap :: a -> [a]

941 wrap x = [x]

945 OLDER: qsort version

947 -- qsort is stable and does not concatenate.

948 qsort :: (a -> a -> Ordering) -> [a] -> [a] -> [a]

949 qsort _ [] r = r

950 qsort _ [x] r = x:r

951 qsort cmp (x:xs) r = qpart cmp x xs [] [] r

953 -- qpart partitions and sorts the sublists

954 qpart :: (a -> a -> Ordering) -> a -> [a] -> [a] -> [a] -> [a] -> [a]

955 qpart cmp x [] rlt rge r =

956 -- rlt and rge are in reverse order and must be sorted with an

957 -- anti-stable sorting

958 rqsort cmp rlt (x:rqsort cmp rge r)

959 qpart cmp x (y:ys) rlt rge r =

960 case cmp x y of

961 GT -> qpart cmp x ys (y:rlt) rge r

962 _ -> qpart cmp x ys rlt (y:rge) r

964 -- rqsort is as qsort but anti-stable, i.e. reverses equal elements

965 rqsort :: (a -> a -> Ordering) -> [a] -> [a] -> [a]

966 rqsort _ [] r = r

967 rqsort _ [x] r = x:r

968 rqsort cmp (x:xs) r = rqpart cmp x xs [] [] r

970 rqpart :: (a -> a -> Ordering) -> a -> [a] -> [a] -> [a] -> [a] -> [a]

971 rqpart cmp x [] rle rgt r =

972 qsort cmp rle (x:qsort cmp rgt r)

973 rqpart cmp x (y:ys) rle rgt r =

974 case cmp y x of

975 GT -> rqpart cmp x ys rle (y:rgt) r

976 _ -> rqpart cmp x ys (y:rle) rgt r

977 -}

981 -- | Sort a list by comparing the results of a key function applied to each

982 -- element. @sortOn f@ is equivalent to @sortBy (comparing f)@, but has the

983 -- performance advantage of only evaluating @f@ once for each element in the

984 -- input list. This is called the decorate-sort-undecorate paradigm, or

985 -- Schwartzian transform.

986 --

987 -- Elements are arranged from from lowest to highest, keeping duplicates in

988 -- the order they appeared in the input.

989 --

990 -- @since 4.8.0.0

992 sortOn f =

995 -- | The 'unfoldr' function is a \`dual\' to 'foldr': while 'foldr'

996 -- reduces a list to a summary value, 'unfoldr' builds a list from

997 -- a seed value. The function takes the element and returns 'Nothing'

998 -- if it is done producing the list or returns 'Just' @(a,b)@, in which

999 -- case, @a@ is a prepended to the list and @b@ is used as the next

1000 -- element in a recursive call. For example,

1001 --

1002 -- > iterate f == unfoldr (\x -> Just (x, f x))

1003 --

1004 -- In some cases, 'unfoldr' can undo a 'foldr' operation:

1005 --

1006 -- > unfoldr f' (foldr f z xs) == xs

1007 --

1008 -- if the following holds:

1009 --

1010 -- > f' (f x y) = Just (x,y)

1011 -- > f' z = Nothing

1012 --

1013 -- A simple use of unfoldr:

1014 --

1015 -- > unfoldr (\b -> if b == 0 then Nothing else Just (b, b-1)) 10

1016 -- > [10,9,8,7,6,5,4,3,2,1]

1017 --

1019 -- Note [INLINE unfoldr]

1020 -- We treat unfoldr a little differently from some other forms for list fusion

1021 -- for two reasons:

1022 --

1023 -- 1. We don't want to use a rule to rewrite a basic form to a fusible

1024 -- form because this would inline before constant floating. As Simon Peyton-

1025 -- Jones and others have pointed out, this could reduce sharing in some cases

1026 -- where sharing is beneficial. Thus we simply INLINE it, which is, for

1027 -- example, how enumFromTo::Int becomes eftInt. Unfortunately, we don't seem

1028 -- to get enough of an inlining discount to get a version of eftInt based on

1029 -- unfoldr to inline as readily as the usual one. We know that all the Maybe

1030 -- nonsense will go away, but the compiler does not.

1031 --

1032 -- 2. The benefit of inlining unfoldr is likely to be huge in many common cases,

1033 -- even apart from list fusion. In particular, inlining unfoldr often

1034 -- allows GHC to erase all the Maybes. This appears to be critical if unfoldr

1035 -- is to be used in high-performance code. A small increase in code size

1036 -- in the relatively rare cases when this does not happen looks like a very

1037 -- small price to pay.

1038 --

1039 -- Doing a back-and-forth dance doesn't seem to accomplish anything if the

1040 -- final form has to be inlined in any case.

1048 Nothing -> n

1051 -- -----------------------------------------------------------------------------

1052 -- Functions on strings

1054 -- | 'lines' breaks a string up into a list of strings at newline

1055 -- characters. The resulting strings do not contain newlines.

1056 --

1057 -- Note that after splitting the string at newline characters, the

1058 -- last part of the string is considered a line even if it doesn't end

1059 -- with a newline. For example,

1060 --

1061 -- > lines "" == []

1062 -- > lines "\n" == [""]

1063 -- > lines "one" == ["one"]

1064 -- > lines "one\n" == ["one"]

1065 -- > lines "one\n\n" == ["one",""]

1066 -- > lines "one\ntwo" == ["one","two"]

1067 -- > lines "one\ntwo\n" == ["one","two"]

1068 --

1069 -- Thus @'lines' s@ contains at least as many elements as newlines in @s@.

1072 -- Somehow GHC doesn't detect the selector thunks in the below code,

1073 -- so s' keeps a reference to the first line via the pair and we have

1074 -- a space leak (cf. #4334).

1075 -- So we need to make GHC see the selector thunks with a trick.

1078 [] -> []

1080 where

1083 -- | 'unlines' is an inverse operation to 'lines'.

1084 -- It joins lines, after appending a terminating newline to each.

1089 -- HBC version (stolen)

1090 -- here's a more efficient version

1093 #endif

1095 -- | 'words' breaks a string up into a list of words, which were delimited

1096 -- by white space.

1098 {-# NOINLINE [1] words #-}

1105 {-# RULES

1106 "words" [~1] forall s . words s = build (\c n -> wordsFB c n s)

1107 "wordsList" [1] wordsFB (:) [] = words

1108 #-}

1111 wordsFB c n = go

1112 where

1118 -- | 'unwords' is an inverse operation to 'words'.

1119 -- It joins words with separating spaces.

1125 -- Here's a lazier version that can get the last element of a

1126 -- _|_-terminated list.

1127 {-# NOINLINE [1] unwords #-}

1130 where

1134 -- In general, the foldr-based version is probably slightly worse

1135 -- than the HBC version, because it adds an extra space and then takes

1136 -- it back off again. But when it fuses, it reduces allocation. How much

1137 -- depends entirely on the average word length--it's most effective when

1138 -- the words are on the short side.

1139 {-# RULES

1140 "unwords" [~1] forall ws .

1141 unwords ws = tailUnwords (foldr unwordsFB "" ws)

1142 "unwordsList" [1] forall ws .

1143 tailUnwords (foldr unwordsFB "" ws) = unwords ws

1144 #-}

1146 {-# INLINE [0] tailUnwords #-}

1148 tailUnwords [] = []

1151 {-# INLINE [0] unwordsFB #-}

1154 #endif

1156 {- A "SnocBuilder" is a version of Chris Okasaki's banker's queue that supports

1157 toListSB instead of uncons. In single-threaded use, its performance

1158 characteristics are similar to John Hughes's functional difference lists, but

1159 likely somewhat worse. In heavily persistent settings, however, it does much

1160 better, because it takes advantage of sharing. The banker's queue guarantees

1161 (amortized) O(1) snoc and O(1) uncons, meaning that we can think of toListSB as

1162 an O(1) conversion to a list-like structure a constant factor slower than

1163 normal lists--we pay the O(n) cost incrementally as we consume the list. Using

1164 functional difference lists, on the other hand, we would have to pay the whole

1165 cost up front for each output list. -}

1167 {- We store a front list, a rear list, and the length of the queue. Because we

1168 only snoc onto the queue and never uncons, we know it's time to rotate when the

1169 length of the queue plus 1 is a power of 2. Note that we rely on the value of

1170 the length field only for performance. In the unlikely event of overflow, the

1171 performance will suffer but the semantics will remain correct. -}

1175 {- Smart constructor that rotates the builder when lp is one minus a power of

1176 2. Does not rotate very small builders because doing so is not worth the

1177 trouble. The lp < 255 test goes first because the power-of-2 test gives awful

1178 branch prediction for very small n (there are 5 powers of 2 between 1 and

1179 16). Putting the well-predicted lp < 255 test first avoids branching on the

1180 power-of-2 test until powers of 2 have become sufficiently rare to be predicted

1181 well. -}

1183 {-# INLINE sb #-}

1185 sb lp f r

1189 -- The empty builder

1191 emptySB :: SnocBuilder a

1194 -- Add an element to the end of a queue.

1199 -- Convert a builder to a list