1 {-# LANGUAGE Trustworthy #-}

2 {-# LANGUAGE CPP, NoImplicitPrelude, ScopedTypeVariables, MagicHash #-}

4 -----------------------------------------------------------------------------

5 -- |

6 -- Module : Data.List

7 -- Copyright : (c) The University of Glasgow 2001

8 -- License : BSD-style (see the file libraries/base/LICENSE)

9 --

10 -- Maintainer : libraries@haskell.org

11 -- Stability : stable

12 -- Portability : portable

13 --

14 -- Operations on lists.

15 --

16 -----------------------------------------------------------------------------

19 (

20 -- * Basic functions

22 (++)

27 , uncons

31 -- * List transformations

36 , intercalate

39 , subsequences

40 , permutations

42 -- * Reducing lists (folds)

51 -- ** Special folds

64 -- * Building lists

66 -- ** Scans

73 -- ** Accumulating maps

77 -- ** Infinite lists

83 -- ** Unfolding

86 -- * Sublists

88 -- ** Extracting sublists

95 , dropWhileEnd

96 , span

99 , stripPrefix

106 -- ** Predicates

109 , isInfixOf

111 -- * Searching lists

113 -- ** Searching by equality

118 -- ** Searching with a predicate

123 -- * Indexing lists

124 -- | These functions treat a list @xs@ as a indexed collection,

125 -- with indices ranging from 0 to @'length' xs - 1@.

127 , (!!)

135 -- * Zipping and unzipping lists

149 -- * Special lists

151 -- ** Functions on strings

157 -- ** \"Set\" operations

167 -- ** Ordered lists

169 , sortOn

172 -- * Generalized functions

174 -- ** The \"@By@\" operations

175 -- | By convention, overloaded functions have a non-overloaded

176 -- counterpart whose name is suffixed with \`@By@\'.

177 --

178 -- It is often convenient to use these functions together with

179 -- 'Data.Function.on', for instance @'sortBy' ('compare'

180 -- \`on\` 'fst')@.

182 -- *** User-supplied equality (replacing an @Eq@ context)

183 -- | The predicate is assumed to define an equivalence.

186 , deleteFirstsBy

191 -- *** User-supplied comparison (replacing an @Ord@ context)

192 -- | The function is assumed to define a total ordering.

198 -- ** The \"@generic@\" operations

199 -- | The prefix \`@generic@\' indicates an overloaded function that

200 -- is a generalized version of a "Prelude" function.

222 infix 5 \\ -- comment to fool cpp: https://www.haskell.org/ghc/docs/latest/html/users_guide/options-phases.html#cpp-string-gaps

224 -- -----------------------------------------------------------------------------

225 -- List functions

227 -- | The 'dropWhileEnd' function drops the largest suffix of a list

228 -- in which the given predicate holds for all elements. For example:

229 --

230 -- > dropWhileEnd isSpace "foo\n" == "foo"

231 -- > dropWhileEnd isSpace "foo bar" == "foo bar"

232 -- > dropWhileEnd isSpace ("foo\n" ++ undefined) == "foo" ++ undefined

233 --

234 -- @since 4.5.0.0

238 -- | The 'stripPrefix' function drops the given prefix from a list.

239 -- It returns 'Nothing' if the list did not start with the prefix

240 -- given, or 'Just' the list after the prefix, if it does.

241 --

242 -- > stripPrefix "foo" "foobar" == Just "bar"

243 -- > stripPrefix "foo" "foo" == Just ""

244 -- > stripPrefix "foo" "barfoo" == Nothing

245 -- > stripPrefix "foo" "barfoobaz" == Nothing

250 stripPrefix _ _ = Nothing

252 -- | The 'elemIndex' function returns the index of the first element

253 -- in the given list which is equal (by '==') to the query element,

254 -- or 'Nothing' if there is no such element.

258 -- | The 'elemIndices' function extends 'elemIndex', by returning the

259 -- indices of all elements equal to the query element, in ascending order.

263 -- | The 'find' function takes a predicate and a list and returns the

264 -- first element in the list matching the predicate, or 'Nothing' if

265 -- there is no such element.

269 -- | The 'findIndex' function takes a predicate and a list and returns

270 -- the index of the first element in the list satisfying the predicate,

271 -- or 'Nothing' if there is no such element.

275 -- | The 'findIndices' function extends 'findIndex', by returning the

276 -- indices of all elements satisfying the predicate, in ascending order.

278 #ifdef USE_REPORT_PRELUDE

281 -- Efficient definition, adapted from Data.Sequence

282 {-# INLINE findIndices #-}

289 -- | The 'isPrefixOf' function takes two lists and returns 'True'

290 -- iff the first list is a prefix of the second.

296 -- | The 'isSuffixOf' function takes two lists and returns 'True' iff

297 -- the first list is a suffix of the second. The second list must be

298 -- finite.

301 delta <- dropLengthMaybe ns hs

303 -- Since dropLengthMaybe ns hs succeeded, we know that (if hs is finite)

304 -- length ns + length delta = length hs

305 -- so dropping the length of delta from hs will yield a suffix exactly

306 -- the length of ns.

308 -- A version of drop that drops the length of the first argument from the

309 -- second argument. If xs is longer than ys, xs will not be traversed in its

310 -- entirety. dropLength is also generally faster than (drop . length)

311 -- Both this and dropLengthMaybe could be written as folds over their first

312 -- arguments, but this reduces clarity with no benefit to isSuffixOf.

315 dropLength _ [] = []

318 -- A version of dropLength that returns Nothing if the second list runs out of

319 -- elements before the first.

322 dropLengthMaybe _ [] = Nothing

325 -- | The 'isInfixOf' function takes two lists and returns 'True'

326 -- iff the first list is contained, wholly and intact,

327 -- anywhere within the second.

328 --

329 -- Example:

330 --

331 -- >isInfixOf "Haskell" "I really like Haskell." == True

332 -- >isInfixOf "Ial" "I really like Haskell." == False

336 -- | /O(n^2)/. The 'nub' function removes duplicate elements from a list.

337 -- In particular, it keeps only the first occurrence of each element.

338 -- (The name 'nub' means \`essence\'.)

339 -- It is a special case of 'nubBy', which allows the programmer to supply

340 -- their own equality test.

344 -- | The 'nubBy' function behaves just like 'nub', except it uses a

345 -- user-supplied equality predicate instead of the overloaded '=='

346 -- function.

348 #ifdef USE_REPORT_PRELUDE

352 -- stolen from HBC

354 where

360 -- Not exported:

361 -- Note that we keep the call to `eq` with arguments in the

362 -- same order as in the reference (prelude) implementation,

363 -- and that this order is different from how `elem` calls (==).

364 -- See #2528, #3280 and #7913.

365 -- 'xs' is the list of things we've seen so far,

366 -- 'y' is the potential new element

370 #endif

373 -- | 'delete' @x@ removes the first occurrence of @x@ from its list argument.

374 -- For example,

375 --

376 -- > delete 'a' "banana" == "bnana"

377 --

378 -- It is a special case of 'deleteBy', which allows the programmer to

379 -- supply their own equality test.

384 -- | The 'deleteBy' function behaves like 'delete', but takes a

385 -- user-supplied equality predicate.

390 -- | The '\\' function is list difference (non-associative).

391 -- In the result of @xs@ '\\' @ys@, the first occurrence of each element of

392 -- @ys@ in turn (if any) has been removed from @xs@. Thus

393 --

394 -- > (xs ++ ys) \\ xs == ys.

395 --

396 -- It is a special case of 'deleteFirstsBy', which allows the programmer

397 -- to supply their own equality test.

402 -- | The 'union' function returns the list union of the two lists.

403 -- For example,

404 --

405 -- > "dog" `union` "cow" == "dogcw"

406 --

407 -- Duplicates, and elements of the first list, are removed from the

408 -- the second list, but if the first list contains duplicates, so will

409 -- the result.

410 -- It is a special case of 'unionBy', which allows the programmer to supply

411 -- their own equality test.

416 -- | The 'unionBy' function is the non-overloaded version of 'union'.

420 -- | The 'intersect' function takes the list intersection of two lists.

421 -- For example,

422 --

423 -- > [1,2,3,4] `intersect` [2,4,6,8] == [2,4]

424 --

425 -- If the first list contains duplicates, so will the result.

426 --

427 -- > [1,2,2,3,4] `intersect` [6,4,4,2] == [2,2,4]

428 --

429 -- It is a special case of 'intersectBy', which allows the programmer to

430 -- supply their own equality test. If the element is found in both the first

431 -- and the second list, the element from the first list will be used.

436 -- | The 'intersectBy' function is the non-overloaded version of 'intersect'.

442 -- | The 'intersperse' function takes an element and a list and

443 -- \`intersperses\' that element between the elements of the list.

444 -- For example,

445 --

446 -- > intersperse ',' "abcde" == "a,b,c,d,e"

453 -- Not exported:

454 -- We want to make every element in the 'intersperse'd list available

455 -- as soon as possible to avoid space leaks. Experiments suggested that

456 -- a separate top-level helper is more efficient than a local worker.

458 prependToAll _ [] = []

461 -- | 'intercalate' @xs xss@ is equivalent to @('concat' ('intersperse' xs xss))@.

462 -- It inserts the list @xs@ in between the lists in @xss@ and concatenates the

463 -- result.

467 -- | The 'transpose' function transposes the rows and columns of its argument.

468 -- For example,

469 --

470 -- > transpose [[1,2,3],[4,5,6]] == [[1,4],[2,5],[3,6]]

471 --

472 -- If some of the rows are shorter than the following rows, their elements are skipped:

473 --

474 -- > transpose [[10,11],[20],[],[30,31,32]] == [[10,20,30],[11,31],[32]]

482 -- | The 'partition' function takes a predicate a list and returns

483 -- the pair of lists of elements which do and do not satisfy the

484 -- predicate, respectively; i.e.,

485 --

486 -- > partition p xs == (filter p xs, filter (not . p) xs)

489 {-# INLINE partition #-}

496 -- | The 'mapAccumL' function behaves like a combination of 'map' and

497 -- 'foldl'; it applies a function to each element of a list, passing

498 -- an accumulating parameter from left to right, and returning a final

499 -- value of this accumulator together with the new list.

501 -- and accumulator, returning new

502 -- accumulator and elt of result list

506 {-# NOINLINE [1] mapAccumL #-}

512 {-# RULES

513 "mapAccumL" [~1] forall f s xs . mapAccumL f s xs = foldr (mapAccumLF f) pairWithNil xs s

514 "mapAccumLList" [1] forall f s xs . foldr (mapAccumLF f) pairWithNil xs s = mapAccumL f s xs

515 #-}

518 {-# INLINE [0] pairWithNil #-}

522 {-# INLINE [0] mapAccumLF #-}

527 -- See Note [Left folds via right fold]

530 -- | The 'mapAccumR' function behaves like a combination of 'map' and

531 -- 'foldr'; it applies a function to each element of a list, passing

532 -- an accumulating parameter from right to left, and returning a final

533 -- value of this accumulator together with the new list.

535 -- and accumulator, returning new

536 -- accumulator and elt of result list

545 -- | The 'insert' function takes an element and a list and inserts the

546 -- element into the list at the first position where it is less

547 -- than or equal to the next element. In particular, if the list

548 -- is sorted before the call, the result will also be sorted.

549 -- It is a special case of 'insertBy', which allows the programmer to

550 -- supply their own comparison function.

554 -- | The non-overloaded version of 'insert'.

562 -- | The 'maximumBy' function takes a comparison function and a list

563 -- and returns the greatest element of the list by the comparison function.

564 -- The list must be finite and non-empty.

568 where

570 GT -> x

571 _ -> y

573 -- | The 'minimumBy' function takes a comparison function and a list

574 -- and returns the least element of the list by the comparison function.

575 -- The list must be finite and non-empty.

579 where

581 GT -> y

582 _ -> x

584 -- | The 'genericLength' function is an overloaded version of 'length'. In

585 -- particular, instead of returning an 'Int', it returns any type which is

586 -- an instance of 'Num'. It is, however, less efficient than 'length'.

588 {-# NOINLINE [1] genericLength #-}

592 {-# RULES

593 "genericLengthInt" genericLength = (strictGenericLength :: [a] -> Int);

594 "genericLengthInteger" genericLength = (strictGenericLength :: [a] -> Integer);

595 #-}

599 where

603 -- | The 'genericTake' function is an overloaded version of 'take', which

604 -- accepts any 'Integral' value as the number of elements to take.

610 -- | The 'genericDrop' function is an overloaded version of 'drop', which

611 -- accepts any 'Integral' value as the number of elements to drop.

618 -- | The 'genericSplitAt' function is an overloaded version of 'splitAt', which

619 -- accepts any 'Integral' value as the position at which to split.

626 -- | The 'genericIndex' function is an overloaded version of '!!', which

627 -- accepts any 'Integral' value as the index.

635 -- | The 'genericReplicate' function is an overloaded version of 'replicate',

636 -- which accepts any 'Integral' value as the number of repetitions to make.

640 -- | The 'zip4' function takes four lists and returns a list of

641 -- quadruples, analogous to 'zip'.

645 -- | The 'zip5' function takes five lists and returns a list of

646 -- five-tuples, analogous to 'zip'.

650 -- | The 'zip6' function takes six lists and returns a list of six-tuples,

651 -- analogous to 'zip'.

656 -- | The 'zip7' function takes seven lists and returns a list of

657 -- seven-tuples, analogous to 'zip'.

662 -- | The 'zipWith4' function takes a function which combines four

663 -- elements, as well as four lists and returns a list of their point-wise

664 -- combination, analogous to 'zipWith'.

670 -- | The 'zipWith5' function takes a function which combines five

671 -- elements, as well as five lists and returns a list of their point-wise

672 -- combination, analogous to 'zipWith'.

679 -- | The 'zipWith6' function takes a function which combines six

680 -- elements, as well as six lists and returns a list of their point-wise

681 -- combination, analogous to 'zipWith'.

688 -- | The 'zipWith7' function takes a function which combines seven

689 -- elements, as well as seven lists and returns a list of their point-wise

690 -- combination, analogous to 'zipWith'.

697 -- | The 'unzip4' function takes a list of quadruples and returns four

698 -- lists, analogous to 'unzip'.

702 ([],[],[],[])

704 -- | The 'unzip5' function takes a list of five-tuples and returns five

705 -- lists, analogous to 'unzip'.

709 ([],[],[],[],[])

711 -- | The 'unzip6' function takes a list of six-tuples and returns six

712 -- lists, analogous to 'unzip'.

716 ([],[],[],[],[],[])

718 -- | The 'unzip7' function takes a list of seven-tuples and returns

719 -- seven lists, analogous to 'unzip'.

723 ([],[],[],[],[],[],[])

726 -- | The 'deleteFirstsBy' function takes a predicate and two lists and

727 -- returns the first list with the first occurrence of each element of

728 -- the second list removed.

732 -- | The 'group' function takes a list and returns a list of lists such

733 -- that the concatenation of the result is equal to the argument. Moreover,

734 -- each sublist in the result contains only equal elements. For example,

735 --

736 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]

737 --

738 -- It is a special case of 'groupBy', which allows the programmer to supply

739 -- their own equality test.

743 -- | The 'groupBy' function is the non-overloaded version of 'group'.

749 -- | The 'inits' function returns all initial segments of the argument,

750 -- shortest first. For example,

751 --

752 -- > inits "abc" == ["","a","ab","abc"]

753 --

754 -- Note that 'inits' has the following strictness property:

755 -- @inits (xs ++ _|_) = inits xs ++ _|_@

756 --

757 -- In particular,

758 -- @inits _|_ = [] : _|_@

761 {-# NOINLINE inits #-}

763 -- We do not allow inits to inline, because it plays havoc with Call Arity

764 -- if it fuses with a consumer, and it would generally lead to serious

765 -- loss of sharing if allowed to fuse with a producer.

767 -- | The 'tails' function returns all final segments of the argument,

768 -- longest first. For example,

769 --

770 -- > tails "abc" == ["abc", "bc", "c",""]

771 --

772 -- Note that 'tails' has the following strictness property:

773 -- @tails _|_ = _|_ : _|_@

775 {-# INLINABLE tails #-}

778 [] -> n

782 -- | The 'subsequences' function returns the list of all subsequences of the argument.

783 --

784 -- > subsequences "abc" == ["","a","b","ab","c","ac","bc","abc"]

786 subsequences xs = [] : nonEmptySubsequences xs

788 -- | The 'nonEmptySubsequences' function returns the list of all subsequences of the argument,

789 -- except for the empty list.

790 --

791 -- > nonEmptySubsequences "abc" == ["a","b","ab","c","ac","bc","abc"]

793 nonEmptySubsequences [] = []

798 -- | The 'permutations' function returns the list of all permutations of the argument.

799 --

800 -- > permutations "abc" == ["abc","bac","cba","bca","cab","acb"]

803 where

812 ------------------------------------------------------------------------------

813 -- Quick Sort algorithm taken from HBC's QSort library.

815 -- | The 'sort' function implements a stable sorting algorithm.

816 -- It is a special case of 'sortBy', which allows the programmer to supply

817 -- their own comparison function.

820 -- | The 'sortBy' function is the non-overloaded version of 'sort'.

823 #ifdef USE_REPORT_PRELUDE

828 {-

829 GHC's mergesort replaced by a better implementation, 24/12/2009.

830 This code originally contributed to the nhc12 compiler by Thomas Nordin

831 in 2002. Rumoured to have been based on code by Lennart Augustsson, e.g.

832 http://www.mail-archive.com/haskell@haskell.org/msg01822.html

833 and possibly to bear similarities to a 1982 paper by Richard O'Keefe:

834 "A smooth applicative merge sort".

836 Benchmarks show it to be often 2x the speed of the previous implementation.

837 Fixes ticket http://ghc.haskell.org/trac/ghc/ticket/2143

838 -}

842 where

860 mergePairs xs = xs

868 {-

869 sortBy cmp l = mergesort cmp l

870 sort l = mergesort compare l

872 Quicksort replaced by mergesort, 14/5/2002.

874 From: Ian Lynagh <igloo@earth.li>

876 I am curious as to why the List.sort implementation in GHC is a

877 quicksort algorithm rather than an algorithm that guarantees n log n

878 time in the worst case? I have attached a mergesort implementation along

879 with a few scripts to time it's performance, the results of which are

880 shown below (* means it didn't finish successfully - in all cases this

881 was due to a stack overflow).

883 If I heap profile the random_list case with only 10000 then I see

884 random_list peaks at using about 2.5M of memory, whereas in the same

885 program using List.sort it uses only 100k.

887 Input style Input length Sort data Sort alg User time

888 stdin 10000 random_list sort 2.82

889 stdin 10000 random_list mergesort 2.96

890 stdin 10000 sorted sort 31.37

891 stdin 10000 sorted mergesort 1.90

892 stdin 10000 revsorted sort 31.21

893 stdin 10000 revsorted mergesort 1.88

894 stdin 100000 random_list sort *

895 stdin 100000 random_list mergesort *

896 stdin 100000 sorted sort *

897 stdin 100000 sorted mergesort *

898 stdin 100000 revsorted sort *

899 stdin 100000 revsorted mergesort *

900 func 10000 random_list sort 0.31

901 func 10000 random_list mergesort 0.91

902 func 10000 sorted sort 19.09

903 func 10000 sorted mergesort 0.15

904 func 10000 revsorted sort 19.17

905 func 10000 revsorted mergesort 0.16

906 func 100000 random_list sort 3.85

907 func 100000 random_list mergesort *

908 func 100000 sorted sort 5831.47

909 func 100000 sorted mergesort 2.23

910 func 100000 revsorted sort 5872.34

911 func 100000 revsorted mergesort 2.24

913 mergesort :: (a -> a -> Ordering) -> [a] -> [a]

914 mergesort cmp = mergesort' cmp . map wrap

916 mergesort' :: (a -> a -> Ordering) -> [[a]] -> [a]

917 mergesort' _ [] = []

918 mergesort' _ [xs] = xs

919 mergesort' cmp xss = mergesort' cmp (merge_pairs cmp xss)

921 merge_pairs :: (a -> a -> Ordering) -> [[a]] -> [[a]]

922 merge_pairs _ [] = []

923 merge_pairs _ [xs] = [xs]

924 merge_pairs cmp (xs:ys:xss) = merge cmp xs ys : merge_pairs cmp xss

926 merge :: (a -> a -> Ordering) -> [a] -> [a] -> [a]

927 merge _ [] ys = ys

928 merge _ xs [] = xs

929 merge cmp (x:xs) (y:ys)

930 = case x `cmp` y of

931 GT -> y : merge cmp (x:xs) ys

932 _ -> x : merge cmp xs (y:ys)

934 wrap :: a -> [a]

935 wrap x = [x]

939 OLDER: qsort version

941 -- qsort is stable and does not concatenate.

942 qsort :: (a -> a -> Ordering) -> [a] -> [a] -> [a]

943 qsort _ [] r = r

944 qsort _ [x] r = x:r

945 qsort cmp (x:xs) r = qpart cmp x xs [] [] r

947 -- qpart partitions and sorts the sublists

948 qpart :: (a -> a -> Ordering) -> a -> [a] -> [a] -> [a] -> [a] -> [a]

949 qpart cmp x [] rlt rge r =

950 -- rlt and rge are in reverse order and must be sorted with an

951 -- anti-stable sorting

952 rqsort cmp rlt (x:rqsort cmp rge r)

953 qpart cmp x (y:ys) rlt rge r =

954 case cmp x y of

955 GT -> qpart cmp x ys (y:rlt) rge r

956 _ -> qpart cmp x ys rlt (y:rge) r

958 -- rqsort is as qsort but anti-stable, i.e. reverses equal elements

959 rqsort :: (a -> a -> Ordering) -> [a] -> [a] -> [a]

960 rqsort _ [] r = r

961 rqsort _ [x] r = x:r

962 rqsort cmp (x:xs) r = rqpart cmp x xs [] [] r

964 rqpart :: (a -> a -> Ordering) -> a -> [a] -> [a] -> [a] -> [a] -> [a]

965 rqpart cmp x [] rle rgt r =

966 qsort cmp rle (x:qsort cmp rgt r)

967 rqpart cmp x (y:ys) rle rgt r =

968 case cmp y x of

969 GT -> rqpart cmp x ys rle (y:rgt) r

970 _ -> rqpart cmp x ys (y:rle) rgt r

971 -}

975 -- | Sort a list by comparing the results of a key function applied to each

976 -- element. @sortOn f@ is equivalent to @sortBy . comparing f@, but has the

977 -- performance advantage of only evaluating @f@ once for each element in the

978 -- input list. This is called the decorate-sort-undecorate paradigm, or

979 -- Schwartzian transform.

980 --

981 -- @since 4.8.0.0

983 sortOn f =

986 -- | The 'unfoldr' function is a \`dual\' to 'foldr': while 'foldr'

987 -- reduces a list to a summary value, 'unfoldr' builds a list from

988 -- a seed value. The function takes the element and returns 'Nothing'

989 -- if it is done producing the list or returns 'Just' @(a,b)@, in which

990 -- case, @a@ is a prepended to the list and @b@ is used as the next

991 -- element in a recursive call. For example,

992 --

993 -- > iterate f == unfoldr (\x -> Just (x, f x))

994 --

995 -- In some cases, 'unfoldr' can undo a 'foldr' operation:

996 --

997 -- > unfoldr f' (foldr f z xs) == xs

998 --

999 -- if the following holds:

1000 --

1001 -- > f' (f x y) = Just (x,y)

1002 -- > f' z = Nothing

1003 --

1004 -- A simple use of unfoldr:

1005 --

1006 -- > unfoldr (\b -> if b == 0 then Nothing else Just (b, b-1)) 10

1007 -- > [10,9,8,7,6,5,4,3,2,1]

1008 --

1010 -- Note [INLINE unfoldr]

1011 -- We treat unfoldr a little differently from some other forms for list fusion

1012 -- for two reasons:

1013 --

1014 -- 1. We don't want to use a rule to rewrite a basic form to a fusible

1015 -- form because this would inline before constant floating. As Simon Peyton-

1016 -- Jones and others have pointed out, this could reduce sharing in some cases

1017 -- where sharing is beneficial. Thus we simply INLINE it, which is, for

1018 -- example, how enumFromTo::Int becomes eftInt. Unfortunately, we don't seem

1019 -- to get enough of an inlining discount to get a version of eftInt based on

1020 -- unfoldr to inline as readily as the usual one. We know that all the Maybe

1021 -- nonsense will go away, but the compiler does not.

1022 --

1023 -- 2. The benefit of inlining unfoldr is likely to be huge in many common cases,

1024 -- even apart from list fusion. In particular, inlining unfoldr often

1025 -- allows GHC to erase all the Maybes. This appears to be critical if unfoldr

1026 -- is to be used in high-performance code. A small increase in code size

1027 -- in the relatively rare cases when this does not happen looks like a very

1028 -- small price to pay.

1029 --

1030 -- Doing a back-and-forth dance doesn't seem to accomplish anything if the

1031 -- final form has to be inlined in any case.

1039 Nothing -> n

1042 -- -----------------------------------------------------------------------------

1043 -- Functions on strings

1045 -- | 'lines' breaks a string up into a list of strings at newline

1046 -- characters. The resulting strings do not contain newlines.

1049 -- Somehow GHC doesn't detect the selector thunks in the below code,

1050 -- so s' keeps a reference to the first line via the pair and we have

1051 -- a space leak (cf. #4334).

1052 -- So we need to make GHC see the selector thunks with a trick.

1055 [] -> []

1057 where

1060 -- | 'unlines' is an inverse operation to 'lines'.

1061 -- It joins lines, after appending a terminating newline to each.

1063 #ifdef USE_REPORT_PRELUDE

1066 -- HBC version (stolen)

1067 -- here's a more efficient version

1070 #endif

1072 -- | 'words' breaks a string up into a list of words, which were delimited

1073 -- by white space.

1075 {-# NOINLINE [1] words #-}

1082 {-# RULES

1083 "words" [~1] forall s . words s = build (\c n -> wordsFB c n s)

1084 "wordsList" [1] wordsFB (:) [] = words

1085 #-}

1087 {-# NOINLINE [0] wordsFB #-}

1088 wordsFB c n = go

1089 where

1095 -- | 'unwords' is an inverse operation to 'words'.

1096 -- It joins words with separating spaces.

1098 #ifdef USE_REPORT_PRELUDE

1102 -- Here's a lazier version that can get the last element of a

1103 -- _|_-terminated list.

1104 {-# NOINLINE [1] unwords #-}

1107 where

1111 -- In general, the foldr-based version is probably slightly worse

1112 -- than the HBC version, because it adds an extra space and then takes

1113 -- it back off again. But when it fuses, it reduces allocation. How much

1114 -- depends entirely on the average word length--it's most effective when

1115 -- the words are on the short side.

1116 {-# RULES

1117 "unwords" [~1] forall ws .

1118 unwords ws = tailUnwords (foldr unwordsFB "" ws)

1119 "unwordsList" [1] forall ws .

1120 tailUnwords (foldr unwordsFB "" ws) = unwords ws

1121 #-}

1123 {-# INLINE [0] tailUnwords #-}

1125 tailUnwords [] = []

1128 {-# INLINE [0] unwordsFB #-}

1131 #endif

1133 {- A "SnocBuilder" is a version of Chris Okasaki's banker's queue that supports

1134 toListSB instead of uncons. In single-threaded use, its performance

1135 characteristics are similar to John Hughes's functional difference lists, but

1136 likely somewhat worse. In heavily persistent settings, however, it does much

1137 better, because it takes advantage of sharing. The banker's queue guarantees

1138 (amortized) O(1) snoc and O(1) uncons, meaning that we can think of toListSB as

1139 an O(1) conversion to a list-like structure a constant factor slower than

1140 normal lists--we pay the O(n) cost incrementally as we consume the list. Using

1141 functional difference lists, on the other hand, we would have to pay the whole

1142 cost up front for each output list. -}

1144 {- We store a front list, a rear list, and the length of the queue. Because we

1145 only snoc onto the queue and never uncons, we know it's time to rotate when the

1146 length of the queue plus 1 is a power of 2. Note that we rely on the value of

1147 the length field only for performance. In the unlikely event of overflow, the

1148 performance will suffer but the semantics will remain correct. -}

1152 {- Smart constructor that rotates the builder when lp is one minus a power of

1153 2. Does not rotate very small builders because doing so is not worth the

1154 trouble. The lp < 255 test goes first because the power-of-2 test gives awful

1155 branch prediction for very small n (there are 5 powers of 2 between 1 and

1156 16). Putting the well-predicted lp < 255 test first avoids branching on the

1157 power-of-2 test until powers of 2 have become sufficiently rare to be predicted

1158 well. -}

1160 {-# INLINE sb #-}

1162 sb lp f r

1166 -- The empty builder

1168 emptySB :: SnocBuilder a

1171 -- Add an element to the end of a queue.

1176 -- Convert a builder to a list