[project @ 2004-09-08 11:10:08 by ross]
[ghc.git] / libraries / base / Data / PackedString.hs
1 -----------------------------------------------------------------------------
2 -- |
3 -- Module : Data.PackedString
4 -- Copyright : (c) The University of Glasgow 2001
5 -- License : BSD-style (see the file libraries/base/LICENSE)
6 --
7 -- Maintainer : libraries@haskell.org
8 -- Stability : experimental
9 -- Portability : portable
10 --
11 -- An efficient implementation of strings.
12 --
13 -----------------------------------------------------------------------------
14
15 -- Original GHC implementation by Bryan O\'Sullivan,
16 -- rewritten to use UArray by Simon Marlow.
17
18 module Data.PackedString (
19 -- * The @PackedString@ type
20 PackedString, -- abstract, instances: Eq, Ord, Show, Typeable
21
22 -- * Converting to and from @PackedString@s
23 packString, -- :: String -> PackedString
24 unpackPS, -- :: PackedString -> String
25
26 #ifndef __NHC__
27 -- * I\/O with @PackedString@s
28 hPutPS, -- :: Handle -> PackedString -> IO ()
29 hGetPS, -- :: Handle -> Int -> IO PackedString
30 #endif
31
32 -- * List-like manipulation functions
33 nilPS, -- :: PackedString
34 consPS, -- :: Char -> PackedString -> PackedString
35 headPS, -- :: PackedString -> Char
36 tailPS, -- :: PackedString -> PackedString
37 nullPS, -- :: PackedString -> Bool
38 appendPS, -- :: PackedString -> PackedString -> PackedString
39 lengthPS, -- :: PackedString -> Int
40 indexPS, -- :: PackedString -> Int -> Char
41 mapPS, -- :: (Char -> Char) -> PackedString -> PackedString
42 filterPS, -- :: (Char -> Bool) -> PackedString -> PackedString
43 reversePS, -- :: PackedString -> PackedString
44 concatPS, -- :: [PackedString] -> PackedString
45 elemPS, -- :: Char -> PackedString -> Bool
46 substrPS, -- :: PackedString -> Int -> Int -> PackedString
47 takePS, -- :: Int -> PackedString -> PackedString
48 dropPS, -- :: Int -> PackedString -> PackedString
49 splitAtPS, -- :: Int -> PackedString -> (PackedString, PackedString)
50
51 foldlPS, -- :: (a -> Char -> a) -> a -> PackedString -> a
52 foldrPS, -- :: (Char -> a -> a) -> a -> PackedString -> a
53 takeWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
54 dropWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
55 spanPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
56 breakPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
57 linesPS, -- :: PackedString -> [PackedString]
58 unlinesPS, -- :: [PackedString] -> PackedString
59 wordsPS, -- :: PackedString -> [PackedString]
60 unwordsPS, -- :: [PackedString] -> PackedString
61 splitPS, -- :: Char -> PackedString -> [PackedString]
62 splitWithPS, -- :: (Char -> Bool) -> PackedString -> [PackedString]
63
64 joinPS, -- :: PackedString -> [PackedString] -> PackedString
65
66 ) where
67
68 import Prelude
69
70 #ifndef __NHC__
71
72 import Data.Array.Unboxed
73 import Data.Array.IO
74 import Data.Typeable
75 import Data.Char
76
77 import System.IO
78
79 -- -----------------------------------------------------------------------------
80 -- PackedString type declaration
81
82 -- | A space-efficient representation of a 'String', which supports various
83 -- efficient operations. A 'PackedString' contains full Unicode 'Char's.
84 newtype PackedString = PS (UArray Int Char)
85
86 -- ToDo: we could support "slices", i.e. include offset and length fields into
87 -- the string, so that operations like take/drop could be O(1). Perhaps making
88 -- a slice should be conditional on the ratio of the slice/string size to
89 -- limit memory leaks.
90
91 instance Eq PackedString where
92 (PS x) == (PS y) = x == y
93
94 instance Ord PackedString where
95 compare (PS x) (PS y) = compare x y
96
97 --instance Read PackedString: ToDo
98
99 instance Show PackedString where
100 showsPrec p ps r = showsPrec p (unpackPS ps) r
101
102 #include "Typeable.h"
103 INSTANCE_TYPEABLE0(PackedString,packedStringTc,"PackedString")
104
105 -- -----------------------------------------------------------------------------
106 -- Constructor functions
107
108 -- | The 'nilPS' value is the empty string.
109 nilPS :: PackedString
110 nilPS = PS (array (0,-1) [])
111
112 -- | The 'consPS' function prepends the given character to the
113 -- given string.
114 consPS :: Char -> PackedString -> PackedString
115 consPS c cs = packString (c : (unpackPS cs)) -- ToDo:better
116
117 -- | Convert a 'String' into a 'PackedString'
118 packString :: String -> PackedString
119 packString str = packNChars (length str) str
120
121 -- | The 'packNChars' function creates a 'PackedString' out of the
122 -- first @len@ elements of the given 'String'.
123 packNChars :: Int -> [Char] -> PackedString
124 packNChars len str = PS (listArray (0,len-1) str)
125
126 -- -----------------------------------------------------------------------------
127 -- Destructor functions (taking PackedStrings apart)
128
129 -- | Convert a 'PackedString' into a 'String'
130 unpackPS :: PackedString -> String
131 unpackPS (PS ps) = elems ps
132
133 -- -----------------------------------------------------------------------------
134 -- List-mimicking functions for PackedStrings
135
136 -- | The 'lengthPS' function returns the length of the input list. Analogous to 'length'.
137 lengthPS :: PackedString -> Int
138 lengthPS (PS ps) = rangeSize (bounds ps)
139
140 -- | The 'indexPS' function returns the character in the string at the given position.
141 indexPS :: PackedString -> Int -> Char
142 indexPS (PS ps) i = ps ! i
143
144 -- | The 'headPS' function returns the first element of a 'PackedString' or throws an
145 -- error if the string is empty.
146 headPS :: PackedString -> Char
147 headPS ps
148 | nullPS ps = error "Data.PackedString.headPS: head []"
149 | otherwise = indexPS ps 0
150
151 -- | The 'tailPS' function returns the tail of a 'PackedString' or throws an error
152 -- if the string is empty.
153 tailPS :: PackedString -> PackedString
154 tailPS ps
155 | len <= 0 = error "Data.PackedString.tailPS: tail []"
156 | len == 1 = nilPS
157 | otherwise = substrPS ps 1 (len - 1)
158 where
159 len = lengthPS ps
160
161 -- | The 'nullPS' function returns True iff the argument is null.
162 nullPS :: PackedString -> Bool
163 nullPS (PS ps) = rangeSize (bounds ps) == 0
164
165 -- | The 'appendPS' function appends the second string onto the first.
166 appendPS :: PackedString -> PackedString -> PackedString
167 appendPS xs ys
168 | nullPS xs = ys
169 | nullPS ys = xs
170 | otherwise = concatPS [xs,ys]
171
172 -- | The 'mapPS' function applies a function to each character in the string.
173 mapPS :: (Char -> Char) -> PackedString -> PackedString
174 mapPS f (PS ps) = PS (amap f ps)
175
176 -- | The 'filterPS' function filters out the appropriate substring.
177 filterPS :: (Char -> Bool) -> PackedString -> PackedString {-or String?-}
178 filterPS pred ps = packString (filter pred (unpackPS ps))
179
180 -- | The 'foldlPS' function behaves like 'foldl' on 'PackedString's.
181 foldlPS :: (a -> Char -> a) -> a -> PackedString -> a
182 foldlPS f b ps = foldl f b (unpackPS ps)
183
184 -- | The 'foldrPS' function behaves like 'foldr' on 'PackedString's.
185 foldrPS :: (Char -> a -> a) -> a -> PackedString -> a
186 foldrPS f v ps = foldr f v (unpackPS ps)
187
188 -- | The 'takePS' function takes the first @n@ characters of a 'PackedString'.
189 takePS :: Int -> PackedString -> PackedString
190 takePS n ps = substrPS ps 0 (n-1)
191
192 -- | The 'dropPS' function drops the first @n@ characters of a 'PackedString'.
193 dropPS :: Int -> PackedString -> PackedString
194 dropPS n ps = substrPS ps n (lengthPS ps - 1)
195
196 -- | The 'splitWithPS' function splits a 'PackedString' at a given index.
197 splitAtPS :: Int -> PackedString -> (PackedString, PackedString)
198 splitAtPS n ps = (takePS n ps, dropPS n ps)
199
200 -- | The 'takeWhilePS' function is analogous to the 'takeWhile' function.
201 takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString
202 takeWhilePS pred ps = packString (takeWhile pred (unpackPS ps))
203
204 -- | The 'dropWhilePS' function is analogous to the 'dropWhile' function.
205 dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString
206 dropWhilePS pred ps = packString (dropWhile pred (unpackPS ps))
207
208 -- | The 'elemPS' function returns True iff the given element is in the string.
209 elemPS :: Char -> PackedString -> Bool
210 elemPS c ps = c `elem` unpackPS ps
211
212 -- | The 'spanPS' function returns a pair containing the result of
213 -- running both 'takeWhilePS' and 'dropWhilePS'.
214 spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
215 spanPS p ps = (takeWhilePS p ps, dropWhilePS p ps)
216
217 -- | The 'breakPS' function breaks a string at the first position which
218 -- satisfies the predicate.
219 breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
220 breakPS p ps = spanPS (not . p) ps
221
222 -- | The 'linesPS' function splits the input on line-breaks.
223 linesPS :: PackedString -> [PackedString]
224 linesPS ps = splitPS '\n' ps
225
226 -- | The 'unlinesPS' function concatenates the input list after
227 -- interspersing newlines.
228 unlinesPS :: [PackedString] -> PackedString
229 unlinesPS = joinPS (packString "\n")
230
231 -- | The 'wordsPS' function is analogous to the 'words' function.
232 wordsPS :: PackedString -> [PackedString]
233 wordsPS ps = filter (not.nullPS) (splitWithPS isSpace ps)
234
235 -- | The 'unwordsPS' function is analogous to the 'unwords' function.
236 unwordsPS :: [PackedString] -> PackedString
237 unwordsPS = joinPS (packString " ")
238
239 -- | The 'reversePS' function reverses the string.
240 reversePS :: PackedString -> PackedString
241 reversePS ps = packString (reverse (unpackPS ps))
242
243 -- | The 'concatPS' function concatenates a list of 'PackedString's.
244 concatPS :: [PackedString] -> PackedString
245 concatPS pss = packString (concat (map unpackPS pss))
246
247 ------------------------------------------------------------
248
249 -- | The 'joinPS' function takes a 'PackedString' and a list of 'PackedString's
250 -- and concatenates the list after interspersing the first argument between
251 -- each element of the list.
252 joinPS :: PackedString -> [PackedString] -> PackedString
253 joinPS filler pss = concatPS (splice pss)
254 where
255 splice [] = []
256 splice [x] = [x]
257 splice (x:y:xs) = x:filler:splice (y:xs)
258
259 -- ToDo: the obvious generalisation
260 {-
261 Some properties that hold:
262
263 * splitPS x ls = ls'
264 where False = any (map (x `elemPS`) ls')
265
266 * joinPS (packString [x]) (splitPS x ls) = ls
267 -}
268
269 -- | The 'splitPS' function splits the input string on each occurrence of the given 'Char'.
270 splitPS :: Char -> PackedString -> [PackedString]
271 splitPS c = splitWithPS (== c)
272
273 -- | The 'splitWithPS' function takes a character predicate and splits the input string
274 -- at each character which satisfies the predicate.
275 splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString]
276 splitWithPS pred (PS ps) =
277 splitify 0
278 where
279 len = lengthPS (PS ps)
280
281 splitify n
282 | n >= len = []
283 | otherwise =
284 let
285 break_pt = first_pos_that_satisfies pred ps len n
286 in
287 if break_pt == n then -- immediate match, empty substring
288 nilPS
289 : splitify (break_pt + 1)
290 else
291 substrPS (PS ps) n (break_pt - 1) -- leave out the matching character
292 : splitify (break_pt + 1)
293
294 first_pos_that_satisfies pred ps len n =
295 case [ m | m <- [n..len-1], pred (ps ! m) ] of
296 [] -> len
297 (m:_) -> m
298
299 -- -----------------------------------------------------------------------------
300 -- Local utility functions
301
302 -- The definition of @_substrPS@ is essentially:
303 -- @take (end - begin + 1) (drop begin str)@.
304
305 -- | The 'substrPS' function takes a 'PackedString' and two indices
306 -- and returns the substring of the input string between (and including)
307 -- these indices.
308 substrPS :: PackedString -> Int -> Int -> PackedString
309 substrPS (PS ps) begin end = packString [ ps ! i | i <- [begin..end] ]
310
311 -- -----------------------------------------------------------------------------
312 -- hPutPS
313
314 -- | Outputs a 'PackedString' to the specified 'Handle'.
315 --
316 -- NOTE: the representation of the 'PackedString' in the file is assumed to
317 -- be in the ISO-8859-1 encoding. In other words, only the least significant
318 -- byte is taken from each character in the 'PackedString'.
319 hPutPS :: Handle -> PackedString -> IO ()
320 hPutPS h (PS ps) = do
321 let l = lengthPS (PS ps)
322 arr <- newArray_ (0, l-1)
323 sequence_ [ writeArray arr i (fromIntegral (ord (ps ! i))) | i <- [0..l-1] ]
324 hPutArray h arr l
325
326 -- -----------------------------------------------------------------------------
327 -- hGetPS
328
329 -- | Read a 'PackedString' directly from the specified 'Handle'.
330 -- This is far more efficient than reading the characters into a 'String'
331 -- and then using 'packString'.
332 --
333 -- NOTE: as with 'hPutPS', the string representation in the file is
334 -- assumed to be ISO-8859-1.
335 hGetPS :: Handle -> Int -> IO PackedString
336 hGetPS h i = do
337 arr <- newArray_ (0, i-1)
338 l <- hGetArray h arr i
339 chars <- mapM (\i -> readArray arr i >>= return.chr.fromIntegral) [0..l-1]
340 return (packNChars l chars)
341
342 #else /* __NHC__ */
343
344 --import Prelude hiding (append, break, concat, cons, drop, dropWhile,
345 -- filter, foldl, foldr, head, length, lines, map,
346 -- nil, null, reverse, span, splitAt, subst, tail,
347 -- take, takeWhile, unlines, unwords, words)
348 -- also hiding: Ix(..), Functor(..)
349 import qualified NHC.PackedString
350 import NHC.PackedString (PackedString,packString,unpackPS)
351 import List (intersperse)
352
353
354 nilPS :: PackedString
355 consPS :: Char -> PackedString -> PackedString
356 headPS :: PackedString -> Char
357 tailPS :: PackedString -> PackedString
358 nullPS :: PackedString -> Bool
359 appendPS :: PackedString -> PackedString -> PackedString
360 lengthPS :: PackedString -> Int
361 indexPS :: PackedString -> Int -> Char
362 mapPS :: (Char -> Char) -> PackedString -> PackedString
363 filterPS :: (Char -> Bool) -> PackedString -> PackedString
364 reversePS :: PackedString -> PackedString
365 concatPS :: [PackedString] -> PackedString
366 elemPS :: Char -> PackedString -> Bool
367 substrPS :: PackedString -> Int -> Int -> PackedString
368 takePS :: Int -> PackedString -> PackedString
369 dropPS :: Int -> PackedString -> PackedString
370 splitAtPS :: Int -> PackedString -> (PackedString, PackedString)
371
372 foldlPS :: (a -> Char -> a) -> a -> PackedString -> a
373 foldrPS :: (Char -> a -> a) -> a -> PackedString -> a
374 takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString
375 dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString
376 spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
377 breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
378 linesPS :: PackedString -> [PackedString]
379 unlinesPS :: [PackedString] -> PackedString
380
381 wordsPS :: PackedString -> [PackedString]
382 unwordsPS :: [PackedString] -> PackedString
383 splitPS :: Char -> PackedString -> [PackedString]
384 splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString]
385 joinPS :: PackedString -> [PackedString] -> PackedString
386
387 nilPS = NHC.PackedString.nil
388 consPS = NHC.PackedString.cons
389 headPS = NHC.PackedString.head
390 tailPS = NHC.PackedString.tail
391 nullPS = NHC.PackedString.null
392 appendPS = NHC.PackedString.append
393 lengthPS = NHC.PackedString.length
394 indexPS p i = (unpackPS p) !! i
395 mapPS = NHC.PackedString.map
396 filterPS = NHC.PackedString.filter
397 reversePS = NHC.PackedString.reverse
398 concatPS = NHC.PackedString.concat
399 elemPS c p = c `elem` unpackPS p
400 substrPS = NHC.PackedString.substr
401 takePS = NHC.PackedString.take
402 dropPS = NHC.PackedString.drop
403 splitAtPS = NHC.PackedString.splitAt
404
405 foldlPS = NHC.PackedString.foldl
406 foldrPS = NHC.PackedString.foldr
407 takeWhilePS = NHC.PackedString.takeWhile
408 dropWhilePS = NHC.PackedString.dropWhile
409 spanPS = NHC.PackedString.span
410 breakPS = NHC.PackedString.break
411 linesPS = NHC.PackedString.lines
412 unlinesPS = NHC.PackedString.unlines
413
414 wordsPS = NHC.PackedString.words
415 unwordsPS = NHC.PackedString.unwords
416 splitPS c = splitWithPS (==c)
417 splitWithPS p =
418 map packString . split' p [] . unpackPS
419 where
420 split' :: (Char->Bool) -> String -> String -> [String]
421 split' pred [] [] = []
422 split' pred acc [] = [reverse acc]
423 split' pred acc (x:xs) | pred x = reverse acc: split' pred [] xs
424 | otherwise = split' pred (x:acc) xs
425
426 joinPS sep = concatPS . intersperse sep
427
428 #endif