[project @ 2002-10-09 17:08:18 by malcolm]
[packages/base.git] / Data / PackedString.hs
1 -----------------------------------------------------------------------------
2 -- |
3 -- Module : Data.PackedString
4 -- Copyright : (c) The University of Glasgow 2001
5 -- License : BSD-style (see the file libraries/base/LICENSE)
6 --
7 -- Maintainer : libraries@haskell.org
8 -- Stability : experimental
9 -- Portability : portable
10 --
11 -- An efficient implementation of strings.
12 --
13 -----------------------------------------------------------------------------
14
15 -- Original GHC implementation by Bryan O\'Sullivan,
16 -- rewritten to use UArray by Simon Marlow.
17
18 module Data.PackedString (
19 -- * The @PackedString@ type
20 PackedString, -- abstract, instances: Eq, Ord, Show, Typeable
21
22 -- * Converting to and from @PackedString@s
23 packString, -- :: String -> PackedString
24 unpackPS, -- :: PackedString -> String
25
26 #ifndef __NHC__
27 -- * I\/O with @PackedString@s
28 hPutPS, -- :: Handle -> PackedString -> IO ()
29 hGetPS, -- :: Handle -> Int -> IO PackedString
30 #endif
31
32 -- * List-like manipulation functions
33 nilPS, -- :: PackedString
34 consPS, -- :: Char -> PackedString -> PackedString
35 headPS, -- :: PackedString -> Char
36 tailPS, -- :: PackedString -> PackedString
37 nullPS, -- :: PackedString -> Bool
38 appendPS, -- :: PackedString -> PackedString -> PackedString
39 lengthPS, -- :: PackedString -> Int
40 indexPS, -- :: PackedString -> Int -> Char
41 mapPS, -- :: (Char -> Char) -> PackedString -> PackedString
42 filterPS, -- :: (Char -> Bool) -> PackedString -> PackedString
43 reversePS, -- :: PackedString -> PackedString
44 concatPS, -- :: [PackedString] -> PackedString
45 elemPS, -- :: Char -> PackedString -> Bool
46 substrPS, -- :: PackedString -> Int -> Int -> PackedString
47 takePS, -- :: Int -> PackedString -> PackedString
48 dropPS, -- :: Int -> PackedString -> PackedString
49 splitAtPS, -- :: Int -> PackedString -> (PackedString, PackedString)
50
51 foldlPS, -- :: (a -> Char -> a) -> a -> PackedString -> a
52 foldrPS, -- :: (Char -> a -> a) -> a -> PackedString -> a
53 takeWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
54 dropWhilePS, -- :: (Char -> Bool) -> PackedString -> PackedString
55 spanPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
56 breakPS, -- :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
57 linesPS, -- :: PackedString -> [PackedString]
58
59 wordsPS, -- :: PackedString -> [PackedString]
60 splitPS, -- :: Char -> PackedString -> [PackedString]
61 splitWithPS, -- :: (Char -> Bool) -> PackedString -> [PackedString]
62
63 -- joinPS, -- :: PackedString -> [PackedString] -> PackedString
64
65 ) where
66
67 import Prelude
68
69 #ifndef __NHC__
70
71 import Data.Array.Unboxed
72 import Data.Array.IO
73 import Data.Dynamic
74 import Data.Char
75
76 import System.IO
77
78 -- -----------------------------------------------------------------------------
79 -- PackedString type declaration
80
81 -- | A space-efficient representation of a 'String', which supports various
82 -- efficient operations. A 'PackedString' contains full Unicode 'Char's.
83 newtype PackedString = PS (UArray Int Char)
84
85 instance Eq PackedString where
86 (PS x) == (PS y) = x == y
87
88 instance Ord PackedString where
89 compare (PS x) (PS y) = compare x y
90
91 --instance Read PackedString: ToDo
92
93 instance Show PackedString where
94 showsPrec p ps r = showsPrec p (unpackPS ps) r
95
96 #include "Dynamic.h"
97 INSTANCE_TYPEABLE0(PackedString,packedStringTc,"PackedString")
98
99 -- -----------------------------------------------------------------------------
100 -- Constructor functions
101
102 nilPS :: PackedString
103 nilPS = PS (array (0,-1) [])
104
105 consPS :: Char -> PackedString -> PackedString
106 consPS c cs = packString (c : (unpackPS cs)) -- ToDo:better
107
108 -- | Convert a 'String' into a 'PackedString'
109 packString :: String -> PackedString
110 packString str = packNChars (length str) str
111
112 packNChars :: Int -> [Char] -> PackedString
113 packNChars len str = PS (array (0,len-1) (zip [0..] str))
114
115 -- -----------------------------------------------------------------------------
116 -- Destructor functions (taking PackedStrings apart)
117
118 -- | Convert a 'PackedString' into a 'String'
119 unpackPS :: PackedString -> String
120 unpackPS (PS ps) = elems ps
121
122 -- -----------------------------------------------------------------------------
123 -- List-mimicking functions for PackedStrings
124
125 lengthPS :: PackedString -> Int
126 lengthPS (PS ps) = rangeSize (bounds ps)
127
128 indexPS :: PackedString -> Int -> Char
129 indexPS (PS ps) i = ps ! i
130
131 headPS :: PackedString -> Char
132 headPS ps
133 | nullPS ps = error "Data.PackedString.headPS: head []"
134 | otherwise = indexPS ps 0
135
136 tailPS :: PackedString -> PackedString
137 tailPS ps
138 | len <= 0 = error "Data.PackedString.tailPS: tail []"
139 | len == 1 = nilPS
140 | otherwise = substrPS ps 1 (len - 1)
141 where
142 len = lengthPS ps
143
144 nullPS :: PackedString -> Bool
145 nullPS (PS ps) = rangeSize (bounds ps) == 0
146
147 appendPS :: PackedString -> PackedString -> PackedString
148 appendPS xs ys
149 | nullPS xs = ys
150 | nullPS ys = xs
151 | otherwise = concatPS [xs,ys]
152
153 mapPS :: (Char -> Char) -> PackedString -> PackedString
154 mapPS f (PS ps) = PS (amap f ps)
155
156 filterPS :: (Char -> Bool) -> PackedString -> PackedString {-or String?-}
157 filterPS pred ps = packString (filter pred (unpackPS ps))
158
159 foldlPS :: (a -> Char -> a) -> a -> PackedString -> a
160 foldlPS f b ps = foldl f b (unpackPS ps)
161
162 foldrPS :: (Char -> a -> a) -> a -> PackedString -> a
163 foldrPS f v ps = foldr f v (unpackPS ps)
164
165 takePS :: Int -> PackedString -> PackedString
166 takePS n ps = substrPS ps 0 (n-1)
167
168 dropPS :: Int -> PackedString -> PackedString
169 dropPS n ps = substrPS ps n (lengthPS ps - 1)
170
171 splitAtPS :: Int -> PackedString -> (PackedString, PackedString)
172 splitAtPS n ps = (takePS n ps, dropPS n ps)
173
174 takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString
175 takeWhilePS pred ps = packString (takeWhile pred (unpackPS ps))
176
177 dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString
178 dropWhilePS pred ps = packString (dropWhile pred (unpackPS ps))
179
180 elemPS :: Char -> PackedString -> Bool
181 elemPS c ps = c `elem` unpackPS ps
182
183 spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
184 spanPS p ps = (takeWhilePS p ps, dropWhilePS p ps)
185
186 breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
187 breakPS p ps = spanPS (not . p) ps
188
189 linesPS :: PackedString -> [PackedString]
190 linesPS ps = splitPS '\n' ps
191
192 wordsPS :: PackedString -> [PackedString]
193 wordsPS ps = splitWithPS isSpace ps
194
195 reversePS :: PackedString -> PackedString
196 reversePS ps = packString (reverse (unpackPS ps))
197
198 concatPS :: [PackedString] -> PackedString
199 concatPS pss = packString (concat (map unpackPS pss))
200
201 ------------------------------------------------------------
202 {-
203 joinPS :: PackedString -> [PackedString] -> PackedString
204 joinPS filler pss = concatPS (splice pss)
205 where
206 splice [] = []
207 splice [x] = [x]
208 splice (x:y:xs) = x:filler:splice (y:xs)
209
210 -- ToDo: the obvious generalisation
211 {-
212 Some properties that hold:
213
214 * splitPS x ls = ls'
215 where False = any (map (x `elemPS`) ls')
216 False = any (map (nullPS) ls')
217
218 * all x's have been chopped out.
219 * no empty PackedStrings in returned list. A conseq.
220 of this is:
221 splitPS x nilPS = []
222
223
224 * joinPS (packString [x]) (_splitPS x ls) = ls
225
226 -}
227 -}
228
229 splitPS :: Char -> PackedString -> [PackedString]
230 splitPS c = splitWithPS (== c)
231
232 splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString]
233 splitWithPS pred (PS ps) =
234 splitify 0
235 where
236 len = lengthPS (PS ps)
237
238 splitify n
239 | n >= len = []
240 | otherwise =
241 let
242 break_pt = first_pos_that_satisfies pred ps len n
243 in
244 if break_pt == n then -- immediate match, no substring to cut out.
245 splitify (break_pt + 1)
246 else
247 substrPS (PS ps) n (break_pt - 1) -- leave out the matching character
248 : splitify (break_pt + 1)
249
250 first_pos_that_satisfies pred ps len n =
251 case [ m | m <- [n..len], pred (ps ! m) ] of
252 [] -> len
253 (m:_) -> m
254
255 -- -----------------------------------------------------------------------------
256 -- Local utility functions
257
258 -- The definition of @_substrPS@ is essentially:
259 -- @take (end - begin + 1) (drop begin str)@.
260
261 substrPS :: PackedString -> Int -> Int -> PackedString
262 substrPS (PS ps) begin end = packString [ ps ! i | i <- [begin..end] ]
263
264 -- -----------------------------------------------------------------------------
265 -- hPutPS
266
267 -- | Outputs a 'PackedString' to the specified 'Handle'.
268 --
269 -- NOTE: the representation of the 'PackedString' in the file is assumed to
270 -- be in the ISO-8859-1 encoding. In other words, only the least signficant
271 -- byte is taken from each character in the 'PackedString'.
272 hPutPS :: Handle -> PackedString -> IO ()
273 hPutPS h (PS ps) = do
274 let l = lengthPS (PS ps)
275 arr <- newArray_ (0, l-1)
276 sequence_ [ writeArray arr i (fromIntegral (ord (ps ! i))) | i <- [0..l-1] ]
277 hPutArray h arr l
278
279 -- -----------------------------------------------------------------------------
280 -- hGetPS
281
282 -- | Read a 'PackedString' directly from the specified 'Handle'. This
283 -- is far more efficient than reading the characters into a 'String'
284 -- and then using 'packString'.
285 --
286 -- NOTE: as with 'hPutPS', the string representation in the file is
287 -- assumed to be ISO-8859-1.
288 hGetPS :: Handle -> Int -> IO PackedString
289 hGetPS h i = do
290 arr <- newArray_ (0, i-1)
291 l <- hGetArray h arr i
292 chars <- mapM (\i -> readArray arr i >>= return.chr.fromIntegral) [0..l-1]
293 return (packString chars)
294
295 #else /* __NHC__ */
296
297 --import Prelude hiding (append, break, concat, cons, drop, dropWhile,
298 -- filter, foldl, foldr, head, length, lines, map,
299 -- nil, null, reverse, span, splitAt, subst, tail,
300 -- take, takeWhile, unlines, unwords, words)
301 -- also hiding: Ix(..), Functor(..)
302 import NHC.PackedString
303
304
305 nilPS :: PackedString
306 consPS :: Char -> PackedString -> PackedString
307 headPS :: PackedString -> Char
308 tailPS :: PackedString -> PackedString
309 nullPS :: PackedString -> Bool
310 appendPS :: PackedString -> PackedString -> PackedString
311 lengthPS :: PackedString -> Int
312 indexPS :: PackedString -> Int -> Char
313 mapPS :: (Char -> Char) -> PackedString -> PackedString
314 filterPS :: (Char -> Bool) -> PackedString -> PackedString
315 reversePS :: PackedString -> PackedString
316 concatPS :: [PackedString] -> PackedString
317 elemPS :: Char -> PackedString -> Bool
318 substrPS :: PackedString -> Int -> Int -> PackedString
319 takePS :: Int -> PackedString -> PackedString
320 dropPS :: Int -> PackedString -> PackedString
321 splitAtPS :: Int -> PackedString -> (PackedString, PackedString)
322
323 foldlPS :: (a -> Char -> a) -> a -> PackedString -> a
324 foldrPS :: (Char -> a -> a) -> a -> PackedString -> a
325 takeWhilePS :: (Char -> Bool) -> PackedString -> PackedString
326 dropWhilePS :: (Char -> Bool) -> PackedString -> PackedString
327 spanPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
328 breakPS :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
329 linesPS :: PackedString -> [PackedString]
330
331 wordsPS :: PackedString -> [PackedString]
332 splitPS :: Char -> PackedString -> [PackedString]
333 splitWithPS :: (Char -> Bool) -> PackedString -> [PackedString]
334
335 nilPS = NHC.PackedString.nil
336 consPS = NHC.PackedString.cons
337 headPS = NHC.PackedString.head
338 tailPS = NHC.PackedString.tail
339 nullPS = NHC.PackedString.null
340 appendPS = NHC.PackedString.append
341 lengthPS = NHC.PackedString.length
342 indexPS p i = (unpackPS p) !! i
343 mapPS = NHC.PackedString.map
344 filterPS = NHC.PackedString.filter
345 reversePS = NHC.PackedString.reverse
346 concatPS = NHC.PackedString.concat
347 elemPS c p = c `elem` unpackPS p
348 substrPS = NHC.PackedString.substr
349 takePS = NHC.PackedString.take
350 dropPS = NHC.PackedString.drop
351 splitAtPS = NHC.PackedString.splitAt
352
353 foldlPS = NHC.PackedString.foldl
354 foldrPS = NHC.PackedString.foldr
355 takeWhilePS = NHC.PackedString.takeWhile
356 dropWhilePS = NHC.PackedString.dropWhile
357 spanPS = NHC.PackedString.span
358 breakPS = NHC.PackedString.break
359 linesPS = NHC.PackedString.lines
360
361 wordsPS = NHC.PackedString.words
362 splitPS c = splitWithPS (==c)
363 splitWithPS = error "Data.PackedString: splitWithPS not implemented"
364
365 #endif