Get rid of some compiler warnings.
[packages/text.git] / Data / Text / Lazy / Encoding / Fusion.hs
1 {-# LANGUAGE BangPatterns #-}
2
3 -- |
4 -- Module : Data.Text.Lazy.Encoding.Fusion
5 -- Copyright : (c) Bryan O'Sullivan 2009
6 --
7 -- License : BSD-style
8 -- Maintainer : rtharper@aftereternity.co.uk, bos@serpentine.com,
9 -- duncan@haskell.org
10 -- Stability : experimental
11 -- Portability : portable
12 --
13 -- Fusible 'Stream'-oriented functions for converting between lazy
14 -- 'Text' and several common encodings.
15
16 module Data.Text.Lazy.Encoding.Fusion
17 (
18 -- * Streaming
19 -- streamASCII
20 streamUtf8
21 --, streamUtf16LE
22 --, streamUtf16BE
23 --, streamUtf32LE
24 --, streamUtf32BE
25
26 -- * Unstreaming
27 , unstream
28
29 , module Data.Text.Encoding.Fusion.Common
30 ) where
31
32 import Data.ByteString.Lazy.Internal (ByteString(..), defaultChunkSize)
33 import qualified Data.ByteString as B
34 import qualified Data.ByteString.Unsafe as B
35 import Data.Text.Encoding.Fusion.Common
36 import Data.Text.Fusion (Step(..), Stream(..))
37 import Data.Text.Fusion.Internal (M(..), PairS(..), S(..))
38 import Data.Text.UnsafeChar (unsafeChr8)
39 import Data.Word (Word8)
40 import qualified Data.Text.Encoding.Utf8 as U8
41 import System.IO.Unsafe (unsafePerformIO)
42 import Foreign.ForeignPtr (withForeignPtr, ForeignPtr)
43 import Foreign.Storable (pokeByteOff)
44 import Data.ByteString.Internal (mallocByteString, memcpy)
45 import Control.Exception (assert)
46 import qualified Data.ByteString.Internal as B
47
48 unknownLength :: Int
49 unknownLength = 4
50
51 -- | /O(n)/ Convert a lazy 'ByteString' into a 'Stream Char', using
52 -- UTF-8 encoding.
53 streamUtf8 :: ByteString -> Stream Char
54 streamUtf8 bs0 = Stream next (bs0 :!: S N N N N :!: 0) unknownLength
55 where
56 {-# INLINE next #-}
57 next st@(bs :!: s :!: i) =
58 case s of
59 S (J a) N _ _ | U8.validate1 a ->
60 Yield (unsafeChr8 a) es
61 S (J a) (J b) N _ | U8.validate2 a b ->
62 Yield (U8.chr2 a b) es
63 S (J a) (J b) (J c) N | U8.validate3 a b c ->
64 Yield (U8.chr3 a b c) es
65 S (J a) (J b) (J c) (J d) | U8.validate4 a b c d ->
66 Yield (U8.chr4 a b c d) es
67 _ -> consume st
68 where es = bs :!: S N N N N :!: i
69 {-# INLINE consume #-}
70 consume (bs@(Chunk ps rest) :!: s :!: i)
71 | i >= len = consume (rest :!: s :!: 0)
72 | otherwise = next (bs :!: s' :!: i+1)
73 where s' = case s of
74 S N _ _ _ -> S x N N N
75 S a N _ _ -> S a x N N
76 S a b N _ -> S a b x N
77 S a b c N -> S a b c x
78 _ -> encodingError "streamUtf8" "UTF-8"
79 x = J (B.unsafeIndex ps i)
80 len = B.length ps
81 consume (Empty :!: S N _ _ _ :!: _) = Done
82 consume _ = encodingError "streamUtf8" "UTF-8"
83 {-# INLINE [0] streamUtf8 #-}
84
85 -- | /O(n)/ Convert a 'Stream' 'Word8' to a lazy 'ByteString'.
86 unstreamChunks :: Int -> Stream Word8 -> ByteString
87 unstreamChunks chunkSize (Stream next s0 len0) = chunk s0 len0
88 where chunk s1 len1 = unsafePerformIO $ do
89 let len = min (max len1 unknownLength) chunkSize
90 mallocByteString len >>= loop len 0 s1
91 where
92 loop !n !off !s fp = case next s of
93 Done | off == 0 -> return Empty
94 | otherwise -> do
95 bs <- trimUp fp off
96 return $! Chunk bs Empty
97 Skip s' -> loop n off s' fp
98 Yield x s'
99 | off == chunkSize -> do
100 bs <- trimUp fp off
101 return (Chunk bs (chunk s (n - B.length bs)))
102 | off == n -> realloc fp n off s' x
103 | otherwise -> do
104 withForeignPtr fp $ \p -> pokeByteOff p off x
105 loop n (off+1) s' fp
106 {-# NOINLINE realloc #-}
107 realloc fp n off s x = do
108 let n' = min (n+n) chunkSize
109 fp' <- copy0 fp n n'
110 withForeignPtr fp' $ \p -> pokeByteOff p off x
111 loop n' (off+1) s fp'
112 {-# NOINLINE trimUp #-}
113 trimUp fp off = return $! B.PS fp 0 off
114 copy0 :: ForeignPtr Word8 -> Int -> Int -> IO (ForeignPtr Word8)
115 copy0 !src !srcLen !destLen = assert (srcLen <= destLen) $ do
116 dest <- mallocByteString destLen
117 withForeignPtr src $ \src' ->
118 withForeignPtr dest $ \dest' ->
119 memcpy dest' src' (fromIntegral srcLen)
120 return dest
121
122 -- | /O(n)/ Convert a 'Stream' 'Word8' to a lazy 'ByteString'.
123 unstream :: Stream Word8 -> ByteString
124 unstream = unstreamChunks defaultChunkSize
125
126 encodingError :: String -> String -> a
127 encodingError func encoding =
128 error $ "Data.Text.Lazy.Encoding.Fusion." ++ func ++ ": Bad " ++
129 encoding ++ " stream"