Decode lazy UTF-8 ByteStrings
[packages/text.git] / Data / Text / Lazy / Encoding / Fusion.hs
1 -- |
2 -- Module : Data.Text.Lazy.Encoding.Fusion
3 -- Copyright : (c) Bryan O'Sullivan 2009
4 --
5 -- License : BSD-style
6 -- Maintainer : rtharper@aftereternity.co.uk, bos@serpentine.com,
7 -- duncan@haskell.org
8 -- Stability : experimental
9 -- Portability : portable
10 --
11 -- Fusible 'Stream'-oriented functions for converting between lazy
12 -- 'Text' and several common encodings.
13
14 module Data.Text.Lazy.Encoding.Fusion
15 (
16 -- * Streaming
17 -- streamASCII
18 streamUtf8
19 --, streamUtf16LE
20 --, streamUtf16BE
21 --, streamUtf32LE
22 --, streamUtf32BE
23
24 -- * Unstreaming
25 --, unstream
26
27 , module Data.Text.Encoding.Fusion.Common
28 ) where
29
30 import Data.ByteString.Lazy.Internal (ByteString(..))
31 import qualified Data.ByteString as B
32 import qualified Data.ByteString.Unsafe as B
33 import Data.Text.Encoding.Fusion.Common
34 import Data.Text.Fusion (Step(..), Stream(..))
35 import Data.Text.Fusion.Internal (PairS(..))
36 import Data.Text.UnsafeChar (unsafeChr, unsafeChr8, unsafeChr32)
37 import qualified Data.Text.Encoding.Utf8 as U8
38
39 unknownLength :: Int
40 unknownLength = 4
41
42 -- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using UTF-8
43 -- encoding.
44 streamUtf8 :: ByteString -> Stream Char
45 streamUtf8 bs0 = Stream next (bs0 :!: 0) unknownLength
46 where
47 {-# INLINE next #-}
48 next (c@(Chunk bs rest) :!: i)
49 | i >= l = next (rest :!: 0)
50 | U8.validate1 x1 = Yield (unsafeChr8 x1) (c :!: i+1)
51 | i+1 < l && U8.validate2 x1 x2 = Yield (U8.chr2 x1 x2) (c :!: i+2)
52 | i+2 < l && U8.validate3 x1 x2 x3 = Yield (U8.chr3 x1 x2 x3) (c :!: i+3)
53 | i+3 < l && U8.validate4 x1 x2 x3 x4 = Yield (U8.chr4 x1 x2 x3 x4) (c :!: i+4)
54 | otherwise = encodingError "UTF-8"
55 where
56 x1 = idx i
57 x2 = idx (i + 1)
58 x3 = idx (i + 2)
59 x4 = idx (i + 3)
60 idx = B.unsafeIndex bs
61 l = B.length bs
62 next (Empty :!: _) = Done
63 {-# INLINE [0] streamUtf8 #-}
64
65 encodingError :: String -> a
66 encodingError encoding =
67 error $ "Data.Text.Lazy.Encoding.Fusion: Bad " ++ encoding ++ " stream"