Prefer #if defined to #ifdef
[ghc.git] / libraries / base / GHC / IO / Buffer.hs
1 {-# LANGUAGE Trustworthy #-}
2 {-# LANGUAGE CPP, NoImplicitPrelude #-}
3 {-# OPTIONS_GHC -funbox-strict-fields #-}
4
5 -----------------------------------------------------------------------------
6 -- |
7 -- Module : GHC.IO.Buffer
8 -- Copyright : (c) The University of Glasgow 2008
9 -- License : see libraries/base/LICENSE
10 --
11 -- Maintainer : cvs-ghc@haskell.org
12 -- Stability : internal
13 -- Portability : non-portable (GHC Extensions)
14 --
15 -- Buffers used in the IO system
16 --
17 -----------------------------------------------------------------------------
18
19 module GHC.IO.Buffer (
20 -- * Buffers of any element
21 Buffer(..), BufferState(..), CharBuffer, CharBufElem,
22
23 -- ** Creation
24 newByteBuffer,
25 newCharBuffer,
26 newBuffer,
27 emptyBuffer,
28
29 -- ** Insertion/removal
30 bufferRemove,
31 bufferAdd,
32 slideContents,
33 bufferAdjustL,
34
35 -- ** Inspecting
36 isEmptyBuffer,
37 isFullBuffer,
38 isFullCharBuffer,
39 isWriteBuffer,
40 bufferElems,
41 bufferAvailable,
42 summaryBuffer,
43
44 -- ** Operating on the raw buffer as a Ptr
45 withBuffer,
46 withRawBuffer,
47
48 -- ** Assertions
49 checkBuffer,
50
51 -- * Raw buffers
52 RawBuffer,
53 readWord8Buf,
54 writeWord8Buf,
55 RawCharBuffer,
56 peekCharBuf,
57 readCharBuf,
58 writeCharBuf,
59 readCharBufPtr,
60 writeCharBufPtr,
61 charSize,
62 ) where
63
64 import GHC.Base
65 -- import GHC.IO
66 import GHC.Num
67 import GHC.Ptr
68 import GHC.Word
69 import GHC.Show
70 import GHC.Real
71 import Foreign.C.Types
72 import Foreign.ForeignPtr
73 import Foreign.Storable
74
75 -- Char buffers use either UTF-16 or UTF-32, with the endianness matching
76 -- the endianness of the host.
77 --
78 -- Invariants:
79 -- * a Char buffer consists of *valid* UTF-16 or UTF-32
80 -- * only whole characters: no partial surrogate pairs
81
82 #define CHARBUF_UTF32
83
84 -- #define CHARBUF_UTF16
85 --
86 -- NB. it won't work to just change this to CHARBUF_UTF16. Some of
87 -- the code to make this work is there, and it has been tested with
88 -- the Iconv codec, but there are some pieces that are known to be
89 -- broken. In particular, the built-in codecs
90 -- e.g. GHC.IO.Encoding.UTF{8,16,32} need to use isFullCharBuffer or
91 -- similar in place of the ow >= os comparisons.
92
93 -- ---------------------------------------------------------------------------
94 -- Raw blocks of data
95
96 type RawBuffer e = ForeignPtr e
97
98 readWord8Buf :: RawBuffer Word8 -> Int -> IO Word8
99 readWord8Buf arr ix = withForeignPtr arr $ \p -> peekByteOff p ix
100
101 writeWord8Buf :: RawBuffer Word8 -> Int -> Word8 -> IO ()
102 writeWord8Buf arr ix w = withForeignPtr arr $ \p -> pokeByteOff p ix w
103
104 #if defined(CHARBUF_UTF16)
105 type CharBufElem = Word16
106 #else
107 type CharBufElem = Char
108 #endif
109
110 type RawCharBuffer = RawBuffer CharBufElem
111
112 peekCharBuf :: RawCharBuffer -> Int -> IO Char
113 peekCharBuf arr ix = withForeignPtr arr $ \p -> do
114 (c,_) <- readCharBufPtr p ix
115 return c
116
117 {-# INLINE readCharBuf #-}
118 readCharBuf :: RawCharBuffer -> Int -> IO (Char, Int)
119 readCharBuf arr ix = withForeignPtr arr $ \p -> readCharBufPtr p ix
120
121 {-# INLINE writeCharBuf #-}
122 writeCharBuf :: RawCharBuffer -> Int -> Char -> IO Int
123 writeCharBuf arr ix c = withForeignPtr arr $ \p -> writeCharBufPtr p ix c
124
125 {-# INLINE readCharBufPtr #-}
126 readCharBufPtr :: Ptr CharBufElem -> Int -> IO (Char, Int)
127 #if defined(CHARBUF_UTF16)
128 readCharBufPtr p ix = do
129 c1 <- peekElemOff p ix
130 if (c1 < 0xd800 || c1 > 0xdbff)
131 then return (chr (fromIntegral c1), ix+1)
132 else do c2 <- peekElemOff p (ix+1)
133 return (unsafeChr ((fromIntegral c1 - 0xd800)*0x400 +
134 (fromIntegral c2 - 0xdc00) + 0x10000), ix+2)
135 #else
136 readCharBufPtr p ix = do c <- peekElemOff (castPtr p) ix; return (c, ix+1)
137 #endif
138
139 {-# INLINE writeCharBufPtr #-}
140 writeCharBufPtr :: Ptr CharBufElem -> Int -> Char -> IO Int
141 #if defined(CHARBUF_UTF16)
142 writeCharBufPtr p ix ch
143 | c < 0x10000 = do pokeElemOff p ix (fromIntegral c)
144 return (ix+1)
145 | otherwise = do let c' = c - 0x10000
146 pokeElemOff p ix (fromIntegral (c' `div` 0x400 + 0xd800))
147 pokeElemOff p (ix+1) (fromIntegral (c' `mod` 0x400 + 0xdc00))
148 return (ix+2)
149 where
150 c = ord ch
151 #else
152 writeCharBufPtr p ix ch = do pokeElemOff (castPtr p) ix ch; return (ix+1)
153 #endif
154
155 charSize :: Int
156 #if defined(CHARBUF_UTF16)
157 charSize = 2
158 #else
159 charSize = 4
160 #endif
161
162 -- ---------------------------------------------------------------------------
163 -- Buffers
164
165 -- | A mutable array of bytes that can be passed to foreign functions.
166 --
167 -- The buffer is represented by a record, where the record contains
168 -- the raw buffer and the start/end points of the filled portion. The
169 -- buffer contents itself is mutable, but the rest of the record is
170 -- immutable. This is a slightly odd mix, but it turns out to be
171 -- quite practical: by making all the buffer metadata immutable, we
172 -- can have operations on buffer metadata outside of the IO monad.
173 --
174 -- The "live" elements of the buffer are those between the 'bufL' and
175 -- 'bufR' offsets. In an empty buffer, 'bufL' is equal to 'bufR', but
176 -- they might not be zero: for example, the buffer might correspond to
177 -- a memory-mapped file and in which case 'bufL' will point to the
178 -- next location to be written, which is not necessarily the beginning
179 -- of the file.
180 data Buffer e
181 = Buffer {
182 bufRaw :: !(RawBuffer e),
183 bufState :: BufferState,
184 bufSize :: !Int, -- in elements, not bytes
185 bufL :: !Int, -- offset of first item in the buffer
186 bufR :: !Int -- offset of last item + 1
187 }
188
189 #if defined(CHARBUF_UTF16)
190 type CharBuffer = Buffer Word16
191 #else
192 type CharBuffer = Buffer Char
193 #endif
194
195 data BufferState = ReadBuffer | WriteBuffer deriving (Eq)
196
197 withBuffer :: Buffer e -> (Ptr e -> IO a) -> IO a
198 withBuffer Buffer{ bufRaw=raw } f = withForeignPtr (castForeignPtr raw) f
199
200 withRawBuffer :: RawBuffer e -> (Ptr e -> IO a) -> IO a
201 withRawBuffer raw f = withForeignPtr (castForeignPtr raw) f
202
203 isEmptyBuffer :: Buffer e -> Bool
204 isEmptyBuffer Buffer{ bufL=l, bufR=r } = l == r
205
206 isFullBuffer :: Buffer e -> Bool
207 isFullBuffer Buffer{ bufR=w, bufSize=s } = s == w
208
209 -- if a Char buffer does not have room for a surrogate pair, it is "full"
210 isFullCharBuffer :: Buffer e -> Bool
211 #if defined(CHARBUF_UTF16)
212 isFullCharBuffer buf = bufferAvailable buf < 2
213 #else
214 isFullCharBuffer = isFullBuffer
215 #endif
216
217 isWriteBuffer :: Buffer e -> Bool
218 isWriteBuffer buf = case bufState buf of
219 WriteBuffer -> True
220 ReadBuffer -> False
221
222 bufferElems :: Buffer e -> Int
223 bufferElems Buffer{ bufR=w, bufL=r } = w - r
224
225 bufferAvailable :: Buffer e -> Int
226 bufferAvailable Buffer{ bufR=w, bufSize=s } = s - w
227
228 bufferRemove :: Int -> Buffer e -> Buffer e
229 bufferRemove i buf@Buffer{ bufL=r } = bufferAdjustL (r+i) buf
230
231 bufferAdjustL :: Int -> Buffer e -> Buffer e
232 bufferAdjustL l buf@Buffer{ bufR=w }
233 | l == w = buf{ bufL=0, bufR=0 }
234 | otherwise = buf{ bufL=l, bufR=w }
235
236 bufferAdd :: Int -> Buffer e -> Buffer e
237 bufferAdd i buf@Buffer{ bufR=w } = buf{ bufR=w+i }
238
239 emptyBuffer :: RawBuffer e -> Int -> BufferState -> Buffer e
240 emptyBuffer raw sz state =
241 Buffer{ bufRaw=raw, bufState=state, bufR=0, bufL=0, bufSize=sz }
242
243 newByteBuffer :: Int -> BufferState -> IO (Buffer Word8)
244 newByteBuffer c st = newBuffer c c st
245
246 newCharBuffer :: Int -> BufferState -> IO CharBuffer
247 newCharBuffer c st = newBuffer (c * charSize) c st
248
249 newBuffer :: Int -> Int -> BufferState -> IO (Buffer e)
250 newBuffer bytes sz state = do
251 fp <- mallocForeignPtrBytes bytes
252 return (emptyBuffer fp sz state)
253
254 -- | slides the contents of the buffer to the beginning
255 slideContents :: Buffer Word8 -> IO (Buffer Word8)
256 slideContents buf@Buffer{ bufL=l, bufR=r, bufRaw=raw } = do
257 let elems = r - l
258 withRawBuffer raw $ \p ->
259 do _ <- memmove p (p `plusPtr` l) (fromIntegral elems)
260 return ()
261 return buf{ bufL=0, bufR=elems }
262
263 foreign import ccall unsafe "memmove"
264 memmove :: Ptr a -> Ptr a -> CSize -> IO (Ptr a)
265
266 summaryBuffer :: Buffer a -> String
267 summaryBuffer buf = "buf" ++ show (bufSize buf) ++ "(" ++ show (bufL buf) ++ "-" ++ show (bufR buf) ++ ")"
268
269 -- INVARIANTS on Buffers:
270 -- * r <= w
271 -- * if r == w, and the buffer is for reading, then r == 0 && w == 0
272 -- * a write buffer is never full. If an operation
273 -- fills up the buffer, it will always flush it before
274 -- returning.
275 -- * a read buffer may be full as a result of hLookAhead. In normal
276 -- operation, a read buffer always has at least one character of space.
277
278 checkBuffer :: Buffer a -> IO ()
279 checkBuffer buf@Buffer{ bufState = state, bufL=r, bufR=w, bufSize=size } = do
280 check buf (
281 size > 0
282 && r <= w
283 && w <= size
284 && ( r /= w || state == WriteBuffer || (r == 0 && w == 0) )
285 && ( state /= WriteBuffer || w < size ) -- write buffer is never full
286 )
287
288 check :: Buffer a -> Bool -> IO ()
289 check _ True = return ()
290 check buf False = errorWithoutStackTrace ("buffer invariant violation: " ++ summaryBuffer buf)
291