defa33bbca2c04a34aca21711acd7d75d12e4c69
[ghc.git] / libraries / base / GHC / IO / Handle / Types.hs
1 {-# LANGUAGE Trustworthy #-}
2 {-# LANGUAGE CPP
3 , NoImplicitPrelude
4 , ExistentialQuantification
5 , AutoDeriveTypeable
6 #-}
7 {-# OPTIONS_GHC -funbox-strict-fields #-}
8 {-# OPTIONS_HADDOCK hide #-}
9
10 -----------------------------------------------------------------------------
11 -- |
12 -- Module : GHC.IO.Handle.Types
13 -- Copyright : (c) The University of Glasgow, 1994-2009
14 -- License : see libraries/base/LICENSE
15 --
16 -- Maintainer : libraries@haskell.org
17 -- Stability : internal
18 -- Portability : non-portable
19 --
20 -- Basic types for the implementation of IO Handles.
21 --
22 -----------------------------------------------------------------------------
23
24 module GHC.IO.Handle.Types (
25 Handle(..), Handle__(..), showHandle,
26 checkHandleInvariants,
27 BufferList(..),
28 HandleType(..),
29 isReadableHandleType, isWritableHandleType, isReadWriteHandleType,
30 BufferMode(..),
31 BufferCodec(..),
32 NewlineMode(..), Newline(..), nativeNewline,
33 universalNewlineMode, noNewlineTranslation, nativeNewlineMode
34 ) where
35
36 #undef DEBUG
37
38 import GHC.Base
39 import GHC.MVar
40 import GHC.IO
41 import GHC.IO.Buffer
42 import GHC.IO.BufferedIO
43 import GHC.IO.Encoding.Types
44 import GHC.IORef
45 import Data.Maybe
46 import GHC.Show
47 import GHC.Read
48 import GHC.Word
49 import GHC.IO.Device
50 import Data.Typeable
51 #ifdef DEBUG
52 import Control.Monad
53 #endif
54
55 -- ---------------------------------------------------------------------------
56 -- Handle type
57
58 -- A Handle is represented by (a reference to) a record
59 -- containing the state of the I/O port/device. We record
60 -- the following pieces of info:
61
62 -- * type (read,write,closed etc.)
63 -- * the underlying file descriptor
64 -- * buffering mode
65 -- * buffer, and spare buffers
66 -- * user-friendly name (usually the
67 -- FilePath used when IO.openFile was called)
68
69 -- Note: when a Handle is garbage collected, we want to flush its buffer
70 -- and close the OS file handle, so as to free up a (precious) resource.
71
72 -- | Haskell defines operations to read and write characters from and to files,
73 -- represented by values of type @Handle@. Each value of this type is a
74 -- /handle/: a record used by the Haskell run-time system to /manage/ I\/O
75 -- with file system objects. A handle has at least the following properties:
76 --
77 -- * whether it manages input or output or both;
78 --
79 -- * whether it is /open/, /closed/ or /semi-closed/;
80 --
81 -- * whether the object is seekable;
82 --
83 -- * whether buffering is disabled, or enabled on a line or block basis;
84 --
85 -- * a buffer (whose length may be zero).
86 --
87 -- Most handles will also have a current I\/O position indicating where the next
88 -- input or output operation will occur. A handle is /readable/ if it
89 -- manages only input or both input and output; likewise, it is /writable/ if
90 -- it manages only output or both input and output. A handle is /open/ when
91 -- first allocated.
92 -- Once it is closed it can no longer be used for either input or output,
93 -- though an implementation cannot re-use its storage while references
94 -- remain to it. Handles are in the 'Show' and 'Eq' classes. The string
95 -- produced by showing a handle is system dependent; it should include
96 -- enough information to identify the handle for debugging. A handle is
97 -- equal according to '==' only to itself; no attempt
98 -- is made to compare the internal state of different handles for equality.
99
100 data Handle
101 = FileHandle -- A normal handle to a file
102 FilePath -- the file (used for error messages
103 -- only)
104 !(MVar Handle__)
105
106 | DuplexHandle -- A handle to a read/write stream
107 FilePath -- file for a FIFO, otherwise some
108 -- descriptive string (used for error
109 -- messages only)
110 !(MVar Handle__) -- The read side
111 !(MVar Handle__) -- The write side
112
113 deriving Typeable
114
115 -- NOTES:
116 -- * A 'FileHandle' is seekable. A 'DuplexHandle' may or may not be
117 -- seekable.
118
119 instance Eq Handle where
120 (FileHandle _ h1) == (FileHandle _ h2) = h1 == h2
121 (DuplexHandle _ h1 _) == (DuplexHandle _ h2 _) = h1 == h2
122 _ == _ = False
123
124 data Handle__
125 = forall dev enc_state dec_state . (IODevice dev, BufferedIO dev, Typeable dev) =>
126 Handle__ {
127 haDevice :: !dev,
128 haType :: HandleType, -- type (read/write/append etc.)
129 haByteBuffer :: !(IORef (Buffer Word8)),
130 haBufferMode :: BufferMode,
131 haLastDecode :: !(IORef (dec_state, Buffer Word8)),
132 haCharBuffer :: !(IORef (Buffer CharBufElem)), -- the current buffer
133 haBuffers :: !(IORef (BufferList CharBufElem)), -- spare buffers
134 haEncoder :: Maybe (TextEncoder enc_state),
135 haDecoder :: Maybe (TextDecoder dec_state),
136 haCodec :: Maybe TextEncoding,
137 haInputNL :: Newline,
138 haOutputNL :: Newline,
139 haOtherSide :: Maybe (MVar Handle__) -- ptr to the write side of a
140 -- duplex handle.
141 }
142 deriving Typeable
143
144 -- we keep a few spare buffers around in a handle to avoid allocating
145 -- a new one for each hPutStr. These buffers are *guaranteed* to be the
146 -- same size as the main buffer.
147 data BufferList e
148 = BufferListNil
149 | BufferListCons (RawBuffer e) (BufferList e)
150
151 -- Internally, we classify handles as being one
152 -- of the following:
153
154 data HandleType
155 = ClosedHandle
156 | SemiClosedHandle
157 | ReadHandle
158 | WriteHandle
159 | AppendHandle
160 | ReadWriteHandle
161
162 isReadableHandleType :: HandleType -> Bool
163 isReadableHandleType ReadHandle = True
164 isReadableHandleType ReadWriteHandle = True
165 isReadableHandleType _ = False
166
167 isWritableHandleType :: HandleType -> Bool
168 isWritableHandleType AppendHandle = True
169 isWritableHandleType WriteHandle = True
170 isWritableHandleType ReadWriteHandle = True
171 isWritableHandleType _ = False
172
173 isReadWriteHandleType :: HandleType -> Bool
174 isReadWriteHandleType ReadWriteHandle{} = True
175 isReadWriteHandleType _ = False
176
177 -- INVARIANTS on Handles:
178 --
179 -- * A handle *always* has a buffer, even if it is only 1 character long
180 -- (an unbuffered handle needs a 1 character buffer in order to support
181 -- hLookAhead and hIsEOF).
182 -- * In a read Handle, the byte buffer is always empty (we decode when reading)
183 -- * In a wriite Handle, the Char buffer is always empty (we encode when writing)
184 --
185 checkHandleInvariants :: Handle__ -> IO ()
186 #ifdef DEBUG
187 checkHandleInvariants h_ = do
188 bbuf <- readIORef (haByteBuffer h_)
189 checkBuffer bbuf
190 cbuf <- readIORef (haCharBuffer h_)
191 checkBuffer cbuf
192 when (isWriteBuffer cbuf && not (isEmptyBuffer cbuf)) $
193 error ("checkHandleInvariants: char write buffer non-empty: " ++
194 summaryBuffer bbuf ++ ", " ++ summaryBuffer cbuf)
195 when (isWriteBuffer bbuf /= isWriteBuffer cbuf) $
196 error ("checkHandleInvariants: buffer modes differ: " ++
197 summaryBuffer bbuf ++ ", " ++ summaryBuffer cbuf)
198
199 #else
200 checkHandleInvariants _ = return ()
201 #endif
202
203 -- ---------------------------------------------------------------------------
204 -- Buffering modes
205
206 -- | Three kinds of buffering are supported: line-buffering,
207 -- block-buffering or no-buffering. These modes have the following
208 -- effects. For output, items are written out, or /flushed/,
209 -- from the internal buffer according to the buffer mode:
210 --
211 -- * /line-buffering/: the entire output buffer is flushed
212 -- whenever a newline is output, the buffer overflows,
213 -- a 'System.IO.hFlush' is issued, or the handle is closed.
214 --
215 -- * /block-buffering/: the entire buffer is written out whenever it
216 -- overflows, a 'System.IO.hFlush' is issued, or the handle is closed.
217 --
218 -- * /no-buffering/: output is written immediately, and never stored
219 -- in the buffer.
220 --
221 -- An implementation is free to flush the buffer more frequently,
222 -- but not less frequently, than specified above.
223 -- The output buffer is emptied as soon as it has been written out.
224 --
225 -- Similarly, input occurs according to the buffer mode for the handle:
226 --
227 -- * /line-buffering/: when the buffer for the handle is not empty,
228 -- the next item is obtained from the buffer; otherwise, when the
229 -- buffer is empty, characters up to and including the next newline
230 -- character are read into the buffer. No characters are available
231 -- until the newline character is available or the buffer is full.
232 --
233 -- * /block-buffering/: when the buffer for the handle becomes empty,
234 -- the next block of data is read into the buffer.
235 --
236 -- * /no-buffering/: the next input item is read and returned.
237 -- The 'System.IO.hLookAhead' operation implies that even a no-buffered
238 -- handle may require a one-character buffer.
239 --
240 -- The default buffering mode when a handle is opened is
241 -- implementation-dependent and may depend on the file system object
242 -- which is attached to that handle.
243 -- For most implementations, physical files will normally be block-buffered
244 -- and terminals will normally be line-buffered.
245
246 data BufferMode
247 = NoBuffering -- ^ buffering is disabled if possible.
248 | LineBuffering
249 -- ^ line-buffering should be enabled if possible.
250 | BlockBuffering (Maybe Int)
251 -- ^ block-buffering should be enabled if possible.
252 -- The size of the buffer is @n@ items if the argument
253 -- is 'Just' @n@ and is otherwise implementation-dependent.
254 deriving (Eq, Ord, Read, Show)
255
256 {-
257 [note Buffering Implementation]
258
259 Each Handle has two buffers: a byte buffer (haByteBuffer) and a Char
260 buffer (haCharBuffer).
261
262 [note Buffered Reading]
263
264 For read Handles, bytes are read into the byte buffer, and immediately
265 decoded into the Char buffer (see
266 GHC.IO.Handle.Internals.readTextDevice). The only way there might be
267 some data left in the byte buffer is if there is a partial multi-byte
268 character sequence that cannot be decoded into a full character.
269
270 Note that the buffering mode (haBufferMode) makes no difference when
271 reading data into a Handle. When reading, we can always just read all
272 the data there is available without blocking, decode it into the Char
273 buffer, and then provide it immediately to the caller.
274
275 [note Buffered Writing]
276
277 Characters are written into the Char buffer by e.g. hPutStr. At the
278 end of the operation, or when the char buffer is full, the buffer is
279 decoded to the byte buffer (see writeCharBuffer). This is so that we
280 can detect encoding errors at the right point.
281
282 Hence, the Char buffer is always empty between Handle operations.
283
284 [note Buffer Sizing]
285
286 The char buffer is always a default size (dEFAULT_CHAR_BUFFER_SIZE).
287 The byte buffer size is chosen by the underlying device (via its
288 IODevice.newBuffer). Hence the size of these buffers is not under
289 user control.
290
291 There are certain minimum sizes for these buffers imposed by the
292 library (but not checked):
293
294 - we must be able to buffer at least one character, so that
295 hLookAhead can work
296
297 - the byte buffer must be able to store at least one encoded
298 character in the current encoding (6 bytes?)
299
300 - when reading, the char buffer must have room for two characters, so
301 that we can spot the \r\n sequence.
302
303 How do we implement hSetBuffering?
304
305 For reading, we have never used the user-supplied buffer size, because
306 there's no point: we always pass all available data to the reader
307 immediately. Buffering would imply waiting until a certain amount of
308 data is available, which has no advantages. So hSetBuffering is
309 essentially a no-op for read handles, except that it turns on/off raw
310 mode for the underlying device if necessary.
311
312 For writing, the buffering mode is handled by the write operations
313 themselves (hPutChar and hPutStr). Every write ends with
314 writeCharBuffer, which checks whether the buffer should be flushed
315 according to the current buffering mode. Additionally, we look for
316 newlines and flush if the mode is LineBuffering.
317
318 [note Buffer Flushing]
319
320 ** Flushing the Char buffer
321
322 We must be able to flush the Char buffer, in order to implement
323 hSetEncoding, and things like hGetBuf which want to read raw bytes.
324
325 Flushing the Char buffer on a write Handle is easy: it is always empty.
326
327 Flushing the Char buffer on a read Handle involves rewinding the byte
328 buffer to the point representing the next Char in the Char buffer.
329 This is done by
330
331 - remembering the state of the byte buffer *before* the last decode
332
333 - re-decoding the bytes that represent the chars already read from the
334 Char buffer. This gives us the point in the byte buffer that
335 represents the *next* Char to be read.
336
337 In order for this to work, after readTextHandle we must NOT MODIFY THE
338 CONTENTS OF THE BYTE OR CHAR BUFFERS, except to remove characters from
339 the Char buffer.
340
341 ** Flushing the byte buffer
342
343 The byte buffer can be flushed if the Char buffer has already been
344 flushed (see above). For a read Handle, flushing the byte buffer
345 means seeking the device back by the number of bytes in the buffer,
346 and hence it is only possible on a seekable Handle.
347
348 -}
349
350 -- ---------------------------------------------------------------------------
351 -- Newline translation
352
353 -- | The representation of a newline in the external file or stream.
354 data Newline = LF -- ^ '\n'
355 | CRLF -- ^ '\r\n'
356 deriving (Eq, Ord, Read, Show)
357
358 -- | Specifies the translation, if any, of newline characters between
359 -- internal Strings and the external file or stream. Haskell Strings
360 -- are assumed to represent newlines with the '\n' character; the
361 -- newline mode specifies how to translate '\n' on output, and what to
362 -- translate into '\n' on input.
363 data NewlineMode
364 = NewlineMode { inputNL :: Newline,
365 -- ^ the representation of newlines on input
366 outputNL :: Newline
367 -- ^ the representation of newlines on output
368 }
369 deriving (Eq, Ord, Read, Show)
370
371 -- | The native newline representation for the current platform: 'LF'
372 -- on Unix systems, 'CRLF' on Windows.
373 nativeNewline :: Newline
374 #ifdef mingw32_HOST_OS
375 nativeNewline = CRLF
376 #else
377 nativeNewline = LF
378 #endif
379
380 -- | Map '\r\n' into '\n' on input, and '\n' to the native newline
381 -- represetnation on output. This mode can be used on any platform, and
382 -- works with text files using any newline convention. The downside is
383 -- that @readFile >>= writeFile@ might yield a different file.
384 --
385 -- > universalNewlineMode = NewlineMode { inputNL = CRLF,
386 -- > outputNL = nativeNewline }
387 --
388 universalNewlineMode :: NewlineMode
389 universalNewlineMode = NewlineMode { inputNL = CRLF,
390 outputNL = nativeNewline }
391
392 -- | Use the native newline representation on both input and output
393 --
394 -- > nativeNewlineMode = NewlineMode { inputNL = nativeNewline
395 -- > outputNL = nativeNewline }
396 --
397 nativeNewlineMode :: NewlineMode
398 nativeNewlineMode = NewlineMode { inputNL = nativeNewline,
399 outputNL = nativeNewline }
400
401 -- | Do no newline translation at all.
402 --
403 -- > noNewlineTranslation = NewlineMode { inputNL = LF, outputNL = LF }
404 --
405 noNewlineTranslation :: NewlineMode
406 noNewlineTranslation = NewlineMode { inputNL = LF, outputNL = LF }
407
408 -- ---------------------------------------------------------------------------
409 -- Show instance for Handles
410
411 -- handle types are 'show'n when printing error msgs, so
412 -- we provide a more user-friendly Show instance for it
413 -- than the derived one.
414
415 instance Show HandleType where
416 showsPrec _ t =
417 case t of
418 ClosedHandle -> showString "closed"
419 SemiClosedHandle -> showString "semi-closed"
420 ReadHandle -> showString "readable"
421 WriteHandle -> showString "writable"
422 AppendHandle -> showString "writable (append)"
423 ReadWriteHandle -> showString "read-writable"
424
425 instance Show Handle where
426 showsPrec _ (FileHandle file _) = showHandle file
427 showsPrec _ (DuplexHandle file _ _) = showHandle file
428
429 showHandle :: FilePath -> String -> String
430 showHandle file = showString "{handle: " . showString file . showString "}"
431