99db84af596e001d3254ec080d11fcdc712e01da
[ghc.git] / libraries / base / tests / IO / encoding005.hs
1 import Control.Monad
2 import Data.Word (Word8)
3 import Foreign.Ptr
4 import Foreign.Marshal.Array
5 import GHC.Foreign (peekCStringLen, withCStringLen)
6 import GHC.IO.Encoding.Failure (CodingFailureMode(..))
7 import qualified GHC.IO.Encoding.Latin1 as Latin1
8 import System.IO
9 import System.IO.Error
10
11 -- Tests for single-byte encodings that map directly to Unicode
12 -- (module GHC.IO.Encoding.Latin1)
13
14 eitherToMaybe :: Either a b -> Maybe b
15 eitherToMaybe (Left _) = Nothing
16 eitherToMaybe (Right b) = Just b
17
18 decode :: TextEncoding -> [Word8] -> IO (Maybe String)
19 decode enc xs = fmap eitherToMaybe . tryIOError $ withArrayLen xs (\sz p -> peekCStringLen enc (castPtr p, sz))
20
21 encode :: TextEncoding -> String -> IO (Maybe [Word8])
22 encode enc cs = fmap eitherToMaybe . tryIOError $ withCStringLen enc cs (\(p, sz) -> peekArray sz (castPtr p))
23
24 testIO :: (Eq a, Show a) => IO a -> a -> IO ()
25 testIO action expected = do
26 result <- action
27 when (result /= expected) $
28 putStrLn $ "Test failed: expected " ++ show expected ++ ", but got " ++ show result
29
30 -- Test char8-like encodings
31 test_char8 :: TextEncoding -> IO ()
32 test_char8 enc = do
33 testIO (decode enc [0..0xff]) $ Just ['\0'..'\xff']
34
35 testIO (encode enc ['\0'..'\x200']) $ Just ([0..0xff] ++ [0..0xff] ++ [0])
36
37 -- Test latin1-like encodings
38 test_latin1 :: CodingFailureMode -> TextEncoding -> IO ()
39 test_latin1 cfm enc = do
40 testIO (decode enc [0..0xff]) $ Just ['\0'..'\xff']
41
42 testIO (encode enc ['\0'..'\xff']) $ Just [0..0xff]
43 testIO (encode enc "\xfe\xff\x100\x101\x100\xff\xfe") $ case cfm of
44 ErrorOnCodingFailure -> Nothing
45 IgnoreCodingFailure -> Just [0xfe,0xff,0xff,0xfe]
46 TransliterateCodingFailure -> Just [0xfe,0xff,0x3f,0x3f,0x3f,0xff,0xfe]
47 -- N.B. The argument "LATIN1//TRANSLIT" to mkTextEncoding does not
48 -- correspond to "LATIN1//TRANSLIT" in iconv! Instead GHC asks iconv
49 -- to encode to "LATIN1" and uses its own "evil hack" to insert '?'
50 -- (ASCII 0x3f) in place of failures. See GHC.IO.Encoding.recoverEncode.
51 --
52 -- U+0100 is LATIN CAPITAL LETTER A WITH MACRON, which iconv would
53 -- transliterate to 'A' (ASCII 0x41). Similarly iconv would
54 -- transliterate U+0101 LATIN SMALL LETTER A WITH MACRON to 'a'
55 -- (ASCII 0x61).
56 RoundtripFailure -> Nothing
57
58 test_ascii :: CodingFailureMode -> TextEncoding -> IO ()
59 test_ascii cfm enc = do
60 testIO (decode enc [0..0x7f]) $ Just ['\0'..'\x7f']
61 testIO (decode enc [0x7e,0x7f,0x80,0x81,0x80,0x7f,0x7e]) $ case cfm of
62 ErrorOnCodingFailure -> Nothing
63 IgnoreCodingFailure -> Just "\x7e\x7f\x7f\x7e"
64 TransliterateCodingFailure -> Just "\x7e\x7f\xfffd\xfffd\xfffd\x7f\x7e"
65 -- Another GHC special: decode invalid input to the Char U+FFFD
66 -- REPLACEMENT CHARACTER.
67 RoundtripFailure -> Just "\x7e\x7f\xdc80\xdc81\xdc80\x7f\x7e"
68 -- GHC's PEP383-style String-encoding of invalid input,
69 -- see Note [Roundtripping]
70
71 testIO (encode enc ['\0'..'\x7f']) $ Just [0..0x7f]
72 testIO (encode enc "\x7e\x7f\x80\x81\x80\x7f\xe9") $ case cfm of
73 ErrorOnCodingFailure -> Nothing
74 IgnoreCodingFailure -> Just [0x7e,0x7f,0x7f]
75 TransliterateCodingFailure -> Just [0x7e,0x7f,0x3f,0x3f,0x3f,0x7f,0x3f]
76 -- See comment in test_latin1. iconv -t ASCII//TRANSLIT would encode
77 -- U+00E9 LATIN SMALL LETTER E WITH ACUTE as 'e' (ASCII 0x65).
78 RoundtripFailure -> Nothing
79
80 -- Test roundtripping for good measure
81 case cfm of
82 RoundtripFailure -> do
83 Just s <- decode enc [0..0xff]
84 testIO (encode enc s) $ Just [0..0xff]
85 _ -> return ()
86
87 main = do
88 putStrLn "char8 tests"
89 test_char8 char8 -- char8 never fails in either direction
90
91 -- These use GHC's own implementation
92 putStrLn "Latin1.ascii tests"
93 test_ascii ErrorOnCodingFailure (Latin1.ascii)
94 test_ascii IgnoreCodingFailure (Latin1.mkAscii IgnoreCodingFailure)
95 test_ascii TransliterateCodingFailure (Latin1.mkAscii TransliterateCodingFailure)
96 test_ascii RoundtripFailure (Latin1.mkAscii RoundtripFailure)
97
98 putStrLn "Latin1.latin1_checked tests"
99 test_latin1 ErrorOnCodingFailure (Latin1.latin1_checked)
100 test_latin1 IgnoreCodingFailure (Latin1.mkLatin1_checked IgnoreCodingFailure)
101 test_latin1 TransliterateCodingFailure (Latin1.mkLatin1_checked TransliterateCodingFailure)
102 test_latin1 RoundtripFailure (Latin1.mkLatin1_checked RoundtripFailure)
103
104 -- These use iconv (normally, unless it is broken)
105 putStrLn "mkTextEncoding ASCII tests"
106 test_ascii ErrorOnCodingFailure =<< mkTextEncoding "ASCII"
107 test_ascii IgnoreCodingFailure =<< mkTextEncoding "ASCII//IGNORE"
108 test_ascii TransliterateCodingFailure =<< mkTextEncoding "ASCII//TRANSLIT"
109 test_ascii RoundtripFailure =<< mkTextEncoding "ASCII//ROUNDTRIP"
110
111 putStrLn "mkTextEncoding LATIN1 tests"
112 test_latin1 ErrorOnCodingFailure =<< mkTextEncoding "LATIN1"
113 test_latin1 IgnoreCodingFailure =<< mkTextEncoding "LATIN1//IGNORE"
114 test_latin1 TransliterateCodingFailure =<< mkTextEncoding "LATIN1//TRANSLIT"
115 test_latin1 RoundtripFailure =<< mkTextEncoding "LATIN1//ROUNDTRIP"