Add PlainPanic for throwing exceptions without depending on pprint
[ghc.git] / compiler / utils / FastString.hs
1 -- (c) The University of Glasgow, 1997-2006
2
3 {-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples,
4 GeneralizedNewtypeDeriving #-}
5 {-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
6 -- We always optimise this, otherwise performance of a non-optimised
7 -- compiler is severely affected
8
9 -- |
10 -- There are two principal string types used internally by GHC:
11 --
12 -- ['FastString']
13 --
14 -- * A compact, hash-consed, representation of character strings.
15 -- * Comparison is O(1), and you can get a 'Unique.Unique' from them.
16 -- * Generated by 'fsLit'.
17 -- * Turn into 'Outputable.SDoc' with 'Outputable.ftext'.
18 --
19 -- ['PtrString']
20 --
21 -- * Pointer and size of a Latin-1 encoded string.
22 -- * Practically no operations.
23 -- * Outputing them is fast.
24 -- * Generated by 'sLit'.
25 -- * Turn into 'Outputable.SDoc' with 'Outputable.ptext'
26 -- * Requires manual memory management.
27 -- Improper use may lead to memory leaks or dangling pointers.
28 -- * It assumes Latin-1 as the encoding, therefore it cannot represent
29 -- arbitrary Unicode strings.
30 --
31 -- Use 'PtrString' unless you want the facilities of 'FastString'.
32 module FastString
33 (
34 -- * ByteString
35 bytesFS, -- :: FastString -> ByteString
36 fastStringToByteString, -- = bytesFS (kept for haddock)
37 mkFastStringByteString,
38 fastZStringToByteString,
39 unsafeMkByteString,
40
41 -- * FastZString
42 FastZString,
43 hPutFZS,
44 zString,
45 lengthFZS,
46
47 -- * FastStrings
48 FastString(..), -- not abstract, for now.
49
50 -- ** Construction
51 fsLit,
52 mkFastString,
53 mkFastStringBytes,
54 mkFastStringByteList,
55 mkFastStringForeignPtr,
56 mkFastString#,
57
58 -- ** Deconstruction
59 unpackFS, -- :: FastString -> String
60
61 -- ** Encoding
62 zEncodeFS,
63
64 -- ** Operations
65 uniqueOfFS,
66 lengthFS,
67 nullFS,
68 appendFS,
69 headFS,
70 tailFS,
71 concatFS,
72 consFS,
73 nilFS,
74 isUnderscoreFS,
75
76 -- ** Outputing
77 hPutFS,
78
79 -- ** Internal
80 getFastStringTable,
81 hasZEncoding,
82
83 -- * PtrStrings
84 PtrString (..),
85
86 -- ** Construction
87 sLit,
88 mkPtrString#,
89 mkPtrString,
90
91 -- ** Deconstruction
92 unpackPtrString,
93
94 -- ** Operations
95 lengthPS
96 ) where
97
98 #include "HsVersions.h"
99
100 import GhcPrelude as Prelude
101
102 import Encoding
103 import FastFunctions
104 import PlainPanic
105 import Util
106
107 import Control.Concurrent.MVar
108 import Control.DeepSeq
109 import Control.Monad
110 import Data.ByteString (ByteString)
111 import qualified Data.ByteString as BS
112 import qualified Data.ByteString.Char8 as BSC
113 import qualified Data.ByteString.Internal as BS
114 import qualified Data.ByteString.Unsafe as BS
115 import Foreign.C
116 import GHC.Exts
117 import System.IO
118 import Data.Data
119 import Data.IORef
120 import Data.Maybe ( isJust )
121 import Data.Char
122 import Data.Semigroup as Semi
123
124 import GHC.IO
125
126 import Foreign
127
128 #if STAGE >= 2
129 import GHC.Conc.Sync (sharedCAF)
130 #endif
131
132 import GHC.Base ( unpackCString#, unpackNBytes# )
133
134
135 -- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
136 bytesFS :: FastString -> ByteString
137 bytesFS f = fs_bs f
138
139 {-# DEPRECATED fastStringToByteString "Use `bytesFS` instead" #-}
140 fastStringToByteString :: FastString -> ByteString
141 fastStringToByteString = bytesFS
142
143 fastZStringToByteString :: FastZString -> ByteString
144 fastZStringToByteString (FastZString bs) = bs
145
146 -- This will drop information if any character > '\xFF'
147 unsafeMkByteString :: String -> ByteString
148 unsafeMkByteString = BSC.pack
149
150 hashFastString :: FastString -> Int
151 hashFastString (FastString _ _ bs _)
152 = inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
153 return $ hashStr (castPtr ptr) len
154
155 -- -----------------------------------------------------------------------------
156
157 newtype FastZString = FastZString ByteString
158 deriving NFData
159
160 hPutFZS :: Handle -> FastZString -> IO ()
161 hPutFZS handle (FastZString bs) = BS.hPut handle bs
162
163 zString :: FastZString -> String
164 zString (FastZString bs) =
165 inlinePerformIO $ BS.unsafeUseAsCStringLen bs peekCAStringLen
166
167 lengthFZS :: FastZString -> Int
168 lengthFZS (FastZString bs) = BS.length bs
169
170 mkFastZStringString :: String -> FastZString
171 mkFastZStringString str = FastZString (BSC.pack str)
172
173 -- -----------------------------------------------------------------------------
174
175 {-|
176 A 'FastString' is an array of bytes, hashed to support fast O(1)
177 comparison. It is also associated with a character encoding, so that
178 we know how to convert a 'FastString' to the local encoding, or to the
179 Z-encoding used by the compiler internally.
180
181 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
182 -}
183
184 data FastString = FastString {
185 uniq :: {-# UNPACK #-} !Int, -- unique id
186 n_chars :: {-# UNPACK #-} !Int, -- number of chars
187 fs_bs :: {-# UNPACK #-} !ByteString,
188 fs_ref :: {-# UNPACK #-} !(IORef (Maybe FastZString))
189 }
190
191 instance Eq FastString where
192 f1 == f2 = uniq f1 == uniq f2
193
194 instance Ord FastString where
195 -- Compares lexicographically, not by unique
196 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
197 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
198 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
199 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
200 max x y | x >= y = x
201 | otherwise = y
202 min x y | x <= y = x
203 | otherwise = y
204 compare a b = cmpFS a b
205
206 instance IsString FastString where
207 fromString = fsLit
208
209 instance Semi.Semigroup FastString where
210 (<>) = appendFS
211
212 instance Monoid FastString where
213 mempty = nilFS
214 mappend = (Semi.<>)
215 mconcat = concatFS
216
217 instance Show FastString where
218 show fs = show (unpackFS fs)
219
220 instance Data FastString where
221 -- don't traverse?
222 toConstr _ = abstractConstr "FastString"
223 gunfold _ _ = error "gunfold"
224 dataTypeOf _ = mkNoRepType "FastString"
225
226 cmpFS :: FastString -> FastString -> Ordering
227 cmpFS f1@(FastString u1 _ _ _) f2@(FastString u2 _ _ _) =
228 if u1 == u2 then EQ else
229 compare (bytesFS f1) (bytesFS f2)
230
231 foreign import ccall unsafe "memcmp"
232 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
233
234 -- -----------------------------------------------------------------------------
235 -- Construction
236
237 {-
238 Internally, the compiler will maintain a fast string symbol table, providing
239 sharing and fast comparison. Creation of new @FastString@s then covertly does a
240 lookup, re-using the @FastString@ if there was a hit.
241
242 The design of the FastString hash table allows for lockless concurrent reads
243 and updates to multiple buckets with low synchronization overhead.
244
245 See Note [Updating the FastString table] on how it's updated.
246 -}
247 data FastStringTable = FastStringTable
248 {-# UNPACK #-} !(IORef Int) -- the unique ID counter shared with all buckets
249 (Array# (IORef FastStringTableSegment)) -- concurrent segments
250
251 data FastStringTableSegment = FastStringTableSegment
252 {-# UNPACK #-} !(MVar ()) -- the lock for write in each segment
253 {-# UNPACK #-} !(IORef Int) -- the number of elements
254 (MutableArray# RealWorld [FastString]) -- buckets in this segment
255
256 {-
257 Following parameters are determined based on:
258
259 * Benchmark based on testsuite/tests/utils/should_run/T14854.hs
260 * Stats of @echo :browse | ghc --interactive -dfaststring-stats >/dev/null@:
261 on 2018-10-24, we have 13920 entries.
262 -}
263 segmentBits, numSegments, segmentMask, initialNumBuckets :: Int
264 segmentBits = 8
265 numSegments = 256 -- bit segmentBits
266 segmentMask = 0xff -- bit segmentBits - 1
267 initialNumBuckets = 64
268
269 hashToSegment# :: Int# -> Int#
270 hashToSegment# hash# = hash# `andI#` segmentMask#
271 where
272 !(I# segmentMask#) = segmentMask
273
274 hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int#
275 hashToIndex# buckets# hash# =
276 (hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size#
277 where
278 !(I# segmentBits#) = segmentBits
279 size# = sizeofMutableArray# buckets#
280
281 maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment
282 maybeResizeSegment segmentRef = do
283 segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef
284 let oldSize# = sizeofMutableArray# old#
285 newSize# = oldSize# *# 2#
286 (I# n#) <- readIORef counter
287 if isTrue# (n# <# newSize#) -- maximum load of 1
288 then return segment
289 else do
290 resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# ->
291 case newArray# newSize# [] s1# of
292 (# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #)
293 forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do
294 fsList <- IO $ readArray# old# i#
295 forM_ fsList $ \fs -> do
296 let -- Shall we store in hash value in FastString instead?
297 !(I# hash#) = hashFastString fs
298 idx# = hashToIndex# new# hash#
299 IO $ \s1# ->
300 case readArray# new# idx# s1# of
301 (# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of
302 s3# -> (# s3#, () #)
303 writeIORef segmentRef resizedSegment
304 return resizedSegment
305
306 {-# NOINLINE stringTable #-}
307 stringTable :: FastStringTable
308 stringTable = unsafePerformIO $ do
309 let !(I# numSegments#) = numSegments
310 !(I# initialNumBuckets#) = initialNumBuckets
311 loop a# i# s1#
312 | isTrue# (i# ==# numSegments#) = s1#
313 | otherwise = case newMVar () `unIO` s1# of
314 (# s2#, lock #) -> case newIORef 0 `unIO` s2# of
315 (# s3#, counter #) -> case newArray# initialNumBuckets# [] s3# of
316 (# s4#, buckets# #) -> case newIORef
317 (FastStringTableSegment lock counter buckets#) `unIO` s4# of
318 (# s5#, segment #) -> case writeArray# a# i# segment s5# of
319 s6# -> loop a# (i# +# 1#) s6#
320 uid <- newIORef 603979776 -- ord '$' * 0x01000000
321 tab <- IO $ \s1# ->
322 case newArray# numSegments# (panic "string_table") s1# of
323 (# s2#, arr# #) -> case loop arr# 0# s2# of
324 s3# -> case unsafeFreezeArray# arr# s3# of
325 (# s4#, segments# #) -> (# s4#, FastStringTable uid segments# #)
326
327 -- use the support wired into the RTS to share this CAF among all images of
328 -- libHSghc
329 #if STAGE < 2
330 return tab
331 #else
332 sharedCAF tab getOrSetLibHSghcFastStringTable
333
334 -- from the RTS; thus we cannot use this mechanism when STAGE<2; the previous
335 -- RTS might not have this symbol
336 foreign import ccall unsafe "getOrSetLibHSghcFastStringTable"
337 getOrSetLibHSghcFastStringTable :: Ptr a -> IO (Ptr a)
338 #endif
339
340 {-
341
342 We include the FastString table in the `sharedCAF` mechanism because we'd like
343 FastStrings created by a Core plugin to have the same uniques as corresponding
344 strings created by the host compiler itself. For example, this allows plugins
345 to lookup known names (eg `mkTcOcc "MySpecialType"`) in the GlobalRdrEnv or
346 even re-invoke the parser.
347
348 In particular, the following little sanity test was failing in a plugin
349 prototyping safe newtype-coercions: GHC.NT.Type.NT was imported, but could not
350 be looked up /by the plugin/.
351
352 let rdrName = mkModuleName "GHC.NT.Type" `mkRdrQual` mkTcOcc "NT"
353 putMsgS $ showSDoc dflags $ ppr $ lookupGRE_RdrName rdrName $ mg_rdr_env guts
354
355 `mkTcOcc` involves the lookup (or creation) of a FastString. Since the
356 plugin's FastString.string_table is empty, constructing the RdrName also
357 allocates new uniques for the FastStrings "GHC.NT.Type" and "NT". These
358 uniques are almost certainly unequal to the ones that the host compiler
359 originally assigned to those FastStrings. Thus the lookup fails since the
360 domain of the GlobalRdrEnv is affected by the RdrName's OccName's FastString's
361 unique.
362
363 Maintaining synchronization of the two instances of this global is rather
364 difficult because of the uses of `unsafePerformIO` in this module. Not
365 synchronizing them risks breaking the rather major invariant that two
366 FastStrings with the same unique have the same string. Thus we use the
367 lower-level `sharedCAF` mechanism that relies on Globals.c.
368
369 -}
370
371 mkFastString# :: Addr# -> FastString
372 mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
373 where ptr = Ptr a#
374
375 {- Note [Updating the FastString table]
376
377 We use a concurrent hashtable which contains multiple segments, each hash value
378 always maps to the same segment. Read is lock-free, write to the a segment
379 should acquire a lock for that segment to avoid race condition, writes to
380 different segments are independent.
381
382 The procedure goes like this:
383
384 1. Find out which segment to operate on based on the hash value
385 2. Read the relevant bucket and perform a look up of the string.
386 3. If it exists, return it.
387 4. Otherwise grab a unique ID, create a new FastString and atomically attempt
388 to update the relevant segment with this FastString:
389
390 * Resize the segment by doubling the number of buckets when the number of
391 FastStrings in this segment grows beyond the threshold.
392 * Double check that the string is not in the bucket. Another thread may have
393 inserted it while we were creating our string.
394 * Return the existing FastString if it exists. The one we preemptively
395 created will get GCed.
396 * Otherwise, insert and return the string we created.
397 -}
398
399 mkFastStringWith :: (Int -> IO FastString) -> Ptr Word8 -> Int -> IO FastString
400 mkFastStringWith mk_fs !ptr !len = do
401 FastStringTableSegment lock _ buckets# <- readIORef segmentRef
402 let idx# = hashToIndex# buckets# hash#
403 bucket <- IO $ readArray# buckets# idx#
404 res <- bucket_match bucket len ptr
405 case res of
406 Just found -> return found
407 Nothing -> do
408 -- The withMVar below is not dupable. It can lead to deadlock if it is
409 -- only run partially and putMVar is not called after takeMVar.
410 noDuplicate
411 n <- get_uid
412 new_fs <- mk_fs n
413 withMVar lock $ \_ -> insert new_fs
414 where
415 !(FastStringTable uid segments#) = stringTable
416 get_uid = atomicModifyIORef' uid $ \n -> (n+1,n)
417
418 !(I# hash#) = hashStr ptr len
419 (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
420 insert fs = do
421 FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
422 let idx# = hashToIndex# buckets# hash#
423 bucket <- IO $ readArray# buckets# idx#
424 res <- bucket_match bucket len ptr
425 case res of
426 -- The FastString was added by another thread after previous read and
427 -- before we acquired the write lock.
428 Just found -> return found
429 Nothing -> do
430 IO $ \s1# ->
431 case writeArray# buckets# idx# (fs: bucket) s1# of
432 s2# -> (# s2#, () #)
433 modifyIORef' counter succ
434 return fs
435
436 bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
437 bucket_match [] _ _ = return Nothing
438 bucket_match (v@(FastString _ _ bs _):ls) len ptr
439 | len == BS.length bs = do
440 b <- BS.unsafeUseAsCString bs $ \buf ->
441 cmpStringPrefix ptr (castPtr buf) len
442 if b then return (Just v)
443 else bucket_match ls len ptr
444 | otherwise =
445 bucket_match ls len ptr
446
447 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
448 mkFastStringBytes !ptr !len =
449 -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is
450 -- idempotent.
451 unsafeDupablePerformIO $
452 mkFastStringWith (copyNewFastString ptr len) ptr len
453
454 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
455 -- between this and 'mkFastStringBytes' is that we don't have to copy
456 -- the bytes if the string is new to the table.
457 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
458 mkFastStringForeignPtr ptr !fp len
459 = mkFastStringWith (mkNewFastString fp ptr len) ptr len
460
461 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
462 -- between this and 'mkFastStringBytes' is that we don't have to copy
463 -- the bytes if the string is new to the table.
464 mkFastStringByteString :: ByteString -> FastString
465 mkFastStringByteString bs =
466 inlinePerformIO $
467 BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do
468 let ptr' = castPtr ptr
469 mkFastStringWith (mkNewFastStringByteString bs ptr' len) ptr' len
470
471 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
472 mkFastString :: String -> FastString
473 mkFastString str =
474 inlinePerformIO $ do
475 let l = utf8EncodedLength str
476 buf <- mallocForeignPtrBytes l
477 withForeignPtr buf $ \ptr -> do
478 utf8EncodeString ptr str
479 mkFastStringForeignPtr ptr buf l
480
481 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
482 mkFastStringByteList :: [Word8] -> FastString
483 mkFastStringByteList str = mkFastStringByteString (BS.pack str)
484
485 -- | Creates a Z-encoded 'FastString' from a 'String'
486 mkZFastString :: String -> FastZString
487 mkZFastString = mkFastZStringString
488
489 mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int
490 -> IO FastString
491 mkNewFastString fp ptr len uid = do
492 ref <- newIORef Nothing
493 n_chars <- countUTF8Chars ptr len
494 return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref)
495
496 mkNewFastStringByteString :: ByteString -> Ptr Word8 -> Int -> Int
497 -> IO FastString
498 mkNewFastStringByteString bs ptr len uid = do
499 ref <- newIORef Nothing
500 n_chars <- countUTF8Chars ptr len
501 return (FastString uid n_chars bs ref)
502
503 copyNewFastString :: Ptr Word8 -> Int -> Int -> IO FastString
504 copyNewFastString ptr len uid = do
505 fp <- copyBytesToForeignPtr ptr len
506 ref <- newIORef Nothing
507 n_chars <- countUTF8Chars ptr len
508 return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref)
509
510 copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
511 copyBytesToForeignPtr ptr len = do
512 fp <- mallocForeignPtrBytes len
513 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
514 return fp
515
516 cmpStringPrefix :: Ptr Word8 -> Ptr Word8 -> Int -> IO Bool
517 cmpStringPrefix ptr1 ptr2 len =
518 do r <- memcmp ptr1 ptr2 len
519 return (r == 0)
520
521
522 hashStr :: Ptr Word8 -> Int -> Int
523 -- use the Addr to produce a hash value between 0 & m (inclusive)
524 hashStr (Ptr a#) (I# len#) = loop 0# 0#
525 where
526 loop h n | isTrue# (n ==# len#) = I# h
527 | otherwise = loop h2 (n +# 1#)
528 where
529 !c = ord# (indexCharOffAddr# a# n)
530 !h2 = (h *# 16777619#) `xorI#` c
531
532 -- -----------------------------------------------------------------------------
533 -- Operations
534
535 -- | Returns the length of the 'FastString' in characters
536 lengthFS :: FastString -> Int
537 lengthFS f = n_chars f
538
539 -- | Returns @True@ if this 'FastString' is not Z-encoded but already has
540 -- a Z-encoding cached (used in producing stats).
541 hasZEncoding :: FastString -> Bool
542 hasZEncoding (FastString _ _ _ ref) =
543 inlinePerformIO $ do
544 m <- readIORef ref
545 return (isJust m)
546
547 -- | Returns @True@ if the 'FastString' is empty
548 nullFS :: FastString -> Bool
549 nullFS f = BS.null (fs_bs f)
550
551 -- | Unpacks and decodes the FastString
552 unpackFS :: FastString -> String
553 unpackFS (FastString _ _ bs _) = utf8DecodeByteString bs
554
555 -- | Returns a Z-encoded version of a 'FastString'. This might be the
556 -- original, if it was already Z-encoded. The first time this
557 -- function is applied to a particular 'FastString', the results are
558 -- memoized.
559 --
560 zEncodeFS :: FastString -> FastZString
561 zEncodeFS fs@(FastString _ _ _ ref) =
562 inlinePerformIO $ do
563 m <- readIORef ref
564 case m of
565 Just zfs -> return zfs
566 Nothing -> do
567 atomicModifyIORef' ref $ \m' -> case m' of
568 Nothing -> let zfs = mkZFastString (zEncodeString (unpackFS fs))
569 in (Just zfs, zfs)
570 Just zfs -> (m', zfs)
571
572 appendFS :: FastString -> FastString -> FastString
573 appendFS fs1 fs2 = mkFastStringByteString
574 $ BS.append (bytesFS fs1) (bytesFS fs2)
575
576 concatFS :: [FastString] -> FastString
577 concatFS = mkFastStringByteString . BS.concat . map fs_bs
578
579 headFS :: FastString -> Char
580 headFS (FastString _ 0 _ _) = panic "headFS: Empty FastString"
581 headFS (FastString _ _ bs _) =
582 inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
583 return (fst (utf8DecodeChar (castPtr ptr)))
584
585 tailFS :: FastString -> FastString
586 tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString"
587 tailFS (FastString _ _ bs _) =
588 inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
589 do let (_, n) = utf8DecodeChar (castPtr ptr)
590 return $! mkFastStringByteString (BS.drop n bs)
591
592 consFS :: Char -> FastString -> FastString
593 consFS c fs = mkFastString (c : unpackFS fs)
594
595 uniqueOfFS :: FastString -> Int
596 uniqueOfFS (FastString u _ _ _) = u
597
598 nilFS :: FastString
599 nilFS = mkFastString ""
600
601 isUnderscoreFS :: FastString -> Bool
602 isUnderscoreFS fs = fs == fsLit "_"
603
604 -- -----------------------------------------------------------------------------
605 -- Stats
606
607 getFastStringTable :: IO [[[FastString]]]
608 getFastStringTable =
609 forM [0 .. numSegments - 1] $ \(I# i#) -> do
610 let (# segmentRef #) = indexArray# segments# i#
611 FastStringTableSegment _ _ buckets# <- readIORef segmentRef
612 let bucketSize = I# (sizeofMutableArray# buckets#)
613 forM [0 .. bucketSize - 1] $ \(I# j#) ->
614 IO $ readArray# buckets# j#
615 where
616 !(FastStringTable _ segments#) = stringTable
617
618 -- -----------------------------------------------------------------------------
619 -- Outputting 'FastString's
620
621 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
622 -- get the actual bytes in the 'FastString' written to the 'Handle'.
623 hPutFS :: Handle -> FastString -> IO ()
624 hPutFS handle fs = BS.hPut handle $ bytesFS fs
625
626 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
627 -- in the current locale's encoding (for error messages and suchlike).
628
629 -- -----------------------------------------------------------------------------
630 -- PtrStrings, here for convenience only.
631
632 -- | A 'PtrString' is a pointer to some array of Latin-1 encoded chars.
633 data PtrString = PtrString !(Ptr Word8) !Int
634
635 -- | Wrap an unboxed address into a 'PtrString'.
636 mkPtrString# :: Addr# -> PtrString
637 mkPtrString# a# = PtrString (Ptr a#) (ptrStrLength (Ptr a#))
638
639 -- | Encode a 'String' into a newly allocated 'PtrString' using Latin-1
640 -- encoding. The original string must not contain non-Latin-1 characters
641 -- (above codepoint @0xff@).
642 {-# INLINE mkPtrString #-}
643 mkPtrString :: String -> PtrString
644 mkPtrString s =
645 -- we don't use `unsafeDupablePerformIO` here to avoid potential memory leaks
646 -- and because someone might be using `eqAddr#` to check for string equality.
647 unsafePerformIO (do
648 let len = length s
649 p <- mallocBytes len
650 let
651 loop :: Int -> String -> IO ()
652 loop !_ [] = return ()
653 loop n (c:cs) = do
654 pokeByteOff p n (fromIntegral (ord c) :: Word8)
655 loop (1+n) cs
656 loop 0 s
657 return (PtrString p len)
658 )
659
660 -- | Decode a 'PtrString' back into a 'String' using Latin-1 encoding.
661 -- This does not free the memory associated with 'PtrString'.
662 unpackPtrString :: PtrString -> String
663 unpackPtrString (PtrString (Ptr p#) (I# n#)) = unpackNBytes# p# n#
664
665 -- | Return the length of a 'PtrString'
666 lengthPS :: PtrString -> Int
667 lengthPS (PtrString _ n) = n
668
669 -- -----------------------------------------------------------------------------
670 -- under the carpet
671
672 foreign import ccall unsafe "strlen"
673 ptrStrLength :: Ptr Word8 -> Int
674
675 {-# NOINLINE sLit #-}
676 sLit :: String -> PtrString
677 sLit x = mkPtrString x
678
679 {-# NOINLINE fsLit #-}
680 fsLit :: String -> FastString
681 fsLit x = mkFastString x
682
683 {-# RULES "slit"
684 forall x . sLit (unpackCString# x) = mkPtrString# x #-}
685 {-# RULES "fslit"
686 forall x . fsLit (unpackCString# x) = mkFastString# x #-}