Rename literal constructors
[ghc.git] / compiler / utils / FastString.hs
1 -- (c) The University of Glasgow, 1997-2006
2
3 {-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples,
4 GeneralizedNewtypeDeriving #-}
5 {-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
6 -- We always optimise this, otherwise performance of a non-optimised
7 -- compiler is severely affected
8
9 -- |
10 -- There are two principal string types used internally by GHC:
11 --
12 -- ['FastString']
13 --
14 -- * A compact, hash-consed, representation of character strings.
15 -- * Comparison is O(1), and you can get a 'Unique.Unique' from them.
16 -- * Generated by 'fsLit'.
17 -- * Turn into 'Outputable.SDoc' with 'Outputable.ftext'.
18 --
19 -- ['PtrString']
20 --
21 -- * Pointer and size of a Latin-1 encoded string.
22 -- * Practically no operations.
23 -- * Outputing them is fast.
24 -- * Generated by 'sLit'.
25 -- * Turn into 'Outputable.SDoc' with 'Outputable.ptext'
26 -- * Requires manual memory management.
27 -- Improper use may lead to memory leaks or dangling pointers.
28 -- * It assumes Latin-1 as the encoding, therefore it cannot represent
29 -- arbitrary Unicode strings.
30 --
31 -- Use 'PtrString' unless you want the facilities of 'FastString'.
32 module FastString
33 (
34 -- * ByteString
35 fastStringToByteString,
36 mkFastStringByteString,
37 fastZStringToByteString,
38 unsafeMkByteString,
39
40 -- * FastZString
41 FastZString,
42 hPutFZS,
43 zString,
44 lengthFZS,
45
46 -- * FastStrings
47 FastString(..), -- not abstract, for now.
48
49 -- ** Construction
50 fsLit,
51 mkFastString,
52 mkFastStringBytes,
53 mkFastStringByteList,
54 mkFastStringForeignPtr,
55 mkFastString#,
56
57 -- ** Deconstruction
58 unpackFS, -- :: FastString -> String
59 bytesFS, -- :: FastString -> [Word8]
60
61 -- ** Encoding
62 zEncodeFS,
63
64 -- ** Operations
65 uniqueOfFS,
66 lengthFS,
67 nullFS,
68 appendFS,
69 headFS,
70 tailFS,
71 concatFS,
72 consFS,
73 nilFS,
74
75 -- ** Outputing
76 hPutFS,
77
78 -- ** Internal
79 getFastStringTable,
80 hasZEncoding,
81
82 -- * PtrStrings
83 PtrString (..),
84
85 -- ** Construction
86 sLit,
87 mkPtrString#,
88 mkPtrString,
89
90 -- ** Deconstruction
91 unpackPtrString,
92
93 -- ** Operations
94 lengthPS
95 ) where
96
97 #include "HsVersions.h"
98
99 import GhcPrelude as Prelude
100
101 import Encoding
102 import FastFunctions
103 import Panic
104 import Util
105
106 import Control.Concurrent.MVar
107 import Control.DeepSeq
108 import Control.Monad
109 import Data.ByteString (ByteString)
110 import qualified Data.ByteString as BS
111 import qualified Data.ByteString.Char8 as BSC
112 import qualified Data.ByteString.Internal as BS
113 import qualified Data.ByteString.Unsafe as BS
114 import Foreign.C
115 import GHC.Exts
116 import System.IO
117 import System.IO.Unsafe ( unsafePerformIO )
118 import Data.Data
119 import Data.IORef
120 import Data.Maybe ( isJust )
121 import Data.Char
122 import Data.Semigroup as Semi
123
124 import GHC.IO ( IO(..), unIO, unsafeDupablePerformIO )
125
126 import Foreign
127
128 #if STAGE >= 2
129 import GHC.Conc.Sync (sharedCAF)
130 #endif
131
132 import GHC.Base ( unpackCString#, unpackNBytes# )
133
134
135 fastStringToByteString :: FastString -> ByteString
136 fastStringToByteString f = fs_bs f
137
138 fastZStringToByteString :: FastZString -> ByteString
139 fastZStringToByteString (FastZString bs) = bs
140
141 -- This will drop information if any character > '\xFF'
142 unsafeMkByteString :: String -> ByteString
143 unsafeMkByteString = BSC.pack
144
145 hashFastString :: FastString -> Int
146 hashFastString (FastString _ _ bs _)
147 = inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
148 return $ hashStr (castPtr ptr) len
149
150 -- -----------------------------------------------------------------------------
151
152 newtype FastZString = FastZString ByteString
153 deriving NFData
154
155 hPutFZS :: Handle -> FastZString -> IO ()
156 hPutFZS handle (FastZString bs) = BS.hPut handle bs
157
158 zString :: FastZString -> String
159 zString (FastZString bs) =
160 inlinePerformIO $ BS.unsafeUseAsCStringLen bs peekCAStringLen
161
162 lengthFZS :: FastZString -> Int
163 lengthFZS (FastZString bs) = BS.length bs
164
165 mkFastZStringString :: String -> FastZString
166 mkFastZStringString str = FastZString (BSC.pack str)
167
168 -- -----------------------------------------------------------------------------
169
170 {-|
171 A 'FastString' is an array of bytes, hashed to support fast O(1)
172 comparison. It is also associated with a character encoding, so that
173 we know how to convert a 'FastString' to the local encoding, or to the
174 Z-encoding used by the compiler internally.
175
176 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
177 -}
178
179 data FastString = FastString {
180 uniq :: {-# UNPACK #-} !Int, -- unique id
181 n_chars :: {-# UNPACK #-} !Int, -- number of chars
182 fs_bs :: {-# UNPACK #-} !ByteString,
183 fs_ref :: {-# UNPACK #-} !(IORef (Maybe FastZString))
184 }
185
186 instance Eq FastString where
187 f1 == f2 = uniq f1 == uniq f2
188
189 instance Ord FastString where
190 -- Compares lexicographically, not by unique
191 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
192 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
193 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
194 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
195 max x y | x >= y = x
196 | otherwise = y
197 min x y | x <= y = x
198 | otherwise = y
199 compare a b = cmpFS a b
200
201 instance IsString FastString where
202 fromString = fsLit
203
204 instance Semi.Semigroup FastString where
205 (<>) = appendFS
206
207 instance Monoid FastString where
208 mempty = nilFS
209 mappend = (Semi.<>)
210 mconcat = concatFS
211
212 instance Show FastString where
213 show fs = show (unpackFS fs)
214
215 instance Data FastString where
216 -- don't traverse?
217 toConstr _ = abstractConstr "FastString"
218 gunfold _ _ = error "gunfold"
219 dataTypeOf _ = mkNoRepType "FastString"
220
221 cmpFS :: FastString -> FastString -> Ordering
222 cmpFS f1@(FastString u1 _ _ _) f2@(FastString u2 _ _ _) =
223 if u1 == u2 then EQ else
224 compare (fastStringToByteString f1) (fastStringToByteString f2)
225
226 foreign import ccall unsafe "memcmp"
227 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
228
229 -- -----------------------------------------------------------------------------
230 -- Construction
231
232 {-
233 Internally, the compiler will maintain a fast string symbol table, providing
234 sharing and fast comparison. Creation of new @FastString@s then covertly does a
235 lookup, re-using the @FastString@ if there was a hit.
236
237 The design of the FastString hash table allows for lockless concurrent reads
238 and updates to multiple buckets with low synchronization overhead.
239
240 See Note [Updating the FastString table] on how it's updated.
241 -}
242 data FastStringTable = FastStringTable
243 {-# UNPACK #-} !(IORef Int) -- the unique ID counter shared with all buckets
244 (Array# (IORef FastStringTableSegment)) -- concurrent segments
245
246 data FastStringTableSegment = FastStringTableSegment
247 {-# UNPACK #-} !(MVar ()) -- the lock for write in each segment
248 {-# UNPACK #-} !(IORef Int) -- the number of elements
249 (MutableArray# RealWorld [FastString]) -- buckets in this segment
250
251 {-
252 Following parameters are determined based on:
253
254 * Benchmark based on testsuite/tests/utils/should_run/T14854.hs
255 * Stats of @echo :browse | ghc --interactive -dfaststring-stats >/dev/null@:
256 on 2018-10-24, we have 13920 entries.
257 -}
258 segmentBits, numSegments, segmentMask, initialNumBuckets :: Int
259 segmentBits = 8
260 numSegments = 256 -- bit segmentBits
261 segmentMask = 0xff -- bit segmentBits - 1
262 initialNumBuckets = 64
263
264 hashToSegment# :: Int# -> Int#
265 hashToSegment# hash# = hash# `andI#` segmentMask#
266 where
267 !(I# segmentMask#) = segmentMask
268
269 hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int#
270 hashToIndex# buckets# hash# =
271 (hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size#
272 where
273 !(I# segmentBits#) = segmentBits
274 size# = sizeofMutableArray# buckets#
275
276 maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment
277 maybeResizeSegment segmentRef = do
278 segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef
279 let oldSize# = sizeofMutableArray# old#
280 newSize# = oldSize# *# 2#
281 (I# n#) <- readIORef counter
282 if isTrue# (n# <# newSize#) -- maximum load of 1
283 then return segment
284 else do
285 resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# ->
286 case newArray# newSize# [] s1# of
287 (# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #)
288 forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do
289 fsList <- IO $ readArray# old# i#
290 forM_ fsList $ \fs -> do
291 let -- Shall we store in hash value in FastString instead?
292 !(I# hash#) = hashFastString fs
293 idx# = hashToIndex# new# hash#
294 IO $ \s1# ->
295 case readArray# new# idx# s1# of
296 (# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of
297 s3# -> (# s3#, () #)
298 writeIORef segmentRef resizedSegment
299 return resizedSegment
300
301 {-# NOINLINE stringTable #-}
302 stringTable :: FastStringTable
303 stringTable = unsafePerformIO $ do
304 let !(I# numSegments#) = numSegments
305 !(I# initialNumBuckets#) = initialNumBuckets
306 loop a# i# s1#
307 | isTrue# (i# ==# numSegments#) = s1#
308 | otherwise = case newMVar () `unIO` s1# of
309 (# s2#, lock #) -> case newIORef 0 `unIO` s2# of
310 (# s3#, counter #) -> case newArray# initialNumBuckets# [] s3# of
311 (# s4#, buckets# #) -> case newIORef
312 (FastStringTableSegment lock counter buckets#) `unIO` s4# of
313 (# s5#, segment #) -> case writeArray# a# i# segment s5# of
314 s6# -> loop a# (i# +# 1#) s6#
315 uid <- newIORef 603979776 -- ord '$' * 0x01000000
316 tab <- IO $ \s1# ->
317 case newArray# numSegments# (panic "string_table") s1# of
318 (# s2#, arr# #) -> case loop arr# 0# s2# of
319 s3# -> case unsafeFreezeArray# arr# s3# of
320 (# s4#, segments# #) -> (# s4#, FastStringTable uid segments# #)
321
322 -- use the support wired into the RTS to share this CAF among all images of
323 -- libHSghc
324 #if STAGE < 2
325 return tab
326 #else
327 sharedCAF tab getOrSetLibHSghcFastStringTable
328
329 -- from the RTS; thus we cannot use this mechanism when STAGE<2; the previous
330 -- RTS might not have this symbol
331 foreign import ccall unsafe "getOrSetLibHSghcFastStringTable"
332 getOrSetLibHSghcFastStringTable :: Ptr a -> IO (Ptr a)
333 #endif
334
335 {-
336
337 We include the FastString table in the `sharedCAF` mechanism because we'd like
338 FastStrings created by a Core plugin to have the same uniques as corresponding
339 strings created by the host compiler itself. For example, this allows plugins
340 to lookup known names (eg `mkTcOcc "MySpecialType"`) in the GlobalRdrEnv or
341 even re-invoke the parser.
342
343 In particular, the following little sanity test was failing in a plugin
344 prototyping safe newtype-coercions: GHC.NT.Type.NT was imported, but could not
345 be looked up /by the plugin/.
346
347 let rdrName = mkModuleName "GHC.NT.Type" `mkRdrQual` mkTcOcc "NT"
348 putMsgS $ showSDoc dflags $ ppr $ lookupGRE_RdrName rdrName $ mg_rdr_env guts
349
350 `mkTcOcc` involves the lookup (or creation) of a FastString. Since the
351 plugin's FastString.string_table is empty, constructing the RdrName also
352 allocates new uniques for the FastStrings "GHC.NT.Type" and "NT". These
353 uniques are almost certainly unequal to the ones that the host compiler
354 originally assigned to those FastStrings. Thus the lookup fails since the
355 domain of the GlobalRdrEnv is affected by the RdrName's OccName's FastString's
356 unique.
357
358 Maintaining synchronization of the two instances of this global is rather
359 difficult because of the uses of `unsafePerformIO` in this module. Not
360 synchronizing them risks breaking the rather major invariant that two
361 FastStrings with the same unique have the same string. Thus we use the
362 lower-level `sharedCAF` mechanism that relies on Globals.c.
363
364 -}
365
366 mkFastString# :: Addr# -> FastString
367 mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
368 where ptr = Ptr a#
369
370 {- Note [Updating the FastString table]
371
372 We use a concurrent hashtable which contains multiple segments, each hash value
373 always maps to the same segment. Read is lock-free, write to the a segment
374 should acquire a lock for that segment to avoid race condition, writes to
375 different segments are independent.
376
377 The procedure goes like this:
378
379 1. Find out which segment to operate on based on the hash value
380 2. Read the relevant bucket and perform a look up of the string.
381 3. If it exists, return it.
382 4. Otherwise grab a unique ID, create a new FastString and atomically attempt
383 to update the relevant segment with this FastString:
384
385 * Resize the segment by doubling the number of buckets when the number of
386 FastStrings in this segment grows beyond the threshold.
387 * Double check that the string is not in the bucket. Another thread may have
388 inserted it while we were creating our string.
389 * Return the existing FastString if it exists. The one we preemptively
390 created will get GCed.
391 * Otherwise, insert and return the string we created.
392 -}
393
394 mkFastStringWith :: (Int -> IO FastString) -> Ptr Word8 -> Int -> IO FastString
395 mkFastStringWith mk_fs !ptr !len = do
396 FastStringTableSegment lock _ buckets# <- readIORef segmentRef
397 let idx# = hashToIndex# buckets# hash#
398 bucket <- IO $ readArray# buckets# idx#
399 res <- bucket_match bucket len ptr
400 case res of
401 Just found -> return found
402 Nothing -> do
403 n <- get_uid
404 new_fs <- mk_fs n
405 withMVar lock $ \_ -> insert new_fs
406 where
407 !(FastStringTable uid segments#) = stringTable
408 get_uid = atomicModifyIORef' uid $ \n -> (n+1,n)
409
410 !(I# hash#) = hashStr ptr len
411 (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
412 insert fs = do
413 FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
414 let idx# = hashToIndex# buckets# hash#
415 bucket <- IO $ readArray# buckets# idx#
416 res <- bucket_match bucket len ptr
417 case res of
418 -- The FastString was added by another thread after previous read and
419 -- before we acquired the write lock.
420 Just found -> return found
421 Nothing -> do
422 IO $ \s1# ->
423 case writeArray# buckets# idx# (fs: bucket) s1# of
424 s2# -> (# s2#, () #)
425 modifyIORef' counter succ
426 return fs
427
428 bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
429 bucket_match [] _ _ = return Nothing
430 bucket_match (v@(FastString _ _ bs _):ls) len ptr
431 | len == BS.length bs = do
432 b <- BS.unsafeUseAsCString bs $ \buf ->
433 cmpStringPrefix ptr (castPtr buf) len
434 if b then return (Just v)
435 else bucket_match ls len ptr
436 | otherwise =
437 bucket_match ls len ptr
438
439 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
440 mkFastStringBytes !ptr !len =
441 -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is
442 -- idempotent.
443 unsafeDupablePerformIO $
444 mkFastStringWith (copyNewFastString ptr len) ptr len
445
446 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
447 -- between this and 'mkFastStringBytes' is that we don't have to copy
448 -- the bytes if the string is new to the table.
449 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
450 mkFastStringForeignPtr ptr !fp len
451 = mkFastStringWith (mkNewFastString fp ptr len) ptr len
452
453 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
454 -- between this and 'mkFastStringBytes' is that we don't have to copy
455 -- the bytes if the string is new to the table.
456 mkFastStringByteString :: ByteString -> FastString
457 mkFastStringByteString bs =
458 inlinePerformIO $
459 BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do
460 let ptr' = castPtr ptr
461 mkFastStringWith (mkNewFastStringByteString bs ptr' len) ptr' len
462
463 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
464 mkFastString :: String -> FastString
465 mkFastString str =
466 inlinePerformIO $ do
467 let l = utf8EncodedLength str
468 buf <- mallocForeignPtrBytes l
469 withForeignPtr buf $ \ptr -> do
470 utf8EncodeString ptr str
471 mkFastStringForeignPtr ptr buf l
472
473 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
474 mkFastStringByteList :: [Word8] -> FastString
475 mkFastStringByteList str =
476 inlinePerformIO $ do
477 let l = Prelude.length str
478 buf <- mallocForeignPtrBytes l
479 withForeignPtr buf $ \ptr -> do
480 pokeArray (castPtr ptr) str
481 mkFastStringForeignPtr ptr buf l
482
483 -- | Creates a Z-encoded 'FastString' from a 'String'
484 mkZFastString :: String -> FastZString
485 mkZFastString = mkFastZStringString
486
487 mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int
488 -> IO FastString
489 mkNewFastString fp ptr len uid = do
490 ref <- newIORef Nothing
491 n_chars <- countUTF8Chars ptr len
492 return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref)
493
494 mkNewFastStringByteString :: ByteString -> Ptr Word8 -> Int -> Int
495 -> IO FastString
496 mkNewFastStringByteString bs ptr len uid = do
497 ref <- newIORef Nothing
498 n_chars <- countUTF8Chars ptr len
499 return (FastString uid n_chars bs ref)
500
501 copyNewFastString :: Ptr Word8 -> Int -> Int -> IO FastString
502 copyNewFastString ptr len uid = do
503 fp <- copyBytesToForeignPtr ptr len
504 ref <- newIORef Nothing
505 n_chars <- countUTF8Chars ptr len
506 return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref)
507
508 copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
509 copyBytesToForeignPtr ptr len = do
510 fp <- mallocForeignPtrBytes len
511 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
512 return fp
513
514 cmpStringPrefix :: Ptr Word8 -> Ptr Word8 -> Int -> IO Bool
515 cmpStringPrefix ptr1 ptr2 len =
516 do r <- memcmp ptr1 ptr2 len
517 return (r == 0)
518
519
520 hashStr :: Ptr Word8 -> Int -> Int
521 -- use the Addr to produce a hash value between 0 & m (inclusive)
522 hashStr (Ptr a#) (I# len#) = loop 0# 0#
523 where
524 loop h n | isTrue# (n ==# len#) = I# h
525 | otherwise = loop h2 (n +# 1#)
526 where
527 !c = ord# (indexCharOffAddr# a# n)
528 !h2 = (h *# 16777619#) `xorI#` c
529
530 -- -----------------------------------------------------------------------------
531 -- Operations
532
533 -- | Returns the length of the 'FastString' in characters
534 lengthFS :: FastString -> Int
535 lengthFS f = n_chars f
536
537 -- | Returns @True@ if this 'FastString' is not Z-encoded but already has
538 -- a Z-encoding cached (used in producing stats).
539 hasZEncoding :: FastString -> Bool
540 hasZEncoding (FastString _ _ _ ref) =
541 inlinePerformIO $ do
542 m <- readIORef ref
543 return (isJust m)
544
545 -- | Returns @True@ if the 'FastString' is empty
546 nullFS :: FastString -> Bool
547 nullFS f = BS.null (fs_bs f)
548
549 -- | Unpacks and decodes the FastString
550 unpackFS :: FastString -> String
551 unpackFS (FastString _ _ bs _) = utf8DecodeByteString bs
552
553 -- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
554 bytesFS :: FastString -> [Word8]
555 bytesFS fs = BS.unpack $ fastStringToByteString fs
556
557 -- | Returns a Z-encoded version of a 'FastString'. This might be the
558 -- original, if it was already Z-encoded. The first time this
559 -- function is applied to a particular 'FastString', the results are
560 -- memoized.
561 --
562 zEncodeFS :: FastString -> FastZString
563 zEncodeFS fs@(FastString _ _ _ ref) =
564 inlinePerformIO $ do
565 m <- readIORef ref
566 case m of
567 Just zfs -> return zfs
568 Nothing -> do
569 atomicModifyIORef' ref $ \m' -> case m' of
570 Nothing -> let zfs = mkZFastString (zEncodeString (unpackFS fs))
571 in (Just zfs, zfs)
572 Just zfs -> (m', zfs)
573
574 appendFS :: FastString -> FastString -> FastString
575 appendFS fs1 fs2 = mkFastStringByteString
576 $ BS.append (fastStringToByteString fs1)
577 (fastStringToByteString fs2)
578
579 concatFS :: [FastString] -> FastString
580 concatFS = mkFastStringByteString . BS.concat . map fs_bs
581
582 headFS :: FastString -> Char
583 headFS (FastString _ 0 _ _) = panic "headFS: Empty FastString"
584 headFS (FastString _ _ bs _) =
585 inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
586 return (fst (utf8DecodeChar (castPtr ptr)))
587
588 tailFS :: FastString -> FastString
589 tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString"
590 tailFS (FastString _ _ bs _) =
591 inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
592 do let (_, n) = utf8DecodeChar (castPtr ptr)
593 return $! mkFastStringByteString (BS.drop n bs)
594
595 consFS :: Char -> FastString -> FastString
596 consFS c fs = mkFastString (c : unpackFS fs)
597
598 uniqueOfFS :: FastString -> Int
599 uniqueOfFS (FastString u _ _ _) = u
600
601 nilFS :: FastString
602 nilFS = mkFastString ""
603
604 -- -----------------------------------------------------------------------------
605 -- Stats
606
607 getFastStringTable :: IO [[[FastString]]]
608 getFastStringTable =
609 forM [0 .. numSegments - 1] $ \(I# i#) -> do
610 let (# segmentRef #) = indexArray# segments# i#
611 FastStringTableSegment _ _ buckets# <- readIORef segmentRef
612 let bucketSize = I# (sizeofMutableArray# buckets#)
613 forM [0 .. bucketSize - 1] $ \(I# j#) ->
614 IO $ readArray# buckets# j#
615 where
616 !(FastStringTable _ segments#) = stringTable
617
618 -- -----------------------------------------------------------------------------
619 -- Outputting 'FastString's
620
621 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
622 -- get the actual bytes in the 'FastString' written to the 'Handle'.
623 hPutFS :: Handle -> FastString -> IO ()
624 hPutFS handle fs = BS.hPut handle $ fastStringToByteString fs
625
626 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
627 -- in the current locale's encoding (for error messages and suchlike).
628
629 -- -----------------------------------------------------------------------------
630 -- PtrStrings, here for convenience only.
631
632 -- | A 'PtrString' is a pointer to some array of Latin-1 encoded chars.
633 data PtrString = PtrString !(Ptr Word8) !Int
634
635 -- | Wrap an unboxed address into a 'PtrString'.
636 mkPtrString# :: Addr# -> PtrString
637 mkPtrString# a# = PtrString (Ptr a#) (ptrStrLength (Ptr a#))
638
639 -- | Encode a 'String' into a newly allocated 'PtrString' using Latin-1
640 -- encoding. The original string must not contain non-Latin-1 characters
641 -- (above codepoint @0xff@).
642 {-# INLINE mkPtrString #-}
643 mkPtrString :: String -> PtrString
644 mkPtrString s =
645 -- we don't use `unsafeDupablePerformIO` here to avoid potential memory leaks
646 -- and because someone might be using `eqAddr#` to check for string equality.
647 unsafePerformIO (do
648 let len = length s
649 p <- mallocBytes len
650 let
651 loop :: Int -> String -> IO ()
652 loop !_ [] = return ()
653 loop n (c:cs) = do
654 pokeByteOff p n (fromIntegral (ord c) :: Word8)
655 loop (1+n) cs
656 loop 0 s
657 return (PtrString p len)
658 )
659
660 -- | Decode a 'PtrString' back into a 'String' using Latin-1 encoding.
661 -- This does not free the memory associated with 'PtrString'.
662 unpackPtrString :: PtrString -> String
663 unpackPtrString (PtrString (Ptr p#) (I# n#)) = unpackNBytes# p# n#
664
665 -- | Return the length of a 'PtrString'
666 lengthPS :: PtrString -> Int
667 lengthPS (PtrString _ n) = n
668
669 -- -----------------------------------------------------------------------------
670 -- under the carpet
671
672 foreign import ccall unsafe "strlen"
673 ptrStrLength :: Ptr Word8 -> Int
674
675 {-# NOINLINE sLit #-}
676 sLit :: String -> PtrString
677 sLit x = mkPtrString x
678
679 {-# NOINLINE fsLit #-}
680 fsLit :: String -> FastString
681 fsLit x = mkFastString x
682
683 {-# RULES "slit"
684 forall x . sLit (unpackCString# x) = mkPtrString# x #-}
685 {-# RULES "fslit"
686 forall x . fsLit (unpackCString# x) = mkFastString# x #-}