testsuite: Widen acceptance window of T1969
[ghc.git] / compiler / utils / FastString.hs
1 -- (c) The University of Glasgow, 1997-2006
2
3 {-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples,
4 GeneralizedNewtypeDeriving #-}
5 {-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
6 -- We always optimise this, otherwise performance of a non-optimised
7 -- compiler is severely affected
8
9 -- |
10 -- There are two principal string types used internally by GHC:
11 --
12 -- ['FastString']
13 --
14 -- * A compact, hash-consed, representation of character strings.
15 -- * Comparison is O(1), and you can get a 'Unique.Unique' from them.
16 -- * Generated by 'fsLit'.
17 -- * Turn into 'Outputable.SDoc' with 'Outputable.ftext'.
18 --
19 -- ['PtrString']
20 --
21 -- * Pointer and size of a Latin-1 encoded string.
22 -- * Practically no operations.
23 -- * Outputting them is fast.
24 -- * Generated by 'sLit'.
25 -- * Turn into 'Outputable.SDoc' with 'Outputable.ptext'
26 -- * Requires manual memory management.
27 -- Improper use may lead to memory leaks or dangling pointers.
28 -- * It assumes Latin-1 as the encoding, therefore it cannot represent
29 -- arbitrary Unicode strings.
30 --
31 -- Use 'PtrString' unless you want the facilities of 'FastString'.
32 module FastString
33 (
34 -- * ByteString
35 bytesFS, -- :: FastString -> ByteString
36 fastStringToByteString, -- = bytesFS (kept for haddock)
37 mkFastStringByteString,
38 fastZStringToByteString,
39 unsafeMkByteString,
40
41 -- * FastZString
42 FastZString,
43 hPutFZS,
44 zString,
45 lengthFZS,
46
47 -- * FastStrings
48 FastString(..), -- not abstract, for now.
49
50 -- ** Construction
51 fsLit,
52 mkFastString,
53 mkFastStringBytes,
54 mkFastStringByteList,
55 mkFastStringForeignPtr,
56 mkFastString#,
57
58 -- ** Deconstruction
59 unpackFS, -- :: FastString -> String
60
61 -- ** Encoding
62 zEncodeFS,
63
64 -- ** Operations
65 uniqueOfFS,
66 lengthFS,
67 nullFS,
68 appendFS,
69 headFS,
70 tailFS,
71 concatFS,
72 consFS,
73 nilFS,
74 isUnderscoreFS,
75
76 -- ** Outputting
77 hPutFS,
78
79 -- ** Internal
80 getFastStringTable,
81 getFastStringZEncCounter,
82
83 -- * PtrStrings
84 PtrString (..),
85
86 -- ** Construction
87 sLit,
88 mkPtrString#,
89 mkPtrString,
90
91 -- ** Deconstruction
92 unpackPtrString,
93
94 -- ** Operations
95 lengthPS
96 ) where
97
98 #include "HsVersions.h"
99
100 import GhcPrelude as Prelude
101
102 import Encoding
103 import FastFunctions
104 import PlainPanic
105 import Util
106
107 import Control.Concurrent.MVar
108 import Control.DeepSeq
109 import Control.Monad
110 import Data.ByteString (ByteString)
111 import qualified Data.ByteString as BS
112 import qualified Data.ByteString.Char8 as BSC
113 import qualified Data.ByteString.Internal as BS
114 import qualified Data.ByteString.Unsafe as BS
115 import Foreign.C
116 import GHC.Exts
117 import System.IO
118 import Data.Data
119 import Data.IORef
120 import Data.Char
121 import Data.Semigroup as Semi
122
123 import GHC.IO
124
125 import Foreign
126
127 #if GHC_STAGE >= 2
128 import GHC.Conc.Sync (sharedCAF)
129 #endif
130
131 import GHC.Base ( unpackCString#, unpackNBytes# )
132
133
134 -- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
135 bytesFS :: FastString -> ByteString
136 bytesFS f = fs_bs f
137
138 {-# DEPRECATED fastStringToByteString "Use `bytesFS` instead" #-}
139 fastStringToByteString :: FastString -> ByteString
140 fastStringToByteString = bytesFS
141
142 fastZStringToByteString :: FastZString -> ByteString
143 fastZStringToByteString (FastZString bs) = bs
144
145 -- This will drop information if any character > '\xFF'
146 unsafeMkByteString :: String -> ByteString
147 unsafeMkByteString = BSC.pack
148
149 hashFastString :: FastString -> Int
150 hashFastString (FastString _ _ bs _)
151 = inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
152 return $ hashStr (castPtr ptr) len
153
154 -- -----------------------------------------------------------------------------
155
156 newtype FastZString = FastZString ByteString
157 deriving NFData
158
159 hPutFZS :: Handle -> FastZString -> IO ()
160 hPutFZS handle (FastZString bs) = BS.hPut handle bs
161
162 zString :: FastZString -> String
163 zString (FastZString bs) =
164 inlinePerformIO $ BS.unsafeUseAsCStringLen bs peekCAStringLen
165
166 lengthFZS :: FastZString -> Int
167 lengthFZS (FastZString bs) = BS.length bs
168
169 mkFastZStringString :: String -> FastZString
170 mkFastZStringString str = FastZString (BSC.pack str)
171
172 -- -----------------------------------------------------------------------------
173
174 {-| A 'FastString' is a UTF-8 encoded string together with a unique ID. All
175 'FastString's are stored in a global hashtable to support fast O(1)
176 comparison.
177
178 It is also associated with a lazy reference to the Z-encoding
179 of this string which is used by the compiler internally.
180 -}
181 data FastString = FastString {
182 uniq :: {-# UNPACK #-} !Int, -- unique id
183 n_chars :: {-# UNPACK #-} !Int, -- number of chars
184 fs_bs :: {-# UNPACK #-} !ByteString,
185 fs_zenc :: FastZString
186 -- ^ Lazily computed z-encoding of this string.
187 --
188 -- Since 'FastString's are globally memoized this is computed at most
189 -- once for any given string.
190 }
191
192 instance Eq FastString where
193 f1 == f2 = uniq f1 == uniq f2
194
195 instance Ord FastString where
196 -- Compares lexicographically, not by unique
197 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
198 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
199 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
200 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
201 max x y | x >= y = x
202 | otherwise = y
203 min x y | x <= y = x
204 | otherwise = y
205 compare a b = cmpFS a b
206
207 instance IsString FastString where
208 fromString = fsLit
209
210 instance Semi.Semigroup FastString where
211 (<>) = appendFS
212
213 instance Monoid FastString where
214 mempty = nilFS
215 mappend = (Semi.<>)
216 mconcat = concatFS
217
218 instance Show FastString where
219 show fs = show (unpackFS fs)
220
221 instance Data FastString where
222 -- don't traverse?
223 toConstr _ = abstractConstr "FastString"
224 gunfold _ _ = error "gunfold"
225 dataTypeOf _ = mkNoRepType "FastString"
226
227 instance NFData FastString where
228 rnf fs = seq fs ()
229
230 cmpFS :: FastString -> FastString -> Ordering
231 cmpFS f1@(FastString u1 _ _ _) f2@(FastString u2 _ _ _) =
232 if u1 == u2 then EQ else
233 compare (bytesFS f1) (bytesFS f2)
234
235 foreign import ccall unsafe "memcmp"
236 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
237
238 -- -----------------------------------------------------------------------------
239 -- Construction
240
241 {-
242 Internally, the compiler will maintain a fast string symbol table, providing
243 sharing and fast comparison. Creation of new @FastString@s then covertly does a
244 lookup, re-using the @FastString@ if there was a hit.
245
246 The design of the FastString hash table allows for lockless concurrent reads
247 and updates to multiple buckets with low synchronization overhead.
248
249 See Note [Updating the FastString table] on how it's updated.
250 -}
251 data FastStringTable = FastStringTable
252 {-# UNPACK #-} !(IORef Int) -- the unique ID counter shared with all buckets
253 {-# UNPACK #-} !(IORef Int) -- number of computed z-encodings for all buckets
254 (Array# (IORef FastStringTableSegment)) -- concurrent segments
255
256 data FastStringTableSegment = FastStringTableSegment
257 {-# UNPACK #-} !(MVar ()) -- the lock for write in each segment
258 {-# UNPACK #-} !(IORef Int) -- the number of elements
259 (MutableArray# RealWorld [FastString]) -- buckets in this segment
260
261 {-
262 Following parameters are determined based on:
263
264 * Benchmark based on testsuite/tests/utils/should_run/T14854.hs
265 * Stats of @echo :browse | ghc --interactive -dfaststring-stats >/dev/null@:
266 on 2018-10-24, we have 13920 entries.
267 -}
268 segmentBits, numSegments, segmentMask, initialNumBuckets :: Int
269 segmentBits = 8
270 numSegments = 256 -- bit segmentBits
271 segmentMask = 0xff -- bit segmentBits - 1
272 initialNumBuckets = 64
273
274 hashToSegment# :: Int# -> Int#
275 hashToSegment# hash# = hash# `andI#` segmentMask#
276 where
277 !(I# segmentMask#) = segmentMask
278
279 hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int#
280 hashToIndex# buckets# hash# =
281 (hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size#
282 where
283 !(I# segmentBits#) = segmentBits
284 size# = sizeofMutableArray# buckets#
285
286 maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment
287 maybeResizeSegment segmentRef = do
288 segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef
289 let oldSize# = sizeofMutableArray# old#
290 newSize# = oldSize# *# 2#
291 (I# n#) <- readIORef counter
292 if isTrue# (n# <# newSize#) -- maximum load of 1
293 then return segment
294 else do
295 resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# ->
296 case newArray# newSize# [] s1# of
297 (# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #)
298 forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do
299 fsList <- IO $ readArray# old# i#
300 forM_ fsList $ \fs -> do
301 let -- Shall we store in hash value in FastString instead?
302 !(I# hash#) = hashFastString fs
303 idx# = hashToIndex# new# hash#
304 IO $ \s1# ->
305 case readArray# new# idx# s1# of
306 (# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of
307 s3# -> (# s3#, () #)
308 writeIORef segmentRef resizedSegment
309 return resizedSegment
310
311 {-# NOINLINE stringTable #-}
312 stringTable :: FastStringTable
313 stringTable = unsafePerformIO $ do
314 let !(I# numSegments#) = numSegments
315 !(I# initialNumBuckets#) = initialNumBuckets
316 loop a# i# s1#
317 | isTrue# (i# ==# numSegments#) = s1#
318 | otherwise = case newMVar () `unIO` s1# of
319 (# s2#, lock #) -> case newIORef 0 `unIO` s2# of
320 (# s3#, counter #) -> case newArray# initialNumBuckets# [] s3# of
321 (# s4#, buckets# #) -> case newIORef
322 (FastStringTableSegment lock counter buckets#) `unIO` s4# of
323 (# s5#, segment #) -> case writeArray# a# i# segment s5# of
324 s6# -> loop a# (i# +# 1#) s6#
325 uid <- newIORef 603979776 -- ord '$' * 0x01000000
326 n_zencs <- newIORef 0
327 tab <- IO $ \s1# ->
328 case newArray# numSegments# (panic "string_table") s1# of
329 (# s2#, arr# #) -> case loop arr# 0# s2# of
330 s3# -> case unsafeFreezeArray# arr# s3# of
331 (# s4#, segments# #) ->
332 (# s4#, FastStringTable uid n_zencs segments# #)
333
334 -- use the support wired into the RTS to share this CAF among all images of
335 -- libHSghc
336 #if GHC_STAGE < 2
337 return tab
338 #else
339 sharedCAF tab getOrSetLibHSghcFastStringTable
340
341 -- from the RTS; thus we cannot use this mechanism when GHC_STAGE<2; the previous
342 -- RTS might not have this symbol
343 foreign import ccall unsafe "getOrSetLibHSghcFastStringTable"
344 getOrSetLibHSghcFastStringTable :: Ptr a -> IO (Ptr a)
345 #endif
346
347 {-
348
349 We include the FastString table in the `sharedCAF` mechanism because we'd like
350 FastStrings created by a Core plugin to have the same uniques as corresponding
351 strings created by the host compiler itself. For example, this allows plugins
352 to lookup known names (eg `mkTcOcc "MySpecialType"`) in the GlobalRdrEnv or
353 even re-invoke the parser.
354
355 In particular, the following little sanity test was failing in a plugin
356 prototyping safe newtype-coercions: GHC.NT.Type.NT was imported, but could not
357 be looked up /by the plugin/.
358
359 let rdrName = mkModuleName "GHC.NT.Type" `mkRdrQual` mkTcOcc "NT"
360 putMsgS $ showSDoc dflags $ ppr $ lookupGRE_RdrName rdrName $ mg_rdr_env guts
361
362 `mkTcOcc` involves the lookup (or creation) of a FastString. Since the
363 plugin's FastString.string_table is empty, constructing the RdrName also
364 allocates new uniques for the FastStrings "GHC.NT.Type" and "NT". These
365 uniques are almost certainly unequal to the ones that the host compiler
366 originally assigned to those FastStrings. Thus the lookup fails since the
367 domain of the GlobalRdrEnv is affected by the RdrName's OccName's FastString's
368 unique.
369
370 Maintaining synchronization of the two instances of this global is rather
371 difficult because of the uses of `unsafePerformIO` in this module. Not
372 synchronizing them risks breaking the rather major invariant that two
373 FastStrings with the same unique have the same string. Thus we use the
374 lower-level `sharedCAF` mechanism that relies on Globals.c.
375
376 -}
377
378 mkFastString# :: Addr# -> FastString
379 mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
380 where ptr = Ptr a#
381
382 {- Note [Updating the FastString table]
383
384 We use a concurrent hashtable which contains multiple segments, each hash value
385 always maps to the same segment. Read is lock-free, write to the a segment
386 should acquire a lock for that segment to avoid race condition, writes to
387 different segments are independent.
388
389 The procedure goes like this:
390
391 1. Find out which segment to operate on based on the hash value
392 2. Read the relevant bucket and perform a look up of the string.
393 3. If it exists, return it.
394 4. Otherwise grab a unique ID, create a new FastString and atomically attempt
395 to update the relevant segment with this FastString:
396
397 * Resize the segment by doubling the number of buckets when the number of
398 FastStrings in this segment grows beyond the threshold.
399 * Double check that the string is not in the bucket. Another thread may have
400 inserted it while we were creating our string.
401 * Return the existing FastString if it exists. The one we preemptively
402 created will get GCed.
403 * Otherwise, insert and return the string we created.
404 -}
405
406 mkFastStringWith
407 :: (Int -> IORef Int-> IO FastString) -> Ptr Word8 -> Int -> IO FastString
408 mkFastStringWith mk_fs !ptr !len = do
409 FastStringTableSegment lock _ buckets# <- readIORef segmentRef
410 let idx# = hashToIndex# buckets# hash#
411 bucket <- IO $ readArray# buckets# idx#
412 res <- bucket_match bucket len ptr
413 case res of
414 Just found -> return found
415 Nothing -> do
416 -- The withMVar below is not dupable. It can lead to deadlock if it is
417 -- only run partially and putMVar is not called after takeMVar.
418 noDuplicate
419 n <- get_uid
420 new_fs <- mk_fs n n_zencs
421 withMVar lock $ \_ -> insert new_fs
422 where
423 !(FastStringTable uid n_zencs segments#) = stringTable
424 get_uid = atomicModifyIORef' uid $ \n -> (n+1,n)
425
426 !(I# hash#) = hashStr ptr len
427 (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
428 insert fs = do
429 FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
430 let idx# = hashToIndex# buckets# hash#
431 bucket <- IO $ readArray# buckets# idx#
432 res <- bucket_match bucket len ptr
433 case res of
434 -- The FastString was added by another thread after previous read and
435 -- before we acquired the write lock.
436 Just found -> return found
437 Nothing -> do
438 IO $ \s1# ->
439 case writeArray# buckets# idx# (fs: bucket) s1# of
440 s2# -> (# s2#, () #)
441 modifyIORef' counter succ
442 return fs
443
444 bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
445 bucket_match [] _ _ = return Nothing
446 bucket_match (v@(FastString _ _ bs _):ls) len ptr
447 | len == BS.length bs = do
448 b <- BS.unsafeUseAsCString bs $ \buf ->
449 cmpStringPrefix ptr (castPtr buf) len
450 if b then return (Just v)
451 else bucket_match ls len ptr
452 | otherwise =
453 bucket_match ls len ptr
454
455 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
456 mkFastStringBytes !ptr !len =
457 -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is
458 -- idempotent.
459 unsafeDupablePerformIO $
460 mkFastStringWith (copyNewFastString ptr len) ptr len
461
462 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
463 -- between this and 'mkFastStringBytes' is that we don't have to copy
464 -- the bytes if the string is new to the table.
465 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
466 mkFastStringForeignPtr ptr !fp len
467 = mkFastStringWith (mkNewFastString fp ptr len) ptr len
468
469 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
470 -- between this and 'mkFastStringBytes' is that we don't have to copy
471 -- the bytes if the string is new to the table.
472 mkFastStringByteString :: ByteString -> FastString
473 mkFastStringByteString bs =
474 inlinePerformIO $
475 BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do
476 let ptr' = castPtr ptr
477 mkFastStringWith (mkNewFastStringByteString bs ptr' len) ptr' len
478
479 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
480 mkFastString :: String -> FastString
481 mkFastString str =
482 inlinePerformIO $ do
483 let l = utf8EncodedLength str
484 buf <- mallocForeignPtrBytes l
485 withForeignPtr buf $ \ptr -> do
486 utf8EncodeString ptr str
487 mkFastStringForeignPtr ptr buf l
488
489 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
490 mkFastStringByteList :: [Word8] -> FastString
491 mkFastStringByteList str = mkFastStringByteString (BS.pack str)
492
493 -- | Creates a (lazy) Z-encoded 'FastString' from a 'String' and account
494 -- the number of forced z-strings into the passed 'IORef'.
495 mkZFastString :: IORef Int -> ByteString -> FastZString
496 mkZFastString n_zencs bs = unsafePerformIO $ do
497 atomicModifyIORef' n_zencs $ \n -> (n+1, ())
498 return $ mkFastZStringString (zEncodeString (utf8DecodeByteString bs))
499
500 mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int
501 -> IORef Int -> IO FastString
502 mkNewFastString fp ptr len uid n_zencs = do
503 let bs = BS.fromForeignPtr fp 0 len
504 zstr = mkZFastString n_zencs bs
505 n_chars <- countUTF8Chars ptr len
506 return (FastString uid n_chars bs zstr)
507
508 mkNewFastStringByteString :: ByteString -> Ptr Word8 -> Int -> Int
509 -> IORef Int -> IO FastString
510 mkNewFastStringByteString bs ptr len uid n_zencs = do
511 let zstr = mkZFastString n_zencs bs
512 n_chars <- countUTF8Chars ptr len
513 return (FastString uid n_chars bs zstr)
514
515 copyNewFastString :: Ptr Word8 -> Int -> Int -> IORef Int -> IO FastString
516 copyNewFastString ptr len uid n_zencs = do
517 fp <- copyBytesToForeignPtr ptr len
518 let bs = BS.fromForeignPtr fp 0 len
519 zstr = mkZFastString n_zencs bs
520 n_chars <- countUTF8Chars ptr len
521 return (FastString uid n_chars bs zstr)
522
523 copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
524 copyBytesToForeignPtr ptr len = do
525 fp <- mallocForeignPtrBytes len
526 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
527 return fp
528
529 cmpStringPrefix :: Ptr Word8 -> Ptr Word8 -> Int -> IO Bool
530 cmpStringPrefix ptr1 ptr2 len =
531 do r <- memcmp ptr1 ptr2 len
532 return (r == 0)
533
534
535 hashStr :: Ptr Word8 -> Int -> Int
536 -- use the Addr to produce a hash value between 0 & m (inclusive)
537 hashStr (Ptr a#) (I# len#) = loop 0# 0#
538 where
539 loop h n | isTrue# (n ==# len#) = I# h
540 | otherwise = loop h2 (n +# 1#)
541 where
542 !c = ord# (indexCharOffAddr# a# n)
543 !h2 = (h *# 16777619#) `xorI#` c
544
545 -- -----------------------------------------------------------------------------
546 -- Operations
547
548 -- | Returns the length of the 'FastString' in characters
549 lengthFS :: FastString -> Int
550 lengthFS f = n_chars f
551
552 -- | Returns @True@ if the 'FastString' is empty
553 nullFS :: FastString -> Bool
554 nullFS f = BS.null (fs_bs f)
555
556 -- | Unpacks and decodes the FastString
557 unpackFS :: FastString -> String
558 unpackFS (FastString _ _ bs _) = utf8DecodeByteString bs
559
560 -- | Returns a Z-encoded version of a 'FastString'. This might be the
561 -- original, if it was already Z-encoded. The first time this
562 -- function is applied to a particular 'FastString', the results are
563 -- memoized.
564 --
565 zEncodeFS :: FastString -> FastZString
566 zEncodeFS (FastString _ _ _ ref) = ref
567
568 appendFS :: FastString -> FastString -> FastString
569 appendFS fs1 fs2 = mkFastStringByteString
570 $ BS.append (bytesFS fs1) (bytesFS fs2)
571
572 concatFS :: [FastString] -> FastString
573 concatFS = mkFastStringByteString . BS.concat . map fs_bs
574
575 headFS :: FastString -> Char
576 headFS (FastString _ 0 _ _) = panic "headFS: Empty FastString"
577 headFS (FastString _ _ bs _) =
578 inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
579 return (fst (utf8DecodeChar (castPtr ptr)))
580
581 tailFS :: FastString -> FastString
582 tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString"
583 tailFS (FastString _ _ bs _) =
584 inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
585 do let (_, n) = utf8DecodeChar (castPtr ptr)
586 return $! mkFastStringByteString (BS.drop n bs)
587
588 consFS :: Char -> FastString -> FastString
589 consFS c fs = mkFastString (c : unpackFS fs)
590
591 uniqueOfFS :: FastString -> Int
592 uniqueOfFS (FastString u _ _ _) = u
593
594 nilFS :: FastString
595 nilFS = mkFastString ""
596
597 isUnderscoreFS :: FastString -> Bool
598 isUnderscoreFS fs = fs == fsLit "_"
599
600 -- -----------------------------------------------------------------------------
601 -- Stats
602
603 getFastStringTable :: IO [[[FastString]]]
604 getFastStringTable =
605 forM [0 .. numSegments - 1] $ \(I# i#) -> do
606 let (# segmentRef #) = indexArray# segments# i#
607 FastStringTableSegment _ _ buckets# <- readIORef segmentRef
608 let bucketSize = I# (sizeofMutableArray# buckets#)
609 forM [0 .. bucketSize - 1] $ \(I# j#) ->
610 IO $ readArray# buckets# j#
611 where
612 !(FastStringTable _ _ segments#) = stringTable
613
614 getFastStringZEncCounter :: IO Int
615 getFastStringZEncCounter = readIORef n_zencs
616 where
617 !(FastStringTable _ n_zencs _) = stringTable
618
619 -- -----------------------------------------------------------------------------
620 -- Outputting 'FastString's
621
622 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
623 -- get the actual bytes in the 'FastString' written to the 'Handle'.
624 hPutFS :: Handle -> FastString -> IO ()
625 hPutFS handle fs = BS.hPut handle $ bytesFS fs
626
627 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
628 -- in the current locale's encoding (for error messages and suchlike).
629
630 -- -----------------------------------------------------------------------------
631 -- PtrStrings, here for convenience only.
632
633 -- | A 'PtrString' is a pointer to some array of Latin-1 encoded chars.
634 data PtrString = PtrString !(Ptr Word8) !Int
635
636 -- | Wrap an unboxed address into a 'PtrString'.
637 mkPtrString# :: Addr# -> PtrString
638 mkPtrString# a# = PtrString (Ptr a#) (ptrStrLength (Ptr a#))
639
640 -- | Encode a 'String' into a newly allocated 'PtrString' using Latin-1
641 -- encoding. The original string must not contain non-Latin-1 characters
642 -- (above codepoint @0xff@).
643 {-# INLINE mkPtrString #-}
644 mkPtrString :: String -> PtrString
645 mkPtrString s =
646 -- we don't use `unsafeDupablePerformIO` here to avoid potential memory leaks
647 -- and because someone might be using `eqAddr#` to check for string equality.
648 unsafePerformIO (do
649 let len = length s
650 p <- mallocBytes len
651 let
652 loop :: Int -> String -> IO ()
653 loop !_ [] = return ()
654 loop n (c:cs) = do
655 pokeByteOff p n (fromIntegral (ord c) :: Word8)
656 loop (1+n) cs
657 loop 0 s
658 return (PtrString p len)
659 )
660
661 -- | Decode a 'PtrString' back into a 'String' using Latin-1 encoding.
662 -- This does not free the memory associated with 'PtrString'.
663 unpackPtrString :: PtrString -> String
664 unpackPtrString (PtrString (Ptr p#) (I# n#)) = unpackNBytes# p# n#
665
666 -- | Return the length of a 'PtrString'
667 lengthPS :: PtrString -> Int
668 lengthPS (PtrString _ n) = n
669
670 -- -----------------------------------------------------------------------------
671 -- under the carpet
672
673 foreign import ccall unsafe "strlen"
674 ptrStrLength :: Ptr Word8 -> Int
675
676 {-# NOINLINE sLit #-}
677 sLit :: String -> PtrString
678 sLit x = mkPtrString x
679
680 {-# NOINLINE fsLit #-}
681 fsLit :: String -> FastString
682 fsLit x = mkFastString x
683
684 {-# RULES "slit"
685 forall x . sLit (unpackCString# x) = mkPtrString# x #-}
686 {-# RULES "fslit"
687 forall x . fsLit (unpackCString# x) = mkFastString# x #-}