Revert "Batch merge"
[ghc.git] / compiler / utils / FastString.hs
1 -- (c) The University of Glasgow, 1997-2006
2
3 {-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples,
4 GeneralizedNewtypeDeriving #-}
5 {-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
6 -- We always optimise this, otherwise performance of a non-optimised
7 -- compiler is severely affected
8
9 -- |
10 -- There are two principal string types used internally by GHC:
11 --
12 -- ['FastString']
13 --
14 -- * A compact, hash-consed, representation of character strings.
15 -- * Comparison is O(1), and you can get a 'Unique.Unique' from them.
16 -- * Generated by 'fsLit'.
17 -- * Turn into 'Outputable.SDoc' with 'Outputable.ftext'.
18 --
19 -- ['PtrString']
20 --
21 -- * Pointer and size of a Latin-1 encoded string.
22 -- * Practically no operations.
23 -- * Outputing them is fast.
24 -- * Generated by 'sLit'.
25 -- * Turn into 'Outputable.SDoc' with 'Outputable.ptext'
26 -- * Requires manual memory management.
27 -- Improper use may lead to memory leaks or dangling pointers.
28 -- * It assumes Latin-1 as the encoding, therefore it cannot represent
29 -- arbitrary Unicode strings.
30 --
31 -- Use 'PtrString' unless you want the facilities of 'FastString'.
32 module FastString
33 (
34 -- * ByteString
35 fastStringToByteString,
36 mkFastStringByteString,
37 fastZStringToByteString,
38 unsafeMkByteString,
39
40 -- * FastZString
41 FastZString,
42 hPutFZS,
43 zString,
44 lengthFZS,
45
46 -- * FastStrings
47 FastString(..), -- not abstract, for now.
48
49 -- ** Construction
50 fsLit,
51 mkFastString,
52 mkFastStringBytes,
53 mkFastStringByteList,
54 mkFastStringForeignPtr,
55 mkFastString#,
56
57 -- ** Deconstruction
58 unpackFS, -- :: FastString -> String
59 bytesFS, -- :: FastString -> [Word8]
60
61 -- ** Encoding
62 zEncodeFS,
63
64 -- ** Operations
65 uniqueOfFS,
66 lengthFS,
67 nullFS,
68 appendFS,
69 headFS,
70 tailFS,
71 concatFS,
72 consFS,
73 nilFS,
74 isUnderscoreFS,
75
76 -- ** Outputing
77 hPutFS,
78
79 -- ** Internal
80 getFastStringTable,
81 hasZEncoding,
82
83 -- * PtrStrings
84 PtrString (..),
85
86 -- ** Construction
87 sLit,
88 mkPtrString#,
89 mkPtrString,
90
91 -- ** Deconstruction
92 unpackPtrString,
93
94 -- ** Operations
95 lengthPS
96 ) where
97
98 #include "HsVersions.h"
99
100 import GhcPrelude as Prelude
101
102 import Encoding
103 import FastFunctions
104 import Panic
105 import Util
106
107 import Control.Concurrent.MVar
108 import Control.DeepSeq
109 import Control.Monad
110 import Data.ByteString (ByteString)
111 import qualified Data.ByteString as BS
112 import qualified Data.ByteString.Char8 as BSC
113 import qualified Data.ByteString.Internal as BS
114 import qualified Data.ByteString.Unsafe as BS
115 import Foreign.C
116 import GHC.Exts
117 import System.IO
118 import Data.Data
119 import Data.IORef
120 import Data.Maybe ( isJust )
121 import Data.Char
122 import Data.Semigroup as Semi
123
124 import GHC.IO
125
126 import Foreign
127
128 #if STAGE >= 2
129 import GHC.Conc.Sync (sharedCAF)
130 #endif
131
132 import GHC.Base ( unpackCString#, unpackNBytes# )
133
134
135 fastStringToByteString :: FastString -> ByteString
136 fastStringToByteString f = fs_bs f
137
138 fastZStringToByteString :: FastZString -> ByteString
139 fastZStringToByteString (FastZString bs) = bs
140
141 -- This will drop information if any character > '\xFF'
142 unsafeMkByteString :: String -> ByteString
143 unsafeMkByteString = BSC.pack
144
145 hashFastString :: FastString -> Int
146 hashFastString (FastString _ _ bs _)
147 = inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
148 return $ hashStr (castPtr ptr) len
149
150 -- -----------------------------------------------------------------------------
151
152 newtype FastZString = FastZString ByteString
153 deriving NFData
154
155 hPutFZS :: Handle -> FastZString -> IO ()
156 hPutFZS handle (FastZString bs) = BS.hPut handle bs
157
158 zString :: FastZString -> String
159 zString (FastZString bs) =
160 inlinePerformIO $ BS.unsafeUseAsCStringLen bs peekCAStringLen
161
162 lengthFZS :: FastZString -> Int
163 lengthFZS (FastZString bs) = BS.length bs
164
165 mkFastZStringString :: String -> FastZString
166 mkFastZStringString str = FastZString (BSC.pack str)
167
168 -- -----------------------------------------------------------------------------
169
170 {-|
171 A 'FastString' is an array of bytes, hashed to support fast O(1)
172 comparison. It is also associated with a character encoding, so that
173 we know how to convert a 'FastString' to the local encoding, or to the
174 Z-encoding used by the compiler internally.
175
176 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
177 -}
178
179 data FastString = FastString {
180 uniq :: {-# UNPACK #-} !Int, -- unique id
181 n_chars :: {-# UNPACK #-} !Int, -- number of chars
182 fs_bs :: {-# UNPACK #-} !ByteString,
183 fs_ref :: {-# UNPACK #-} !(IORef (Maybe FastZString))
184 }
185
186 instance Eq FastString where
187 f1 == f2 = uniq f1 == uniq f2
188
189 instance Ord FastString where
190 -- Compares lexicographically, not by unique
191 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
192 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
193 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
194 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
195 max x y | x >= y = x
196 | otherwise = y
197 min x y | x <= y = x
198 | otherwise = y
199 compare a b = cmpFS a b
200
201 instance IsString FastString where
202 fromString = fsLit
203
204 instance Semi.Semigroup FastString where
205 (<>) = appendFS
206
207 instance Monoid FastString where
208 mempty = nilFS
209 mappend = (Semi.<>)
210 mconcat = concatFS
211
212 instance Show FastString where
213 show fs = show (unpackFS fs)
214
215 instance Data FastString where
216 -- don't traverse?
217 toConstr _ = abstractConstr "FastString"
218 gunfold _ _ = error "gunfold"
219 dataTypeOf _ = mkNoRepType "FastString"
220
221 cmpFS :: FastString -> FastString -> Ordering
222 cmpFS f1@(FastString u1 _ _ _) f2@(FastString u2 _ _ _) =
223 if u1 == u2 then EQ else
224 compare (fastStringToByteString f1) (fastStringToByteString f2)
225
226 foreign import ccall unsafe "memcmp"
227 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
228
229 -- -----------------------------------------------------------------------------
230 -- Construction
231
232 {-
233 Internally, the compiler will maintain a fast string symbol table, providing
234 sharing and fast comparison. Creation of new @FastString@s then covertly does a
235 lookup, re-using the @FastString@ if there was a hit.
236
237 The design of the FastString hash table allows for lockless concurrent reads
238 and updates to multiple buckets with low synchronization overhead.
239
240 See Note [Updating the FastString table] on how it's updated.
241 -}
242 data FastStringTable = FastStringTable
243 {-# UNPACK #-} !(IORef Int) -- the unique ID counter shared with all buckets
244 (Array# (IORef FastStringTableSegment)) -- concurrent segments
245
246 data FastStringTableSegment = FastStringTableSegment
247 {-# UNPACK #-} !(MVar ()) -- the lock for write in each segment
248 {-# UNPACK #-} !(IORef Int) -- the number of elements
249 (MutableArray# RealWorld [FastString]) -- buckets in this segment
250
251 {-
252 Following parameters are determined based on:
253
254 * Benchmark based on testsuite/tests/utils/should_run/T14854.hs
255 * Stats of @echo :browse | ghc --interactive -dfaststring-stats >/dev/null@:
256 on 2018-10-24, we have 13920 entries.
257 -}
258 segmentBits, numSegments, segmentMask, initialNumBuckets :: Int
259 segmentBits = 8
260 numSegments = 256 -- bit segmentBits
261 segmentMask = 0xff -- bit segmentBits - 1
262 initialNumBuckets = 64
263
264 hashToSegment# :: Int# -> Int#
265 hashToSegment# hash# = hash# `andI#` segmentMask#
266 where
267 !(I# segmentMask#) = segmentMask
268
269 hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int#
270 hashToIndex# buckets# hash# =
271 (hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size#
272 where
273 !(I# segmentBits#) = segmentBits
274 size# = sizeofMutableArray# buckets#
275
276 maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment
277 maybeResizeSegment segmentRef = do
278 segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef
279 let oldSize# = sizeofMutableArray# old#
280 newSize# = oldSize# *# 2#
281 (I# n#) <- readIORef counter
282 if isTrue# (n# <# newSize#) -- maximum load of 1
283 then return segment
284 else do
285 resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# ->
286 case newArray# newSize# [] s1# of
287 (# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #)
288 forM_ [0 .. (I# oldSize#) - 1] $ \(I# i#) -> do
289 fsList <- IO $ readArray# old# i#
290 forM_ fsList $ \fs -> do
291 let -- Shall we store in hash value in FastString instead?
292 !(I# hash#) = hashFastString fs
293 idx# = hashToIndex# new# hash#
294 IO $ \s1# ->
295 case readArray# new# idx# s1# of
296 (# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of
297 s3# -> (# s3#, () #)
298 writeIORef segmentRef resizedSegment
299 return resizedSegment
300
301 {-# NOINLINE stringTable #-}
302 stringTable :: FastStringTable
303 stringTable = unsafePerformIO $ do
304 let !(I# numSegments#) = numSegments
305 !(I# initialNumBuckets#) = initialNumBuckets
306 loop a# i# s1#
307 | isTrue# (i# ==# numSegments#) = s1#
308 | otherwise = case newMVar () `unIO` s1# of
309 (# s2#, lock #) -> case newIORef 0 `unIO` s2# of
310 (# s3#, counter #) -> case newArray# initialNumBuckets# [] s3# of
311 (# s4#, buckets# #) -> case newIORef
312 (FastStringTableSegment lock counter buckets#) `unIO` s4# of
313 (# s5#, segment #) -> case writeArray# a# i# segment s5# of
314 s6# -> loop a# (i# +# 1#) s6#
315 uid <- newIORef 603979776 -- ord '$' * 0x01000000
316 tab <- IO $ \s1# ->
317 case newArray# numSegments# (panic "string_table") s1# of
318 (# s2#, arr# #) -> case loop arr# 0# s2# of
319 s3# -> case unsafeFreezeArray# arr# s3# of
320 (# s4#, segments# #) -> (# s4#, FastStringTable uid segments# #)
321
322 -- use the support wired into the RTS to share this CAF among all images of
323 -- libHSghc
324 #if STAGE < 2
325 return tab
326 #else
327 sharedCAF tab getOrSetLibHSghcFastStringTable
328
329 -- from the RTS; thus we cannot use this mechanism when STAGE<2; the previous
330 -- RTS might not have this symbol
331 foreign import ccall unsafe "getOrSetLibHSghcFastStringTable"
332 getOrSetLibHSghcFastStringTable :: Ptr a -> IO (Ptr a)
333 #endif
334
335 {-
336
337 We include the FastString table in the `sharedCAF` mechanism because we'd like
338 FastStrings created by a Core plugin to have the same uniques as corresponding
339 strings created by the host compiler itself. For example, this allows plugins
340 to lookup known names (eg `mkTcOcc "MySpecialType"`) in the GlobalRdrEnv or
341 even re-invoke the parser.
342
343 In particular, the following little sanity test was failing in a plugin
344 prototyping safe newtype-coercions: GHC.NT.Type.NT was imported, but could not
345 be looked up /by the plugin/.
346
347 let rdrName = mkModuleName "GHC.NT.Type" `mkRdrQual` mkTcOcc "NT"
348 putMsgS $ showSDoc dflags $ ppr $ lookupGRE_RdrName rdrName $ mg_rdr_env guts
349
350 `mkTcOcc` involves the lookup (or creation) of a FastString. Since the
351 plugin's FastString.string_table is empty, constructing the RdrName also
352 allocates new uniques for the FastStrings "GHC.NT.Type" and "NT". These
353 uniques are almost certainly unequal to the ones that the host compiler
354 originally assigned to those FastStrings. Thus the lookup fails since the
355 domain of the GlobalRdrEnv is affected by the RdrName's OccName's FastString's
356 unique.
357
358 Maintaining synchronization of the two instances of this global is rather
359 difficult because of the uses of `unsafePerformIO` in this module. Not
360 synchronizing them risks breaking the rather major invariant that two
361 FastStrings with the same unique have the same string. Thus we use the
362 lower-level `sharedCAF` mechanism that relies on Globals.c.
363
364 -}
365
366 mkFastString# :: Addr# -> FastString
367 mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
368 where ptr = Ptr a#
369
370 {- Note [Updating the FastString table]
371
372 We use a concurrent hashtable which contains multiple segments, each hash value
373 always maps to the same segment. Read is lock-free, write to the a segment
374 should acquire a lock for that segment to avoid race condition, writes to
375 different segments are independent.
376
377 The procedure goes like this:
378
379 1. Find out which segment to operate on based on the hash value
380 2. Read the relevant bucket and perform a look up of the string.
381 3. If it exists, return it.
382 4. Otherwise grab a unique ID, create a new FastString and atomically attempt
383 to update the relevant segment with this FastString:
384
385 * Resize the segment by doubling the number of buckets when the number of
386 FastStrings in this segment grows beyond the threshold.
387 * Double check that the string is not in the bucket. Another thread may have
388 inserted it while we were creating our string.
389 * Return the existing FastString if it exists. The one we preemptively
390 created will get GCed.
391 * Otherwise, insert and return the string we created.
392 -}
393
394 mkFastStringWith :: (Int -> IO FastString) -> Ptr Word8 -> Int -> IO FastString
395 mkFastStringWith mk_fs !ptr !len = do
396 FastStringTableSegment lock _ buckets# <- readIORef segmentRef
397 let idx# = hashToIndex# buckets# hash#
398 bucket <- IO $ readArray# buckets# idx#
399 res <- bucket_match bucket len ptr
400 case res of
401 Just found -> return found
402 Nothing -> do
403 -- The withMVar below is not dupable. It can lead to deadlock if it is
404 -- only run partially and putMVar is not called after takeMVar.
405 noDuplicate
406 n <- get_uid
407 new_fs <- mk_fs n
408 withMVar lock $ \_ -> insert new_fs
409 where
410 !(FastStringTable uid segments#) = stringTable
411 get_uid = atomicModifyIORef' uid $ \n -> (n+1,n)
412
413 !(I# hash#) = hashStr ptr len
414 (# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
415 insert fs = do
416 FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
417 let idx# = hashToIndex# buckets# hash#
418 bucket <- IO $ readArray# buckets# idx#
419 res <- bucket_match bucket len ptr
420 case res of
421 -- The FastString was added by another thread after previous read and
422 -- before we acquired the write lock.
423 Just found -> return found
424 Nothing -> do
425 IO $ \s1# ->
426 case writeArray# buckets# idx# (fs: bucket) s1# of
427 s2# -> (# s2#, () #)
428 modifyIORef' counter succ
429 return fs
430
431 bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
432 bucket_match [] _ _ = return Nothing
433 bucket_match (v@(FastString _ _ bs _):ls) len ptr
434 | len == BS.length bs = do
435 b <- BS.unsafeUseAsCString bs $ \buf ->
436 cmpStringPrefix ptr (castPtr buf) len
437 if b then return (Just v)
438 else bucket_match ls len ptr
439 | otherwise =
440 bucket_match ls len ptr
441
442 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
443 mkFastStringBytes !ptr !len =
444 -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is
445 -- idempotent.
446 unsafeDupablePerformIO $
447 mkFastStringWith (copyNewFastString ptr len) ptr len
448
449 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
450 -- between this and 'mkFastStringBytes' is that we don't have to copy
451 -- the bytes if the string is new to the table.
452 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
453 mkFastStringForeignPtr ptr !fp len
454 = mkFastStringWith (mkNewFastString fp ptr len) ptr len
455
456 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
457 -- between this and 'mkFastStringBytes' is that we don't have to copy
458 -- the bytes if the string is new to the table.
459 mkFastStringByteString :: ByteString -> FastString
460 mkFastStringByteString bs =
461 inlinePerformIO $
462 BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do
463 let ptr' = castPtr ptr
464 mkFastStringWith (mkNewFastStringByteString bs ptr' len) ptr' len
465
466 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
467 mkFastString :: String -> FastString
468 mkFastString str =
469 inlinePerformIO $ do
470 let l = utf8EncodedLength str
471 buf <- mallocForeignPtrBytes l
472 withForeignPtr buf $ \ptr -> do
473 utf8EncodeString ptr str
474 mkFastStringForeignPtr ptr buf l
475
476 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
477 mkFastStringByteList :: [Word8] -> FastString
478 mkFastStringByteList str =
479 inlinePerformIO $ do
480 let l = Prelude.length str
481 buf <- mallocForeignPtrBytes l
482 withForeignPtr buf $ \ptr -> do
483 pokeArray (castPtr ptr) str
484 mkFastStringForeignPtr ptr buf l
485
486 -- | Creates a Z-encoded 'FastString' from a 'String'
487 mkZFastString :: String -> FastZString
488 mkZFastString = mkFastZStringString
489
490 mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int
491 -> IO FastString
492 mkNewFastString fp ptr len uid = do
493 ref <- newIORef Nothing
494 n_chars <- countUTF8Chars ptr len
495 return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref)
496
497 mkNewFastStringByteString :: ByteString -> Ptr Word8 -> Int -> Int
498 -> IO FastString
499 mkNewFastStringByteString bs ptr len uid = do
500 ref <- newIORef Nothing
501 n_chars <- countUTF8Chars ptr len
502 return (FastString uid n_chars bs ref)
503
504 copyNewFastString :: Ptr Word8 -> Int -> Int -> IO FastString
505 copyNewFastString ptr len uid = do
506 fp <- copyBytesToForeignPtr ptr len
507 ref <- newIORef Nothing
508 n_chars <- countUTF8Chars ptr len
509 return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref)
510
511 copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
512 copyBytesToForeignPtr ptr len = do
513 fp <- mallocForeignPtrBytes len
514 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
515 return fp
516
517 cmpStringPrefix :: Ptr Word8 -> Ptr Word8 -> Int -> IO Bool
518 cmpStringPrefix ptr1 ptr2 len =
519 do r <- memcmp ptr1 ptr2 len
520 return (r == 0)
521
522
523 hashStr :: Ptr Word8 -> Int -> Int
524 -- use the Addr to produce a hash value between 0 & m (inclusive)
525 hashStr (Ptr a#) (I# len#) = loop 0# 0#
526 where
527 loop h n | isTrue# (n ==# len#) = I# h
528 | otherwise = loop h2 (n +# 1#)
529 where
530 !c = ord# (indexCharOffAddr# a# n)
531 !h2 = (h *# 16777619#) `xorI#` c
532
533 -- -----------------------------------------------------------------------------
534 -- Operations
535
536 -- | Returns the length of the 'FastString' in characters
537 lengthFS :: FastString -> Int
538 lengthFS f = n_chars f
539
540 -- | Returns @True@ if this 'FastString' is not Z-encoded but already has
541 -- a Z-encoding cached (used in producing stats).
542 hasZEncoding :: FastString -> Bool
543 hasZEncoding (FastString _ _ _ ref) =
544 inlinePerformIO $ do
545 m <- readIORef ref
546 return (isJust m)
547
548 -- | Returns @True@ if the 'FastString' is empty
549 nullFS :: FastString -> Bool
550 nullFS f = BS.null (fs_bs f)
551
552 -- | Unpacks and decodes the FastString
553 unpackFS :: FastString -> String
554 unpackFS (FastString _ _ bs _) = utf8DecodeByteString bs
555
556 -- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
557 bytesFS :: FastString -> [Word8]
558 bytesFS fs = BS.unpack $ fastStringToByteString fs
559
560 -- | Returns a Z-encoded version of a 'FastString'. This might be the
561 -- original, if it was already Z-encoded. The first time this
562 -- function is applied to a particular 'FastString', the results are
563 -- memoized.
564 --
565 zEncodeFS :: FastString -> FastZString
566 zEncodeFS fs@(FastString _ _ _ ref) =
567 inlinePerformIO $ do
568 m <- readIORef ref
569 case m of
570 Just zfs -> return zfs
571 Nothing -> do
572 atomicModifyIORef' ref $ \m' -> case m' of
573 Nothing -> let zfs = mkZFastString (zEncodeString (unpackFS fs))
574 in (Just zfs, zfs)
575 Just zfs -> (m', zfs)
576
577 appendFS :: FastString -> FastString -> FastString
578 appendFS fs1 fs2 = mkFastStringByteString
579 $ BS.append (fastStringToByteString fs1)
580 (fastStringToByteString fs2)
581
582 concatFS :: [FastString] -> FastString
583 concatFS = mkFastStringByteString . BS.concat . map fs_bs
584
585 headFS :: FastString -> Char
586 headFS (FastString _ 0 _ _) = panic "headFS: Empty FastString"
587 headFS (FastString _ _ bs _) =
588 inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
589 return (fst (utf8DecodeChar (castPtr ptr)))
590
591 tailFS :: FastString -> FastString
592 tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString"
593 tailFS (FastString _ _ bs _) =
594 inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
595 do let (_, n) = utf8DecodeChar (castPtr ptr)
596 return $! mkFastStringByteString (BS.drop n bs)
597
598 consFS :: Char -> FastString -> FastString
599 consFS c fs = mkFastString (c : unpackFS fs)
600
601 uniqueOfFS :: FastString -> Int
602 uniqueOfFS (FastString u _ _ _) = u
603
604 nilFS :: FastString
605 nilFS = mkFastString ""
606
607 isUnderscoreFS :: FastString -> Bool
608 isUnderscoreFS fs = fs == fsLit "_"
609
610 -- -----------------------------------------------------------------------------
611 -- Stats
612
613 getFastStringTable :: IO [[[FastString]]]
614 getFastStringTable =
615 forM [0 .. numSegments - 1] $ \(I# i#) -> do
616 let (# segmentRef #) = indexArray# segments# i#
617 FastStringTableSegment _ _ buckets# <- readIORef segmentRef
618 let bucketSize = I# (sizeofMutableArray# buckets#)
619 forM [0 .. bucketSize - 1] $ \(I# j#) ->
620 IO $ readArray# buckets# j#
621 where
622 !(FastStringTable _ segments#) = stringTable
623
624 -- -----------------------------------------------------------------------------
625 -- Outputting 'FastString's
626
627 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
628 -- get the actual bytes in the 'FastString' written to the 'Handle'.
629 hPutFS :: Handle -> FastString -> IO ()
630 hPutFS handle fs = BS.hPut handle $ fastStringToByteString fs
631
632 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
633 -- in the current locale's encoding (for error messages and suchlike).
634
635 -- -----------------------------------------------------------------------------
636 -- PtrStrings, here for convenience only.
637
638 -- | A 'PtrString' is a pointer to some array of Latin-1 encoded chars.
639 data PtrString = PtrString !(Ptr Word8) !Int
640
641 -- | Wrap an unboxed address into a 'PtrString'.
642 mkPtrString# :: Addr# -> PtrString
643 mkPtrString# a# = PtrString (Ptr a#) (ptrStrLength (Ptr a#))
644
645 -- | Encode a 'String' into a newly allocated 'PtrString' using Latin-1
646 -- encoding. The original string must not contain non-Latin-1 characters
647 -- (above codepoint @0xff@).
648 {-# INLINE mkPtrString #-}
649 mkPtrString :: String -> PtrString
650 mkPtrString s =
651 -- we don't use `unsafeDupablePerformIO` here to avoid potential memory leaks
652 -- and because someone might be using `eqAddr#` to check for string equality.
653 unsafePerformIO (do
654 let len = length s
655 p <- mallocBytes len
656 let
657 loop :: Int -> String -> IO ()
658 loop !_ [] = return ()
659 loop n (c:cs) = do
660 pokeByteOff p n (fromIntegral (ord c) :: Word8)
661 loop (1+n) cs
662 loop 0 s
663 return (PtrString p len)
664 )
665
666 -- | Decode a 'PtrString' back into a 'String' using Latin-1 encoding.
667 -- This does not free the memory associated with 'PtrString'.
668 unpackPtrString :: PtrString -> String
669 unpackPtrString (PtrString (Ptr p#) (I# n#)) = unpackNBytes# p# n#
670
671 -- | Return the length of a 'PtrString'
672 lengthPS :: PtrString -> Int
673 lengthPS (PtrString _ n) = n
674
675 -- -----------------------------------------------------------------------------
676 -- under the carpet
677
678 foreign import ccall unsafe "strlen"
679 ptrStrLength :: Ptr Word8 -> Int
680
681 {-# NOINLINE sLit #-}
682 sLit :: String -> PtrString
683 sLit x = mkPtrString x
684
685 {-# NOINLINE fsLit #-}
686 fsLit :: String -> FastString
687 fsLit x = mkFastString x
688
689 {-# RULES "slit"
690 forall x . sLit (unpackCString# x) = mkPtrString# x #-}
691 {-# RULES "fslit"
692 forall x . fsLit (unpackCString# x) = mkFastString# x #-}