Revert "Make globals use sharedCAF"
[ghc.git] / compiler / utils / FastString.hs
1 -- (c) The University of Glasgow, 1997-2006
2
3 {-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples,
4 GeneralizedNewtypeDeriving #-}
5 {-# OPTIONS_GHC -O -funbox-strict-fields #-}
6 -- We always optimise this, otherwise performance of a non-optimised
7 -- compiler is severely affected
8
9 -- |
10 -- There are two principal string types used internally by GHC:
11 --
12 -- ['FastString']
13 --
14 -- * A compact, hash-consed, representation of character strings.
15 -- * Comparison is O(1), and you can get a 'Unique.Unique' from them.
16 -- * Generated by 'fsLit'.
17 -- * Turn into 'Outputable.SDoc' with 'Outputable.ftext'.
18 --
19 -- ['LitString']
20 --
21 -- * Just a wrapper for the @Addr#@ of a C string (@Ptr CChar@).
22 -- * Practically no operations.
23 -- * Outputing them is fast.
24 -- * Generated by 'sLit'.
25 -- * Turn into 'Outputable.SDoc' with 'Outputable.ptext'
26 --
27 -- Use 'LitString' unless you want the facilities of 'FastString'.
28 module FastString
29 (
30 -- * ByteString
31 fastStringToByteString,
32 mkFastStringByteString,
33 fastZStringToByteString,
34 unsafeMkByteString,
35 hashByteString,
36
37 -- * FastZString
38 FastZString,
39 hPutFZS,
40 zString,
41 lengthFZS,
42
43 -- * FastStrings
44 FastString(..), -- not abstract, for now.
45
46 -- ** Construction
47 fsLit,
48 mkFastString,
49 mkFastStringBytes,
50 mkFastStringByteList,
51 mkFastStringForeignPtr,
52 mkFastString#,
53
54 -- ** Deconstruction
55 unpackFS, -- :: FastString -> String
56 bytesFS, -- :: FastString -> [Word8]
57
58 -- ** Encoding
59 zEncodeFS,
60
61 -- ** Operations
62 uniqueOfFS,
63 lengthFS,
64 nullFS,
65 appendFS,
66 headFS,
67 tailFS,
68 concatFS,
69 consFS,
70 nilFS,
71
72 -- ** Outputing
73 hPutFS,
74
75 -- ** Internal
76 getFastStringTable,
77 hasZEncoding,
78
79 -- * LitStrings
80 LitString,
81
82 -- ** Construction
83 sLit,
84 mkLitString#,
85 mkLitString,
86
87 -- ** Deconstruction
88 unpackLitString,
89
90 -- ** Operations
91 lengthLS
92 ) where
93
94 #include "HsVersions.h"
95
96 import Encoding
97 import FastFunctions
98 import Panic
99 import Util
100
101 import Control.DeepSeq
102 import Control.Monad
103 import Data.ByteString (ByteString)
104 import qualified Data.ByteString as BS
105 import qualified Data.ByteString.Char8 as BSC
106 import qualified Data.ByteString.Internal as BS
107 import qualified Data.ByteString.Unsafe as BS
108 import Foreign.C
109 import GHC.Exts
110 import System.IO
111 import System.IO.Unsafe ( unsafePerformIO )
112 import Data.Data
113 import Data.IORef ( IORef, newIORef, readIORef, atomicModifyIORef' )
114 import Data.Maybe ( isJust )
115 import Data.Char
116 import Data.List ( elemIndex )
117
118 import GHC.IO ( IO(..), unsafeDupablePerformIO )
119
120 import Foreign
121
122 #if STAGE >= 2
123 import GHC.Conc.Sync (sharedCAF)
124 #endif
125
126 import GHC.Base ( unpackCString# )
127
128 #define hASH_TBL_SIZE 4091
129 #define hASH_TBL_SIZE_UNBOXED 4091#
130
131
132 fastStringToByteString :: FastString -> ByteString
133 fastStringToByteString f = fs_bs f
134
135 fastZStringToByteString :: FastZString -> ByteString
136 fastZStringToByteString (FastZString bs) = bs
137
138 -- This will drop information if any character > '\xFF'
139 unsafeMkByteString :: String -> ByteString
140 unsafeMkByteString = BSC.pack
141
142 hashByteString :: ByteString -> Int
143 hashByteString bs
144 = inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
145 return $ hashStr (castPtr ptr) len
146
147 -- -----------------------------------------------------------------------------
148
149 newtype FastZString = FastZString ByteString
150 deriving NFData
151
152 hPutFZS :: Handle -> FastZString -> IO ()
153 hPutFZS handle (FastZString bs) = BS.hPut handle bs
154
155 zString :: FastZString -> String
156 zString (FastZString bs) =
157 inlinePerformIO $ BS.unsafeUseAsCStringLen bs peekCAStringLen
158
159 lengthFZS :: FastZString -> Int
160 lengthFZS (FastZString bs) = BS.length bs
161
162 mkFastZStringString :: String -> FastZString
163 mkFastZStringString str = FastZString (BSC.pack str)
164
165 -- -----------------------------------------------------------------------------
166
167 {-|
168 A 'FastString' is an array of bytes, hashed to support fast O(1)
169 comparison. It is also associated with a character encoding, so that
170 we know how to convert a 'FastString' to the local encoding, or to the
171 Z-encoding used by the compiler internally.
172
173 'FastString's support a memoized conversion to the Z-encoding via zEncodeFS.
174 -}
175
176 data FastString = FastString {
177 uniq :: {-# UNPACK #-} !Int, -- unique id
178 n_chars :: {-# UNPACK #-} !Int, -- number of chars
179 fs_bs :: {-# UNPACK #-} !ByteString,
180 fs_ref :: {-# UNPACK #-} !(IORef (Maybe FastZString))
181 }
182
183 instance Eq FastString where
184 f1 == f2 = uniq f1 == uniq f2
185
186 instance Ord FastString where
187 -- Compares lexicographically, not by unique
188 a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
189 a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
190 a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
191 a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
192 max x y | x >= y = x
193 | otherwise = y
194 min x y | x <= y = x
195 | otherwise = y
196 compare a b = cmpFS a b
197
198 instance IsString FastString where
199 fromString = fsLit
200
201 instance Monoid FastString where
202 mempty = nilFS
203 mappend = appendFS
204 mconcat = concatFS
205
206 instance Show FastString where
207 show fs = show (unpackFS fs)
208
209 instance Data FastString where
210 -- don't traverse?
211 toConstr _ = abstractConstr "FastString"
212 gunfold _ _ = error "gunfold"
213 dataTypeOf _ = mkNoRepType "FastString"
214
215 cmpFS :: FastString -> FastString -> Ordering
216 cmpFS f1@(FastString u1 _ _ _) f2@(FastString u2 _ _ _) =
217 if u1 == u2 then EQ else
218 compare (fastStringToByteString f1) (fastStringToByteString f2)
219
220 foreign import ccall unsafe "ghc_memcmp"
221 memcmp :: Ptr a -> Ptr b -> Int -> IO Int
222
223 -- -----------------------------------------------------------------------------
224 -- Construction
225
226 {-
227 Internally, the compiler will maintain a fast string symbol table, providing
228 sharing and fast comparison. Creation of new @FastString@s then covertly does a
229 lookup, re-using the @FastString@ if there was a hit.
230
231 The design of the FastString hash table allows for lockless concurrent reads
232 and updates to multiple buckets with low synchronization overhead.
233
234 See Note [Updating the FastString table] on how it's updated.
235 -}
236 data FastStringTable =
237 FastStringTable
238 {-# UNPACK #-} !(IORef Int) -- the unique ID counter shared with all buckets
239 (MutableArray# RealWorld (IORef [FastString])) -- the array of mutable buckets
240
241 string_table :: FastStringTable
242 {-# NOINLINE string_table #-}
243 string_table = unsafePerformIO $ do
244 uid <- newIORef 603979776 -- ord '$' * 0x01000000
245 tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED (panic "string_table") s1# of
246 (# s2#, arr# #) ->
247 (# s2#, FastStringTable uid arr# #)
248 forM_ [0.. hASH_TBL_SIZE-1] $ \i -> do
249 bucket <- newIORef []
250 updTbl tab i bucket
251
252 -- use the support wired into the RTS to share this CAF among all images of
253 -- libHSghc
254 #if STAGE < 2
255 return tab
256 #else
257 sharedCAF tab getOrSetLibHSghcFastStringTable
258
259 -- from the RTS; thus we cannot use this mechanism when STAGE<2; the previous
260 -- RTS might not have this symbol
261 foreign import ccall unsafe "getOrSetLibHSghcFastStringTable"
262 getOrSetLibHSghcFastStringTable :: Ptr a -> IO (Ptr a)
263 #endif
264
265 {-
266
267 We include the FastString table in the `sharedCAF` mechanism because we'd like
268 FastStrings created by a Core plugin to have the same uniques as corresponding
269 strings created by the host compiler itself. For example, this allows plugins
270 to lookup known names (eg `mkTcOcc "MySpecialType"`) in the GlobalRdrEnv or
271 even re-invoke the parser.
272
273 In particular, the following little sanity test was failing in a plugin
274 prototyping safe newtype-coercions: GHC.NT.Type.NT was imported, but could not
275 be looked up /by the plugin/.
276
277 let rdrName = mkModuleName "GHC.NT.Type" `mkRdrQual` mkTcOcc "NT"
278 putMsgS $ showSDoc dflags $ ppr $ lookupGRE_RdrName rdrName $ mg_rdr_env guts
279
280 `mkTcOcc` involves the lookup (or creation) of a FastString. Since the
281 plugin's FastString.string_table is empty, constructing the RdrName also
282 allocates new uniques for the FastStrings "GHC.NT.Type" and "NT". These
283 uniques are almost certainly unequal to the ones that the host compiler
284 originally assigned to those FastStrings. Thus the lookup fails since the
285 domain of the GlobalRdrEnv is affected by the RdrName's OccName's FastString's
286 unique.
287
288 The old `reinitializeGlobals` mechanism is enough to provide the plugin with
289 read-access to the table, but it insufficient in the general case where the
290 plugin may allocate FastStrings. This mutates the supply for the FastStrings'
291 unique, and that needs to be propagated back to the compiler's instance of the
292 global variable. Such propagation is beyond the `reinitializeGlobals`
293 mechanism.
294
295 Maintaining synchronization of the two instances of this global is rather
296 difficult because of the uses of `unsafePerformIO` in this module. Not
297 synchronizing them risks breaking the rather major invariant that two
298 FastStrings with the same unique have the same string. Thus we use the
299 lower-level `sharedCAF` mechanism that relies on Globals.c.
300
301 -}
302
303 lookupTbl :: FastStringTable -> Int -> IO (IORef [FastString])
304 lookupTbl (FastStringTable _ arr#) (I# i#) =
305 IO $ \ s# -> readArray# arr# i# s#
306
307 updTbl :: FastStringTable -> Int -> IORef [FastString] -> IO ()
308 updTbl (FastStringTable _uid arr#) (I# i#) ls = do
309 (IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
310
311 mkFastString# :: Addr# -> FastString
312 mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
313 where ptr = Ptr a#
314
315 {- Note [Updating the FastString table]
316
317 The procedure goes like this:
318
319 1. Read the relevant bucket and perform a look up of the string.
320 2. If it exists, return it.
321 3. Otherwise grab a unique ID, create a new FastString and atomically attempt
322 to update the relevant bucket with this FastString:
323
324 * Double check that the string is not in the bucket. Another thread may have
325 inserted it while we were creating our string.
326 * Return the existing FastString if it exists. The one we preemptively
327 created will get GCed.
328 * Otherwise, insert and return the string we created.
329 -}
330
331 {- Note [Double-checking the bucket]
332
333 It is not necessary to check the entire bucket the second time. We only have to
334 check the strings that are new to the bucket since the last time we read it.
335 -}
336
337 mkFastStringWith :: (Int -> IO FastString) -> Ptr Word8 -> Int -> IO FastString
338 mkFastStringWith mk_fs !ptr !len = do
339 let hash = hashStr ptr len
340 bucket <- lookupTbl string_table hash
341 ls1 <- readIORef bucket
342 res <- bucket_match ls1 len ptr
343 case res of
344 Just v -> return v
345 Nothing -> do
346 n <- get_uid
347 new_fs <- mk_fs n
348
349 atomicModifyIORef' bucket $ \ls2 ->
350 -- Note [Double-checking the bucket]
351 let delta_ls = case ls1 of
352 [] -> ls2
353 l:_ -> case l `elemIndex` ls2 of
354 Nothing -> panic "mkFastStringWith"
355 Just idx -> take idx ls2
356
357 -- NB: Might as well use inlinePerformIO, since the call to
358 -- bucket_match doesn't perform any IO that could be floated
359 -- out of this closure or erroneously duplicated.
360 in case inlinePerformIO (bucket_match delta_ls len ptr) of
361 Nothing -> (new_fs:ls2, new_fs)
362 Just fs -> (ls2,fs)
363 where
364 !(FastStringTable uid _arr) = string_table
365
366 get_uid = atomicModifyIORef' uid $ \n -> (n+1,n)
367
368 mkFastStringBytes :: Ptr Word8 -> Int -> FastString
369 mkFastStringBytes !ptr !len =
370 -- NB: Might as well use unsafeDupablePerformIO, since mkFastStringWith is
371 -- idempotent.
372 unsafeDupablePerformIO $
373 mkFastStringWith (copyNewFastString ptr len) ptr len
374
375 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
376 -- between this and 'mkFastStringBytes' is that we don't have to copy
377 -- the bytes if the string is new to the table.
378 mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
379 mkFastStringForeignPtr ptr !fp len
380 = mkFastStringWith (mkNewFastString fp ptr len) ptr len
381
382 -- | Create a 'FastString' from an existing 'ForeignPtr'; the difference
383 -- between this and 'mkFastStringBytes' is that we don't have to copy
384 -- the bytes if the string is new to the table.
385 mkFastStringByteString :: ByteString -> FastString
386 mkFastStringByteString bs =
387 inlinePerformIO $
388 BS.unsafeUseAsCStringLen bs $ \(ptr, len) -> do
389 let ptr' = castPtr ptr
390 mkFastStringWith (mkNewFastStringByteString bs ptr' len) ptr' len
391
392 -- | Creates a UTF-8 encoded 'FastString' from a 'String'
393 mkFastString :: String -> FastString
394 mkFastString str =
395 inlinePerformIO $ do
396 let l = utf8EncodedLength str
397 buf <- mallocForeignPtrBytes l
398 withForeignPtr buf $ \ptr -> do
399 utf8EncodeString ptr str
400 mkFastStringForeignPtr ptr buf l
401
402 -- | Creates a 'FastString' from a UTF-8 encoded @[Word8]@
403 mkFastStringByteList :: [Word8] -> FastString
404 mkFastStringByteList str =
405 inlinePerformIO $ do
406 let l = Prelude.length str
407 buf <- mallocForeignPtrBytes l
408 withForeignPtr buf $ \ptr -> do
409 pokeArray (castPtr ptr) str
410 mkFastStringForeignPtr ptr buf l
411
412 -- | Creates a Z-encoded 'FastString' from a 'String'
413 mkZFastString :: String -> FastZString
414 mkZFastString = mkFastZStringString
415
416 bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
417 bucket_match [] _ _ = return Nothing
418 bucket_match (v@(FastString _ _ bs _):ls) len ptr
419 | len == BS.length bs = do
420 b <- BS.unsafeUseAsCString bs $ \buf ->
421 cmpStringPrefix ptr (castPtr buf) len
422 if b then return (Just v)
423 else bucket_match ls len ptr
424 | otherwise =
425 bucket_match ls len ptr
426
427 mkNewFastString :: ForeignPtr Word8 -> Ptr Word8 -> Int -> Int
428 -> IO FastString
429 mkNewFastString fp ptr len uid = do
430 ref <- newIORef Nothing
431 n_chars <- countUTF8Chars ptr len
432 return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref)
433
434 mkNewFastStringByteString :: ByteString -> Ptr Word8 -> Int -> Int
435 -> IO FastString
436 mkNewFastStringByteString bs ptr len uid = do
437 ref <- newIORef Nothing
438 n_chars <- countUTF8Chars ptr len
439 return (FastString uid n_chars bs ref)
440
441 copyNewFastString :: Ptr Word8 -> Int -> Int -> IO FastString
442 copyNewFastString ptr len uid = do
443 fp <- copyBytesToForeignPtr ptr len
444 ref <- newIORef Nothing
445 n_chars <- countUTF8Chars ptr len
446 return (FastString uid n_chars (BS.fromForeignPtr fp 0 len) ref)
447
448 copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
449 copyBytesToForeignPtr ptr len = do
450 fp <- mallocForeignPtrBytes len
451 withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
452 return fp
453
454 cmpStringPrefix :: Ptr Word8 -> Ptr Word8 -> Int -> IO Bool
455 cmpStringPrefix ptr1 ptr2 len =
456 do r <- memcmp ptr1 ptr2 len
457 return (r == 0)
458
459
460 hashStr :: Ptr Word8 -> Int -> Int
461 -- use the Addr to produce a hash value between 0 & m (inclusive)
462 hashStr (Ptr a#) (I# len#) = loop 0# 0#
463 where
464 loop h n | isTrue# (n ==# len#) = I# h
465 | otherwise = loop h2 (n +# 1#)
466 where !c = ord# (indexCharOffAddr# a# n)
467 !h2 = (c +# (h *# 128#)) `remInt#`
468 hASH_TBL_SIZE#
469
470 -- -----------------------------------------------------------------------------
471 -- Operations
472
473 -- | Returns the length of the 'FastString' in characters
474 lengthFS :: FastString -> Int
475 lengthFS f = n_chars f
476
477 -- | Returns @True@ if this 'FastString' is not Z-encoded but already has
478 -- a Z-encoding cached (used in producing stats).
479 hasZEncoding :: FastString -> Bool
480 hasZEncoding (FastString _ _ _ ref) =
481 inlinePerformIO $ do
482 m <- readIORef ref
483 return (isJust m)
484
485 -- | Returns @True@ if the 'FastString' is empty
486 nullFS :: FastString -> Bool
487 nullFS f = BS.null (fs_bs f)
488
489 -- | Unpacks and decodes the FastString
490 unpackFS :: FastString -> String
491 unpackFS (FastString _ _ bs _) =
492 inlinePerformIO $ BS.unsafeUseAsCStringLen bs $ \(ptr, len) ->
493 utf8DecodeString (castPtr ptr) len
494
495 -- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
496 bytesFS :: FastString -> [Word8]
497 bytesFS fs = BS.unpack $ fastStringToByteString fs
498
499 -- | Returns a Z-encoded version of a 'FastString'. This might be the
500 -- original, if it was already Z-encoded. The first time this
501 -- function is applied to a particular 'FastString', the results are
502 -- memoized.
503 --
504 zEncodeFS :: FastString -> FastZString
505 zEncodeFS fs@(FastString _ _ _ ref) =
506 inlinePerformIO $ do
507 m <- readIORef ref
508 case m of
509 Just zfs -> return zfs
510 Nothing -> do
511 atomicModifyIORef' ref $ \m' -> case m' of
512 Nothing -> let zfs = mkZFastString (zEncodeString (unpackFS fs))
513 in (Just zfs, zfs)
514 Just zfs -> (m', zfs)
515
516 appendFS :: FastString -> FastString -> FastString
517 appendFS fs1 fs2 = mkFastStringByteString
518 $ BS.append (fastStringToByteString fs1)
519 (fastStringToByteString fs2)
520
521 concatFS :: [FastString] -> FastString
522 concatFS = mkFastStringByteString . BS.concat . map fs_bs
523
524 headFS :: FastString -> Char
525 headFS (FastString _ 0 _ _) = panic "headFS: Empty FastString"
526 headFS (FastString _ _ bs _) =
527 inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
528 return (fst (utf8DecodeChar (castPtr ptr)))
529
530 tailFS :: FastString -> FastString
531 tailFS (FastString _ 0 _ _) = panic "tailFS: Empty FastString"
532 tailFS (FastString _ _ bs _) =
533 inlinePerformIO $ BS.unsafeUseAsCString bs $ \ptr ->
534 do let (_, n) = utf8DecodeChar (castPtr ptr)
535 return $! mkFastStringByteString (BS.drop n bs)
536
537 consFS :: Char -> FastString -> FastString
538 consFS c fs = mkFastString (c : unpackFS fs)
539
540 uniqueOfFS :: FastString -> Int
541 uniqueOfFS (FastString u _ _ _) = u
542
543 nilFS :: FastString
544 nilFS = mkFastString ""
545
546 -- -----------------------------------------------------------------------------
547 -- Stats
548
549 getFastStringTable :: IO [[FastString]]
550 getFastStringTable = do
551 buckets <- forM [0.. hASH_TBL_SIZE-1] $ \idx -> do
552 bucket <- lookupTbl string_table idx
553 readIORef bucket
554 return buckets
555
556 -- -----------------------------------------------------------------------------
557 -- Outputting 'FastString's
558
559 -- |Outputs a 'FastString' with /no decoding at all/, that is, you
560 -- get the actual bytes in the 'FastString' written to the 'Handle'.
561 hPutFS :: Handle -> FastString -> IO ()
562 hPutFS handle fs = BS.hPut handle $ fastStringToByteString fs
563
564 -- ToDo: we'll probably want an hPutFSLocal, or something, to output
565 -- in the current locale's encoding (for error messages and suchlike).
566
567 -- -----------------------------------------------------------------------------
568 -- LitStrings, here for convenience only.
569
570 type LitString = Ptr Word8
571 --Why do we recalculate length every time it's requested?
572 --If it's commonly needed, we should perhaps have
573 --data LitString = LitString {-#UNPACK#-}!Addr# {-#UNPACK#-}!Int#
574
575 mkLitString# :: Addr# -> LitString
576 mkLitString# a# = Ptr a#
577
578 {-# INLINE mkLitString #-}
579 mkLitString :: String -> LitString
580 mkLitString s =
581 unsafePerformIO (do
582 p <- mallocBytes (length s + 1)
583 let
584 loop :: Int -> String -> IO ()
585 loop !n [] = pokeByteOff p n (0 :: Word8)
586 loop n (c:cs) = do
587 pokeByteOff p n (fromIntegral (ord c) :: Word8)
588 loop (1+n) cs
589 loop 0 s
590 return p
591 )
592
593 unpackLitString :: LitString -> String
594 unpackLitString (Ptr p) = unpackCString# p
595
596 lengthLS :: LitString -> Int
597 lengthLS = ptrStrLength
598
599 -- -----------------------------------------------------------------------------
600 -- under the carpet
601
602 foreign import ccall unsafe "ghc_strlen"
603 ptrStrLength :: Ptr Word8 -> Int
604
605 {-# NOINLINE sLit #-}
606 sLit :: String -> LitString
607 sLit x = mkLitString x
608
609 {-# NOINLINE fsLit #-}
610 fsLit :: String -> FastString
611 fsLit x = mkFastString x
612
613 {-# RULES "slit"
614 forall x . sLit (unpackCString# x) = mkLitString# x #-}
615 {-# RULES "fslit"
616 forall x . fsLit (unpackCString# x) = mkFastString# x #-}