Allow top-level string literals in Core (#8472)
[ghc.git] / compiler / ghci / ByteCodeAsm.hs
1 {-# LANGUAGE BangPatterns, CPP, MagicHash, RecordWildCards #-}
2 {-# OPTIONS_GHC -optc-DNON_POSIX_SOURCE #-}
3 --
4 -- (c) The University of Glasgow 2002-2006
5 --
6
7 -- | ByteCodeLink: Bytecode assembler and linker
8 module ByteCodeAsm (
9 assembleBCOs, assembleOneBCO,
10
11 bcoFreeNames,
12 SizedSeq, sizeSS, ssElts,
13 iNTERP_STACK_CHECK_THRESH
14 ) where
15
16 #include "HsVersions.h"
17
18 import ByteCodeInstr
19 import ByteCodeItbls
20 import ByteCodeTypes
21 import GHCi.RemoteTypes
22 import GHCi
23
24 import HscTypes
25 import Name
26 import NameSet
27 import Literal
28 import TyCon
29 import FastString
30 import StgCmmLayout ( ArgRep(..) )
31 import SMRep
32 import DynFlags
33 import Outputable
34 import Platform
35 import Util
36 import Unique
37 import UniqDSet
38
39 -- From iserv
40 import SizedSeq
41
42 import Control.Monad
43 import Control.Monad.ST ( runST )
44 import Control.Monad.Trans.Class
45 import Control.Monad.Trans.State.Strict
46
47 import Data.Array.MArray
48
49 import qualified Data.Array.Unboxed as Array
50 import Data.Array.Base ( UArray(..) )
51
52 import Data.Array.Unsafe( castSTUArray )
53
54 import Foreign
55 import Data.Char ( ord )
56 import Data.List
57 import Data.Map (Map)
58 import Data.Maybe (fromMaybe)
59 import qualified Data.Map as Map
60
61 -- -----------------------------------------------------------------------------
62 -- Unlinked BCOs
63
64 -- CompiledByteCode represents the result of byte-code
65 -- compiling a bunch of functions and data types
66
67 -- | Finds external references. Remember to remove the names
68 -- defined by this group of BCOs themselves
69 bcoFreeNames :: UnlinkedBCO -> UniqDSet Name
70 bcoFreeNames bco
71 = bco_refs bco `uniqDSetMinusUniqSet` mkNameSet [unlinkedBCOName bco]
72 where
73 bco_refs (UnlinkedBCO _ _ _ _ nonptrs ptrs)
74 = unionManyUniqDSets (
75 mkUniqDSet [ n | BCOPtrName n <- ssElts ptrs ] :
76 mkUniqDSet [ n | BCONPtrItbl n <- ssElts nonptrs ] :
77 map bco_refs [ bco | BCOPtrBCO bco <- ssElts ptrs ]
78 )
79
80 -- -----------------------------------------------------------------------------
81 -- The bytecode assembler
82
83 -- The object format for bytecodes is: 16 bits for the opcode, and 16
84 -- for each field -- so the code can be considered a sequence of
85 -- 16-bit ints. Each field denotes either a stack offset or number of
86 -- items on the stack (eg SLIDE), and index into the pointer table (eg
87 -- PUSH_G), an index into the literal table (eg PUSH_I/D/L), or a
88 -- bytecode address in this BCO.
89
90 -- Top level assembler fn.
91 assembleBCOs
92 :: HscEnv -> [ProtoBCO Name] -> [TyCon] -> [RemotePtr ()]
93 -> Maybe ModBreaks
94 -> IO CompiledByteCode
95 assembleBCOs hsc_env proto_bcos tycons top_strs modbreaks = do
96 itblenv <- mkITbls hsc_env tycons
97 bcos <- mapM (assembleBCO (hsc_dflags hsc_env)) proto_bcos
98 (bcos',ptrs) <- mallocStrings hsc_env bcos
99 return CompiledByteCode
100 { bc_bcos = bcos'
101 , bc_itbls = itblenv
102 , bc_ffis = concat (map protoBCOFFIs proto_bcos)
103 , bc_strs = top_strs ++ ptrs
104 , bc_breaks = modbreaks
105 }
106
107 -- Find all the literal strings and malloc them together. We want to
108 -- do this because:
109 --
110 -- a) It should be done when we compile the module, not each time we relink it
111 -- b) For -fexternal-interpreter It's more efficient to malloc the strings
112 -- as a single batch message, especially when compiling in parallel.
113 --
114 mallocStrings :: HscEnv -> [UnlinkedBCO] -> IO ([UnlinkedBCO], [RemotePtr ()])
115 mallocStrings hsc_env ulbcos = do
116 let bytestrings = reverse (execState (mapM_ collect ulbcos) [])
117 ptrs <- iservCmd hsc_env (MallocStrings bytestrings)
118 return (evalState (mapM splice ulbcos) ptrs, ptrs)
119 where
120 splice bco@UnlinkedBCO{..} = do
121 lits <- mapM spliceLit unlinkedBCOLits
122 ptrs <- mapM splicePtr unlinkedBCOPtrs
123 return bco { unlinkedBCOLits = lits, unlinkedBCOPtrs = ptrs }
124
125 spliceLit (BCONPtrStr _) = do
126 (RemotePtr p : rest) <- get
127 put rest
128 return (BCONPtrWord (fromIntegral p))
129 spliceLit other = return other
130
131 splicePtr (BCOPtrBCO bco) = BCOPtrBCO <$> splice bco
132 splicePtr other = return other
133
134 collect UnlinkedBCO{..} = do
135 mapM_ collectLit unlinkedBCOLits
136 mapM_ collectPtr unlinkedBCOPtrs
137
138 collectLit (BCONPtrStr bs) = do
139 strs <- get
140 put (bs:strs)
141 collectLit _ = return ()
142
143 collectPtr (BCOPtrBCO bco) = collect bco
144 collectPtr _ = return ()
145
146
147 assembleOneBCO :: HscEnv -> ProtoBCO Name -> IO UnlinkedBCO
148 assembleOneBCO hsc_env pbco = do
149 ubco <- assembleBCO (hsc_dflags hsc_env) pbco
150 ([ubco'], _ptrs) <- mallocStrings hsc_env [ubco]
151 return ubco'
152
153 assembleBCO :: DynFlags -> ProtoBCO Name -> IO UnlinkedBCO
154 assembleBCO dflags (ProtoBCO nm instrs bitmap bsize arity _origin _malloced) = do
155 -- pass 1: collect up the offsets of the local labels.
156 let asm = mapM_ (assembleI dflags) instrs
157
158 initial_offset = 0
159
160 -- Jump instructions are variable-sized, there are long and short variants
161 -- depending on the magnitude of the offset. However, we can't tell what
162 -- size instructions we will need until we have calculated the offsets of
163 -- the labels, which depends on the size of the instructions... So we
164 -- first create the label environment assuming that all jumps are short,
165 -- and if the final size is indeed small enough for short jumps, we are
166 -- done. Otherwise, we repeat the calculation, and we force all jumps in
167 -- this BCO to be long.
168 (n_insns0, lbl_map0) = inspectAsm dflags False initial_offset asm
169 ((n_insns, lbl_map), long_jumps)
170 | isLarge n_insns0 = (inspectAsm dflags True initial_offset asm, True)
171 | otherwise = ((n_insns0, lbl_map0), False)
172
173 env :: Word16 -> Word
174 env lbl = fromMaybe
175 (pprPanic "assembleBCO.findLabel" (ppr lbl))
176 (Map.lookup lbl lbl_map)
177
178 -- pass 2: run assembler and generate instructions, literals and pointers
179 let initial_state = (emptySS, emptySS, emptySS)
180 (final_insns, final_lits, final_ptrs) <- flip execStateT initial_state $ runAsm dflags long_jumps env asm
181
182 -- precomputed size should be equal to final size
183 ASSERT(n_insns == sizeSS final_insns) return ()
184
185 let asm_insns = ssElts final_insns
186 insns_arr = Array.listArray (0, fromIntegral n_insns - 1) asm_insns
187 bitmap_arr = mkBitmapArray bsize bitmap
188 ul_bco = UnlinkedBCO nm arity insns_arr bitmap_arr final_lits final_ptrs
189
190 -- 8 Aug 01: Finalisers aren't safe when attached to non-primitive
191 -- objects, since they might get run too early. Disable this until
192 -- we figure out what to do.
193 -- when (notNull malloced) (addFinalizer ul_bco (mapM_ zonk malloced))
194
195 return ul_bco
196
197 mkBitmapArray :: Word16 -> [StgWord] -> UArray Int Word
198 -- Here the return type must be an array of Words, not StgWords,
199 -- because the underlying ByteArray# will end up as a component
200 -- of a BCO object.
201 mkBitmapArray bsize bitmap
202 = Array.listArray (0, length bitmap) $
203 fromIntegral bsize : map (fromInteger . fromStgWord) bitmap
204
205 -- instrs nonptrs ptrs
206 type AsmState = (SizedSeq Word16,
207 SizedSeq BCONPtr,
208 SizedSeq BCOPtr)
209
210 data Operand
211 = Op Word
212 | SmallOp Word16
213 | LabelOp Word16
214 -- (unused) | LargeOp Word
215
216 data Assembler a
217 = AllocPtr (IO BCOPtr) (Word -> Assembler a)
218 | AllocLit [BCONPtr] (Word -> Assembler a)
219 | AllocLabel Word16 (Assembler a)
220 | Emit Word16 [Operand] (Assembler a)
221 | NullAsm a
222
223 instance Functor Assembler where
224 fmap = liftM
225
226 instance Applicative Assembler where
227 pure = NullAsm
228 (<*>) = ap
229
230 instance Monad Assembler where
231 NullAsm x >>= f = f x
232 AllocPtr p k >>= f = AllocPtr p (k >=> f)
233 AllocLit l k >>= f = AllocLit l (k >=> f)
234 AllocLabel lbl k >>= f = AllocLabel lbl (k >>= f)
235 Emit w ops k >>= f = Emit w ops (k >>= f)
236
237 ioptr :: IO BCOPtr -> Assembler Word
238 ioptr p = AllocPtr p return
239
240 ptr :: BCOPtr -> Assembler Word
241 ptr = ioptr . return
242
243 lit :: [BCONPtr] -> Assembler Word
244 lit l = AllocLit l return
245
246 label :: Word16 -> Assembler ()
247 label w = AllocLabel w (return ())
248
249 emit :: Word16 -> [Operand] -> Assembler ()
250 emit w ops = Emit w ops (return ())
251
252 type LabelEnv = Word16 -> Word
253
254 largeOp :: Bool -> Operand -> Bool
255 largeOp long_jumps op = case op of
256 SmallOp _ -> False
257 Op w -> isLarge w
258 LabelOp _ -> long_jumps
259 -- LargeOp _ -> True
260
261 runAsm :: DynFlags -> Bool -> LabelEnv -> Assembler a -> StateT AsmState IO a
262 runAsm dflags long_jumps e = go
263 where
264 go (NullAsm x) = return x
265 go (AllocPtr p_io k) = do
266 p <- lift p_io
267 w <- state $ \(st_i0,st_l0,st_p0) ->
268 let st_p1 = addToSS st_p0 p
269 in (sizeSS st_p0, (st_i0,st_l0,st_p1))
270 go $ k w
271 go (AllocLit lits k) = do
272 w <- state $ \(st_i0,st_l0,st_p0) ->
273 let st_l1 = addListToSS st_l0 lits
274 in (sizeSS st_l0, (st_i0,st_l1,st_p0))
275 go $ k w
276 go (AllocLabel _ k) = go k
277 go (Emit w ops k) = do
278 let largeOps = any (largeOp long_jumps) ops
279 opcode
280 | largeOps = largeArgInstr w
281 | otherwise = w
282 words = concatMap expand ops
283 expand (SmallOp w) = [w]
284 expand (LabelOp w) = expand (Op (e w))
285 expand (Op w) = if largeOps then largeArg dflags w else [fromIntegral w]
286 -- expand (LargeOp w) = largeArg dflags w
287 state $ \(st_i0,st_l0,st_p0) ->
288 let st_i1 = addListToSS st_i0 (opcode : words)
289 in ((), (st_i1,st_l0,st_p0))
290 go k
291
292 type LabelEnvMap = Map Word16 Word
293
294 data InspectState = InspectState
295 { instrCount :: !Word
296 , ptrCount :: !Word
297 , litCount :: !Word
298 , lblEnv :: LabelEnvMap
299 }
300
301 inspectAsm :: DynFlags -> Bool -> Word -> Assembler a -> (Word, LabelEnvMap)
302 inspectAsm dflags long_jumps initial_offset
303 = go (InspectState initial_offset 0 0 Map.empty)
304 where
305 go s (NullAsm _) = (instrCount s, lblEnv s)
306 go s (AllocPtr _ k) = go (s { ptrCount = n + 1 }) (k n)
307 where n = ptrCount s
308 go s (AllocLit ls k) = go (s { litCount = n + genericLength ls }) (k n)
309 where n = litCount s
310 go s (AllocLabel lbl k) = go s' k
311 where s' = s { lblEnv = Map.insert lbl (instrCount s) (lblEnv s) }
312 go s (Emit _ ops k) = go s' k
313 where
314 s' = s { instrCount = instrCount s + size }
315 size = sum (map count ops) + 1
316 largeOps = any (largeOp long_jumps) ops
317 count (SmallOp _) = 1
318 count (LabelOp _) = count (Op 0)
319 count (Op _) = if largeOps then largeArg16s dflags else 1
320 -- count (LargeOp _) = largeArg16s dflags
321
322 -- Bring in all the bci_ bytecode constants.
323 #include "rts/Bytecodes.h"
324
325 largeArgInstr :: Word16 -> Word16
326 largeArgInstr bci = bci_FLAG_LARGE_ARGS .|. bci
327
328 largeArg :: DynFlags -> Word -> [Word16]
329 largeArg dflags w
330 | wORD_SIZE_IN_BITS dflags == 64
331 = [fromIntegral (w `shiftR` 48),
332 fromIntegral (w `shiftR` 32),
333 fromIntegral (w `shiftR` 16),
334 fromIntegral w]
335 | wORD_SIZE_IN_BITS dflags == 32
336 = [fromIntegral (w `shiftR` 16),
337 fromIntegral w]
338 | otherwise = error "wORD_SIZE_IN_BITS not 32 or 64?"
339
340 largeArg16s :: DynFlags -> Word
341 largeArg16s dflags | wORD_SIZE_IN_BITS dflags == 64 = 4
342 | otherwise = 2
343
344 assembleI :: DynFlags
345 -> BCInstr
346 -> Assembler ()
347 assembleI dflags i = case i of
348 STKCHECK n -> emit bci_STKCHECK [Op n]
349 PUSH_L o1 -> emit bci_PUSH_L [SmallOp o1]
350 PUSH_LL o1 o2 -> emit bci_PUSH_LL [SmallOp o1, SmallOp o2]
351 PUSH_LLL o1 o2 o3 -> emit bci_PUSH_LLL [SmallOp o1, SmallOp o2, SmallOp o3]
352 PUSH_G nm -> do p <- ptr (BCOPtrName nm)
353 emit bci_PUSH_G [Op p]
354 PUSH_PRIMOP op -> do p <- ptr (BCOPtrPrimOp op)
355 emit bci_PUSH_G [Op p]
356 PUSH_BCO proto -> do let ul_bco = assembleBCO dflags proto
357 p <- ioptr (liftM BCOPtrBCO ul_bco)
358 emit bci_PUSH_G [Op p]
359 PUSH_ALTS proto -> do let ul_bco = assembleBCO dflags proto
360 p <- ioptr (liftM BCOPtrBCO ul_bco)
361 emit bci_PUSH_ALTS [Op p]
362 PUSH_ALTS_UNLIFTED proto pk
363 -> do let ul_bco = assembleBCO dflags proto
364 p <- ioptr (liftM BCOPtrBCO ul_bco)
365 emit (push_alts pk) [Op p]
366 PUSH_UBX lit nws -> do np <- literal lit
367 emit bci_PUSH_UBX [Op np, SmallOp nws]
368
369 PUSH_APPLY_N -> emit bci_PUSH_APPLY_N []
370 PUSH_APPLY_V -> emit bci_PUSH_APPLY_V []
371 PUSH_APPLY_F -> emit bci_PUSH_APPLY_F []
372 PUSH_APPLY_D -> emit bci_PUSH_APPLY_D []
373 PUSH_APPLY_L -> emit bci_PUSH_APPLY_L []
374 PUSH_APPLY_P -> emit bci_PUSH_APPLY_P []
375 PUSH_APPLY_PP -> emit bci_PUSH_APPLY_PP []
376 PUSH_APPLY_PPP -> emit bci_PUSH_APPLY_PPP []
377 PUSH_APPLY_PPPP -> emit bci_PUSH_APPLY_PPPP []
378 PUSH_APPLY_PPPPP -> emit bci_PUSH_APPLY_PPPPP []
379 PUSH_APPLY_PPPPPP -> emit bci_PUSH_APPLY_PPPPPP []
380
381 SLIDE n by -> emit bci_SLIDE [SmallOp n, SmallOp by]
382 ALLOC_AP n -> emit bci_ALLOC_AP [SmallOp n]
383 ALLOC_AP_NOUPD n -> emit bci_ALLOC_AP_NOUPD [SmallOp n]
384 ALLOC_PAP arity n -> emit bci_ALLOC_PAP [SmallOp arity, SmallOp n]
385 MKAP off sz -> emit bci_MKAP [SmallOp off, SmallOp sz]
386 MKPAP off sz -> emit bci_MKPAP [SmallOp off, SmallOp sz]
387 UNPACK n -> emit bci_UNPACK [SmallOp n]
388 PACK dcon sz -> do itbl_no <- lit [BCONPtrItbl (getName dcon)]
389 emit bci_PACK [Op itbl_no, SmallOp sz]
390 LABEL lbl -> label lbl
391 TESTLT_I i l -> do np <- int i
392 emit bci_TESTLT_I [Op np, LabelOp l]
393 TESTEQ_I i l -> do np <- int i
394 emit bci_TESTEQ_I [Op np, LabelOp l]
395 TESTLT_W w l -> do np <- word w
396 emit bci_TESTLT_W [Op np, LabelOp l]
397 TESTEQ_W w l -> do np <- word w
398 emit bci_TESTEQ_W [Op np, LabelOp l]
399 TESTLT_F f l -> do np <- float f
400 emit bci_TESTLT_F [Op np, LabelOp l]
401 TESTEQ_F f l -> do np <- float f
402 emit bci_TESTEQ_F [Op np, LabelOp l]
403 TESTLT_D d l -> do np <- double d
404 emit bci_TESTLT_D [Op np, LabelOp l]
405 TESTEQ_D d l -> do np <- double d
406 emit bci_TESTEQ_D [Op np, LabelOp l]
407 TESTLT_P i l -> emit bci_TESTLT_P [SmallOp i, LabelOp l]
408 TESTEQ_P i l -> emit bci_TESTEQ_P [SmallOp i, LabelOp l]
409 CASEFAIL -> emit bci_CASEFAIL []
410 SWIZZLE stkoff n -> emit bci_SWIZZLE [SmallOp stkoff, SmallOp n]
411 JMP l -> emit bci_JMP [LabelOp l]
412 ENTER -> emit bci_ENTER []
413 RETURN -> emit bci_RETURN []
414 RETURN_UBX rep -> emit (return_ubx rep) []
415 CCALL off m_addr i -> do np <- addr m_addr
416 emit bci_CCALL [SmallOp off, Op np, SmallOp i]
417 BRK_FUN index uniq cc -> do p1 <- ptr BCOPtrBreakArray
418 q <- int (getKey uniq)
419 np <- addr cc
420 emit bci_BRK_FUN [Op p1, SmallOp index,
421 Op q, Op np]
422
423 where
424 literal (MachLabel fs (Just sz) _)
425 | platformOS (targetPlatform dflags) == OSMinGW32
426 = litlabel (appendFS fs (mkFastString ('@':show sz)))
427 -- On Windows, stdcall labels have a suffix indicating the no. of
428 -- arg words, e.g. foo@8. testcase: ffi012(ghci)
429 literal (MachLabel fs _ _) = litlabel fs
430 literal (MachWord w) = int (fromIntegral w)
431 literal (MachInt j) = int (fromIntegral j)
432 literal MachNullAddr = int 0
433 literal (MachFloat r) = float (fromRational r)
434 literal (MachDouble r) = double (fromRational r)
435 literal (MachChar c) = int (ord c)
436 literal (MachInt64 ii) = int64 (fromIntegral ii)
437 literal (MachWord64 ii) = int64 (fromIntegral ii)
438 literal (MachStr bs) = lit [BCONPtrStr bs]
439 -- MachStr requires a zero-terminator when emitted
440 literal LitInteger{} = panic "ByteCodeAsm.literal: LitInteger"
441
442 litlabel fs = lit [BCONPtrLbl fs]
443 addr (RemotePtr a) = words [fromIntegral a]
444 float = words . mkLitF
445 double = words . mkLitD dflags
446 int = words . mkLitI
447 int64 = words . mkLitI64 dflags
448 words ws = lit (map BCONPtrWord ws)
449 word w = words [w]
450
451 isLarge :: Word -> Bool
452 isLarge n = n > 65535
453
454 push_alts :: ArgRep -> Word16
455 push_alts V = bci_PUSH_ALTS_V
456 push_alts P = bci_PUSH_ALTS_P
457 push_alts N = bci_PUSH_ALTS_N
458 push_alts L = bci_PUSH_ALTS_L
459 push_alts F = bci_PUSH_ALTS_F
460 push_alts D = bci_PUSH_ALTS_D
461 push_alts V16 = error "push_alts: vector"
462 push_alts V32 = error "push_alts: vector"
463 push_alts V64 = error "push_alts: vector"
464
465 return_ubx :: ArgRep -> Word16
466 return_ubx V = bci_RETURN_V
467 return_ubx P = bci_RETURN_P
468 return_ubx N = bci_RETURN_N
469 return_ubx L = bci_RETURN_L
470 return_ubx F = bci_RETURN_F
471 return_ubx D = bci_RETURN_D
472 return_ubx V16 = error "return_ubx: vector"
473 return_ubx V32 = error "return_ubx: vector"
474 return_ubx V64 = error "return_ubx: vector"
475
476 -- Make lists of host-sized words for literals, so that when the
477 -- words are placed in memory at increasing addresses, the
478 -- bit pattern is correct for the host's word size and endianness.
479 mkLitI :: Int -> [Word]
480 mkLitF :: Float -> [Word]
481 mkLitD :: DynFlags -> Double -> [Word]
482 mkLitI64 :: DynFlags -> Int64 -> [Word]
483
484 mkLitF f
485 = runST (do
486 arr <- newArray_ ((0::Int),0)
487 writeArray arr 0 f
488 f_arr <- castSTUArray arr
489 w0 <- readArray f_arr 0
490 return [w0 :: Word]
491 )
492
493 mkLitD dflags d
494 | wORD_SIZE dflags == 4
495 = runST (do
496 arr <- newArray_ ((0::Int),1)
497 writeArray arr 0 d
498 d_arr <- castSTUArray arr
499 w0 <- readArray d_arr 0
500 w1 <- readArray d_arr 1
501 return [w0 :: Word, w1]
502 )
503 | wORD_SIZE dflags == 8
504 = runST (do
505 arr <- newArray_ ((0::Int),0)
506 writeArray arr 0 d
507 d_arr <- castSTUArray arr
508 w0 <- readArray d_arr 0
509 return [w0 :: Word]
510 )
511 | otherwise
512 = panic "mkLitD: Bad wORD_SIZE"
513
514 mkLitI64 dflags ii
515 | wORD_SIZE dflags == 4
516 = runST (do
517 arr <- newArray_ ((0::Int),1)
518 writeArray arr 0 ii
519 d_arr <- castSTUArray arr
520 w0 <- readArray d_arr 0
521 w1 <- readArray d_arr 1
522 return [w0 :: Word,w1]
523 )
524 | wORD_SIZE dflags == 8
525 = runST (do
526 arr <- newArray_ ((0::Int),0)
527 writeArray arr 0 ii
528 d_arr <- castSTUArray arr
529 w0 <- readArray d_arr 0
530 return [w0 :: Word]
531 )
532 | otherwise
533 = panic "mkLitI64: Bad wORD_SIZE"
534
535 mkLitI i = [fromIntegral i :: Word]
536
537 iNTERP_STACK_CHECK_THRESH :: Int
538 iNTERP_STACK_CHECK_THRESH = INTERP_STACK_CHECK_THRESH