s/StgArrWords/StgArrBytes/
[ghc.git] / compiler / codeGen / StgCmmLayout.hs
1 {-# LANGUAGE CPP #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Building info tables.
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmLayout (
12 mkArgDescr,
13 emitCall, emitReturn, adjustHpBackwards,
14
15 emitClosureProcAndInfoTable,
16 emitClosureAndInfoTable,
17
18 slowCall, directCall,
19
20 mkVirtHeapOffsets, mkVirtConstrOffsets, getHpRelOffset,
21
22 ArgRep(..), toArgRep, argRepSizeW -- re-exported from StgCmmArgRep
23 ) where
24
25
26 #include "HsVersions.h"
27
28 #if __GLASGOW_HASKELL__ >= 709
29 import Prelude hiding ((<*>))
30 #endif
31
32 import StgCmmClosure
33 import StgCmmEnv
34 import StgCmmArgRep -- notably: ( slowCallPattern )
35 import StgCmmTicky
36 import StgCmmMonad
37 import StgCmmUtils
38 import StgCmmProf (curCCS)
39
40 import MkGraph
41 import SMRep
42 import Cmm
43 import CmmUtils
44 import CmmInfo
45 import CLabel
46 import StgSyn
47 import Id
48 import TyCon ( PrimRep(..) )
49 import BasicTypes ( RepArity )
50 import DynFlags
51 import Module
52
53 import Util
54 import Data.List
55 import Outputable
56 import FastString
57 import Control.Monad
58
59 ------------------------------------------------------------------------
60 -- Call and return sequences
61 ------------------------------------------------------------------------
62
63 -- | Return multiple values to the sequel
64 --
65 -- If the sequel is @Return@
66 --
67 -- > return (x,y)
68 --
69 -- If the sequel is @AssignTo [p,q]@
70 --
71 -- > p=x; q=y;
72 --
73 emitReturn :: [CmmExpr] -> FCode ReturnKind
74 emitReturn results
75 = do { dflags <- getDynFlags
76 ; sequel <- getSequel
77 ; updfr_off <- getUpdFrameOff
78 ; case sequel of
79 Return _ ->
80 do { adjustHpBackwards
81 ; let e = CmmLoad (CmmStackSlot Old updfr_off) (gcWord dflags)
82 ; emit (mkReturn dflags (entryCode dflags e) results updfr_off)
83 }
84 AssignTo regs adjust ->
85 do { when adjust adjustHpBackwards
86 ; emitMultiAssign regs results }
87 ; return AssignedDirectly
88 }
89
90
91 -- | @emitCall conv fun args@ makes a call to the entry-code of @fun@,
92 -- using the call/return convention @conv@, passing @args@, and
93 -- returning the results to the current sequel.
94 --
95 emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ReturnKind
96 emitCall convs fun args
97 = emitCallWithExtraStack convs fun args noExtraStack
98
99
100 -- | @emitCallWithExtraStack conv fun args stack@ makes a call to the
101 -- entry-code of @fun@, using the call/return convention @conv@,
102 -- passing @args@, pushing some extra stack frames described by
103 -- @stack@, and returning the results to the current sequel.
104 --
105 emitCallWithExtraStack
106 :: (Convention, Convention) -> CmmExpr -> [CmmExpr]
107 -> [CmmExpr] -> FCode ReturnKind
108 emitCallWithExtraStack (callConv, retConv) fun args extra_stack
109 = do { dflags <- getDynFlags
110 ; adjustHpBackwards
111 ; sequel <- getSequel
112 ; updfr_off <- getUpdFrameOff
113 ; case sequel of
114 Return _ -> do
115 emit $ mkJumpExtra dflags callConv fun args updfr_off extra_stack
116 return AssignedDirectly
117 AssignTo res_regs _ -> do
118 k <- newLabelC
119 let area = Young k
120 (off, _, copyin) = copyInOflow dflags retConv area res_regs []
121 copyout = mkCallReturnsTo dflags fun callConv args k off updfr_off
122 extra_stack
123 tscope <- getTickScope
124 emit (copyout <*> mkLabel k tscope <*> copyin)
125 return (ReturnedTo k off)
126 }
127
128
129 adjustHpBackwards :: FCode ()
130 -- This function adjusts the heap pointer just before a tail call or
131 -- return. At a call or return, the virtual heap pointer may be less
132 -- than the real Hp, because the latter was advanced to deal with
133 -- the worst-case branch of the code, and we may be in a better-case
134 -- branch. In that case, move the real Hp *back* and retract some
135 -- ticky allocation count.
136 --
137 -- It *does not* deal with high-water-mark adjustment. That's done by
138 -- functions which allocate heap.
139 adjustHpBackwards
140 = do { hp_usg <- getHpUsage
141 ; let rHp = realHp hp_usg
142 vHp = virtHp hp_usg
143 adjust_words = vHp -rHp
144 ; new_hp <- getHpRelOffset vHp
145
146 ; emit (if adjust_words == 0
147 then mkNop
148 else mkAssign hpReg new_hp) -- Generates nothing when vHp==rHp
149
150 ; tickyAllocHeap False adjust_words -- ...ditto
151
152 ; setRealHp vHp
153 }
154
155
156 -------------------------------------------------------------------------
157 -- Making calls: directCall and slowCall
158 -------------------------------------------------------------------------
159
160 -- General plan is:
161 -- - we'll make *one* fast call, either to the function itself
162 -- (directCall) or to stg_ap_<pat>_fast (slowCall)
163 -- Any left-over arguments will be pushed on the stack,
164 --
165 -- e.g. Sp[old+8] = arg1
166 -- Sp[old+16] = arg2
167 -- Sp[old+32] = stg_ap_pp_info
168 -- R2 = arg3
169 -- R3 = arg4
170 -- call f() return to Nothing updfr_off: 32
171
172
173 directCall :: Convention -> CLabel -> RepArity -> [StgArg] -> FCode ReturnKind
174 -- (directCall f n args)
175 -- calls f(arg1, ..., argn), and applies the result to the remaining args
176 -- The function f has arity n, and there are guaranteed at least n args
177 -- Both arity and args include void args
178 directCall conv lbl arity stg_args
179 = do { argreps <- getArgRepsAmodes stg_args
180 ; direct_call "directCall" conv lbl arity argreps }
181
182
183 slowCall :: CmmExpr -> [StgArg] -> FCode ReturnKind
184 -- (slowCall fun args) applies fun to args, returning the results to Sequel
185 slowCall fun stg_args
186 = do dflags <- getDynFlags
187 argsreps <- getArgRepsAmodes stg_args
188 let (rts_fun, arity) = slowCallPattern (map fst argsreps)
189
190 (r, slow_code) <- getCodeR $ do
191 r <- direct_call "slow_call" NativeNodeCall
192 (mkRtsApFastLabel rts_fun) arity ((P,Just fun):argsreps)
193 emitComment $ mkFastString ("slow_call for " ++
194 showSDoc dflags (ppr fun) ++
195 " with pat " ++ unpackFS rts_fun)
196 return r
197
198 -- Note [avoid intermediate PAPs]
199 let n_args = length stg_args
200 if n_args > arity && optLevel dflags >= 2
201 then do
202 funv <- (CmmReg . CmmLocal) `fmap` assignTemp fun
203 fun_iptr <- (CmmReg . CmmLocal) `fmap`
204 assignTemp (closureInfoPtr dflags (cmmUntag dflags funv))
205
206 -- ToDo: we could do slightly better here by reusing the
207 -- continuation from the slow call, which we have in r.
208 -- Also we'd like to push the continuation on the stack
209 -- before the branch, so that we only get one copy of the
210 -- code that saves all the live variables across the
211 -- call, but that might need some improvements to the
212 -- special case in the stack layout code to handle this
213 -- (see Note [diamond proc point]).
214
215 fast_code <- getCode $
216 emitCall (NativeNodeCall, NativeReturn)
217 (entryCode dflags fun_iptr)
218 (nonVArgs ((P,Just funv):argsreps))
219
220 slow_lbl <- newLabelC
221 fast_lbl <- newLabelC
222 is_tagged_lbl <- newLabelC
223 end_lbl <- newLabelC
224
225 let correct_arity = cmmEqWord dflags (funInfoArity dflags fun_iptr)
226 (mkIntExpr dflags n_args)
227
228 tscope <- getTickScope
229 emit (mkCbranch (cmmIsTagged dflags funv) is_tagged_lbl slow_lbl
230 <*> mkLabel is_tagged_lbl tscope
231 <*> mkCbranch correct_arity fast_lbl slow_lbl
232 <*> mkLabel fast_lbl tscope
233 <*> fast_code
234 <*> mkBranch end_lbl
235 <*> mkLabel slow_lbl tscope
236 <*> slow_code
237 <*> mkLabel end_lbl tscope)
238 return r
239
240 else do
241 emit slow_code
242 return r
243
244
245 -- Note [avoid intermediate PAPs]
246 --
247 -- A slow call which needs multiple generic apply patterns will be
248 -- almost guaranteed to create one or more intermediate PAPs when
249 -- applied to a function that takes the correct number of arguments.
250 -- We try to avoid this situation by generating code to test whether
251 -- we are calling a function with the correct number of arguments
252 -- first, i.e.:
253 --
254 -- if (TAG(f) != 0} { // f is not a thunk
255 -- if (f->info.arity == n) {
256 -- ... make a fast call to f ...
257 -- }
258 -- }
259 -- ... otherwise make the slow call ...
260 --
261 -- We *only* do this when the call requires multiple generic apply
262 -- functions, which requires pushing extra stack frames and probably
263 -- results in intermediate PAPs. (I say probably, because it might be
264 -- that we're over-applying a function, but that seems even less
265 -- likely).
266 --
267 -- This very rarely applies, but if it does happen in an inner loop it
268 -- can have a severe impact on performance (#6084).
269
270
271 --------------
272 direct_call :: String
273 -> Convention -- e.g. NativeNodeCall or NativeDirectCall
274 -> CLabel -> RepArity
275 -> [(ArgRep,Maybe CmmExpr)] -> FCode ReturnKind
276 direct_call caller call_conv lbl arity args
277 | debugIsOn && real_arity > length args -- Too few args
278 = do -- Caller should ensure that there enough args!
279 pprPanic "direct_call" $
280 text caller <+> ppr arity <+>
281 ppr lbl <+> ppr (length args) <+>
282 ppr (map snd args) <+> ppr (map fst args)
283
284 | null rest_args -- Precisely the right number of arguments
285 = emitCall (call_conv, NativeReturn) target (nonVArgs args)
286
287 | otherwise -- Note [over-saturated calls]
288 = do dflags <- getDynFlags
289 emitCallWithExtraStack (call_conv, NativeReturn)
290 target
291 (nonVArgs fast_args)
292 (nonVArgs (stack_args dflags))
293 where
294 target = CmmLit (CmmLabel lbl)
295 (fast_args, rest_args) = splitAt real_arity args
296 stack_args dflags = slowArgs dflags rest_args
297 real_arity = case call_conv of
298 NativeNodeCall -> arity+1
299 _ -> arity
300
301
302 -- When constructing calls, it is easier to keep the ArgReps and the
303 -- CmmExprs zipped together. However, a void argument has no
304 -- representation, so we need to use Maybe CmmExpr (the alternative of
305 -- using zeroCLit or even undefined would work, but would be ugly).
306 --
307 getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)]
308 getArgRepsAmodes = mapM getArgRepAmode
309 where getArgRepAmode arg
310 | V <- rep = return (V, Nothing)
311 | otherwise = do expr <- getArgAmode (NonVoid arg)
312 return (rep, Just expr)
313 where rep = toArgRep (argPrimRep arg)
314
315 nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr]
316 nonVArgs [] = []
317 nonVArgs ((_,Nothing) : args) = nonVArgs args
318 nonVArgs ((_,Just arg) : args) = arg : nonVArgs args
319
320 {-
321 Note [over-saturated calls]
322
323 The natural thing to do for an over-saturated call would be to call
324 the function with the correct number of arguments, and then apply the
325 remaining arguments to the value returned, e.g.
326
327 f a b c d (where f has arity 2)
328 -->
329 r = call f(a,b)
330 call r(c,d)
331
332 but this entails
333 - saving c and d on the stack
334 - making a continuation info table
335 - at the continuation, loading c and d off the stack into regs
336 - finally, call r
337
338 Note that since there are a fixed number of different r's
339 (e.g. stg_ap_pp_fast), we can also pre-compile continuations
340 that correspond to each of them, rather than generating a fresh
341 one for each over-saturated call.
342
343 Not only does this generate much less code, it is faster too. We will
344 generate something like:
345
346 Sp[old+16] = c
347 Sp[old+24] = d
348 Sp[old+32] = stg_ap_pp_info
349 call f(a,b) -- usual calling convention
350
351 For the purposes of the CmmCall node, we count this extra stack as
352 just more arguments that we are passing on the stack (cml_args).
353 -}
354
355 -- | 'slowArgs' takes a list of function arguments and prepares them for
356 -- pushing on the stack for "extra" arguments to a function which requires
357 -- fewer arguments than we currently have.
358 slowArgs :: DynFlags -> [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)]
359 slowArgs _ [] = []
360 slowArgs dflags args -- careful: reps contains voids (V), but args does not
361 | gopt Opt_SccProfilingOn dflags
362 = save_cccs ++ this_pat ++ slowArgs dflags rest_args
363 | otherwise = this_pat ++ slowArgs dflags rest_args
364 where
365 (arg_pat, n) = slowCallPattern (map fst args)
366 (call_args, rest_args) = splitAt n args
367
368 stg_ap_pat = mkCmmRetInfoLabel rtsPackageKey arg_pat
369 this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args
370 save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)]
371 save_cccs_lbl = mkCmmRetInfoLabel rtsPackageKey (fsLit "stg_restore_cccs")
372
373 -------------------------------------------------------------------------
374 ---- Laying out objects on the heap and stack
375 -------------------------------------------------------------------------
376
377 -- The heap always grows upwards, so hpRel is easy to compute
378 hpRel :: VirtualHpOffset -- virtual offset of Hp
379 -> VirtualHpOffset -- virtual offset of The Thing
380 -> WordOff -- integer word offset
381 hpRel hp off = off - hp
382
383 getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr
384 -- See Note [Virtual and real heap pointers] in StgCmmMonad
385 getHpRelOffset virtual_offset
386 = do dflags <- getDynFlags
387 hp_usg <- getHpUsage
388 return (cmmRegOffW dflags hpReg (hpRel (realHp hp_usg) virtual_offset))
389
390 mkVirtHeapOffsets
391 :: DynFlags
392 -> Bool -- True <=> is a thunk
393 -> [(PrimRep,a)] -- Things to make offsets for
394 -> (WordOff, -- _Total_ number of words allocated
395 WordOff, -- Number of words allocated for *pointers*
396 [(NonVoid a, ByteOff)])
397
398 -- Things with their offsets from start of object in order of
399 -- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER
400 -- First in list gets lowest offset, which is initial offset + 1.
401 --
402 -- Void arguments are removed, so output list may be shorter than
403 -- input list
404 --
405 -- mkVirtHeapOffsets always returns boxed things with smaller offsets
406 -- than the unboxed things
407
408 mkVirtHeapOffsets dflags is_thunk things
409 = ( bytesToWordsRoundUp dflags tot_bytes
410 , bytesToWordsRoundUp dflags bytes_of_ptrs
411 , ptrs_w_offsets ++ non_ptrs_w_offsets
412 )
413 where
414 hdr_words | is_thunk = thunkHdrSize dflags
415 | otherwise = fixedHdrSizeW dflags
416 hdr_bytes = wordsToBytes dflags hdr_words
417
418 non_void_things = filterOut (isVoidRep . fst) things
419 (ptrs, non_ptrs) = partition (isGcPtrRep . fst) non_void_things
420
421 (bytes_of_ptrs, ptrs_w_offsets) =
422 mapAccumL computeOffset 0 ptrs
423 (tot_bytes, non_ptrs_w_offsets) =
424 mapAccumL computeOffset bytes_of_ptrs non_ptrs
425
426 computeOffset bytes_so_far (rep, thing)
427 = (bytes_so_far + wordsToBytes dflags (argRepSizeW dflags (toArgRep rep)),
428 (NonVoid thing, hdr_bytes + bytes_so_far))
429
430 -- | Just like mkVirtHeapOffsets, but for constructors
431 mkVirtConstrOffsets
432 :: DynFlags -> [(PrimRep,a)]
433 -> (WordOff, WordOff, [(NonVoid a, ByteOff)])
434 mkVirtConstrOffsets dflags = mkVirtHeapOffsets dflags False
435
436
437 -------------------------------------------------------------------------
438 --
439 -- Making argument descriptors
440 --
441 -- An argument descriptor describes the layout of args on the stack,
442 -- both for * GC (stack-layout) purposes, and
443 -- * saving/restoring registers when a heap-check fails
444 --
445 -- Void arguments aren't important, therefore (contrast constructSlowCall)
446 --
447 -------------------------------------------------------------------------
448
449 -- bring in ARG_P, ARG_N, etc.
450 #include "../includes/rts/storage/FunTypes.h"
451
452 mkArgDescr :: DynFlags -> [Id] -> ArgDescr
453 mkArgDescr dflags args
454 = let arg_bits = argBits dflags arg_reps
455 arg_reps = filter isNonV (map idArgRep args)
456 -- Getting rid of voids eases matching of standard patterns
457 in case stdPattern arg_reps of
458 Just spec_id -> ArgSpec spec_id
459 Nothing -> ArgGen arg_bits
460
461 argBits :: DynFlags -> [ArgRep] -> [Bool] -- True for non-ptr, False for ptr
462 argBits _ [] = []
463 argBits dflags (P : args) = False : argBits dflags args
464 argBits dflags (arg : args) = take (argRepSizeW dflags arg) (repeat True)
465 ++ argBits dflags args
466
467 ----------------------
468 stdPattern :: [ArgRep] -> Maybe Int
469 stdPattern reps
470 = case reps of
471 [] -> Just ARG_NONE -- just void args, probably
472 [N] -> Just ARG_N
473 [P] -> Just ARG_P
474 [F] -> Just ARG_F
475 [D] -> Just ARG_D
476 [L] -> Just ARG_L
477 [V16] -> Just ARG_V16
478 [V32] -> Just ARG_V32
479 [V64] -> Just ARG_V64
480
481 [N,N] -> Just ARG_NN
482 [N,P] -> Just ARG_NP
483 [P,N] -> Just ARG_PN
484 [P,P] -> Just ARG_PP
485
486 [N,N,N] -> Just ARG_NNN
487 [N,N,P] -> Just ARG_NNP
488 [N,P,N] -> Just ARG_NPN
489 [N,P,P] -> Just ARG_NPP
490 [P,N,N] -> Just ARG_PNN
491 [P,N,P] -> Just ARG_PNP
492 [P,P,N] -> Just ARG_PPN
493 [P,P,P] -> Just ARG_PPP
494
495 [P,P,P,P] -> Just ARG_PPPP
496 [P,P,P,P,P] -> Just ARG_PPPPP
497 [P,P,P,P,P,P] -> Just ARG_PPPPPP
498
499 _ -> Nothing
500
501 -------------------------------------------------------------------------
502 --
503 -- Generating the info table and code for a closure
504 --
505 -------------------------------------------------------------------------
506
507 -- Here we make an info table of type 'CmmInfo'. The concrete
508 -- representation as a list of 'CmmAddr' is handled later
509 -- in the pipeline by 'cmmToRawCmm'.
510 -- When loading the free variables, a function closure pointer may be tagged,
511 -- so we must take it into account.
512
513 emitClosureProcAndInfoTable :: Bool -- top-level?
514 -> Id -- name of the closure
515 -> LambdaFormInfo
516 -> CmmInfoTable
517 -> [NonVoid Id] -- incoming arguments
518 -> ((Int, LocalReg, [LocalReg]) -> FCode ()) -- function body
519 -> FCode ()
520 emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args body
521 = do { dflags <- getDynFlags
522 -- Bind the binder itself, but only if it's not a top-level
523 -- binding. We need non-top let-bindings to refer to the
524 -- top-level binding, which this binding would incorrectly shadow.
525 ; node <- if top_lvl then return $ idToReg dflags (NonVoid bndr)
526 else bindToReg (NonVoid bndr) lf_info
527 ; let node_points = nodeMustPointToIt dflags lf_info
528 ; arg_regs <- bindArgsToRegs args
529 ; let args' = if node_points then (node : arg_regs) else arg_regs
530 conv = if nodeMustPointToIt dflags lf_info then NativeNodeCall
531 else NativeDirectCall
532 (offset, _, _) = mkCallEntry dflags conv args' []
533 ; emitClosureAndInfoTable info_tbl conv args' $ body (offset, node, arg_regs)
534 }
535
536 -- Data constructors need closures, but not with all the argument handling
537 -- needed for functions. The shared part goes here.
538 emitClosureAndInfoTable ::
539 CmmInfoTable -> Convention -> [LocalReg] -> FCode () -> FCode ()
540 emitClosureAndInfoTable info_tbl conv args body
541 = do { (_, blks) <- getCodeScoped body
542 ; let entry_lbl = toEntryLbl (cit_lbl info_tbl)
543 ; emitProcWithConvention conv (Just info_tbl) entry_lbl args blks
544 }