Another batch of typo fixes in non-code
[ghc.git] / compiler / codeGen / StgCmmLayout.hs
1 {-# LANGUAGE CPP #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Building info tables.
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmLayout (
12 mkArgDescr,
13 emitCall, emitReturn, adjustHpBackwards,
14
15 emitClosureProcAndInfoTable,
16 emitClosureAndInfoTable,
17
18 slowCall, directCall,
19
20 mkVirtHeapOffsets, mkVirtConstrOffsets, getHpRelOffset,
21
22 ArgRep(..), toArgRep, argRepSizeW -- re-exported from StgCmmArgRep
23 ) where
24
25
26 #include "HsVersions.h"
27
28 import Prelude hiding ((<*>))
29
30 import StgCmmClosure
31 import StgCmmEnv
32 import StgCmmArgRep -- notably: ( slowCallPattern )
33 import StgCmmTicky
34 import StgCmmMonad
35 import StgCmmUtils
36 import StgCmmProf (curCCS)
37
38 import MkGraph
39 import SMRep
40 import Cmm
41 import CmmUtils
42 import CmmInfo
43 import CLabel
44 import StgSyn
45 import Id
46 import TyCon ( PrimRep(..) )
47 import BasicTypes ( RepArity )
48 import DynFlags
49 import Module
50
51 import Util
52 import Data.List
53 import Outputable
54 import FastString
55 import Control.Monad
56
57 ------------------------------------------------------------------------
58 -- Call and return sequences
59 ------------------------------------------------------------------------
60
61 -- | Return multiple values to the sequel
62 --
63 -- If the sequel is @Return@
64 --
65 -- > return (x,y)
66 --
67 -- If the sequel is @AssignTo [p,q]@
68 --
69 -- > p=x; q=y;
70 --
71 emitReturn :: [CmmExpr] -> FCode ReturnKind
72 emitReturn results
73 = do { dflags <- getDynFlags
74 ; sequel <- getSequel
75 ; updfr_off <- getUpdFrameOff
76 ; case sequel of
77 Return _ ->
78 do { adjustHpBackwards
79 ; let e = CmmLoad (CmmStackSlot Old updfr_off) (gcWord dflags)
80 ; emit (mkReturn dflags (entryCode dflags e) results updfr_off)
81 }
82 AssignTo regs adjust ->
83 do { when adjust adjustHpBackwards
84 ; emitMultiAssign regs results }
85 ; return AssignedDirectly
86 }
87
88
89 -- | @emitCall conv fun args@ makes a call to the entry-code of @fun@,
90 -- using the call/return convention @conv@, passing @args@, and
91 -- returning the results to the current sequel.
92 --
93 emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ReturnKind
94 emitCall convs fun args
95 = emitCallWithExtraStack convs fun args noExtraStack
96
97
98 -- | @emitCallWithExtraStack conv fun args stack@ makes a call to the
99 -- entry-code of @fun@, using the call/return convention @conv@,
100 -- passing @args@, pushing some extra stack frames described by
101 -- @stack@, and returning the results to the current sequel.
102 --
103 emitCallWithExtraStack
104 :: (Convention, Convention) -> CmmExpr -> [CmmExpr]
105 -> [CmmExpr] -> FCode ReturnKind
106 emitCallWithExtraStack (callConv, retConv) fun args extra_stack
107 = do { dflags <- getDynFlags
108 ; adjustHpBackwards
109 ; sequel <- getSequel
110 ; updfr_off <- getUpdFrameOff
111 ; case sequel of
112 Return _ -> do
113 emit $ mkJumpExtra dflags callConv fun args updfr_off extra_stack
114 return AssignedDirectly
115 AssignTo res_regs _ -> do
116 k <- newLabelC
117 let area = Young k
118 (off, _, copyin) = copyInOflow dflags retConv area res_regs []
119 copyout = mkCallReturnsTo dflags fun callConv args k off updfr_off
120 extra_stack
121 tscope <- getTickScope
122 emit (copyout <*> mkLabel k tscope <*> copyin)
123 return (ReturnedTo k off)
124 }
125
126
127 adjustHpBackwards :: FCode ()
128 -- This function adjusts the heap pointer just before a tail call or
129 -- return. At a call or return, the virtual heap pointer may be less
130 -- than the real Hp, because the latter was advanced to deal with
131 -- the worst-case branch of the code, and we may be in a better-case
132 -- branch. In that case, move the real Hp *back* and retract some
133 -- ticky allocation count.
134 --
135 -- It *does not* deal with high-water-mark adjustment. That's done by
136 -- functions which allocate heap.
137 adjustHpBackwards
138 = do { hp_usg <- getHpUsage
139 ; let rHp = realHp hp_usg
140 vHp = virtHp hp_usg
141 adjust_words = vHp -rHp
142 ; new_hp <- getHpRelOffset vHp
143
144 ; emit (if adjust_words == 0
145 then mkNop
146 else mkAssign hpReg new_hp) -- Generates nothing when vHp==rHp
147
148 ; tickyAllocHeap False adjust_words -- ...ditto
149
150 ; setRealHp vHp
151 }
152
153
154 -------------------------------------------------------------------------
155 -- Making calls: directCall and slowCall
156 -------------------------------------------------------------------------
157
158 -- General plan is:
159 -- - we'll make *one* fast call, either to the function itself
160 -- (directCall) or to stg_ap_<pat>_fast (slowCall)
161 -- Any left-over arguments will be pushed on the stack,
162 --
163 -- e.g. Sp[old+8] = arg1
164 -- Sp[old+16] = arg2
165 -- Sp[old+32] = stg_ap_pp_info
166 -- R2 = arg3
167 -- R3 = arg4
168 -- call f() return to Nothing updfr_off: 32
169
170
171 directCall :: Convention -> CLabel -> RepArity -> [StgArg] -> FCode ReturnKind
172 -- (directCall f n args)
173 -- calls f(arg1, ..., argn), and applies the result to the remaining args
174 -- The function f has arity n, and there are guaranteed at least n args
175 -- Both arity and args include void args
176 directCall conv lbl arity stg_args
177 = do { argreps <- getArgRepsAmodes stg_args
178 ; direct_call "directCall" conv lbl arity argreps }
179
180
181 slowCall :: CmmExpr -> [StgArg] -> FCode ReturnKind
182 -- (slowCall fun args) applies fun to args, returning the results to Sequel
183 slowCall fun stg_args
184 = do dflags <- getDynFlags
185 argsreps <- getArgRepsAmodes stg_args
186 let (rts_fun, arity) = slowCallPattern (map fst argsreps)
187
188 (r, slow_code) <- getCodeR $ do
189 r <- direct_call "slow_call" NativeNodeCall
190 (mkRtsApFastLabel rts_fun) arity ((P,Just fun):argsreps)
191 emitComment $ mkFastString ("slow_call for " ++
192 showSDoc dflags (ppr fun) ++
193 " with pat " ++ unpackFS rts_fun)
194 return r
195
196 -- Note [avoid intermediate PAPs]
197 let n_args = length stg_args
198 if n_args > arity && optLevel dflags >= 2
199 then do
200 funv <- (CmmReg . CmmLocal) `fmap` assignTemp fun
201 fun_iptr <- (CmmReg . CmmLocal) `fmap`
202 assignTemp (closureInfoPtr dflags (cmmUntag dflags funv))
203
204 -- ToDo: we could do slightly better here by reusing the
205 -- continuation from the slow call, which we have in r.
206 -- Also we'd like to push the continuation on the stack
207 -- before the branch, so that we only get one copy of the
208 -- code that saves all the live variables across the
209 -- call, but that might need some improvements to the
210 -- special case in the stack layout code to handle this
211 -- (see Note [diamond proc point]).
212
213 fast_code <- getCode $
214 emitCall (NativeNodeCall, NativeReturn)
215 (entryCode dflags fun_iptr)
216 (nonVArgs ((P,Just funv):argsreps))
217
218 slow_lbl <- newLabelC
219 fast_lbl <- newLabelC
220 is_tagged_lbl <- newLabelC
221 end_lbl <- newLabelC
222
223 let correct_arity = cmmEqWord dflags (funInfoArity dflags fun_iptr)
224 (mkIntExpr dflags n_args)
225
226 tscope <- getTickScope
227 emit (mkCbranch (cmmIsTagged dflags funv)
228 is_tagged_lbl slow_lbl (Just True)
229 <*> mkLabel is_tagged_lbl tscope
230 <*> mkCbranch correct_arity fast_lbl slow_lbl (Just True)
231 <*> mkLabel fast_lbl tscope
232 <*> fast_code
233 <*> mkBranch end_lbl
234 <*> mkLabel slow_lbl tscope
235 <*> slow_code
236 <*> mkLabel end_lbl tscope)
237 return r
238
239 else do
240 emit slow_code
241 return r
242
243
244 -- Note [avoid intermediate PAPs]
245 --
246 -- A slow call which needs multiple generic apply patterns will be
247 -- almost guaranteed to create one or more intermediate PAPs when
248 -- applied to a function that takes the correct number of arguments.
249 -- We try to avoid this situation by generating code to test whether
250 -- we are calling a function with the correct number of arguments
251 -- first, i.e.:
252 --
253 -- if (TAG(f) != 0} { // f is not a thunk
254 -- if (f->info.arity == n) {
255 -- ... make a fast call to f ...
256 -- }
257 -- }
258 -- ... otherwise make the slow call ...
259 --
260 -- We *only* do this when the call requires multiple generic apply
261 -- functions, which requires pushing extra stack frames and probably
262 -- results in intermediate PAPs. (I say probably, because it might be
263 -- that we're over-applying a function, but that seems even less
264 -- likely).
265 --
266 -- This very rarely applies, but if it does happen in an inner loop it
267 -- can have a severe impact on performance (#6084).
268
269
270 --------------
271 direct_call :: String
272 -> Convention -- e.g. NativeNodeCall or NativeDirectCall
273 -> CLabel -> RepArity
274 -> [(ArgRep,Maybe CmmExpr)] -> FCode ReturnKind
275 direct_call caller call_conv lbl arity args
276 | debugIsOn && real_arity > length args -- Too few args
277 = do -- Caller should ensure that there enough args!
278 pprPanic "direct_call" $
279 text caller <+> ppr arity <+>
280 ppr lbl <+> ppr (length args) <+>
281 ppr (map snd args) <+> ppr (map fst args)
282
283 | null rest_args -- Precisely the right number of arguments
284 = emitCall (call_conv, NativeReturn) target (nonVArgs args)
285
286 | otherwise -- Note [over-saturated calls]
287 = do dflags <- getDynFlags
288 emitCallWithExtraStack (call_conv, NativeReturn)
289 target
290 (nonVArgs fast_args)
291 (nonVArgs (stack_args dflags))
292 where
293 target = CmmLit (CmmLabel lbl)
294 (fast_args, rest_args) = splitAt real_arity args
295 stack_args dflags = slowArgs dflags rest_args
296 real_arity = case call_conv of
297 NativeNodeCall -> arity+1
298 _ -> arity
299
300
301 -- When constructing calls, it is easier to keep the ArgReps and the
302 -- CmmExprs zipped together. However, a void argument has no
303 -- representation, so we need to use Maybe CmmExpr (the alternative of
304 -- using zeroCLit or even undefined would work, but would be ugly).
305 --
306 getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)]
307 getArgRepsAmodes = mapM getArgRepAmode
308 where getArgRepAmode arg
309 | V <- rep = return (V, Nothing)
310 | otherwise = do expr <- getArgAmode (NonVoid arg)
311 return (rep, Just expr)
312 where rep = toArgRep (argPrimRep arg)
313
314 nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr]
315 nonVArgs [] = []
316 nonVArgs ((_,Nothing) : args) = nonVArgs args
317 nonVArgs ((_,Just arg) : args) = arg : nonVArgs args
318
319 {-
320 Note [over-saturated calls]
321
322 The natural thing to do for an over-saturated call would be to call
323 the function with the correct number of arguments, and then apply the
324 remaining arguments to the value returned, e.g.
325
326 f a b c d (where f has arity 2)
327 -->
328 r = call f(a,b)
329 call r(c,d)
330
331 but this entails
332 - saving c and d on the stack
333 - making a continuation info table
334 - at the continuation, loading c and d off the stack into regs
335 - finally, call r
336
337 Note that since there are a fixed number of different r's
338 (e.g. stg_ap_pp_fast), we can also pre-compile continuations
339 that correspond to each of them, rather than generating a fresh
340 one for each over-saturated call.
341
342 Not only does this generate much less code, it is faster too. We will
343 generate something like:
344
345 Sp[old+16] = c
346 Sp[old+24] = d
347 Sp[old+32] = stg_ap_pp_info
348 call f(a,b) -- usual calling convention
349
350 For the purposes of the CmmCall node, we count this extra stack as
351 just more arguments that we are passing on the stack (cml_args).
352 -}
353
354 -- | 'slowArgs' takes a list of function arguments and prepares them for
355 -- pushing on the stack for "extra" arguments to a function which requires
356 -- fewer arguments than we currently have.
357 slowArgs :: DynFlags -> [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)]
358 slowArgs _ [] = []
359 slowArgs dflags args -- careful: reps contains voids (V), but args does not
360 | gopt Opt_SccProfilingOn dflags
361 = save_cccs ++ this_pat ++ slowArgs dflags rest_args
362 | otherwise = this_pat ++ slowArgs dflags rest_args
363 where
364 (arg_pat, n) = slowCallPattern (map fst args)
365 (call_args, rest_args) = splitAt n args
366
367 stg_ap_pat = mkCmmRetInfoLabel rtsUnitId arg_pat
368 this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args
369 save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)]
370 save_cccs_lbl = mkCmmRetInfoLabel rtsUnitId (fsLit "stg_restore_cccs")
371
372 -------------------------------------------------------------------------
373 ---- Laying out objects on the heap and stack
374 -------------------------------------------------------------------------
375
376 -- The heap always grows upwards, so hpRel is easy to compute
377 hpRel :: VirtualHpOffset -- virtual offset of Hp
378 -> VirtualHpOffset -- virtual offset of The Thing
379 -> WordOff -- integer word offset
380 hpRel hp off = off - hp
381
382 getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr
383 -- See Note [Virtual and real heap pointers] in StgCmmMonad
384 getHpRelOffset virtual_offset
385 = do dflags <- getDynFlags
386 hp_usg <- getHpUsage
387 return (cmmRegOffW dflags hpReg (hpRel (realHp hp_usg) virtual_offset))
388
389 mkVirtHeapOffsets
390 :: DynFlags
391 -> Bool -- True <=> is a thunk
392 -> [(PrimRep,a)] -- Things to make offsets for
393 -> (WordOff, -- _Total_ number of words allocated
394 WordOff, -- Number of words allocated for *pointers*
395 [(NonVoid a, ByteOff)])
396
397 -- Things with their offsets from start of object in order of
398 -- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER
399 -- First in list gets lowest offset, which is initial offset + 1.
400 --
401 -- Void arguments are removed, so output list may be shorter than
402 -- input list
403 --
404 -- mkVirtHeapOffsets always returns boxed things with smaller offsets
405 -- than the unboxed things
406
407 mkVirtHeapOffsets dflags is_thunk things
408 = ( bytesToWordsRoundUp dflags tot_bytes
409 , bytesToWordsRoundUp dflags bytes_of_ptrs
410 , ptrs_w_offsets ++ non_ptrs_w_offsets
411 )
412 where
413 hdr_words | is_thunk = thunkHdrSize dflags
414 | otherwise = fixedHdrSizeW dflags
415 hdr_bytes = wordsToBytes dflags hdr_words
416
417 non_void_things = filterOut (isVoidRep . fst) things
418 (ptrs, non_ptrs) = partition (isGcPtrRep . fst) non_void_things
419
420 (bytes_of_ptrs, ptrs_w_offsets) =
421 mapAccumL computeOffset 0 ptrs
422 (tot_bytes, non_ptrs_w_offsets) =
423 mapAccumL computeOffset bytes_of_ptrs non_ptrs
424
425 computeOffset bytes_so_far (rep, thing)
426 = (bytes_so_far + wordsToBytes dflags (argRepSizeW dflags (toArgRep rep)),
427 (NonVoid thing, hdr_bytes + bytes_so_far))
428
429 -- | Just like mkVirtHeapOffsets, but for constructors
430 mkVirtConstrOffsets
431 :: DynFlags -> [(PrimRep,a)]
432 -> (WordOff, WordOff, [(NonVoid a, ByteOff)])
433 mkVirtConstrOffsets dflags = mkVirtHeapOffsets dflags False
434
435
436 -------------------------------------------------------------------------
437 --
438 -- Making argument descriptors
439 --
440 -- An argument descriptor describes the layout of args on the stack,
441 -- both for * GC (stack-layout) purposes, and
442 -- * saving/restoring registers when a heap-check fails
443 --
444 -- Void arguments aren't important, therefore (contrast constructSlowCall)
445 --
446 -------------------------------------------------------------------------
447
448 -- bring in ARG_P, ARG_N, etc.
449 #include "../includes/rts/storage/FunTypes.h"
450
451 mkArgDescr :: DynFlags -> [Id] -> ArgDescr
452 mkArgDescr dflags args
453 = let arg_bits = argBits dflags arg_reps
454 arg_reps = filter isNonV (map idArgRep args)
455 -- Getting rid of voids eases matching of standard patterns
456 in case stdPattern arg_reps of
457 Just spec_id -> ArgSpec spec_id
458 Nothing -> ArgGen arg_bits
459
460 argBits :: DynFlags -> [ArgRep] -> [Bool] -- True for non-ptr, False for ptr
461 argBits _ [] = []
462 argBits dflags (P : args) = False : argBits dflags args
463 argBits dflags (arg : args) = take (argRepSizeW dflags arg) (repeat True)
464 ++ argBits dflags args
465
466 ----------------------
467 stdPattern :: [ArgRep] -> Maybe Int
468 stdPattern reps
469 = case reps of
470 [] -> Just ARG_NONE -- just void args, probably
471 [N] -> Just ARG_N
472 [P] -> Just ARG_P
473 [F] -> Just ARG_F
474 [D] -> Just ARG_D
475 [L] -> Just ARG_L
476 [V16] -> Just ARG_V16
477 [V32] -> Just ARG_V32
478 [V64] -> Just ARG_V64
479
480 [N,N] -> Just ARG_NN
481 [N,P] -> Just ARG_NP
482 [P,N] -> Just ARG_PN
483 [P,P] -> Just ARG_PP
484
485 [N,N,N] -> Just ARG_NNN
486 [N,N,P] -> Just ARG_NNP
487 [N,P,N] -> Just ARG_NPN
488 [N,P,P] -> Just ARG_NPP
489 [P,N,N] -> Just ARG_PNN
490 [P,N,P] -> Just ARG_PNP
491 [P,P,N] -> Just ARG_PPN
492 [P,P,P] -> Just ARG_PPP
493
494 [P,P,P,P] -> Just ARG_PPPP
495 [P,P,P,P,P] -> Just ARG_PPPPP
496 [P,P,P,P,P,P] -> Just ARG_PPPPPP
497
498 _ -> Nothing
499
500 -------------------------------------------------------------------------
501 --
502 -- Generating the info table and code for a closure
503 --
504 -------------------------------------------------------------------------
505
506 -- Here we make an info table of type 'CmmInfo'. The concrete
507 -- representation as a list of 'CmmAddr' is handled later
508 -- in the pipeline by 'cmmToRawCmm'.
509 -- When loading the free variables, a function closure pointer may be tagged,
510 -- so we must take it into account.
511
512 emitClosureProcAndInfoTable :: Bool -- top-level?
513 -> Id -- name of the closure
514 -> LambdaFormInfo
515 -> CmmInfoTable
516 -> [NonVoid Id] -- incoming arguments
517 -> ((Int, LocalReg, [LocalReg]) -> FCode ()) -- function body
518 -> FCode ()
519 emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args body
520 = do { dflags <- getDynFlags
521 -- Bind the binder itself, but only if it's not a top-level
522 -- binding. We need non-top let-bindings to refer to the
523 -- top-level binding, which this binding would incorrectly shadow.
524 ; node <- if top_lvl then return $ idToReg dflags (NonVoid bndr)
525 else bindToReg (NonVoid bndr) lf_info
526 ; let node_points = nodeMustPointToIt dflags lf_info
527 ; arg_regs <- bindArgsToRegs args
528 ; let args' = if node_points then (node : arg_regs) else arg_regs
529 conv = if nodeMustPointToIt dflags lf_info then NativeNodeCall
530 else NativeDirectCall
531 (offset, _, _) = mkCallEntry dflags conv args' []
532 ; emitClosureAndInfoTable info_tbl conv args' $ body (offset, node, arg_regs)
533 }
534
535 -- Data constructors need closures, but not with all the argument handling
536 -- needed for functions. The shared part goes here.
537 emitClosureAndInfoTable ::
538 CmmInfoTable -> Convention -> [LocalReg] -> FCode () -> FCode ()
539 emitClosureAndInfoTable info_tbl conv args body
540 = do { (_, blks) <- getCodeScoped body
541 ; let entry_lbl = toEntryLbl (cit_lbl info_tbl)
542 ; emitProcWithConvention conv (Just info_tbl) entry_lbl args blks
543 }