c6e57d5041851cc8128fc4039f14e299912a39be
[ghc.git] / compiler / codeGen / StgCmmLayout.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Building info tables.
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmLayout (
10 mkArgDescr,
11 emitCall, emitReturn, adjustHpBackwards,
12
13 emitClosureProcAndInfoTable,
14 emitClosureAndInfoTable,
15
16 slowCall, directCall,
17
18 mkVirtHeapOffsets, mkVirtConstrOffsets, getHpRelOffset, hpRel,
19
20 ArgRep(..), toArgRep, argRepSizeW -- re-exported from StgCmmArgRep
21 ) where
22
23
24 #include "HsVersions.h"
25
26 import StgCmmClosure
27 import StgCmmEnv
28 import StgCmmArgRep -- notably: ( slowCallPattern )
29 import StgCmmTicky
30 import StgCmmMonad
31 import StgCmmUtils
32 import StgCmmProf
33
34 import MkGraph
35 import SMRep
36 import Cmm
37 import CmmUtils
38 import CmmInfo
39 import CLabel
40 import StgSyn
41 import Id
42 import Name
43 import TyCon ( PrimRep(..) )
44 import BasicTypes ( RepArity )
45 import DynFlags
46 import Module
47
48 import Util
49 import Data.List
50 import Outputable
51 import FastString
52 import Control.Monad
53
54 ------------------------------------------------------------------------
55 -- Call and return sequences
56 ------------------------------------------------------------------------
57
58 -- | Return multiple values to the sequel
59 --
60 -- If the sequel is @Return@
61 --
62 -- > return (x,y)
63 --
64 -- If the sequel is @AssignTo [p,q]@
65 --
66 -- > p=x; q=y;
67 --
68 emitReturn :: [CmmExpr] -> FCode ReturnKind
69 emitReturn results
70 = do { dflags <- getDynFlags
71 ; sequel <- getSequel
72 ; updfr_off <- getUpdFrameOff
73 ; case sequel of
74 Return _ ->
75 do { adjustHpBackwards
76 ; let e = CmmLoad (CmmStackSlot Old updfr_off) (gcWord dflags)
77 ; emit (mkReturn dflags (entryCode dflags e) results updfr_off)
78 }
79 AssignTo regs adjust ->
80 do { when adjust adjustHpBackwards
81 ; emitMultiAssign regs results }
82 ; return AssignedDirectly
83 }
84
85
86 -- | @emitCall conv fun args@ makes a call to the entry-code of @fun@,
87 -- using the call/return convention @conv@, passing @args@, and
88 -- returning the results to the current sequel.
89 --
90 emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ReturnKind
91 emitCall convs fun args
92 = emitCallWithExtraStack convs fun args noExtraStack
93
94
95 -- | @emitCallWithExtraStack conv fun args stack@ makes a call to the
96 -- entry-code of @fun@, using the call/return convention @conv@,
97 -- passing @args@, pushing some extra stack frames described by
98 -- @stack@, and returning the results to the current sequel.
99 --
100 emitCallWithExtraStack
101 :: (Convention, Convention) -> CmmExpr -> [CmmExpr]
102 -> [CmmExpr] -> FCode ReturnKind
103 emitCallWithExtraStack (callConv, retConv) fun args extra_stack
104 = do { dflags <- getDynFlags
105 ; adjustHpBackwards
106 ; sequel <- getSequel
107 ; updfr_off <- getUpdFrameOff
108 ; case sequel of
109 Return _ -> do
110 emit $ mkJumpExtra dflags callConv fun args updfr_off extra_stack
111 return AssignedDirectly
112 AssignTo res_regs _ -> do
113 k <- newLabelC
114 let area = Young k
115 (off, _, copyin) = copyInOflow dflags retConv area res_regs []
116 copyout = mkCallReturnsTo dflags fun callConv args k off updfr_off
117 extra_stack
118 emit (copyout <*> mkLabel k <*> copyin)
119 return (ReturnedTo k off)
120 }
121
122
123 adjustHpBackwards :: FCode ()
124 -- This function adjusts and heap pointers just before a tail call or
125 -- return. At a call or return, the virtual heap pointer may be less
126 -- than the real Hp, because the latter was advanced to deal with
127 -- the worst-case branch of the code, and we may be in a better-case
128 -- branch. In that case, move the real Hp *back* and retract some
129 -- ticky allocation count.
130 --
131 -- It *does not* deal with high-water-mark adjustment.
132 -- That's done by functions which allocate heap.
133 adjustHpBackwards
134 = do { hp_usg <- getHpUsage
135 ; let rHp = realHp hp_usg
136 vHp = virtHp hp_usg
137 adjust_words = vHp -rHp
138 ; new_hp <- getHpRelOffset vHp
139
140 ; emit (if adjust_words == 0
141 then mkNop
142 else mkAssign hpReg new_hp) -- Generates nothing when vHp==rHp
143
144 ; tickyAllocHeap False adjust_words -- ...ditto
145
146 ; setRealHp vHp
147 }
148
149
150 -------------------------------------------------------------------------
151 -- Making calls: directCall and slowCall
152 -------------------------------------------------------------------------
153
154 -- General plan is:
155 -- - we'll make *one* fast call, either to the function itself
156 -- (directCall) or to stg_ap_<pat>_fast (slowCall)
157 -- Any left-over arguments will be pushed on the stack,
158 --
159 -- e.g. Sp[old+8] = arg1
160 -- Sp[old+16] = arg2
161 -- Sp[old+32] = stg_ap_pp_info
162 -- R2 = arg3
163 -- R3 = arg4
164 -- call f() return to Nothing updfr_off: 32
165
166
167 directCall :: Convention -> CLabel -> RepArity -> [StgArg] -> FCode ReturnKind
168 -- (directCall f n args)
169 -- calls f(arg1, ..., argn), and applies the result to the remaining args
170 -- The function f has arity n, and there are guaranteed at least n args
171 -- Both arity and args include void args
172 directCall conv lbl arity stg_args
173 = do { argreps <- getArgRepsAmodes stg_args
174 ; direct_call "directCall" conv lbl arity argreps }
175
176
177 slowCall :: CmmExpr -> [StgArg] -> FCode ReturnKind
178 -- (slowCall fun args) applies fun to args, returning the results to Sequel
179 slowCall fun stg_args
180 = do { dflags <- getDynFlags
181 ; argsreps <- getArgRepsAmodes stg_args
182 ; let (rts_fun, arity) = slowCallPattern (map fst argsreps)
183 ; r <- direct_call "slow_call" NativeNodeCall
184 (mkRtsApFastLabel rts_fun) arity ((P,Just fun):argsreps)
185 ; emitComment $ mkFastString ("slow_call for " ++
186 showSDoc dflags (ppr fun) ++
187 " with pat " ++ unpackFS rts_fun)
188 ; return r
189 }
190
191
192 --------------
193 direct_call :: String
194 -> Convention -- e.g. NativeNodeCall or NativeDirectCall
195 -> CLabel -> RepArity
196 -> [(ArgRep,Maybe CmmExpr)] -> FCode ReturnKind
197 direct_call caller call_conv lbl arity args
198 | debugIsOn && real_arity > length args -- Too few args
199 = do -- Caller should ensure that there enough args!
200 pprPanic "direct_call" $
201 text caller <+> ppr arity <+>
202 ppr lbl <+> ppr (length args) <+>
203 ppr (map snd args) <+> ppr (map fst args)
204
205 | null rest_args -- Precisely the right number of arguments
206 = emitCall (call_conv, NativeReturn) target (nonVArgs args)
207
208 | otherwise -- Note [over-saturated calls]
209 = do dflags <- getDynFlags
210 emitCallWithExtraStack (call_conv, NativeReturn)
211 target
212 (nonVArgs fast_args)
213 (nonVArgs (stack_args dflags))
214 where
215 target = CmmLit (CmmLabel lbl)
216 (fast_args, rest_args) = splitAt real_arity args
217 stack_args dflags = slowArgs dflags rest_args
218 real_arity = case call_conv of
219 NativeNodeCall -> arity+1
220 _ -> arity
221
222
223 -- When constructing calls, it is easier to keep the ArgReps and the
224 -- CmmExprs zipped together. However, a void argument has no
225 -- representation, so we need to use Maybe CmmExpr (the alternative of
226 -- using zeroCLit or even undefined would work, but would be ugly).
227 --
228 getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)]
229 getArgRepsAmodes = mapM getArgRepAmode
230 where getArgRepAmode arg
231 | V <- rep = return (V, Nothing)
232 | otherwise = do expr <- getArgAmode (NonVoid arg)
233 return (rep, Just expr)
234 where rep = toArgRep (argPrimRep arg)
235
236 nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr]
237 nonVArgs [] = []
238 nonVArgs ((_,Nothing) : args) = nonVArgs args
239 nonVArgs ((_,Just arg) : args) = arg : nonVArgs args
240
241 {-
242 Note [over-saturated calls]
243
244 The natural thing to do for an over-saturated call would be to call
245 the function with the correct number of arguments, and then apply the
246 remaining arguments to the value returned, e.g.
247
248 f a b c d (where f has arity 2)
249 -->
250 r = call f(a,b)
251 call r(c,d)
252
253 but this entails
254 - saving c and d on the stack
255 - making a continuation info table
256 - at the continuation, loading c and d off the stack into regs
257 - finally, call r
258
259 Note that since there are a fixed number of different r's
260 (e.g. stg_ap_pp_fast), we can also pre-compile continuations
261 that correspond to each of them, rather than generating a fresh
262 one for each over-saturated call.
263
264 Not only does this generate much less code, it is faster too. We will
265 generate something like:
266
267 Sp[old+16] = c
268 Sp[old+24] = d
269 Sp[old+32] = stg_ap_pp_info
270 call f(a,b) -- usual calling convention
271
272 For the purposes of the CmmCall node, we count this extra stack as
273 just more arguments that we are passing on the stack (cml_args).
274 -}
275
276 -- | 'slowArgs' takes a list of function arguments and prepares them for
277 -- pushing on the stack for "extra" arguments to a function which requires
278 -- fewer arguments than we currently have.
279 slowArgs :: DynFlags -> [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)]
280 slowArgs _ [] = []
281 slowArgs dflags args -- careful: reps contains voids (V), but args does not
282 | gopt Opt_SccProfilingOn dflags
283 = save_cccs ++ this_pat ++ slowArgs dflags rest_args
284 | otherwise = this_pat ++ slowArgs dflags rest_args
285 where
286 (arg_pat, n) = slowCallPattern (map fst args)
287 (call_args, rest_args) = splitAt n args
288
289 stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat
290 this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args
291 save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)]
292 save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs")
293
294 -------------------------------------------------------------------------
295 ---- Laying out objects on the heap and stack
296 -------------------------------------------------------------------------
297
298 -- The heap always grows upwards, so hpRel is easy
299 hpRel :: VirtualHpOffset -- virtual offset of Hp
300 -> VirtualHpOffset -- virtual offset of The Thing
301 -> WordOff -- integer word offset
302 hpRel hp off = off - hp
303
304 getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr
305 getHpRelOffset virtual_offset
306 = do dflags <- getDynFlags
307 hp_usg <- getHpUsage
308 return (cmmRegOffW dflags hpReg (hpRel (realHp hp_usg) virtual_offset))
309
310 mkVirtHeapOffsets
311 :: DynFlags
312 -> Bool -- True <=> is a thunk
313 -> [(PrimRep,a)] -- Things to make offsets for
314 -> (WordOff, -- _Total_ number of words allocated
315 WordOff, -- Number of words allocated for *pointers*
316 [(NonVoid a, VirtualHpOffset)])
317
318 -- Things with their offsets from start of object in order of
319 -- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER
320 -- First in list gets lowest offset, which is initial offset + 1.
321 --
322 -- Void arguments are removed, so output list may be shorter than
323 -- input list
324 --
325 -- mkVirtHeapOffsets always returns boxed things with smaller offsets
326 -- than the unboxed things
327
328 mkVirtHeapOffsets dflags is_thunk things
329 = let non_void_things = filterOut (isVoidRep . fst) things
330 (ptrs, non_ptrs) = partition (isGcPtrRep . fst) non_void_things
331 (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs
332 (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs
333 in
334 (tot_wds, wds_of_ptrs, ptrs_w_offsets ++ non_ptrs_w_offsets)
335 where
336 hdr_size | is_thunk = thunkHdrSize dflags
337 | otherwise = fixedHdrSize dflags
338
339 computeOffset wds_so_far (rep, thing)
340 = (wds_so_far + argRepSizeW dflags (toArgRep rep),
341 (NonVoid thing, hdr_size + wds_so_far))
342
343 mkVirtConstrOffsets :: DynFlags -> [(PrimRep,a)] -> (WordOff, WordOff, [(NonVoid a, VirtualHpOffset)])
344 -- Just like mkVirtHeapOffsets, but for constructors
345 mkVirtConstrOffsets dflags = mkVirtHeapOffsets dflags False
346
347
348 -------------------------------------------------------------------------
349 --
350 -- Making argument descriptors
351 --
352 -- An argument descriptor describes the layout of args on the stack,
353 -- both for * GC (stack-layout) purposes, and
354 -- * saving/restoring registers when a heap-check fails
355 --
356 -- Void arguments aren't important, therefore (contrast constructSlowCall)
357 --
358 -------------------------------------------------------------------------
359
360 -- bring in ARG_P, ARG_N, etc.
361 #include "../includes/rts/storage/FunTypes.h"
362
363 mkArgDescr :: Name -> [Id] -> FCode ArgDescr
364 mkArgDescr _nm args
365 = do dflags <- getDynFlags
366 let arg_bits = argBits dflags arg_reps
367 arg_reps = filter isNonV (map idArgRep args)
368 -- Getting rid of voids eases matching of standard patterns
369 case stdPattern arg_reps of
370 Just spec_id -> return (ArgSpec spec_id)
371 Nothing -> return (ArgGen arg_bits)
372
373 argBits :: DynFlags -> [ArgRep] -> [Bool] -- True for non-ptr, False for ptr
374 argBits _ [] = []
375 argBits dflags (P : args) = False : argBits dflags args
376 argBits dflags (arg : args) = take (argRepSizeW dflags arg) (repeat True)
377 ++ argBits dflags args
378
379 ----------------------
380 stdPattern :: [ArgRep] -> Maybe Int
381 stdPattern reps
382 = case reps of
383 [] -> Just ARG_NONE -- just void args, probably
384 [N] -> Just ARG_N
385 [P] -> Just ARG_P
386 [F] -> Just ARG_F
387 [D] -> Just ARG_D
388 [L] -> Just ARG_L
389 [V16] -> Just ARG_V16
390
391 [N,N] -> Just ARG_NN
392 [N,P] -> Just ARG_NP
393 [P,N] -> Just ARG_PN
394 [P,P] -> Just ARG_PP
395
396 [N,N,N] -> Just ARG_NNN
397 [N,N,P] -> Just ARG_NNP
398 [N,P,N] -> Just ARG_NPN
399 [N,P,P] -> Just ARG_NPP
400 [P,N,N] -> Just ARG_PNN
401 [P,N,P] -> Just ARG_PNP
402 [P,P,N] -> Just ARG_PPN
403 [P,P,P] -> Just ARG_PPP
404
405 [P,P,P,P] -> Just ARG_PPPP
406 [P,P,P,P,P] -> Just ARG_PPPPP
407 [P,P,P,P,P,P] -> Just ARG_PPPPPP
408
409 _ -> Nothing
410
411 -------------------------------------------------------------------------
412 --
413 -- Generating the info table and code for a closure
414 --
415 -------------------------------------------------------------------------
416
417 -- Here we make an info table of type 'CmmInfo'. The concrete
418 -- representation as a list of 'CmmAddr' is handled later
419 -- in the pipeline by 'cmmToRawCmm'.
420 -- When loading the free variables, a function closure pointer may be tagged,
421 -- so we must take it into account.
422
423 emitClosureProcAndInfoTable :: Bool -- top-level?
424 -> Id -- name of the closure
425 -> LambdaFormInfo
426 -> CmmInfoTable
427 -> [NonVoid Id] -- incoming arguments
428 -> ((Int, LocalReg, [LocalReg]) -> FCode ()) -- function body
429 -> FCode ()
430 emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args body
431 = do { dflags <- getDynFlags
432 -- Bind the binder itself, but only if it's not a top-level
433 -- binding. We need non-top let-bindings to refer to the
434 -- top-level binding, which this binding would incorrectly shadow.
435 ; node <- if top_lvl then return $ idToReg dflags (NonVoid bndr)
436 else bindToReg (NonVoid bndr) lf_info
437 ; let node_points = nodeMustPointToIt dflags lf_info
438 ; arg_regs <- bindArgsToRegs args
439 ; let args' = if node_points then (node : arg_regs) else arg_regs
440 conv = if nodeMustPointToIt dflags lf_info then NativeNodeCall
441 else NativeDirectCall
442 (offset, _, _) = mkCallEntry dflags conv args' []
443 ; emitClosureAndInfoTable info_tbl conv args' $ body (offset, node, arg_regs)
444 }
445
446 -- Data constructors need closures, but not with all the argument handling
447 -- needed for functions. The shared part goes here.
448 emitClosureAndInfoTable ::
449 CmmInfoTable -> Convention -> [LocalReg] -> FCode () -> FCode ()
450 emitClosureAndInfoTable info_tbl conv args body
451 = do { blks <- getCode body
452 ; let entry_lbl = toEntryLbl (cit_lbl info_tbl)
453 ; emitProcWithConvention conv (Just info_tbl) entry_lbl args blks
454 }