Add support for 512-bit-wide vectors.
[ghc.git] / compiler / codeGen / StgCmmLayout.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Building info tables.
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmLayout (
10 mkArgDescr,
11 emitCall, emitReturn, adjustHpBackwards,
12
13 emitClosureProcAndInfoTable,
14 emitClosureAndInfoTable,
15
16 slowCall, directCall,
17
18 mkVirtHeapOffsets, mkVirtConstrOffsets, getHpRelOffset, hpRel,
19
20 ArgRep(..), toArgRep, argRepSizeW -- re-exported from StgCmmArgRep
21 ) where
22
23
24 #include "HsVersions.h"
25
26 import StgCmmClosure
27 import StgCmmEnv
28 import StgCmmArgRep -- notably: ( slowCallPattern )
29 import StgCmmTicky
30 import StgCmmMonad
31 import StgCmmUtils
32 import StgCmmProf (curCCS)
33
34 import MkGraph
35 import SMRep
36 import Cmm
37 import CmmUtils
38 import CmmInfo
39 import CLabel
40 import StgSyn
41 import Id
42 import TyCon ( PrimRep(..) )
43 import BasicTypes ( RepArity )
44 import DynFlags
45 import Module
46
47 import Util
48 import Data.List
49 import Outputable
50 import FastString
51 import Control.Monad
52
53 ------------------------------------------------------------------------
54 -- Call and return sequences
55 ------------------------------------------------------------------------
56
57 -- | Return multiple values to the sequel
58 --
59 -- If the sequel is @Return@
60 --
61 -- > return (x,y)
62 --
63 -- If the sequel is @AssignTo [p,q]@
64 --
65 -- > p=x; q=y;
66 --
67 emitReturn :: [CmmExpr] -> FCode ReturnKind
68 emitReturn results
69 = do { dflags <- getDynFlags
70 ; sequel <- getSequel
71 ; updfr_off <- getUpdFrameOff
72 ; case sequel of
73 Return _ ->
74 do { adjustHpBackwards
75 ; let e = CmmLoad (CmmStackSlot Old updfr_off) (gcWord dflags)
76 ; emit (mkReturn dflags (entryCode dflags e) results updfr_off)
77 }
78 AssignTo regs adjust ->
79 do { when adjust adjustHpBackwards
80 ; emitMultiAssign regs results }
81 ; return AssignedDirectly
82 }
83
84
85 -- | @emitCall conv fun args@ makes a call to the entry-code of @fun@,
86 -- using the call/return convention @conv@, passing @args@, and
87 -- returning the results to the current sequel.
88 --
89 emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ReturnKind
90 emitCall convs fun args
91 = emitCallWithExtraStack convs fun args noExtraStack
92
93
94 -- | @emitCallWithExtraStack conv fun args stack@ makes a call to the
95 -- entry-code of @fun@, using the call/return convention @conv@,
96 -- passing @args@, pushing some extra stack frames described by
97 -- @stack@, and returning the results to the current sequel.
98 --
99 emitCallWithExtraStack
100 :: (Convention, Convention) -> CmmExpr -> [CmmExpr]
101 -> [CmmExpr] -> FCode ReturnKind
102 emitCallWithExtraStack (callConv, retConv) fun args extra_stack
103 = do { dflags <- getDynFlags
104 ; adjustHpBackwards
105 ; sequel <- getSequel
106 ; updfr_off <- getUpdFrameOff
107 ; case sequel of
108 Return _ -> do
109 emit $ mkJumpExtra dflags callConv fun args updfr_off extra_stack
110 return AssignedDirectly
111 AssignTo res_regs _ -> do
112 k <- newLabelC
113 let area = Young k
114 (off, _, copyin) = copyInOflow dflags retConv area res_regs []
115 copyout = mkCallReturnsTo dflags fun callConv args k off updfr_off
116 extra_stack
117 emit (copyout <*> mkLabel k <*> copyin)
118 return (ReturnedTo k off)
119 }
120
121
122 adjustHpBackwards :: FCode ()
123 -- This function adjusts and heap pointers just before a tail call or
124 -- return. At a call or return, the virtual heap pointer may be less
125 -- than the real Hp, because the latter was advanced to deal with
126 -- the worst-case branch of the code, and we may be in a better-case
127 -- branch. In that case, move the real Hp *back* and retract some
128 -- ticky allocation count.
129 --
130 -- It *does not* deal with high-water-mark adjustment.
131 -- That's done by functions which allocate heap.
132 adjustHpBackwards
133 = do { hp_usg <- getHpUsage
134 ; let rHp = realHp hp_usg
135 vHp = virtHp hp_usg
136 adjust_words = vHp -rHp
137 ; new_hp <- getHpRelOffset vHp
138
139 ; emit (if adjust_words == 0
140 then mkNop
141 else mkAssign hpReg new_hp) -- Generates nothing when vHp==rHp
142
143 ; tickyAllocHeap False adjust_words -- ...ditto
144
145 ; setRealHp vHp
146 }
147
148
149 -------------------------------------------------------------------------
150 -- Making calls: directCall and slowCall
151 -------------------------------------------------------------------------
152
153 -- General plan is:
154 -- - we'll make *one* fast call, either to the function itself
155 -- (directCall) or to stg_ap_<pat>_fast (slowCall)
156 -- Any left-over arguments will be pushed on the stack,
157 --
158 -- e.g. Sp[old+8] = arg1
159 -- Sp[old+16] = arg2
160 -- Sp[old+32] = stg_ap_pp_info
161 -- R2 = arg3
162 -- R3 = arg4
163 -- call f() return to Nothing updfr_off: 32
164
165
166 directCall :: Convention -> CLabel -> RepArity -> [StgArg] -> FCode ReturnKind
167 -- (directCall f n args)
168 -- calls f(arg1, ..., argn), and applies the result to the remaining args
169 -- The function f has arity n, and there are guaranteed at least n args
170 -- Both arity and args include void args
171 directCall conv lbl arity stg_args
172 = do { argreps <- getArgRepsAmodes stg_args
173 ; direct_call "directCall" conv lbl arity argreps }
174
175
176 slowCall :: CmmExpr -> [StgArg] -> FCode ReturnKind
177 -- (slowCall fun args) applies fun to args, returning the results to Sequel
178 slowCall fun stg_args
179 = do { dflags <- getDynFlags
180 ; argsreps <- getArgRepsAmodes stg_args
181 ; let (rts_fun, arity) = slowCallPattern (map fst argsreps)
182 ; r <- direct_call "slow_call" NativeNodeCall
183 (mkRtsApFastLabel rts_fun) arity ((P,Just fun):argsreps)
184 ; emitComment $ mkFastString ("slow_call for " ++
185 showSDoc dflags (ppr fun) ++
186 " with pat " ++ unpackFS rts_fun)
187 ; return r
188 }
189
190
191 --------------
192 direct_call :: String
193 -> Convention -- e.g. NativeNodeCall or NativeDirectCall
194 -> CLabel -> RepArity
195 -> [(ArgRep,Maybe CmmExpr)] -> FCode ReturnKind
196 direct_call caller call_conv lbl arity args
197 | debugIsOn && real_arity > length args -- Too few args
198 = do -- Caller should ensure that there enough args!
199 pprPanic "direct_call" $
200 text caller <+> ppr arity <+>
201 ppr lbl <+> ppr (length args) <+>
202 ppr (map snd args) <+> ppr (map fst args)
203
204 | null rest_args -- Precisely the right number of arguments
205 = emitCall (call_conv, NativeReturn) target (nonVArgs args)
206
207 | otherwise -- Note [over-saturated calls]
208 = do dflags <- getDynFlags
209 emitCallWithExtraStack (call_conv, NativeReturn)
210 target
211 (nonVArgs fast_args)
212 (nonVArgs (stack_args dflags))
213 where
214 target = CmmLit (CmmLabel lbl)
215 (fast_args, rest_args) = splitAt real_arity args
216 stack_args dflags = slowArgs dflags rest_args
217 real_arity = case call_conv of
218 NativeNodeCall -> arity+1
219 _ -> arity
220
221
222 -- When constructing calls, it is easier to keep the ArgReps and the
223 -- CmmExprs zipped together. However, a void argument has no
224 -- representation, so we need to use Maybe CmmExpr (the alternative of
225 -- using zeroCLit or even undefined would work, but would be ugly).
226 --
227 getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)]
228 getArgRepsAmodes = mapM getArgRepAmode
229 where getArgRepAmode arg
230 | V <- rep = return (V, Nothing)
231 | otherwise = do expr <- getArgAmode (NonVoid arg)
232 return (rep, Just expr)
233 where rep = toArgRep (argPrimRep arg)
234
235 nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr]
236 nonVArgs [] = []
237 nonVArgs ((_,Nothing) : args) = nonVArgs args
238 nonVArgs ((_,Just arg) : args) = arg : nonVArgs args
239
240 {-
241 Note [over-saturated calls]
242
243 The natural thing to do for an over-saturated call would be to call
244 the function with the correct number of arguments, and then apply the
245 remaining arguments to the value returned, e.g.
246
247 f a b c d (where f has arity 2)
248 -->
249 r = call f(a,b)
250 call r(c,d)
251
252 but this entails
253 - saving c and d on the stack
254 - making a continuation info table
255 - at the continuation, loading c and d off the stack into regs
256 - finally, call r
257
258 Note that since there are a fixed number of different r's
259 (e.g. stg_ap_pp_fast), we can also pre-compile continuations
260 that correspond to each of them, rather than generating a fresh
261 one for each over-saturated call.
262
263 Not only does this generate much less code, it is faster too. We will
264 generate something like:
265
266 Sp[old+16] = c
267 Sp[old+24] = d
268 Sp[old+32] = stg_ap_pp_info
269 call f(a,b) -- usual calling convention
270
271 For the purposes of the CmmCall node, we count this extra stack as
272 just more arguments that we are passing on the stack (cml_args).
273 -}
274
275 -- | 'slowArgs' takes a list of function arguments and prepares them for
276 -- pushing on the stack for "extra" arguments to a function which requires
277 -- fewer arguments than we currently have.
278 slowArgs :: DynFlags -> [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)]
279 slowArgs _ [] = []
280 slowArgs dflags args -- careful: reps contains voids (V), but args does not
281 | gopt Opt_SccProfilingOn dflags
282 = save_cccs ++ this_pat ++ slowArgs dflags rest_args
283 | otherwise = this_pat ++ slowArgs dflags rest_args
284 where
285 (arg_pat, n) = slowCallPattern (map fst args)
286 (call_args, rest_args) = splitAt n args
287
288 stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat
289 this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args
290 save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)]
291 save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs")
292
293 -------------------------------------------------------------------------
294 ---- Laying out objects on the heap and stack
295 -------------------------------------------------------------------------
296
297 -- The heap always grows upwards, so hpRel is easy
298 hpRel :: VirtualHpOffset -- virtual offset of Hp
299 -> VirtualHpOffset -- virtual offset of The Thing
300 -> WordOff -- integer word offset
301 hpRel hp off = off - hp
302
303 getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr
304 getHpRelOffset virtual_offset
305 = do dflags <- getDynFlags
306 hp_usg <- getHpUsage
307 return (cmmRegOffW dflags hpReg (hpRel (realHp hp_usg) virtual_offset))
308
309 mkVirtHeapOffsets
310 :: DynFlags
311 -> Bool -- True <=> is a thunk
312 -> [(PrimRep,a)] -- Things to make offsets for
313 -> (WordOff, -- _Total_ number of words allocated
314 WordOff, -- Number of words allocated for *pointers*
315 [(NonVoid a, VirtualHpOffset)])
316
317 -- Things with their offsets from start of object in order of
318 -- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER
319 -- First in list gets lowest offset, which is initial offset + 1.
320 --
321 -- Void arguments are removed, so output list may be shorter than
322 -- input list
323 --
324 -- mkVirtHeapOffsets always returns boxed things with smaller offsets
325 -- than the unboxed things
326
327 mkVirtHeapOffsets dflags is_thunk things
328 = let non_void_things = filterOut (isVoidRep . fst) things
329 (ptrs, non_ptrs) = partition (isGcPtrRep . fst) non_void_things
330 (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs
331 (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs
332 in
333 (tot_wds, wds_of_ptrs, ptrs_w_offsets ++ non_ptrs_w_offsets)
334 where
335 hdr_size | is_thunk = thunkHdrSize dflags
336 | otherwise = fixedHdrSize dflags
337
338 computeOffset wds_so_far (rep, thing)
339 = (wds_so_far + argRepSizeW dflags (toArgRep rep),
340 (NonVoid thing, hdr_size + wds_so_far))
341
342 mkVirtConstrOffsets :: DynFlags -> [(PrimRep,a)] -> (WordOff, WordOff, [(NonVoid a, VirtualHpOffset)])
343 -- Just like mkVirtHeapOffsets, but for constructors
344 mkVirtConstrOffsets dflags = mkVirtHeapOffsets dflags False
345
346
347 -------------------------------------------------------------------------
348 --
349 -- Making argument descriptors
350 --
351 -- An argument descriptor describes the layout of args on the stack,
352 -- both for * GC (stack-layout) purposes, and
353 -- * saving/restoring registers when a heap-check fails
354 --
355 -- Void arguments aren't important, therefore (contrast constructSlowCall)
356 --
357 -------------------------------------------------------------------------
358
359 -- bring in ARG_P, ARG_N, etc.
360 #include "../includes/rts/storage/FunTypes.h"
361
362 mkArgDescr :: DynFlags -> [Id] -> ArgDescr
363 mkArgDescr dflags args
364 = let arg_bits = argBits dflags arg_reps
365 arg_reps = filter isNonV (map idArgRep args)
366 -- Getting rid of voids eases matching of standard patterns
367 in case stdPattern arg_reps of
368 Just spec_id -> ArgSpec spec_id
369 Nothing -> ArgGen arg_bits
370
371 argBits :: DynFlags -> [ArgRep] -> [Bool] -- True for non-ptr, False for ptr
372 argBits _ [] = []
373 argBits dflags (P : args) = False : argBits dflags args
374 argBits dflags (arg : args) = take (argRepSizeW dflags arg) (repeat True)
375 ++ argBits dflags args
376
377 ----------------------
378 stdPattern :: [ArgRep] -> Maybe Int
379 stdPattern reps
380 = case reps of
381 [] -> Just ARG_NONE -- just void args, probably
382 [N] -> Just ARG_N
383 [P] -> Just ARG_P
384 [F] -> Just ARG_F
385 [D] -> Just ARG_D
386 [L] -> Just ARG_L
387 [V16] -> Just ARG_V16
388 [V32] -> Just ARG_V32
389 [V64] -> Just ARG_V64
390
391 [N,N] -> Just ARG_NN
392 [N,P] -> Just ARG_NP
393 [P,N] -> Just ARG_PN
394 [P,P] -> Just ARG_PP
395
396 [N,N,N] -> Just ARG_NNN
397 [N,N,P] -> Just ARG_NNP
398 [N,P,N] -> Just ARG_NPN
399 [N,P,P] -> Just ARG_NPP
400 [P,N,N] -> Just ARG_PNN
401 [P,N,P] -> Just ARG_PNP
402 [P,P,N] -> Just ARG_PPN
403 [P,P,P] -> Just ARG_PPP
404
405 [P,P,P,P] -> Just ARG_PPPP
406 [P,P,P,P,P] -> Just ARG_PPPPP
407 [P,P,P,P,P,P] -> Just ARG_PPPPPP
408
409 _ -> Nothing
410
411 -------------------------------------------------------------------------
412 --
413 -- Generating the info table and code for a closure
414 --
415 -------------------------------------------------------------------------
416
417 -- Here we make an info table of type 'CmmInfo'. The concrete
418 -- representation as a list of 'CmmAddr' is handled later
419 -- in the pipeline by 'cmmToRawCmm'.
420 -- When loading the free variables, a function closure pointer may be tagged,
421 -- so we must take it into account.
422
423 emitClosureProcAndInfoTable :: Bool -- top-level?
424 -> Id -- name of the closure
425 -> LambdaFormInfo
426 -> CmmInfoTable
427 -> [NonVoid Id] -- incoming arguments
428 -> ((Int, LocalReg, [LocalReg]) -> FCode ()) -- function body
429 -> FCode ()
430 emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args body
431 = do { dflags <- getDynFlags
432 -- Bind the binder itself, but only if it's not a top-level
433 -- binding. We need non-top let-bindings to refer to the
434 -- top-level binding, which this binding would incorrectly shadow.
435 ; node <- if top_lvl then return $ idToReg dflags (NonVoid bndr)
436 else bindToReg (NonVoid bndr) lf_info
437 ; let node_points = nodeMustPointToIt dflags lf_info
438 ; arg_regs <- bindArgsToRegs args
439 ; let args' = if node_points then (node : arg_regs) else arg_regs
440 conv = if nodeMustPointToIt dflags lf_info then NativeNodeCall
441 else NativeDirectCall
442 (offset, _, _) = mkCallEntry dflags conv args' []
443 ; emitClosureAndInfoTable info_tbl conv args' $ body (offset, node, arg_regs)
444 }
445
446 -- Data constructors need closures, but not with all the argument handling
447 -- needed for functions. The shared part goes here.
448 emitClosureAndInfoTable ::
449 CmmInfoTable -> Convention -> [LocalReg] -> FCode () -> FCode ()
450 emitClosureAndInfoTable info_tbl conv args body
451 = do { blks <- getCode body
452 ; let entry_lbl = toEntryLbl (cit_lbl info_tbl)
453 ; emitProcWithConvention conv (Just info_tbl) entry_lbl args blks
454 }