Simplify and tidy up the handling of tuple names
[ghc.git] / compiler / codeGen / StgCmmLayout.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Building info tables.
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmLayout (
10 mkArgDescr,
11 emitCall, emitReturn, adjustHpBackwards,
12
13 emitClosureProcAndInfoTable,
14 emitClosureAndInfoTable,
15
16 slowCall, directCall,
17
18 mkVirtHeapOffsets, mkVirtConstrOffsets, getHpRelOffset,
19
20 ArgRep(..), toArgRep, argRepSizeW -- re-exported from StgCmmArgRep
21 ) where
22
23
24 #include "HsVersions.h"
25
26 import StgCmmClosure
27 import StgCmmEnv
28 import StgCmmArgRep -- notably: ( slowCallPattern )
29 import StgCmmTicky
30 import StgCmmMonad
31 import StgCmmUtils
32 import StgCmmProf (curCCS)
33
34 import MkGraph
35 import SMRep
36 import Cmm
37 import CmmUtils
38 import CmmInfo
39 import CLabel
40 import StgSyn
41 import Id
42 import TyCon ( PrimRep(..) )
43 import BasicTypes ( RepArity )
44 import DynFlags
45 import Module
46
47 import Util
48 import Data.List
49 import Outputable
50 import FastString
51 import Control.Monad
52
53 ------------------------------------------------------------------------
54 -- Call and return sequences
55 ------------------------------------------------------------------------
56
57 -- | Return multiple values to the sequel
58 --
59 -- If the sequel is @Return@
60 --
61 -- > return (x,y)
62 --
63 -- If the sequel is @AssignTo [p,q]@
64 --
65 -- > p=x; q=y;
66 --
67 emitReturn :: [CmmExpr] -> FCode ReturnKind
68 emitReturn results
69 = do { dflags <- getDynFlags
70 ; sequel <- getSequel
71 ; updfr_off <- getUpdFrameOff
72 ; case sequel of
73 Return _ ->
74 do { adjustHpBackwards
75 ; let e = CmmLoad (CmmStackSlot Old updfr_off) (gcWord dflags)
76 ; emit (mkReturn dflags (entryCode dflags e) results updfr_off)
77 }
78 AssignTo regs adjust ->
79 do { when adjust adjustHpBackwards
80 ; emitMultiAssign regs results }
81 ; return AssignedDirectly
82 }
83
84
85 -- | @emitCall conv fun args@ makes a call to the entry-code of @fun@,
86 -- using the call/return convention @conv@, passing @args@, and
87 -- returning the results to the current sequel.
88 --
89 emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ReturnKind
90 emitCall convs fun args
91 = emitCallWithExtraStack convs fun args noExtraStack
92
93
94 -- | @emitCallWithExtraStack conv fun args stack@ makes a call to the
95 -- entry-code of @fun@, using the call/return convention @conv@,
96 -- passing @args@, pushing some extra stack frames described by
97 -- @stack@, and returning the results to the current sequel.
98 --
99 emitCallWithExtraStack
100 :: (Convention, Convention) -> CmmExpr -> [CmmExpr]
101 -> [CmmExpr] -> FCode ReturnKind
102 emitCallWithExtraStack (callConv, retConv) fun args extra_stack
103 = do { dflags <- getDynFlags
104 ; adjustHpBackwards
105 ; sequel <- getSequel
106 ; updfr_off <- getUpdFrameOff
107 ; case sequel of
108 Return _ -> do
109 emit $ mkJumpExtra dflags callConv fun args updfr_off extra_stack
110 return AssignedDirectly
111 AssignTo res_regs _ -> do
112 k <- newLabelC
113 let area = Young k
114 (off, _, copyin) = copyInOflow dflags retConv area res_regs []
115 copyout = mkCallReturnsTo dflags fun callConv args k off updfr_off
116 extra_stack
117 emit (copyout <*> mkLabel k <*> copyin)
118 return (ReturnedTo k off)
119 }
120
121
122 adjustHpBackwards :: FCode ()
123 -- This function adjusts the heap pointer just before a tail call or
124 -- return. At a call or return, the virtual heap pointer may be less
125 -- than the real Hp, because the latter was advanced to deal with
126 -- the worst-case branch of the code, and we may be in a better-case
127 -- branch. In that case, move the real Hp *back* and retract some
128 -- ticky allocation count.
129 --
130 -- It *does not* deal with high-water-mark adjustment. That's done by
131 -- functions which allocate heap.
132 adjustHpBackwards
133 = do { hp_usg <- getHpUsage
134 ; let rHp = realHp hp_usg
135 vHp = virtHp hp_usg
136 adjust_words = vHp -rHp
137 ; new_hp <- getHpRelOffset vHp
138
139 ; emit (if adjust_words == 0
140 then mkNop
141 else mkAssign hpReg new_hp) -- Generates nothing when vHp==rHp
142
143 ; tickyAllocHeap False adjust_words -- ...ditto
144
145 ; setRealHp vHp
146 }
147
148
149 -------------------------------------------------------------------------
150 -- Making calls: directCall and slowCall
151 -------------------------------------------------------------------------
152
153 -- General plan is:
154 -- - we'll make *one* fast call, either to the function itself
155 -- (directCall) or to stg_ap_<pat>_fast (slowCall)
156 -- Any left-over arguments will be pushed on the stack,
157 --
158 -- e.g. Sp[old+8] = arg1
159 -- Sp[old+16] = arg2
160 -- Sp[old+32] = stg_ap_pp_info
161 -- R2 = arg3
162 -- R3 = arg4
163 -- call f() return to Nothing updfr_off: 32
164
165
166 directCall :: Convention -> CLabel -> RepArity -> [StgArg] -> FCode ReturnKind
167 -- (directCall f n args)
168 -- calls f(arg1, ..., argn), and applies the result to the remaining args
169 -- The function f has arity n, and there are guaranteed at least n args
170 -- Both arity and args include void args
171 directCall conv lbl arity stg_args
172 = do { argreps <- getArgRepsAmodes stg_args
173 ; direct_call "directCall" conv lbl arity argreps }
174
175
176 slowCall :: CmmExpr -> [StgArg] -> FCode ReturnKind
177 -- (slowCall fun args) applies fun to args, returning the results to Sequel
178 slowCall fun stg_args
179 = do dflags <- getDynFlags
180 argsreps <- getArgRepsAmodes stg_args
181 let (rts_fun, arity) = slowCallPattern (map fst argsreps)
182
183 (r, slow_code) <- getCodeR $ do
184 r <- direct_call "slow_call" NativeNodeCall
185 (mkRtsApFastLabel rts_fun) arity ((P,Just fun):argsreps)
186 emitComment $ mkFastString ("slow_call for " ++
187 showSDoc dflags (ppr fun) ++
188 " with pat " ++ unpackFS rts_fun)
189 return r
190
191 -- Note [avoid intermediate PAPs]
192 let n_args = length stg_args
193 if n_args > arity && optLevel dflags >= 2
194 then do
195 funv <- (CmmReg . CmmLocal) `fmap` assignTemp fun
196 fun_iptr <- (CmmReg . CmmLocal) `fmap`
197 assignTemp (closureInfoPtr dflags (cmmUntag dflags funv))
198
199 -- ToDo: we could do slightly better here by reusing the
200 -- continuation from the slow call, which we have in r.
201 -- Also we'd like to push the continuation on the stack
202 -- before the branch, so that we only get one copy of the
203 -- code that saves all the live variables across the
204 -- call, but that might need some improvements to the
205 -- special case in the stack layout code to handle this
206 -- (see Note [diamond proc point]).
207
208 fast_code <- getCode $
209 emitCall (NativeNodeCall, NativeReturn)
210 (entryCode dflags fun_iptr)
211 (nonVArgs ((P,Just funv):argsreps))
212
213 slow_lbl <- newLabelC
214 fast_lbl <- newLabelC
215 is_tagged_lbl <- newLabelC
216 end_lbl <- newLabelC
217
218 let correct_arity = cmmEqWord dflags (funInfoArity dflags fun_iptr)
219 (mkIntExpr dflags n_args)
220
221 emit (mkCbranch (cmmIsTagged dflags funv) is_tagged_lbl slow_lbl
222 <*> mkLabel is_tagged_lbl
223 <*> mkCbranch correct_arity fast_lbl slow_lbl
224 <*> mkLabel fast_lbl
225 <*> fast_code
226 <*> mkBranch end_lbl
227 <*> mkLabel slow_lbl
228 <*> slow_code
229 <*> mkLabel end_lbl)
230 return r
231
232 else do
233 emit slow_code
234 return r
235
236
237 -- Note [avoid intermediate PAPs]
238 --
239 -- A slow call which needs multiple generic apply patterns will be
240 -- almost guaranteed to create one or more intermediate PAPs when
241 -- applied to a function that takes the correct number of arguments.
242 -- We try to avoid this situation by generating code to test whether
243 -- we are calling a function with the correct number of arguments
244 -- first, i.e.:
245 --
246 -- if (TAG(f) != 0} { // f is not a thunk
247 -- if (f->info.arity == n) {
248 -- ... make a fast call to f ...
249 -- }
250 -- }
251 -- ... otherwise make the slow call ...
252 --
253 -- We *only* do this when the call requires multiple generic apply
254 -- functions, which requires pushing extra stack frames and probably
255 -- results in intermediate PAPs. (I say probably, because it might be
256 -- that we're over-applying a function, but that seems even less
257 -- likely).
258 --
259 -- This very rarely applies, but if it does happen in an inner loop it
260 -- can have a severe impact on performance (#6084).
261
262
263 --------------
264 direct_call :: String
265 -> Convention -- e.g. NativeNodeCall or NativeDirectCall
266 -> CLabel -> RepArity
267 -> [(ArgRep,Maybe CmmExpr)] -> FCode ReturnKind
268 direct_call caller call_conv lbl arity args
269 | debugIsOn && real_arity > length args -- Too few args
270 = do -- Caller should ensure that there enough args!
271 pprPanic "direct_call" $
272 text caller <+> ppr arity <+>
273 ppr lbl <+> ppr (length args) <+>
274 ppr (map snd args) <+> ppr (map fst args)
275
276 | null rest_args -- Precisely the right number of arguments
277 = emitCall (call_conv, NativeReturn) target (nonVArgs args)
278
279 | otherwise -- Note [over-saturated calls]
280 = do dflags <- getDynFlags
281 emitCallWithExtraStack (call_conv, NativeReturn)
282 target
283 (nonVArgs fast_args)
284 (nonVArgs (stack_args dflags))
285 where
286 target = CmmLit (CmmLabel lbl)
287 (fast_args, rest_args) = splitAt real_arity args
288 stack_args dflags = slowArgs dflags rest_args
289 real_arity = case call_conv of
290 NativeNodeCall -> arity+1
291 _ -> arity
292
293
294 -- When constructing calls, it is easier to keep the ArgReps and the
295 -- CmmExprs zipped together. However, a void argument has no
296 -- representation, so we need to use Maybe CmmExpr (the alternative of
297 -- using zeroCLit or even undefined would work, but would be ugly).
298 --
299 getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)]
300 getArgRepsAmodes = mapM getArgRepAmode
301 where getArgRepAmode arg
302 | V <- rep = return (V, Nothing)
303 | otherwise = do expr <- getArgAmode (NonVoid arg)
304 return (rep, Just expr)
305 where rep = toArgRep (argPrimRep arg)
306
307 nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr]
308 nonVArgs [] = []
309 nonVArgs ((_,Nothing) : args) = nonVArgs args
310 nonVArgs ((_,Just arg) : args) = arg : nonVArgs args
311
312 {-
313 Note [over-saturated calls]
314
315 The natural thing to do for an over-saturated call would be to call
316 the function with the correct number of arguments, and then apply the
317 remaining arguments to the value returned, e.g.
318
319 f a b c d (where f has arity 2)
320 -->
321 r = call f(a,b)
322 call r(c,d)
323
324 but this entails
325 - saving c and d on the stack
326 - making a continuation info table
327 - at the continuation, loading c and d off the stack into regs
328 - finally, call r
329
330 Note that since there are a fixed number of different r's
331 (e.g. stg_ap_pp_fast), we can also pre-compile continuations
332 that correspond to each of them, rather than generating a fresh
333 one for each over-saturated call.
334
335 Not only does this generate much less code, it is faster too. We will
336 generate something like:
337
338 Sp[old+16] = c
339 Sp[old+24] = d
340 Sp[old+32] = stg_ap_pp_info
341 call f(a,b) -- usual calling convention
342
343 For the purposes of the CmmCall node, we count this extra stack as
344 just more arguments that we are passing on the stack (cml_args).
345 -}
346
347 -- | 'slowArgs' takes a list of function arguments and prepares them for
348 -- pushing on the stack for "extra" arguments to a function which requires
349 -- fewer arguments than we currently have.
350 slowArgs :: DynFlags -> [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)]
351 slowArgs _ [] = []
352 slowArgs dflags args -- careful: reps contains voids (V), but args does not
353 | gopt Opt_SccProfilingOn dflags
354 = save_cccs ++ this_pat ++ slowArgs dflags rest_args
355 | otherwise = this_pat ++ slowArgs dflags rest_args
356 where
357 (arg_pat, n) = slowCallPattern (map fst args)
358 (call_args, rest_args) = splitAt n args
359
360 stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat
361 this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args
362 save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)]
363 save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs")
364
365 -------------------------------------------------------------------------
366 ---- Laying out objects on the heap and stack
367 -------------------------------------------------------------------------
368
369 -- The heap always grows upwards, so hpRel is easy to compute
370 hpRel :: VirtualHpOffset -- virtual offset of Hp
371 -> VirtualHpOffset -- virtual offset of The Thing
372 -> WordOff -- integer word offset
373 hpRel hp off = off - hp
374
375 getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr
376 -- See Note [Virtual and real heap pointers] in StgCmmMonad
377 getHpRelOffset virtual_offset
378 = do dflags <- getDynFlags
379 hp_usg <- getHpUsage
380 return (cmmRegOffW dflags hpReg (hpRel (realHp hp_usg) virtual_offset))
381
382 mkVirtHeapOffsets
383 :: DynFlags
384 -> Bool -- True <=> is a thunk
385 -> [(PrimRep,a)] -- Things to make offsets for
386 -> (WordOff, -- _Total_ number of words allocated
387 WordOff, -- Number of words allocated for *pointers*
388 [(NonVoid a, ByteOff)])
389
390 -- Things with their offsets from start of object in order of
391 -- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER
392 -- First in list gets lowest offset, which is initial offset + 1.
393 --
394 -- Void arguments are removed, so output list may be shorter than
395 -- input list
396 --
397 -- mkVirtHeapOffsets always returns boxed things with smaller offsets
398 -- than the unboxed things
399
400 mkVirtHeapOffsets dflags is_thunk things
401 = ( bytesToWordsRoundUp dflags tot_bytes
402 , bytesToWordsRoundUp dflags bytes_of_ptrs
403 , ptrs_w_offsets ++ non_ptrs_w_offsets
404 )
405 where
406 hdr_words | is_thunk = thunkHdrSize dflags
407 | otherwise = fixedHdrSizeW dflags
408 hdr_bytes = wordsToBytes dflags hdr_words
409
410 non_void_things = filterOut (isVoidRep . fst) things
411 (ptrs, non_ptrs) = partition (isGcPtrRep . fst) non_void_things
412
413 (bytes_of_ptrs, ptrs_w_offsets) =
414 mapAccumL computeOffset 0 ptrs
415 (tot_bytes, non_ptrs_w_offsets) =
416 mapAccumL computeOffset bytes_of_ptrs non_ptrs
417
418 computeOffset bytes_so_far (rep, thing)
419 = (bytes_so_far + wordsToBytes dflags (argRepSizeW dflags (toArgRep rep)),
420 (NonVoid thing, hdr_bytes + bytes_so_far))
421
422 -- | Just like mkVirtHeapOffsets, but for constructors
423 mkVirtConstrOffsets
424 :: DynFlags -> [(PrimRep,a)]
425 -> (WordOff, WordOff, [(NonVoid a, ByteOff)])
426 mkVirtConstrOffsets dflags = mkVirtHeapOffsets dflags False
427
428
429 -------------------------------------------------------------------------
430 --
431 -- Making argument descriptors
432 --
433 -- An argument descriptor describes the layout of args on the stack,
434 -- both for * GC (stack-layout) purposes, and
435 -- * saving/restoring registers when a heap-check fails
436 --
437 -- Void arguments aren't important, therefore (contrast constructSlowCall)
438 --
439 -------------------------------------------------------------------------
440
441 -- bring in ARG_P, ARG_N, etc.
442 #include "../includes/rts/storage/FunTypes.h"
443
444 mkArgDescr :: DynFlags -> [Id] -> ArgDescr
445 mkArgDescr dflags args
446 = let arg_bits = argBits dflags arg_reps
447 arg_reps = filter isNonV (map idArgRep args)
448 -- Getting rid of voids eases matching of standard patterns
449 in case stdPattern arg_reps of
450 Just spec_id -> ArgSpec spec_id
451 Nothing -> ArgGen arg_bits
452
453 argBits :: DynFlags -> [ArgRep] -> [Bool] -- True for non-ptr, False for ptr
454 argBits _ [] = []
455 argBits dflags (P : args) = False : argBits dflags args
456 argBits dflags (arg : args) = take (argRepSizeW dflags arg) (repeat True)
457 ++ argBits dflags args
458
459 ----------------------
460 stdPattern :: [ArgRep] -> Maybe Int
461 stdPattern reps
462 = case reps of
463 [] -> Just ARG_NONE -- just void args, probably
464 [N] -> Just ARG_N
465 [P] -> Just ARG_P
466 [F] -> Just ARG_F
467 [D] -> Just ARG_D
468 [L] -> Just ARG_L
469 [V16] -> Just ARG_V16
470 [V32] -> Just ARG_V32
471 [V64] -> Just ARG_V64
472
473 [N,N] -> Just ARG_NN
474 [N,P] -> Just ARG_NP
475 [P,N] -> Just ARG_PN
476 [P,P] -> Just ARG_PP
477
478 [N,N,N] -> Just ARG_NNN
479 [N,N,P] -> Just ARG_NNP
480 [N,P,N] -> Just ARG_NPN
481 [N,P,P] -> Just ARG_NPP
482 [P,N,N] -> Just ARG_PNN
483 [P,N,P] -> Just ARG_PNP
484 [P,P,N] -> Just ARG_PPN
485 [P,P,P] -> Just ARG_PPP
486
487 [P,P,P,P] -> Just ARG_PPPP
488 [P,P,P,P,P] -> Just ARG_PPPPP
489 [P,P,P,P,P,P] -> Just ARG_PPPPPP
490
491 _ -> Nothing
492
493 -------------------------------------------------------------------------
494 --
495 -- Generating the info table and code for a closure
496 --
497 -------------------------------------------------------------------------
498
499 -- Here we make an info table of type 'CmmInfo'. The concrete
500 -- representation as a list of 'CmmAddr' is handled later
501 -- in the pipeline by 'cmmToRawCmm'.
502 -- When loading the free variables, a function closure pointer may be tagged,
503 -- so we must take it into account.
504
505 emitClosureProcAndInfoTable :: Bool -- top-level?
506 -> Id -- name of the closure
507 -> LambdaFormInfo
508 -> CmmInfoTable
509 -> [NonVoid Id] -- incoming arguments
510 -> ((Int, LocalReg, [LocalReg]) -> FCode ()) -- function body
511 -> FCode ()
512 emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args body
513 = do { dflags <- getDynFlags
514 -- Bind the binder itself, but only if it's not a top-level
515 -- binding. We need non-top let-bindings to refer to the
516 -- top-level binding, which this binding would incorrectly shadow.
517 ; node <- if top_lvl then return $ idToReg dflags (NonVoid bndr)
518 else bindToReg (NonVoid bndr) lf_info
519 ; let node_points = nodeMustPointToIt dflags lf_info
520 ; arg_regs <- bindArgsToRegs args
521 ; let args' = if node_points then (node : arg_regs) else arg_regs
522 conv = if nodeMustPointToIt dflags lf_info then NativeNodeCall
523 else NativeDirectCall
524 (offset, _, _) = mkCallEntry dflags conv args' []
525 ; emitClosureAndInfoTable info_tbl conv args' $ body (offset, node, arg_regs)
526 }
527
528 -- Data constructors need closures, but not with all the argument handling
529 -- needed for functions. The shared part goes here.
530 emitClosureAndInfoTable ::
531 CmmInfoTable -> Convention -> [LocalReg] -> FCode () -> FCode ()
532 emitClosureAndInfoTable info_tbl conv args body
533 = do { blks <- getCode body
534 ; let entry_lbl = toEntryLbl (cit_lbl info_tbl)
535 ; emitProcWithConvention conv (Just info_tbl) entry_lbl args blks
536 }