Use lengthIs and friends in more places
[ghc.git] / compiler / codeGen / StgCmmLayout.hs
1 {-# LANGUAGE CPP #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Building info tables.
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmmLayout (
12 mkArgDescr,
13 emitCall, emitReturn, adjustHpBackwards,
14
15 emitClosureProcAndInfoTable,
16 emitClosureAndInfoTable,
17
18 slowCall, directCall,
19
20 mkVirtHeapOffsets, mkVirtConstrOffsets, mkVirtConstrSizes, getHpRelOffset,
21
22 ArgRep(..), toArgRep, argRepSizeW -- re-exported from StgCmmArgRep
23 ) where
24
25
26 #include "HsVersions.h"
27
28 import Prelude hiding ((<*>))
29
30 import StgCmmClosure
31 import StgCmmEnv
32 import StgCmmArgRep -- notably: ( slowCallPattern )
33 import StgCmmTicky
34 import StgCmmMonad
35 import StgCmmUtils
36 import StgCmmProf (curCCS)
37
38 import MkGraph
39 import SMRep
40 import BlockId
41 import Cmm
42 import CmmUtils
43 import CmmInfo
44 import CLabel
45 import StgSyn
46 import Id
47 import TyCon ( PrimRep(..) )
48 import BasicTypes ( RepArity )
49 import DynFlags
50 import Module
51
52 import Util
53 import Data.List
54 import Outputable
55 import FastString
56 import Control.Monad
57
58 ------------------------------------------------------------------------
59 -- Call and return sequences
60 ------------------------------------------------------------------------
61
62 -- | Return multiple values to the sequel
63 --
64 -- If the sequel is @Return@
65 --
66 -- > return (x,y)
67 --
68 -- If the sequel is @AssignTo [p,q]@
69 --
70 -- > p=x; q=y;
71 --
72 emitReturn :: [CmmExpr] -> FCode ReturnKind
73 emitReturn results
74 = do { dflags <- getDynFlags
75 ; sequel <- getSequel
76 ; updfr_off <- getUpdFrameOff
77 ; case sequel of
78 Return ->
79 do { adjustHpBackwards
80 ; let e = CmmLoad (CmmStackSlot Old updfr_off) (gcWord dflags)
81 ; emit (mkReturn dflags (entryCode dflags e) results updfr_off)
82 }
83 AssignTo regs adjust ->
84 do { when adjust adjustHpBackwards
85 ; emitMultiAssign regs results }
86 ; return AssignedDirectly
87 }
88
89
90 -- | @emitCall conv fun args@ makes a call to the entry-code of @fun@,
91 -- using the call/return convention @conv@, passing @args@, and
92 -- returning the results to the current sequel.
93 --
94 emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ReturnKind
95 emitCall convs fun args
96 = emitCallWithExtraStack convs fun args noExtraStack
97
98
99 -- | @emitCallWithExtraStack conv fun args stack@ makes a call to the
100 -- entry-code of @fun@, using the call/return convention @conv@,
101 -- passing @args@, pushing some extra stack frames described by
102 -- @stack@, and returning the results to the current sequel.
103 --
104 emitCallWithExtraStack
105 :: (Convention, Convention) -> CmmExpr -> [CmmExpr]
106 -> [CmmExpr] -> FCode ReturnKind
107 emitCallWithExtraStack (callConv, retConv) fun args extra_stack
108 = do { dflags <- getDynFlags
109 ; adjustHpBackwards
110 ; sequel <- getSequel
111 ; updfr_off <- getUpdFrameOff
112 ; case sequel of
113 Return -> do
114 emit $ mkJumpExtra dflags callConv fun args updfr_off extra_stack
115 return AssignedDirectly
116 AssignTo res_regs _ -> do
117 k <- newBlockId
118 let area = Young k
119 (off, _, copyin) = copyInOflow dflags retConv area res_regs []
120 copyout = mkCallReturnsTo dflags fun callConv args k off updfr_off
121 extra_stack
122 tscope <- getTickScope
123 emit (copyout <*> mkLabel k tscope <*> copyin)
124 return (ReturnedTo k off)
125 }
126
127
128 adjustHpBackwards :: FCode ()
129 -- This function adjusts the heap pointer just before a tail call or
130 -- return. At a call or return, the virtual heap pointer may be less
131 -- than the real Hp, because the latter was advanced to deal with
132 -- the worst-case branch of the code, and we may be in a better-case
133 -- branch. In that case, move the real Hp *back* and retract some
134 -- ticky allocation count.
135 --
136 -- It *does not* deal with high-water-mark adjustment. That's done by
137 -- functions which allocate heap.
138 adjustHpBackwards
139 = do { hp_usg <- getHpUsage
140 ; let rHp = realHp hp_usg
141 vHp = virtHp hp_usg
142 adjust_words = vHp -rHp
143 ; new_hp <- getHpRelOffset vHp
144
145 ; emit (if adjust_words == 0
146 then mkNop
147 else mkAssign hpReg new_hp) -- Generates nothing when vHp==rHp
148
149 ; tickyAllocHeap False adjust_words -- ...ditto
150
151 ; setRealHp vHp
152 }
153
154
155 -------------------------------------------------------------------------
156 -- Making calls: directCall and slowCall
157 -------------------------------------------------------------------------
158
159 -- General plan is:
160 -- - we'll make *one* fast call, either to the function itself
161 -- (directCall) or to stg_ap_<pat>_fast (slowCall)
162 -- Any left-over arguments will be pushed on the stack,
163 --
164 -- e.g. Sp[old+8] = arg1
165 -- Sp[old+16] = arg2
166 -- Sp[old+32] = stg_ap_pp_info
167 -- R2 = arg3
168 -- R3 = arg4
169 -- call f() return to Nothing updfr_off: 32
170
171
172 directCall :: Convention -> CLabel -> RepArity -> [StgArg] -> FCode ReturnKind
173 -- (directCall f n args)
174 -- calls f(arg1, ..., argn), and applies the result to the remaining args
175 -- The function f has arity n, and there are guaranteed at least n args
176 -- Both arity and args include void args
177 directCall conv lbl arity stg_args
178 = do { argreps <- getArgRepsAmodes stg_args
179 ; direct_call "directCall" conv lbl arity argreps }
180
181
182 slowCall :: CmmExpr -> [StgArg] -> FCode ReturnKind
183 -- (slowCall fun args) applies fun to args, returning the results to Sequel
184 slowCall fun stg_args
185 = do dflags <- getDynFlags
186 argsreps <- getArgRepsAmodes stg_args
187 let (rts_fun, arity) = slowCallPattern (map fst argsreps)
188
189 (r, slow_code) <- getCodeR $ do
190 r <- direct_call "slow_call" NativeNodeCall
191 (mkRtsApFastLabel rts_fun) arity ((P,Just fun):argsreps)
192 emitComment $ mkFastString ("slow_call for " ++
193 showSDoc dflags (ppr fun) ++
194 " with pat " ++ unpackFS rts_fun)
195 return r
196
197 -- Note [avoid intermediate PAPs]
198 let n_args = length stg_args
199 if n_args > arity && optLevel dflags >= 2
200 then do
201 funv <- (CmmReg . CmmLocal) `fmap` assignTemp fun
202 fun_iptr <- (CmmReg . CmmLocal) `fmap`
203 assignTemp (closureInfoPtr dflags (cmmUntag dflags funv))
204
205 -- ToDo: we could do slightly better here by reusing the
206 -- continuation from the slow call, which we have in r.
207 -- Also we'd like to push the continuation on the stack
208 -- before the branch, so that we only get one copy of the
209 -- code that saves all the live variables across the
210 -- call, but that might need some improvements to the
211 -- special case in the stack layout code to handle this
212 -- (see Note [diamond proc point]).
213
214 fast_code <- getCode $
215 emitCall (NativeNodeCall, NativeReturn)
216 (entryCode dflags fun_iptr)
217 (nonVArgs ((P,Just funv):argsreps))
218
219 slow_lbl <- newBlockId
220 fast_lbl <- newBlockId
221 is_tagged_lbl <- newBlockId
222 end_lbl <- newBlockId
223
224 let correct_arity = cmmEqWord dflags (funInfoArity dflags fun_iptr)
225 (mkIntExpr dflags n_args)
226
227 tscope <- getTickScope
228 emit (mkCbranch (cmmIsTagged dflags funv)
229 is_tagged_lbl slow_lbl (Just True)
230 <*> mkLabel is_tagged_lbl tscope
231 <*> mkCbranch correct_arity fast_lbl slow_lbl (Just True)
232 <*> mkLabel fast_lbl tscope
233 <*> fast_code
234 <*> mkBranch end_lbl
235 <*> mkLabel slow_lbl tscope
236 <*> slow_code
237 <*> mkLabel end_lbl tscope)
238 return r
239
240 else do
241 emit slow_code
242 return r
243
244
245 -- Note [avoid intermediate PAPs]
246 --
247 -- A slow call which needs multiple generic apply patterns will be
248 -- almost guaranteed to create one or more intermediate PAPs when
249 -- applied to a function that takes the correct number of arguments.
250 -- We try to avoid this situation by generating code to test whether
251 -- we are calling a function with the correct number of arguments
252 -- first, i.e.:
253 --
254 -- if (TAG(f) != 0} { // f is not a thunk
255 -- if (f->info.arity == n) {
256 -- ... make a fast call to f ...
257 -- }
258 -- }
259 -- ... otherwise make the slow call ...
260 --
261 -- We *only* do this when the call requires multiple generic apply
262 -- functions, which requires pushing extra stack frames and probably
263 -- results in intermediate PAPs. (I say probably, because it might be
264 -- that we're over-applying a function, but that seems even less
265 -- likely).
266 --
267 -- This very rarely applies, but if it does happen in an inner loop it
268 -- can have a severe impact on performance (#6084).
269
270
271 --------------
272 direct_call :: String
273 -> Convention -- e.g. NativeNodeCall or NativeDirectCall
274 -> CLabel -> RepArity
275 -> [(ArgRep,Maybe CmmExpr)] -> FCode ReturnKind
276 direct_call caller call_conv lbl arity args
277 | debugIsOn && args `lengthLessThan` real_arity -- Too few args
278 = do -- Caller should ensure that there enough args!
279 pprPanic "direct_call" $
280 text caller <+> ppr arity <+>
281 ppr lbl <+> ppr (length args) <+>
282 ppr (map snd args) <+> ppr (map fst args)
283
284 | null rest_args -- Precisely the right number of arguments
285 = emitCall (call_conv, NativeReturn) target (nonVArgs args)
286
287 | otherwise -- Note [over-saturated calls]
288 = do dflags <- getDynFlags
289 emitCallWithExtraStack (call_conv, NativeReturn)
290 target
291 (nonVArgs fast_args)
292 (nonVArgs (stack_args dflags))
293 where
294 target = CmmLit (CmmLabel lbl)
295 (fast_args, rest_args) = splitAt real_arity args
296 stack_args dflags = slowArgs dflags rest_args
297 real_arity = case call_conv of
298 NativeNodeCall -> arity+1
299 _ -> arity
300
301
302 -- When constructing calls, it is easier to keep the ArgReps and the
303 -- CmmExprs zipped together. However, a void argument has no
304 -- representation, so we need to use Maybe CmmExpr (the alternative of
305 -- using zeroCLit or even undefined would work, but would be ugly).
306 --
307 getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)]
308 getArgRepsAmodes = mapM getArgRepAmode
309 where getArgRepAmode arg
310 | V <- rep = return (V, Nothing)
311 | otherwise = do expr <- getArgAmode (NonVoid arg)
312 return (rep, Just expr)
313 where rep = toArgRep (argPrimRep arg)
314
315 nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr]
316 nonVArgs [] = []
317 nonVArgs ((_,Nothing) : args) = nonVArgs args
318 nonVArgs ((_,Just arg) : args) = arg : nonVArgs args
319
320 {-
321 Note [over-saturated calls]
322
323 The natural thing to do for an over-saturated call would be to call
324 the function with the correct number of arguments, and then apply the
325 remaining arguments to the value returned, e.g.
326
327 f a b c d (where f has arity 2)
328 -->
329 r = call f(a,b)
330 call r(c,d)
331
332 but this entails
333 - saving c and d on the stack
334 - making a continuation info table
335 - at the continuation, loading c and d off the stack into regs
336 - finally, call r
337
338 Note that since there are a fixed number of different r's
339 (e.g. stg_ap_pp_fast), we can also pre-compile continuations
340 that correspond to each of them, rather than generating a fresh
341 one for each over-saturated call.
342
343 Not only does this generate much less code, it is faster too. We will
344 generate something like:
345
346 Sp[old+16] = c
347 Sp[old+24] = d
348 Sp[old+32] = stg_ap_pp_info
349 call f(a,b) -- usual calling convention
350
351 For the purposes of the CmmCall node, we count this extra stack as
352 just more arguments that we are passing on the stack (cml_args).
353 -}
354
355 -- | 'slowArgs' takes a list of function arguments and prepares them for
356 -- pushing on the stack for "extra" arguments to a function which requires
357 -- fewer arguments than we currently have.
358 slowArgs :: DynFlags -> [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)]
359 slowArgs _ [] = []
360 slowArgs dflags args -- careful: reps contains voids (V), but args does not
361 | gopt Opt_SccProfilingOn dflags
362 = save_cccs ++ this_pat ++ slowArgs dflags rest_args
363 | otherwise = this_pat ++ slowArgs dflags rest_args
364 where
365 (arg_pat, n) = slowCallPattern (map fst args)
366 (call_args, rest_args) = splitAt n args
367
368 stg_ap_pat = mkCmmRetInfoLabel rtsUnitId arg_pat
369 this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args
370 save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)]
371 save_cccs_lbl = mkCmmRetInfoLabel rtsUnitId (fsLit "stg_restore_cccs")
372
373 -------------------------------------------------------------------------
374 ---- Laying out objects on the heap and stack
375 -------------------------------------------------------------------------
376
377 -- The heap always grows upwards, so hpRel is easy to compute
378 hpRel :: VirtualHpOffset -- virtual offset of Hp
379 -> VirtualHpOffset -- virtual offset of The Thing
380 -> WordOff -- integer word offset
381 hpRel hp off = off - hp
382
383 getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr
384 -- See Note [Virtual and real heap pointers] in StgCmmMonad
385 getHpRelOffset virtual_offset
386 = do dflags <- getDynFlags
387 hp_usg <- getHpUsage
388 return (cmmRegOffW dflags hpReg (hpRel (realHp hp_usg) virtual_offset))
389
390 mkVirtHeapOffsets
391 :: DynFlags
392 -> Bool -- True <=> is a thunk
393 -> [NonVoid (PrimRep,a)] -- Things to make offsets for
394 -> (WordOff, -- _Total_ number of words allocated
395 WordOff, -- Number of words allocated for *pointers*
396 [(NonVoid a, ByteOff)])
397
398 -- Things with their offsets from start of object in order of
399 -- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER
400 -- First in list gets lowest offset, which is initial offset + 1.
401 --
402 -- mkVirtHeapOffsets always returns boxed things with smaller offsets
403 -- than the unboxed things
404
405 mkVirtHeapOffsets dflags is_thunk things
406 = ASSERT(not (any (isVoidRep . fst . fromNonVoid) things))
407 ( bytesToWordsRoundUp dflags tot_bytes
408 , bytesToWordsRoundUp dflags bytes_of_ptrs
409 , ptrs_w_offsets ++ non_ptrs_w_offsets
410 )
411 where
412 hdr_words | is_thunk = thunkHdrSize dflags
413 | otherwise = fixedHdrSizeW dflags
414 hdr_bytes = wordsToBytes dflags hdr_words
415
416 (ptrs, non_ptrs) = partition (isGcPtrRep . fst . fromNonVoid) things
417
418 (bytes_of_ptrs, ptrs_w_offsets) =
419 mapAccumL computeOffset 0 ptrs
420 (tot_bytes, non_ptrs_w_offsets) =
421 mapAccumL computeOffset bytes_of_ptrs non_ptrs
422
423 computeOffset bytes_so_far nv_thing
424 = (bytes_so_far + wordsToBytes dflags (argRepSizeW dflags (toArgRep rep)),
425 (NonVoid thing, hdr_bytes + bytes_so_far))
426 where (rep,thing) = fromNonVoid nv_thing
427
428 -- | Just like mkVirtHeapOffsets, but for constructors
429 mkVirtConstrOffsets
430 :: DynFlags -> [NonVoid (PrimRep, a)]
431 -> (WordOff, WordOff, [(NonVoid a, ByteOff)])
432 mkVirtConstrOffsets dflags = mkVirtHeapOffsets dflags False
433
434 -- | Just like mkVirtConstrOffsets, but used when we don't have the actual
435 -- arguments. Useful when e.g. generating info tables; we just need to know
436 -- sizes of pointer and non-pointer fields.
437 mkVirtConstrSizes :: DynFlags -> [NonVoid PrimRep] -> (WordOff, WordOff)
438 mkVirtConstrSizes dflags field_reps
439 = (tot_wds, ptr_wds)
440 where
441 (tot_wds, ptr_wds, _) =
442 mkVirtConstrOffsets dflags
443 (map (\nv_rep -> NonVoid (fromNonVoid nv_rep, ())) field_reps)
444
445 -------------------------------------------------------------------------
446 --
447 -- Making argument descriptors
448 --
449 -- An argument descriptor describes the layout of args on the stack,
450 -- both for * GC (stack-layout) purposes, and
451 -- * saving/restoring registers when a heap-check fails
452 --
453 -- Void arguments aren't important, therefore (contrast constructSlowCall)
454 --
455 -------------------------------------------------------------------------
456
457 -- bring in ARG_P, ARG_N, etc.
458 #include "../includes/rts/storage/FunTypes.h"
459
460 mkArgDescr :: DynFlags -> [Id] -> ArgDescr
461 mkArgDescr dflags args
462 = let arg_bits = argBits dflags arg_reps
463 arg_reps = filter isNonV (map idArgRep args)
464 -- Getting rid of voids eases matching of standard patterns
465 in case stdPattern arg_reps of
466 Just spec_id -> ArgSpec spec_id
467 Nothing -> ArgGen arg_bits
468
469 argBits :: DynFlags -> [ArgRep] -> [Bool] -- True for non-ptr, False for ptr
470 argBits _ [] = []
471 argBits dflags (P : args) = False : argBits dflags args
472 argBits dflags (arg : args) = take (argRepSizeW dflags arg) (repeat True)
473 ++ argBits dflags args
474
475 ----------------------
476 stdPattern :: [ArgRep] -> Maybe Int
477 stdPattern reps
478 = case reps of
479 [] -> Just ARG_NONE -- just void args, probably
480 [N] -> Just ARG_N
481 [P] -> Just ARG_P
482 [F] -> Just ARG_F
483 [D] -> Just ARG_D
484 [L] -> Just ARG_L
485 [V16] -> Just ARG_V16
486 [V32] -> Just ARG_V32
487 [V64] -> Just ARG_V64
488
489 [N,N] -> Just ARG_NN
490 [N,P] -> Just ARG_NP
491 [P,N] -> Just ARG_PN
492 [P,P] -> Just ARG_PP
493
494 [N,N,N] -> Just ARG_NNN
495 [N,N,P] -> Just ARG_NNP
496 [N,P,N] -> Just ARG_NPN
497 [N,P,P] -> Just ARG_NPP
498 [P,N,N] -> Just ARG_PNN
499 [P,N,P] -> Just ARG_PNP
500 [P,P,N] -> Just ARG_PPN
501 [P,P,P] -> Just ARG_PPP
502
503 [P,P,P,P] -> Just ARG_PPPP
504 [P,P,P,P,P] -> Just ARG_PPPPP
505 [P,P,P,P,P,P] -> Just ARG_PPPPPP
506
507 _ -> Nothing
508
509 -------------------------------------------------------------------------
510 --
511 -- Generating the info table and code for a closure
512 --
513 -------------------------------------------------------------------------
514
515 -- Here we make an info table of type 'CmmInfo'. The concrete
516 -- representation as a list of 'CmmAddr' is handled later
517 -- in the pipeline by 'cmmToRawCmm'.
518 -- When loading the free variables, a function closure pointer may be tagged,
519 -- so we must take it into account.
520
521 emitClosureProcAndInfoTable :: Bool -- top-level?
522 -> Id -- name of the closure
523 -> LambdaFormInfo
524 -> CmmInfoTable
525 -> [NonVoid Id] -- incoming arguments
526 -> ((Int, LocalReg, [LocalReg]) -> FCode ()) -- function body
527 -> FCode ()
528 emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args body
529 = do { dflags <- getDynFlags
530 -- Bind the binder itself, but only if it's not a top-level
531 -- binding. We need non-top let-bindings to refer to the
532 -- top-level binding, which this binding would incorrectly shadow.
533 ; node <- if top_lvl then return $ idToReg dflags (NonVoid bndr)
534 else bindToReg (NonVoid bndr) lf_info
535 ; let node_points = nodeMustPointToIt dflags lf_info
536 ; arg_regs <- bindArgsToRegs args
537 ; let args' = if node_points then (node : arg_regs) else arg_regs
538 conv = if nodeMustPointToIt dflags lf_info then NativeNodeCall
539 else NativeDirectCall
540 (offset, _, _) = mkCallEntry dflags conv args' []
541 ; emitClosureAndInfoTable info_tbl conv args' $ body (offset, node, arg_regs)
542 }
543
544 -- Data constructors need closures, but not with all the argument handling
545 -- needed for functions. The shared part goes here.
546 emitClosureAndInfoTable ::
547 CmmInfoTable -> Convention -> [LocalReg] -> FCode () -> FCode ()
548 emitClosureAndInfoTable info_tbl conv args body
549 = do { (_, blks) <- getCodeScoped body
550 ; let entry_lbl = toEntryLbl (cit_lbl info_tbl)
551 ; emitProcWithConvention conv (Just info_tbl) entry_lbl args blks
552 }