Snapshot of codegen refactoring to share with simonpj
[ghc.git] / compiler / codeGen / StgCmmProf.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Code generation for profiling
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmProf (
10 initCostCentres, ccType, ccsType,
11 mkCCostCentre, mkCCostCentreStack,
12
13 -- Cost-centre Profiling
14 dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
15 enterCostCentre, enterCostCentrePAP, enterCostCentreThunk,
16 chooseDynCostCentres,
17 costCentreFrom,
18 curCCS, curCCSAddr,
19 emitSetCCC, emitCCS,
20
21 saveCurrentCostCentre, restoreCurrentCostCentre,
22
23 -- Lag/drag/void stuff
24 ldvEnter, ldvEnterClosure, ldvRecordCreate
25 ) where
26
27 #include "HsVersions.h"
28 #include "../includes/MachDeps.h"
29 -- For WORD_SIZE_IN_BITS only.
30 #include "../includes/rts/Constants.h"
31 -- For LDV_CREATE_MASK, LDV_STATE_USE
32 -- which are StgWords
33 #include "../includes/DerivedConstants.h"
34 -- For REP_xxx constants, which are MachReps
35
36 import StgCmmClosure
37 import StgCmmUtils
38 import StgCmmMonad
39 import SMRep
40
41 import MkGraph
42 import Cmm
43 import CmmUtils
44 import CLabel
45
46 import Id
47 import qualified Module
48 import CostCentre
49 import StgSyn
50 import StaticFlags
51 import FastString
52 import Module
53 import Constants -- Lots of field offsets
54 import Outputable
55
56 import Data.Char
57 import Control.Monad
58
59 -----------------------------------------------------------------------------
60 --
61 -- Cost-centre-stack Profiling
62 --
63 -----------------------------------------------------------------------------
64
65 -- Expression representing the current cost centre stack
66 ccsType :: CmmType -- Type of a cost-centre stack
67 ccsType = bWord
68
69 ccType :: CmmType -- Type of a cost centre
70 ccType = bWord
71
72 curCCS :: CmmExpr
73 curCCS = CmmLoad curCCSAddr ccsType
74
75 -- Address of current CCS variable, for storing into
76 curCCSAddr :: CmmExpr
77 curCCSAddr = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CCCS")))
78
79 mkCCostCentre :: CostCentre -> CmmLit
80 mkCCostCentre cc = CmmLabel (mkCCLabel cc)
81
82 mkCCostCentreStack :: CostCentreStack -> CmmLit
83 mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs)
84
85 costCentreFrom :: CmmExpr -- A closure pointer
86 -> CmmExpr -- The cost centre from that closure
87 costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) ccsType
88
89 staticProfHdr :: CostCentreStack -> [CmmLit]
90 -- The profiling header words in a static closure
91 -- Was SET_STATIC_PROF_HDR
92 staticProfHdr ccs = ifProfilingL [mkCCostCentreStack ccs,
93 staticLdvInit]
94
95 dynProfHdr :: CmmExpr -> [CmmExpr]
96 -- Profiling header words in a dynamic closure
97 dynProfHdr ccs = ifProfilingL [ccs, dynLdvInit]
98
99 initUpdFrameProf :: CmmExpr -> FCode ()
100 -- Initialise the profiling field of an update frame
101 initUpdFrameProf frame_amode
102 = ifProfiling $ -- frame->header.prof.ccs = CCCS
103 emit (mkStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS)
104 -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0)
105 -- is unnecessary because it is not used anyhow.
106
107 ---------------------------------------------------------------------------
108 -- Saving and restoring the current cost centre
109 ---------------------------------------------------------------------------
110
111 {- Note [Saving the current cost centre]
112 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
113 The current cost centre is like a global register. Like other
114 global registers, it's a caller-saves one. But consider
115 case (f x) of (p,q) -> rhs
116 Since 'f' may set the cost centre, we must restore it
117 before resuming rhs. So we want code like this:
118 local_cc = CCC -- save
119 r = f( x )
120 CCC = local_cc -- restore
121 That is, we explicitly "save" the current cost centre in
122 a LocalReg, local_cc; and restore it after the call. The
123 C-- infrastructure will arrange to save local_cc across the
124 call.
125
126 The same goes for join points;
127 let j x = join-stuff
128 in blah-blah
129 We want this kind of code:
130 local_cc = CCC -- save
131 blah-blah
132 J:
133 CCC = local_cc -- restore
134 -}
135
136 saveCurrentCostCentre :: FCode (Maybe LocalReg)
137 -- Returns Nothing if profiling is off
138 saveCurrentCostCentre
139 | not opt_SccProfilingOn
140 = return Nothing
141 | otherwise
142 = do { local_cc <- newTemp ccType
143 ; emit (mkAssign (CmmLocal local_cc) curCCS)
144 ; return (Just local_cc) }
145
146 restoreCurrentCostCentre :: Maybe LocalReg -> FCode ()
147 restoreCurrentCostCentre Nothing
148 = return ()
149 restoreCurrentCostCentre (Just local_cc)
150 = emit (mkStore curCCSAddr (CmmReg (CmmLocal local_cc)))
151
152
153 -------------------------------------------------------------------------------
154 -- Recording allocation in a cost centre
155 -------------------------------------------------------------------------------
156
157 -- | Record the allocation of a closure. The CmmExpr is the cost
158 -- centre stack to which to attribute the allocation.
159 profDynAlloc :: ClosureInfo -> CmmExpr -> FCode ()
160 profDynAlloc cl_info ccs
161 = ifProfiling $
162 profAlloc (CmmLit (mkIntCLit (closureSize cl_info))) ccs
163
164 -- | Record the allocation of a closure (size is given by a CmmExpr)
165 -- The size must be in words, because the allocation counter in a CCS counts
166 -- in words.
167 profAlloc :: CmmExpr -> CmmExpr -> FCode ()
168 profAlloc words ccs
169 = ifProfiling $
170 emit (addToMemE alloc_rep
171 (cmmOffsetB ccs oFFSET_CostCentreStack_mem_alloc)
172 (CmmMachOp (MO_UU_Conv wordWidth (typeWidth alloc_rep)) $
173 [CmmMachOp mo_wordSub [words,
174 CmmLit (mkIntCLit profHdrSize)]]))
175 -- subtract the "profiling overhead", which is the
176 -- profiling header in a closure.
177 where
178 alloc_rep = REP_CostCentreStack_mem_alloc
179
180 -- ----------------------------------------------------------------------
181 -- Setting the cost centre in a new closure
182
183 chooseDynCostCentres :: CostCentreStack
184 -> [Id] -- Args
185 -> StgExpr -- Body
186 -> FCode (CmmExpr, CmmExpr)
187 -- Called when allocating a closure
188 -- Tells which cost centre to put in the object, and which
189 -- to blame the cost of allocation on
190 chooseDynCostCentres ccs args body = do
191 -- Cost-centre we record in the object
192 use_ccs <- emitCCS ccs
193
194 -- Cost-centre on whom we blame the allocation
195 let blame_ccs
196 | null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS)
197 | otherwise = use_ccs
198
199 return (use_ccs, blame_ccs)
200
201
202 -- Some CostCentreStacks are a sequence of pushes on top of CCCS.
203 -- These pushes must be performed before we can refer to the stack in
204 -- an expression.
205 emitCCS :: CostCentreStack -> FCode CmmExpr
206 emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's)
207 where
208 (cc's, ccs') = decomposeCCS ccs
209
210 push_em ccs [] = return ccs
211 push_em ccs (cc:rest) = do
212 tmp <- newTemp ccsType
213 pushCostCentre tmp ccs cc
214 push_em (CmmReg (CmmLocal tmp)) rest
215
216 ccsExpr :: CostCentreStack -> CmmExpr
217 ccsExpr ccs
218 | isCurrentCCS ccs = curCCS
219 | otherwise = CmmLit (mkCCostCentreStack ccs)
220
221
222 isBox :: StgExpr -> Bool
223 -- If it's an utterly trivial RHS, then it must be
224 -- one introduced by boxHigherOrderArgs for profiling,
225 -- so we charge it to "OVERHEAD".
226 -- This looks like a GROSS HACK to me --SDM
227 isBox (StgApp _ []) = True
228 isBox _ = False
229
230
231 -- -----------------------------------------------------------------------
232 -- Setting the current cost centre on entry to a closure
233
234 -- For lexically scoped profiling we have to load the cost centre from
235 -- the closure entered, if the costs are not supposed to be inherited.
236 -- This is done immediately on entering the fast entry point.
237
238 -- Load current cost centre from closure, if not inherited.
239 -- Node is guaranteed to point to it, if profiling and not inherited.
240
241 enterCostCentre
242 :: ClosureInfo
243 -> CostCentreStack
244 -> StgExpr -- The RHS of the closure
245 -> FCode ()
246
247 -- We used to have a special case for bindings of form
248 -- f = g True
249 -- where g has arity 2. The RHS is a thunk, but we don't
250 -- need to update it; and we want to subsume costs.
251 -- We don't have these sort of PAPs any more, so the special
252 -- case has gone away.
253
254 enterCostCentre closure_info ccs body
255 = ifProfiling $
256 ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs)
257 enter_cost_centre closure_info ccs body
258
259 enter_cost_centre :: ClosureInfo -> CostCentreStack -> StgExpr -> FCode ()
260 enter_cost_centre closure_info ccs body
261 | isSubsumedCCS ccs
262 = ASSERT(isToplevClosure closure_info)
263 ASSERT(re_entrant)
264 enter_ccs_fsub
265
266 | isDerivedFromCurrentCCS ccs
267 = do {
268 if re_entrant && not is_box
269 then
270 enter_ccs_fun node_ccs
271 else
272 emit (mkStore curCCSAddr node_ccs)
273
274 -- don't forget to bump the scc count. This closure might have been
275 -- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal
276 -- pass has turned into simply let x = e in ...x... and attached
277 -- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that
278 -- we don't lose the scc counter, bump it in the entry code for x.
279 -- ToDo: for a multi-push we should really bump the counter for
280 -- each of the intervening CCSs, not just the top one.
281 ; when (not (isCurrentCCS ccs)) $
282 emit (bumpSccCount curCCS)
283 }
284
285 | isCafCCS ccs
286 = ASSERT(isToplevClosure closure_info)
287 ASSERT(not re_entrant)
288 do { -- This is just a special case of the isDerivedFromCurrentCCS
289 -- case above. We could delete this, but it's a micro
290 -- optimisation and saves a bit of code.
291 emit (mkStore curCCSAddr enc_ccs)
292 ; emit (bumpSccCount node_ccs)
293 }
294
295 | otherwise
296 = panic "enterCostCentre"
297 where
298 enc_ccs = CmmLit (mkCCostCentreStack ccs)
299 re_entrant = closureReEntrant closure_info
300 node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag))
301 is_box = isBox body
302
303 -- if this is a function, then node will be tagged; we must subract the tag
304 node_tag = funTag closure_info
305
306 -- set the current CCS when entering a PAP
307 enterCostCentrePAP :: CmmExpr -> FCode ()
308 enterCostCentrePAP closure =
309 ifProfiling $ do
310 enter_ccs_fun (costCentreFrom closure)
311 enteringPAP 1
312
313 enterCostCentreThunk :: CmmExpr -> FCode ()
314 enterCostCentreThunk closure =
315 ifProfiling $ do
316 emit $ mkStore curCCSAddr (costCentreFrom closure)
317
318 enter_ccs_fun :: CmmExpr -> FCode ()
319 enter_ccs_fun stack = emitRtsCall rtsPackageId (fsLit "EnterFunCCS") [(stack,AddrHint)] False
320 -- ToDo: vols
321
322 enter_ccs_fsub :: FCode ()
323 enter_ccs_fsub = enteringPAP 0
324
325 -- When entering a PAP, EnterFunCCS is called by both the PAP entry
326 -- code and the function entry code; we don't want the function's
327 -- entry code to also update CCCS in the event that it was called via
328 -- a PAP, so we set the flag entering_PAP to indicate that we are
329 -- entering via a PAP.
330 enteringPAP :: Integer -> FCode ()
331 enteringPAP n
332 = emit (mkStore (CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "entering_PAP"))))
333 (CmmLit (CmmInt n cIntWidth)))
334
335 ifProfiling :: FCode () -> FCode ()
336 ifProfiling code
337 | opt_SccProfilingOn = code
338 | otherwise = nopC
339
340 ifProfilingL :: [a] -> [a]
341 ifProfilingL xs
342 | opt_SccProfilingOn = xs
343 | otherwise = []
344
345
346 ---------------------------------------------------------------
347 -- Initialising Cost Centres & CCSs
348 ---------------------------------------------------------------
349
350 initCostCentres :: CollectedCCs -> FCode ()
351 -- Emit the declarations
352 initCostCentres (local_CCs, ___extern_CCs, singleton_CCSs)
353 = whenC opt_SccProfilingOn $
354 do { mapM_ emitCostCentreDecl local_CCs
355 ; mapM_ emitCostCentreStackDecl singleton_CCSs }
356
357
358 emitCostCentreDecl :: CostCentre -> FCode ()
359 emitCostCentreDecl cc = do
360 { label <- newStringCLit (costCentreUserName cc)
361 ; modl <- newStringCLit (Module.moduleNameString
362 (Module.moduleName (cc_mod cc)))
363 -- All cost centres will be in the main package, since we
364 -- don't normally use -auto-all or add SCCs to other packages.
365 -- Hence don't emit the package name in the module here.
366 ; let lits = [ zero, -- StgInt ccID,
367 label, -- char *label,
368 modl, -- char *module,
369 zero, -- StgWord time_ticks
370 zero64, -- StgWord64 mem_alloc
371 subsumed, -- StgInt is_caf
372 zero -- struct _CostCentre *link
373 ]
374 ; emitDataLits (mkCCLabel cc) lits
375 }
376 where
377 subsumed | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF
378 | otherwise = mkIntCLit (ord 'B') -- 'B' == is boring
379
380 emitCostCentreStackDecl :: CostCentreStack -> FCode ()
381 emitCostCentreStackDecl ccs
382 = case maybeSingletonCCS ccs of
383 Just cc -> emitDataLits (mkCCSLabel ccs) (mk_lits cc)
384 Nothing -> pprPanic "emitCostCentreStackDecl" (ppr ccs)
385 where
386 mk_lits cc = zero :
387 mkCCostCentre cc :
388 replicate (sizeof_ccs_words - 2) zero
389 -- Note: to avoid making any assumptions about how the
390 -- C compiler (that compiles the RTS, in particular) does
391 -- layouts of structs containing long-longs, simply
392 -- pad out the struct with zero words until we hit the
393 -- size of the overall struct (which we get via DerivedConstants.h)
394
395 zero :: CmmLit
396 zero = mkIntCLit 0
397 zero64 :: CmmLit
398 zero64 = CmmInt 0 W64
399
400 sizeof_ccs_words :: Int
401 sizeof_ccs_words
402 -- round up to the next word.
403 | ms == 0 = ws
404 | otherwise = ws + 1
405 where
406 (ws,ms) = SIZEOF_CostCentreStack `divMod` wORD_SIZE
407
408 -- ---------------------------------------------------------------------------
409 -- Set the current cost centre stack
410
411 emitSetCCC :: CostCentre -> FCode ()
412 emitSetCCC cc
413 | not opt_SccProfilingOn = nopC
414 | otherwise = do
415 tmp <- newTemp ccsType -- TODO FIXME NOW
416 ASSERT( sccAbleCostCentre cc )
417 pushCostCentre tmp curCCS cc
418 emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp)))
419 when (isSccCountCostCentre cc) $
420 emit (bumpSccCount curCCS)
421
422 pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode ()
423 pushCostCentre result ccs cc
424 = emitRtsCallWithResult result AddrHint
425 rtsPackageId
426 (fsLit "PushCostCentre") [(ccs,AddrHint),
427 (CmmLit (mkCCostCentre cc), AddrHint)]
428 False
429
430 bumpSccCount :: CmmExpr -> CmmAGraph
431 bumpSccCount ccs
432 = addToMem REP_CostCentreStack_scc_count
433 (cmmOffsetB ccs oFFSET_CostCentreStack_scc_count) 1
434
435 -----------------------------------------------------------------------------
436 --
437 -- Lag/drag/void stuff
438 --
439 -----------------------------------------------------------------------------
440
441 --
442 -- Initial value for the LDV field in a static closure
443 --
444 staticLdvInit :: CmmLit
445 staticLdvInit = zeroCLit
446
447 --
448 -- Initial value of the LDV field in a dynamic closure
449 --
450 dynLdvInit :: CmmExpr
451 dynLdvInit = -- (era << LDV_SHIFT) | LDV_STATE_CREATE
452 CmmMachOp mo_wordOr [
453 CmmMachOp mo_wordShl [loadEra, CmmLit (mkIntCLit lDV_SHIFT) ],
454 CmmLit (mkWordCLit lDV_STATE_CREATE)
455 ]
456
457 --
458 -- Initialise the LDV word of a new closure
459 --
460 ldvRecordCreate :: CmmExpr -> FCode ()
461 ldvRecordCreate closure = emit $ mkStore (ldvWord closure) dynLdvInit
462
463 --
464 -- Called when a closure is entered, marks the closure as having been "used".
465 -- The closure is not an 'inherently used' one.
466 -- The closure is not IND or IND_OLDGEN because neither is considered for LDV
467 -- profiling.
468 --
469 ldvEnterClosure :: ClosureInfo -> FCode ()
470 ldvEnterClosure closure_info = ldvEnter (cmmOffsetB (CmmReg nodeReg) (-tag))
471 where tag = funTag closure_info
472 -- don't forget to substract node's tag
473
474 ldvEnter :: CmmExpr -> FCode ()
475 -- Argument is a closure pointer
476 ldvEnter cl_ptr
477 = ifProfiling $
478 -- if (era > 0) {
479 -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) |
480 -- era | LDV_STATE_USE }
481 emit (mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit])
482 (mkStore ldv_wd new_ldv_wd)
483 mkNop)
484 where
485 -- don't forget to substract node's tag
486 ldv_wd = ldvWord cl_ptr
487 new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd bWord)
488 (CmmLit (mkWordCLit lDV_CREATE_MASK)))
489 (cmmOrWord loadEra (CmmLit (mkWordCLit lDV_STATE_USE)))
490
491 loadEra :: CmmExpr
492 loadEra = CmmMachOp (MO_UU_Conv cIntWidth wordWidth)
493 [CmmLoad (mkLblExpr (mkCmmDataLabel rtsPackageId (fsLit "era"))) cInt]
494
495 ldvWord :: CmmExpr -> CmmExpr
496 -- Takes the address of a closure, and returns
497 -- the address of the LDV word in the closure
498 ldvWord closure_ptr = cmmOffsetB closure_ptr oFFSET_StgHeader_ldvw
499
500 -- LDV constants, from ghc/includes/Constants.h
501 lDV_SHIFT :: Int
502 lDV_SHIFT = LDV_SHIFT
503 --lDV_STATE_MASK :: StgWord
504 --lDV_STATE_MASK = LDV_STATE_MASK
505 lDV_CREATE_MASK :: StgWord
506 lDV_CREATE_MASK = LDV_CREATE_MASK
507 --lDV_LAST_MASK :: StgWord
508 --lDV_LAST_MASK = LDV_LAST_MASK
509 lDV_STATE_CREATE :: StgWord
510 lDV_STATE_CREATE = LDV_STATE_CREATE
511 lDV_STATE_USE :: StgWord
512 lDV_STATE_USE = LDV_STATE_USE
513