Merge in new code generator branch.
[ghc.git] / compiler / codeGen / StgCmmProf.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Code generation for profiling
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmProf (
10 initCostCentres, ccType, ccsType,
11 mkCCostCentre, mkCCostCentreStack,
12
13 -- Cost-centre Profiling
14 dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
15 enterCostCentre, enterCostCentrePAP, enterCostCentreThunk,
16 chooseDynCostCentres,
17 costCentreFrom,
18 curCCS, curCCSAddr,
19 emitSetCCC, emitCCS,
20
21 saveCurrentCostCentre, restoreCurrentCostCentre,
22
23 -- Lag/drag/void stuff
24 ldvEnter, ldvEnterClosure, ldvRecordCreate
25 ) where
26
27 #include "HsVersions.h"
28 #include "../includes/MachDeps.h"
29 -- For WORD_SIZE_IN_BITS only.
30 #include "../includes/rts/Constants.h"
31 -- For LDV_CREATE_MASK, LDV_STATE_USE
32 -- which are StgWords
33 #include "../includes/DerivedConstants.h"
34 -- For REP_xxx constants, which are MachReps
35
36 import StgCmmClosure
37 import StgCmmUtils
38 import StgCmmMonad
39 import SMRep
40
41 import MkGraph
42 import CmmExpr
43 import CmmDecl
44 import CmmUtils
45 import CLabel
46
47 import Id
48 import qualified Module
49 import CostCentre
50 import StgSyn
51 import StaticFlags
52 import FastString
53 import Module
54 import Constants -- Lots of field offsets
55 import Outputable
56
57 import Data.Char
58 import Control.Monad
59
60 -----------------------------------------------------------------------------
61 --
62 -- Cost-centre-stack Profiling
63 --
64 -----------------------------------------------------------------------------
65
66 -- Expression representing the current cost centre stack
67 ccsType :: CmmType -- Type of a cost-centre stack
68 ccsType = bWord
69
70 ccType :: CmmType -- Type of a cost centre
71 ccType = bWord
72
73 curCCS :: CmmExpr
74 curCCS = CmmLoad curCCSAddr ccsType
75
76 -- Address of current CCS variable, for storing into
77 curCCSAddr :: CmmExpr
78 curCCSAddr = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CCCS")))
79
80 mkCCostCentre :: CostCentre -> CmmLit
81 mkCCostCentre cc = CmmLabel (mkCCLabel cc)
82
83 mkCCostCentreStack :: CostCentreStack -> CmmLit
84 mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs)
85
86 costCentreFrom :: CmmExpr -- A closure pointer
87 -> CmmExpr -- The cost centre from that closure
88 costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) ccsType
89
90 staticProfHdr :: CostCentreStack -> [CmmLit]
91 -- The profiling header words in a static closure
92 -- Was SET_STATIC_PROF_HDR
93 staticProfHdr ccs = ifProfilingL [mkCCostCentreStack ccs,
94 staticLdvInit]
95
96 dynProfHdr :: CmmExpr -> [CmmExpr]
97 -- Profiling header words in a dynamic closure
98 dynProfHdr ccs = ifProfilingL [ccs, dynLdvInit]
99
100 initUpdFrameProf :: CmmExpr -> FCode ()
101 -- Initialise the profiling field of an update frame
102 initUpdFrameProf frame_amode
103 = ifProfiling $ -- frame->header.prof.ccs = CCCS
104 emit (mkStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS)
105 -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0)
106 -- is unnecessary because it is not used anyhow.
107
108 ---------------------------------------------------------------------------
109 -- Saving and restoring the current cost centre
110 ---------------------------------------------------------------------------
111
112 {- Note [Saving the current cost centre]
113 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
114 The current cost centre is like a global register. Like other
115 global registers, it's a caller-saves one. But consider
116 case (f x) of (p,q) -> rhs
117 Since 'f' may set the cost centre, we must restore it
118 before resuming rhs. So we want code like this:
119 local_cc = CCC -- save
120 r = f( x )
121 CCC = local_cc -- restore
122 That is, we explicitly "save" the current cost centre in
123 a LocalReg, local_cc; and restore it after the call. The
124 C-- infrastructure will arrange to save local_cc across the
125 call.
126
127 The same goes for join points;
128 let j x = join-stuff
129 in blah-blah
130 We want this kind of code:
131 local_cc = CCC -- save
132 blah-blah
133 J:
134 CCC = local_cc -- restore
135 -}
136
137 saveCurrentCostCentre :: FCode (Maybe LocalReg)
138 -- Returns Nothing if profiling is off
139 saveCurrentCostCentre
140 | not opt_SccProfilingOn
141 = return Nothing
142 | otherwise
143 = do { local_cc <- newTemp ccType
144 ; emit (mkAssign (CmmLocal local_cc) curCCS)
145 ; return (Just local_cc) }
146
147 restoreCurrentCostCentre :: Maybe LocalReg -> FCode ()
148 restoreCurrentCostCentre Nothing
149 = return ()
150 restoreCurrentCostCentre (Just local_cc)
151 = emit (mkStore curCCSAddr (CmmReg (CmmLocal local_cc)))
152
153
154 -------------------------------------------------------------------------------
155 -- Recording allocation in a cost centre
156 -------------------------------------------------------------------------------
157
158 -- | Record the allocation of a closure. The CmmExpr is the cost
159 -- centre stack to which to attribute the allocation.
160 profDynAlloc :: ClosureInfo -> CmmExpr -> FCode ()
161 profDynAlloc cl_info ccs
162 = ifProfiling $
163 profAlloc (CmmLit (mkIntCLit (closureSize cl_info))) ccs
164
165 -- | Record the allocation of a closure (size is given by a CmmExpr)
166 -- The size must be in words, because the allocation counter in a CCS counts
167 -- in words.
168 profAlloc :: CmmExpr -> CmmExpr -> FCode ()
169 profAlloc words ccs
170 = ifProfiling $
171 emit (addToMemE alloc_rep
172 (cmmOffsetB ccs oFFSET_CostCentreStack_mem_alloc)
173 (CmmMachOp (MO_UU_Conv wordWidth (typeWidth alloc_rep)) $
174 [CmmMachOp mo_wordSub [words,
175 CmmLit (mkIntCLit profHdrSize)]]))
176 -- subtract the "profiling overhead", which is the
177 -- profiling header in a closure.
178 where
179 alloc_rep = REP_CostCentreStack_mem_alloc
180
181 -- ----------------------------------------------------------------------
182 -- Setting the cost centre in a new closure
183
184 chooseDynCostCentres :: CostCentreStack
185 -> [Id] -- Args
186 -> StgExpr -- Body
187 -> FCode (CmmExpr, CmmExpr)
188 -- Called when allocating a closure
189 -- Tells which cost centre to put in the object, and which
190 -- to blame the cost of allocation on
191 chooseDynCostCentres ccs args body = do
192 -- Cost-centre we record in the object
193 use_ccs <- emitCCS ccs
194
195 -- Cost-centre on whom we blame the allocation
196 let blame_ccs
197 | null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS)
198 | otherwise = use_ccs
199
200 return (use_ccs, blame_ccs)
201
202
203 -- Some CostCentreStacks are a sequence of pushes on top of CCCS.
204 -- These pushes must be performed before we can refer to the stack in
205 -- an expression.
206 emitCCS :: CostCentreStack -> FCode CmmExpr
207 emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's)
208 where
209 (cc's, ccs') = decomposeCCS ccs
210
211 push_em ccs [] = return ccs
212 push_em ccs (cc:rest) = do
213 tmp <- newTemp ccsType
214 pushCostCentre tmp ccs cc
215 push_em (CmmReg (CmmLocal tmp)) rest
216
217 ccsExpr :: CostCentreStack -> CmmExpr
218 ccsExpr ccs
219 | isCurrentCCS ccs = curCCS
220 | otherwise = CmmLit (mkCCostCentreStack ccs)
221
222
223 isBox :: StgExpr -> Bool
224 -- If it's an utterly trivial RHS, then it must be
225 -- one introduced by boxHigherOrderArgs for profiling,
226 -- so we charge it to "OVERHEAD".
227 -- This looks like a GROSS HACK to me --SDM
228 isBox (StgApp _ []) = True
229 isBox _ = False
230
231
232 -- -----------------------------------------------------------------------
233 -- Setting the current cost centre on entry to a closure
234
235 -- For lexically scoped profiling we have to load the cost centre from
236 -- the closure entered, if the costs are not supposed to be inherited.
237 -- This is done immediately on entering the fast entry point.
238
239 -- Load current cost centre from closure, if not inherited.
240 -- Node is guaranteed to point to it, if profiling and not inherited.
241
242 enterCostCentre
243 :: ClosureInfo
244 -> CostCentreStack
245 -> StgExpr -- The RHS of the closure
246 -> FCode ()
247
248 -- We used to have a special case for bindings of form
249 -- f = g True
250 -- where g has arity 2. The RHS is a thunk, but we don't
251 -- need to update it; and we want to subsume costs.
252 -- We don't have these sort of PAPs any more, so the special
253 -- case has gone away.
254
255 enterCostCentre closure_info ccs body
256 = ifProfiling $
257 ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs)
258 enter_cost_centre closure_info ccs body
259
260 enter_cost_centre :: ClosureInfo -> CostCentreStack -> StgExpr -> FCode ()
261 enter_cost_centre closure_info ccs body
262 | isSubsumedCCS ccs
263 = ASSERT(isToplevClosure closure_info)
264 ASSERT(re_entrant)
265 enter_ccs_fsub
266
267 | isDerivedFromCurrentCCS ccs
268 = do {
269 if re_entrant && not is_box
270 then
271 enter_ccs_fun node_ccs
272 else
273 emit (mkStore curCCSAddr node_ccs)
274
275 -- don't forget to bump the scc count. This closure might have been
276 -- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal
277 -- pass has turned into simply let x = e in ...x... and attached
278 -- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that
279 -- we don't lose the scc counter, bump it in the entry code for x.
280 -- ToDo: for a multi-push we should really bump the counter for
281 -- each of the intervening CCSs, not just the top one.
282 ; when (not (isCurrentCCS ccs)) $
283 emit (bumpSccCount curCCS)
284 }
285
286 | isCafCCS ccs
287 = ASSERT(isToplevClosure closure_info)
288 ASSERT(not re_entrant)
289 do { -- This is just a special case of the isDerivedFromCurrentCCS
290 -- case above. We could delete this, but it's a micro
291 -- optimisation and saves a bit of code.
292 emit (mkStore curCCSAddr enc_ccs)
293 ; emit (bumpSccCount node_ccs)
294 }
295
296 | otherwise
297 = panic "enterCostCentre"
298 where
299 enc_ccs = CmmLit (mkCCostCentreStack ccs)
300 re_entrant = closureReEntrant closure_info
301 node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag))
302 is_box = isBox body
303
304 -- if this is a function, then node will be tagged; we must subract the tag
305 node_tag = funTag closure_info
306
307 -- set the current CCS when entering a PAP
308 enterCostCentrePAP :: CmmExpr -> FCode ()
309 enterCostCentrePAP closure =
310 ifProfiling $ do
311 enter_ccs_fun (costCentreFrom closure)
312 enteringPAP 1
313
314 enterCostCentreThunk :: CmmExpr -> FCode ()
315 enterCostCentreThunk closure =
316 ifProfiling $ do
317 emit $ mkStore curCCSAddr (costCentreFrom closure)
318
319 enter_ccs_fun :: CmmExpr -> FCode ()
320 enter_ccs_fun stack = emitRtsCall rtsPackageId (fsLit "EnterFunCCS") [(stack,AddrHint)] False
321 -- ToDo: vols
322
323 enter_ccs_fsub :: FCode ()
324 enter_ccs_fsub = enteringPAP 0
325
326 -- When entering a PAP, EnterFunCCS is called by both the PAP entry
327 -- code and the function entry code; we don't want the function's
328 -- entry code to also update CCCS in the event that it was called via
329 -- a PAP, so we set the flag entering_PAP to indicate that we are
330 -- entering via a PAP.
331 enteringPAP :: Integer -> FCode ()
332 enteringPAP n
333 = emit (mkStore (CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "entering_PAP"))))
334 (CmmLit (CmmInt n cIntWidth)))
335
336 ifProfiling :: FCode () -> FCode ()
337 ifProfiling code
338 | opt_SccProfilingOn = code
339 | otherwise = nopC
340
341 ifProfilingL :: [a] -> [a]
342 ifProfilingL xs
343 | opt_SccProfilingOn = xs
344 | otherwise = []
345
346
347 ---------------------------------------------------------------
348 -- Initialising Cost Centres & CCSs
349 ---------------------------------------------------------------
350
351 initCostCentres :: CollectedCCs -> FCode CmmAGraph
352 -- Emit the declarations, and return code to register them
353 initCostCentres (local_CCs, ___extern_CCs, singleton_CCSs)
354 = getCode $ whenC opt_SccProfilingOn $
355 do { mapM_ emitCostCentreDecl local_CCs
356 ; mapM_ emitCostCentreStackDecl singleton_CCSs
357 ; emit $ catAGraphs $ map mkRegisterCC local_CCs
358 ; emit $ catAGraphs $ map mkRegisterCCS singleton_CCSs }
359
360
361 emitCostCentreDecl :: CostCentre -> FCode ()
362 emitCostCentreDecl cc = do
363 { label <- mkStringCLit (costCentreUserName cc)
364 ; modl <- mkStringCLit (Module.moduleNameString
365 (Module.moduleName (cc_mod cc)))
366 -- All cost centres will be in the main package, since we
367 -- don't normally use -auto-all or add SCCs to other packages.
368 -- Hence don't emit the package name in the module here.
369 ; let lits = [ zero, -- StgInt ccID,
370 label, -- char *label,
371 modl, -- char *module,
372 zero, -- StgWord time_ticks
373 zero64, -- StgWord64 mem_alloc
374 subsumed, -- StgInt is_caf
375 zero -- struct _CostCentre *link
376 ]
377 ; emitDataLits (mkCCLabel cc) lits
378 }
379 where
380 subsumed | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF
381 | otherwise = mkIntCLit (ord 'B') -- 'B' == is boring
382
383 emitCostCentreStackDecl :: CostCentreStack -> FCode ()
384 emitCostCentreStackDecl ccs
385 = case maybeSingletonCCS ccs of
386 Just cc -> emitDataLits (mkCCSLabel ccs) (mk_lits cc)
387 Nothing -> pprPanic "emitCostCentreStackDecl" (ppr ccs)
388 where
389 mk_lits cc = zero :
390 mkCCostCentre cc :
391 replicate (sizeof_ccs_words - 2) zero
392 -- Note: to avoid making any assumptions about how the
393 -- C compiler (that compiles the RTS, in particular) does
394 -- layouts of structs containing long-longs, simply
395 -- pad out the struct with zero words until we hit the
396 -- size of the overall struct (which we get via DerivedConstants.h)
397
398 zero :: CmmLit
399 zero = mkIntCLit 0
400 zero64 :: CmmLit
401 zero64 = CmmInt 0 W64
402
403 sizeof_ccs_words :: Int
404 sizeof_ccs_words
405 -- round up to the next word.
406 | ms == 0 = ws
407 | otherwise = ws + 1
408 where
409 (ws,ms) = SIZEOF_CostCentreStack `divMod` wORD_SIZE
410
411 -- ---------------------------------------------------------------------------
412 -- Registering CCs and CCSs
413
414 -- (cc)->link = CC_LIST;
415 -- CC_LIST = (cc);
416 -- (cc)->ccID = CC_ID++;
417
418 mkRegisterCC :: CostCentre -> CmmAGraph
419 mkRegisterCC cc
420 = withTemp cInt $ \tmp ->
421 catAGraphs [
422 mkStore (cmmOffsetB cc_lit oFFSET_CostCentre_link)
423 (CmmLoad cC_LIST bWord),
424 mkStore cC_LIST cc_lit,
425 mkAssign (CmmLocal tmp) (CmmLoad cC_ID cInt),
426 mkStore (cmmOffsetB cc_lit oFFSET_CostCentre_ccID) (CmmReg (CmmLocal tmp)),
427 mkStore cC_ID (cmmRegOffB (CmmLocal tmp) 1)
428 ]
429 where
430 cc_lit = CmmLit (CmmLabel (mkCCLabel cc))
431
432 -- (ccs)->prevStack = CCS_LIST;
433 -- CCS_LIST = (ccs);
434 -- (ccs)->ccsID = CCS_ID++;
435
436 mkRegisterCCS :: CostCentreStack -> CmmAGraph
437 mkRegisterCCS ccs
438 = withTemp cInt $ \ tmp ->
439 catAGraphs [
440 mkStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_prevStack)
441 (CmmLoad cCS_LIST bWord),
442 mkStore cCS_LIST ccs_lit,
443 mkAssign (CmmLocal tmp) (CmmLoad cCS_ID cInt),
444 mkStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_ccsID) (CmmReg (CmmLocal tmp)),
445 mkStore cCS_ID (cmmRegOffB (CmmLocal tmp) 1)
446 ]
447 where
448 ccs_lit = CmmLit (CmmLabel (mkCCSLabel ccs))
449
450
451 cC_LIST, cC_ID :: CmmExpr
452 cC_LIST = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CC_LIST")))
453 cC_ID = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CC_ID")))
454
455 cCS_LIST, cCS_ID :: CmmExpr
456 cCS_LIST = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CCS_LIST")))
457 cCS_ID = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CCS_ID")))
458
459 -- ---------------------------------------------------------------------------
460 -- Set the current cost centre stack
461
462 emitSetCCC :: CostCentre -> FCode ()
463 emitSetCCC cc
464 | not opt_SccProfilingOn = nopC
465 | otherwise = do
466 tmp <- newTemp ccsType -- TODO FIXME NOW
467 ASSERT( sccAbleCostCentre cc )
468 pushCostCentre tmp curCCS cc
469 emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp)))
470 when (isSccCountCostCentre cc) $
471 emit (bumpSccCount curCCS)
472
473 pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode ()
474 pushCostCentre result ccs cc
475 = emitRtsCallWithResult result AddrHint
476 rtsPackageId
477 (fsLit "PushCostCentre") [(ccs,AddrHint),
478 (CmmLit (mkCCostCentre cc), AddrHint)]
479 False
480
481 bumpSccCount :: CmmExpr -> CmmAGraph
482 bumpSccCount ccs
483 = addToMem REP_CostCentreStack_scc_count
484 (cmmOffsetB ccs oFFSET_CostCentreStack_scc_count) 1
485
486 -----------------------------------------------------------------------------
487 --
488 -- Lag/drag/void stuff
489 --
490 -----------------------------------------------------------------------------
491
492 --
493 -- Initial value for the LDV field in a static closure
494 --
495 staticLdvInit :: CmmLit
496 staticLdvInit = zeroCLit
497
498 --
499 -- Initial value of the LDV field in a dynamic closure
500 --
501 dynLdvInit :: CmmExpr
502 dynLdvInit = -- (era << LDV_SHIFT) | LDV_STATE_CREATE
503 CmmMachOp mo_wordOr [
504 CmmMachOp mo_wordShl [loadEra, CmmLit (mkIntCLit lDV_SHIFT) ],
505 CmmLit (mkWordCLit lDV_STATE_CREATE)
506 ]
507
508 --
509 -- Initialise the LDV word of a new closure
510 --
511 ldvRecordCreate :: CmmExpr -> FCode ()
512 ldvRecordCreate closure = emit $ mkStore (ldvWord closure) dynLdvInit
513
514 --
515 -- Called when a closure is entered, marks the closure as having been "used".
516 -- The closure is not an 'inherently used' one.
517 -- The closure is not IND or IND_OLDGEN because neither is considered for LDV
518 -- profiling.
519 --
520 ldvEnterClosure :: ClosureInfo -> FCode ()
521 ldvEnterClosure closure_info = ldvEnter (cmmOffsetB (CmmReg nodeReg) (-tag))
522 where tag = funTag closure_info
523 -- don't forget to substract node's tag
524
525 ldvEnter :: CmmExpr -> FCode ()
526 -- Argument is a closure pointer
527 ldvEnter cl_ptr
528 = ifProfiling $
529 -- if (era > 0) {
530 -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) |
531 -- era | LDV_STATE_USE }
532 emit (mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit])
533 (mkStore ldv_wd new_ldv_wd)
534 mkNop)
535 where
536 -- don't forget to substract node's tag
537 ldv_wd = ldvWord cl_ptr
538 new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd bWord)
539 (CmmLit (mkWordCLit lDV_CREATE_MASK)))
540 (cmmOrWord loadEra (CmmLit (mkWordCLit lDV_STATE_USE)))
541
542 loadEra :: CmmExpr
543 loadEra = CmmMachOp (MO_UU_Conv cIntWidth wordWidth)
544 [CmmLoad (mkLblExpr (mkCmmDataLabel rtsPackageId (fsLit "era"))) cInt]
545
546 ldvWord :: CmmExpr -> CmmExpr
547 -- Takes the address of a closure, and returns
548 -- the address of the LDV word in the closure
549 ldvWord closure_ptr = cmmOffsetB closure_ptr oFFSET_StgHeader_ldvw
550
551 -- LDV constants, from ghc/includes/Constants.h
552 lDV_SHIFT :: Int
553 lDV_SHIFT = LDV_SHIFT
554 --lDV_STATE_MASK :: StgWord
555 --lDV_STATE_MASK = LDV_STATE_MASK
556 lDV_CREATE_MASK :: StgWord
557 lDV_CREATE_MASK = LDV_CREATE_MASK
558 --lDV_LAST_MASK :: StgWord
559 --lDV_LAST_MASK = LDV_LAST_MASK
560 lDV_STATE_CREATE :: StgWord
561 lDV_STATE_CREATE = LDV_STATE_CREATE
562 lDV_STATE_USE :: StgWord
563 lDV_STATE_USE = LDV_STATE_USE
564