Make -fscc-profiling a dynamic flag
[ghc.git] / compiler / codeGen / StgCmmProf.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Code generation for profiling
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 {-# OPTIONS -fno-warn-tabs #-}
10 -- The above warning supression flag is a temporary kludge.
11 -- While working on this module you are encouraged to remove it and
12 -- detab the module (please do the detabbing in a separate patch). See
13 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
14 -- for details
15
16 module StgCmmProf (
17 initCostCentres, ccType, ccsType,
18 mkCCostCentre, mkCCostCentreStack,
19
20 -- Cost-centre Profiling
21 dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
22 enterCostCentreThunk,
23 costCentreFrom,
24 curCCS, storeCurCCS,
25 emitSetCCC,
26
27 saveCurrentCostCentre, restoreCurrentCostCentre,
28
29 -- Lag/drag/void stuff
30 ldvEnter, ldvEnterClosure, ldvRecordCreate
31 ) where
32
33 #include "HsVersions.h"
34 #include "../includes/MachDeps.h"
35 -- For WORD_SIZE_IN_BITS only.
36 #include "../includes/rts/Constants.h"
37 -- For LDV_CREATE_MASK, LDV_STATE_USE
38 -- which are StgWords
39 #include "../includes/dist-derivedconstants/header/DerivedConstants.h"
40 -- For REP_xxx constants, which are MachReps
41
42 import StgCmmClosure
43 import StgCmmUtils
44 import StgCmmMonad
45 import SMRep
46
47 import MkGraph
48 import Cmm
49 import CmmUtils
50 import CLabel
51
52 import qualified Module
53 import CostCentre
54 import DynFlags
55 import FastString
56 import Module
57 import Constants -- Lots of field offsets
58 import Outputable
59
60 import Control.Monad
61 import Data.Char (ord)
62
63 -----------------------------------------------------------------------------
64 --
65 -- Cost-centre-stack Profiling
66 --
67 -----------------------------------------------------------------------------
68
69 -- Expression representing the current cost centre stack
70 ccsType :: CmmType -- Type of a cost-centre stack
71 ccsType = bWord
72
73 ccType :: CmmType -- Type of a cost centre
74 ccType = bWord
75
76 curCCS :: CmmExpr
77 curCCS = CmmReg (CmmGlobal CCCS)
78
79 storeCurCCS :: CmmExpr -> CmmAGraph
80 storeCurCCS e = mkAssign (CmmGlobal CCCS) e
81
82 mkCCostCentre :: CostCentre -> CmmLit
83 mkCCostCentre cc = CmmLabel (mkCCLabel cc)
84
85 mkCCostCentreStack :: CostCentreStack -> CmmLit
86 mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs)
87
88 costCentreFrom :: CmmExpr -- A closure pointer
89 -> CmmExpr -- The cost centre from that closure
90 costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) ccsType
91
92 staticProfHdr :: DynFlags -> CostCentreStack -> [CmmLit]
93 -- The profiling header words in a static closure
94 -- Was SET_STATIC_PROF_HDR
95 staticProfHdr dflags ccs
96 = ifProfilingL dflags [mkCCostCentreStack ccs, staticLdvInit]
97
98 dynProfHdr :: DynFlags -> CmmExpr -> [CmmExpr]
99 -- Profiling header words in a dynamic closure
100 dynProfHdr dflags ccs = ifProfilingL dflags [ccs, dynLdvInit]
101
102 initUpdFrameProf :: CmmExpr -> FCode ()
103 -- Initialise the profiling field of an update frame
104 initUpdFrameProf frame_amode
105 = ifProfiling $ -- frame->header.prof.ccs = CCCS
106 emitStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS
107 -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0)
108 -- is unnecessary because it is not used anyhow.
109
110 ---------------------------------------------------------------------------
111 -- Saving and restoring the current cost centre
112 ---------------------------------------------------------------------------
113
114 {- Note [Saving the current cost centre]
115 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
116 The current cost centre is like a global register. Like other
117 global registers, it's a caller-saves one. But consider
118 case (f x) of (p,q) -> rhs
119 Since 'f' may set the cost centre, we must restore it
120 before resuming rhs. So we want code like this:
121 local_cc = CCC -- save
122 r = f( x )
123 CCC = local_cc -- restore
124 That is, we explicitly "save" the current cost centre in
125 a LocalReg, local_cc; and restore it after the call. The
126 C-- infrastructure will arrange to save local_cc across the
127 call.
128
129 The same goes for join points;
130 let j x = join-stuff
131 in blah-blah
132 We want this kind of code:
133 local_cc = CCC -- save
134 blah-blah
135 J:
136 CCC = local_cc -- restore
137 -}
138
139 saveCurrentCostCentre :: FCode (Maybe LocalReg)
140 -- Returns Nothing if profiling is off
141 saveCurrentCostCentre
142 = do dflags <- getDynFlags
143 if not (dopt Opt_SccProfilingOn dflags)
144 then return Nothing
145 else do local_cc <- newTemp ccType
146 emitAssign (CmmLocal local_cc) curCCS
147 return (Just local_cc)
148
149 restoreCurrentCostCentre :: Maybe LocalReg -> FCode ()
150 restoreCurrentCostCentre Nothing
151 = return ()
152 restoreCurrentCostCentre (Just local_cc)
153 = emit (storeCurCCS (CmmReg (CmmLocal local_cc)))
154
155
156 -------------------------------------------------------------------------------
157 -- Recording allocation in a cost centre
158 -------------------------------------------------------------------------------
159
160 -- | Record the allocation of a closure. The CmmExpr is the cost
161 -- centre stack to which to attribute the allocation.
162 profDynAlloc :: SMRep -> CmmExpr -> FCode ()
163 profDynAlloc rep ccs
164 = ifProfiling $
165 do dflags <- getDynFlags
166 profAlloc (CmmLit (mkIntCLit (heapClosureSize dflags rep))) ccs
167
168 -- | Record the allocation of a closure (size is given by a CmmExpr)
169 -- The size must be in words, because the allocation counter in a CCS counts
170 -- in words.
171 profAlloc :: CmmExpr -> CmmExpr -> FCode ()
172 profAlloc words ccs
173 = ifProfiling $
174 do dflags <- getDynFlags
175 emit (addToMemE alloc_rep
176 (cmmOffsetB ccs oFFSET_CostCentreStack_mem_alloc)
177 (CmmMachOp (MO_UU_Conv wordWidth (typeWidth alloc_rep)) $
178 [CmmMachOp mo_wordSub [words,
179 CmmLit (mkIntCLit (profHdrSize dflags))]]))
180 -- subtract the "profiling overhead", which is the
181 -- profiling header in a closure.
182 where
183 alloc_rep = REP_CostCentreStack_mem_alloc
184
185 -- -----------------------------------------------------------------------
186 -- Setting the current cost centre on entry to a closure
187
188 enterCostCentreThunk :: CmmExpr -> FCode ()
189 enterCostCentreThunk closure =
190 ifProfiling $ do
191 emit $ storeCurCCS (costCentreFrom closure)
192
193 ifProfiling :: FCode () -> FCode ()
194 ifProfiling code
195 = do dflags <- getDynFlags
196 if dopt Opt_SccProfilingOn dflags
197 then code
198 else nopC
199
200 ifProfilingL :: DynFlags -> [a] -> [a]
201 ifProfilingL dflags xs
202 | dopt Opt_SccProfilingOn dflags = xs
203 | otherwise = []
204
205
206 ---------------------------------------------------------------
207 -- Initialising Cost Centres & CCSs
208 ---------------------------------------------------------------
209
210 initCostCentres :: CollectedCCs -> FCode ()
211 -- Emit the declarations
212 initCostCentres (local_CCs, ___extern_CCs, singleton_CCSs)
213 = do dflags <- getDynFlags
214 whenC (dopt Opt_SccProfilingOn dflags) $
215 do mapM_ emitCostCentreDecl local_CCs
216 mapM_ emitCostCentreStackDecl singleton_CCSs
217
218
219 emitCostCentreDecl :: CostCentre -> FCode ()
220 emitCostCentreDecl cc = do
221 -- NB. bytesFS: we want the UTF-8 bytes here (#5559)
222 { label <- newByteStringCLit (bytesFS $ costCentreUserNameFS cc)
223 ; modl <- newByteStringCLit (bytesFS $ Module.moduleNameFS
224 $ Module.moduleName
225 $ cc_mod cc)
226 ; dflags <- getDynFlags
227 ; loc <- newStringCLit (showPpr dflags (costCentreSrcSpan cc))
228 -- XXX should UTF-8 encode
229 -- All cost centres will be in the main package, since we
230 -- don't normally use -auto-all or add SCCs to other packages.
231 -- Hence don't emit the package name in the module here.
232 ; let lits = [ zero, -- StgInt ccID,
233 label, -- char *label,
234 modl, -- char *module,
235 loc, -- char *srcloc,
236 zero64, -- StgWord64 mem_alloc
237 zero, -- StgWord time_ticks
238 is_caf, -- StgInt is_caf
239 zero -- struct _CostCentre *link
240 ]
241 ; emitDataLits (mkCCLabel cc) lits
242 }
243 where
244 is_caf | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF
245 | otherwise = zero
246
247 emitCostCentreStackDecl :: CostCentreStack -> FCode ()
248 emitCostCentreStackDecl ccs
249 = case maybeSingletonCCS ccs of
250 Just cc -> emitDataLits (mkCCSLabel ccs) (mk_lits cc)
251 Nothing -> pprPanic "emitCostCentreStackDecl" (ppr ccs)
252 where
253 mk_lits cc = zero :
254 mkCCostCentre cc :
255 replicate (sizeof_ccs_words - 2) zero
256 -- Note: to avoid making any assumptions about how the
257 -- C compiler (that compiles the RTS, in particular) does
258 -- layouts of structs containing long-longs, simply
259 -- pad out the struct with zero words until we hit the
260 -- size of the overall struct (which we get via DerivedConstants.h)
261
262 zero :: CmmLit
263 zero = mkIntCLit 0
264 zero64 :: CmmLit
265 zero64 = CmmInt 0 W64
266
267 sizeof_ccs_words :: Int
268 sizeof_ccs_words
269 -- round up to the next word.
270 | ms == 0 = ws
271 | otherwise = ws + 1
272 where
273 (ws,ms) = SIZEOF_CostCentreStack `divMod` wORD_SIZE
274
275 -- ---------------------------------------------------------------------------
276 -- Set the current cost centre stack
277
278 emitSetCCC :: CostCentre -> Bool -> Bool -> FCode ()
279 emitSetCCC cc tick push
280 = do dflags <- getDynFlags
281 if not (dopt Opt_SccProfilingOn dflags)
282 then nopC
283 else do tmp <- newTemp ccsType -- TODO FIXME NOW
284 pushCostCentre tmp curCCS cc
285 when tick $ emit (bumpSccCount (CmmReg (CmmLocal tmp)))
286 when push $ emit (storeCurCCS (CmmReg (CmmLocal tmp)))
287
288 pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode ()
289 pushCostCentre result ccs cc
290 = emitRtsCallWithResult result AddrHint
291 rtsPackageId
292 (fsLit "PushCostCentre") [(ccs,AddrHint),
293 (CmmLit (mkCCostCentre cc), AddrHint)]
294 False
295
296 bumpSccCount :: CmmExpr -> CmmAGraph
297 bumpSccCount ccs
298 = addToMem REP_CostCentreStack_scc_count
299 (cmmOffsetB ccs oFFSET_CostCentreStack_scc_count) 1
300
301 -----------------------------------------------------------------------------
302 --
303 -- Lag/drag/void stuff
304 --
305 -----------------------------------------------------------------------------
306
307 --
308 -- Initial value for the LDV field in a static closure
309 --
310 staticLdvInit :: CmmLit
311 staticLdvInit = zeroCLit
312
313 --
314 -- Initial value of the LDV field in a dynamic closure
315 --
316 dynLdvInit :: CmmExpr
317 dynLdvInit = -- (era << LDV_SHIFT) | LDV_STATE_CREATE
318 CmmMachOp mo_wordOr [
319 CmmMachOp mo_wordShl [loadEra, CmmLit (mkIntCLit lDV_SHIFT) ],
320 CmmLit (mkWordCLit lDV_STATE_CREATE)
321 ]
322
323 --
324 -- Initialise the LDV word of a new closure
325 --
326 ldvRecordCreate :: CmmExpr -> FCode ()
327 ldvRecordCreate closure = emit $ mkStore (ldvWord closure) dynLdvInit
328
329 --
330 -- Called when a closure is entered, marks the closure as having been "used".
331 -- The closure is not an 'inherently used' one.
332 -- The closure is not IND or IND_OLDGEN because neither is considered for LDV
333 -- profiling.
334 --
335 ldvEnterClosure :: ClosureInfo -> FCode ()
336 ldvEnterClosure closure_info = ldvEnter (cmmOffsetB (CmmReg nodeReg) (-tag))
337 where tag = funTag closure_info
338 -- don't forget to substract node's tag
339
340 ldvEnter :: CmmExpr -> FCode ()
341 -- Argument is a closure pointer
342 ldvEnter cl_ptr
343 = ifProfiling $
344 -- if (era > 0) {
345 -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) |
346 -- era | LDV_STATE_USE }
347 emit =<< mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit])
348 (mkStore ldv_wd new_ldv_wd)
349 mkNop
350 where
351 -- don't forget to substract node's tag
352 ldv_wd = ldvWord cl_ptr
353 new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd bWord)
354 (CmmLit (mkWordCLit lDV_CREATE_MASK)))
355 (cmmOrWord loadEra (CmmLit (mkWordCLit lDV_STATE_USE)))
356
357 loadEra :: CmmExpr
358 loadEra = CmmMachOp (MO_UU_Conv cIntWidth wordWidth)
359 [CmmLoad (mkLblExpr (mkCmmDataLabel rtsPackageId (fsLit "era"))) cInt]
360
361 ldvWord :: CmmExpr -> CmmExpr
362 -- Takes the address of a closure, and returns
363 -- the address of the LDV word in the closure
364 ldvWord closure_ptr = cmmOffsetB closure_ptr oFFSET_StgHeader_ldvw
365
366 -- LDV constants, from ghc/includes/Constants.h
367 lDV_SHIFT :: Int
368 lDV_SHIFT = LDV_SHIFT
369 --lDV_STATE_MASK :: StgWord
370 --lDV_STATE_MASK = LDV_STATE_MASK
371 lDV_CREATE_MASK :: StgWord
372 lDV_CREATE_MASK = LDV_CREATE_MASK
373 --lDV_LAST_MASK :: StgWord
374 --lDV_LAST_MASK = LDV_LAST_MASK
375 lDV_STATE_CREATE :: StgWord
376 lDV_STATE_CREATE = LDV_STATE_CREATE
377 lDV_STATE_USE :: StgWord
378 lDV_STATE_USE = LDV_STATE_USE
379