Make profiling work with multiple capabilities (+RTS -N)
[ghc.git] / compiler / codeGen / StgCmmProf.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Code generation for profiling
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 {-# OPTIONS -fno-warn-tabs #-}
10 -- The above warning supression flag is a temporary kludge.
11 -- While working on this module you are encouraged to remove it and
12 -- detab the module (please do the detabbing in a separate patch). See
13 -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
14 -- for details
15
16 module StgCmmProf (
17 initCostCentres, ccType, ccsType,
18 mkCCostCentre, mkCCostCentreStack,
19
20 -- Cost-centre Profiling
21 dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
22 enterCostCentreThunk,
23 costCentreFrom,
24 curCCS, storeCurCCS,
25 emitSetCCC,
26
27 saveCurrentCostCentre, restoreCurrentCostCentre,
28
29 -- Lag/drag/void stuff
30 ldvEnter, ldvEnterClosure, ldvRecordCreate
31 ) where
32
33 #include "HsVersions.h"
34 #include "../includes/MachDeps.h"
35 -- For WORD_SIZE_IN_BITS only.
36 #include "../includes/rts/Constants.h"
37 -- For LDV_CREATE_MASK, LDV_STATE_USE
38 -- which are StgWords
39 #include "../includes/DerivedConstants.h"
40 -- For REP_xxx constants, which are MachReps
41
42 import StgCmmClosure
43 import StgCmmUtils
44 import StgCmmMonad
45 import SMRep
46
47 import MkGraph
48 import Cmm
49 import CmmUtils
50 import CLabel
51
52 import qualified Module
53 import CostCentre
54 import StaticFlags
55 import FastString
56 import Module
57 import Constants -- Lots of field offsets
58 import Outputable
59
60 import Control.Monad
61
62 -----------------------------------------------------------------------------
63 --
64 -- Cost-centre-stack Profiling
65 --
66 -----------------------------------------------------------------------------
67
68 -- Expression representing the current cost centre stack
69 ccsType :: CmmType -- Type of a cost-centre stack
70 ccsType = bWord
71
72 ccType :: CmmType -- Type of a cost centre
73 ccType = bWord
74
75 curCCS :: CmmExpr
76 curCCS = CmmReg (CmmGlobal CCCS)
77
78 storeCurCCS :: CmmExpr -> CmmAGraph
79 storeCurCCS e = mkAssign (CmmGlobal CCCS) e
80
81 mkCCostCentre :: CostCentre -> CmmLit
82 mkCCostCentre cc = CmmLabel (mkCCLabel cc)
83
84 mkCCostCentreStack :: CostCentreStack -> CmmLit
85 mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs)
86
87 costCentreFrom :: CmmExpr -- A closure pointer
88 -> CmmExpr -- The cost centre from that closure
89 costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) ccsType
90
91 staticProfHdr :: CostCentreStack -> [CmmLit]
92 -- The profiling header words in a static closure
93 -- Was SET_STATIC_PROF_HDR
94 staticProfHdr ccs = ifProfilingL [mkCCostCentreStack ccs,
95 staticLdvInit]
96
97 dynProfHdr :: CmmExpr -> [CmmExpr]
98 -- Profiling header words in a dynamic closure
99 dynProfHdr ccs = ifProfilingL [ccs, dynLdvInit]
100
101 initUpdFrameProf :: CmmExpr -> FCode ()
102 -- Initialise the profiling field of an update frame
103 initUpdFrameProf frame_amode
104 = ifProfiling $ -- frame->header.prof.ccs = CCCS
105 emit (mkStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS)
106 -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0)
107 -- is unnecessary because it is not used anyhow.
108
109 ---------------------------------------------------------------------------
110 -- Saving and restoring the current cost centre
111 ---------------------------------------------------------------------------
112
113 {- Note [Saving the current cost centre]
114 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
115 The current cost centre is like a global register. Like other
116 global registers, it's a caller-saves one. But consider
117 case (f x) of (p,q) -> rhs
118 Since 'f' may set the cost centre, we must restore it
119 before resuming rhs. So we want code like this:
120 local_cc = CCC -- save
121 r = f( x )
122 CCC = local_cc -- restore
123 That is, we explicitly "save" the current cost centre in
124 a LocalReg, local_cc; and restore it after the call. The
125 C-- infrastructure will arrange to save local_cc across the
126 call.
127
128 The same goes for join points;
129 let j x = join-stuff
130 in blah-blah
131 We want this kind of code:
132 local_cc = CCC -- save
133 blah-blah
134 J:
135 CCC = local_cc -- restore
136 -}
137
138 saveCurrentCostCentre :: FCode (Maybe LocalReg)
139 -- Returns Nothing if profiling is off
140 saveCurrentCostCentre
141 | not opt_SccProfilingOn
142 = return Nothing
143 | otherwise
144 = do { local_cc <- newTemp ccType
145 ; emit (mkAssign (CmmLocal local_cc) curCCS)
146 ; return (Just local_cc) }
147
148 restoreCurrentCostCentre :: Maybe LocalReg -> FCode ()
149 restoreCurrentCostCentre Nothing
150 = return ()
151 restoreCurrentCostCentre (Just local_cc)
152 = emit (storeCurCCS (CmmReg (CmmLocal local_cc)))
153
154
155 -------------------------------------------------------------------------------
156 -- Recording allocation in a cost centre
157 -------------------------------------------------------------------------------
158
159 -- | Record the allocation of a closure. The CmmExpr is the cost
160 -- centre stack to which to attribute the allocation.
161 profDynAlloc :: SMRep -> CmmExpr -> FCode ()
162 profDynAlloc rep ccs
163 = ifProfiling $
164 profAlloc (CmmLit (mkIntCLit (heapClosureSize rep))) ccs
165
166 -- | Record the allocation of a closure (size is given by a CmmExpr)
167 -- The size must be in words, because the allocation counter in a CCS counts
168 -- in words.
169 profAlloc :: CmmExpr -> CmmExpr -> FCode ()
170 profAlloc words ccs
171 = ifProfiling $
172 emit (addToMemE alloc_rep
173 (cmmOffsetB ccs oFFSET_CostCentreStack_mem_alloc)
174 (CmmMachOp (MO_UU_Conv wordWidth (typeWidth alloc_rep)) $
175 [CmmMachOp mo_wordSub [words,
176 CmmLit (mkIntCLit profHdrSize)]]))
177 -- subtract the "profiling overhead", which is the
178 -- profiling header in a closure.
179 where
180 alloc_rep = REP_CostCentreStack_mem_alloc
181
182 -- -----------------------------------------------------------------------
183 -- Setting the current cost centre on entry to a closure
184
185 enterCostCentreThunk :: CmmExpr -> FCode ()
186 enterCostCentreThunk closure =
187 ifProfiling $ do
188 emit $ storeCurCCS (costCentreFrom closure)
189
190 ifProfiling :: FCode () -> FCode ()
191 ifProfiling code
192 | opt_SccProfilingOn = code
193 | otherwise = nopC
194
195 ifProfilingL :: [a] -> [a]
196 ifProfilingL xs
197 | opt_SccProfilingOn = xs
198 | otherwise = []
199
200
201 ---------------------------------------------------------------
202 -- Initialising Cost Centres & CCSs
203 ---------------------------------------------------------------
204
205 initCostCentres :: CollectedCCs -> FCode ()
206 -- Emit the declarations
207 initCostCentres (local_CCs, ___extern_CCs, singleton_CCSs)
208 = whenC opt_SccProfilingOn $
209 do { mapM_ emitCostCentreDecl local_CCs
210 ; mapM_ emitCostCentreStackDecl singleton_CCSs }
211
212
213 emitCostCentreDecl :: CostCentre -> FCode ()
214 emitCostCentreDecl cc = do
215 -- NB. bytesFS: we want the UTF-8 bytes here (#5559)
216 { label <- newByteStringCLit (bytesFS $ costCentreUserNameFS cc)
217 ; modl <- newByteStringCLit (bytesFS $ Module.moduleNameFS
218 $ Module.moduleName
219 $ cc_mod cc)
220 -- All cost centres will be in the main package, since we
221 -- don't normally use -auto-all or add SCCs to other packages.
222 -- Hence don't emit the package name in the module here.
223 ; let lits = [ zero, -- StgInt ccID,
224 label, -- char *label,
225 modl, -- char *module,
226 zero, -- StgWord time_ticks
227 zero64, -- StgWord64 mem_alloc
228 zero -- struct _CostCentre *link
229 ]
230 ; emitDataLits (mkCCLabel cc) lits
231 }
232
233 emitCostCentreStackDecl :: CostCentreStack -> FCode ()
234 emitCostCentreStackDecl ccs
235 = case maybeSingletonCCS ccs of
236 Just cc -> emitDataLits (mkCCSLabel ccs) (mk_lits cc)
237 Nothing -> pprPanic "emitCostCentreStackDecl" (ppr ccs)
238 where
239 mk_lits cc = zero :
240 mkCCostCentre cc :
241 replicate (sizeof_ccs_words - 2) zero
242 -- Note: to avoid making any assumptions about how the
243 -- C compiler (that compiles the RTS, in particular) does
244 -- layouts of structs containing long-longs, simply
245 -- pad out the struct with zero words until we hit the
246 -- size of the overall struct (which we get via DerivedConstants.h)
247
248 zero :: CmmLit
249 zero = mkIntCLit 0
250 zero64 :: CmmLit
251 zero64 = CmmInt 0 W64
252
253 sizeof_ccs_words :: Int
254 sizeof_ccs_words
255 -- round up to the next word.
256 | ms == 0 = ws
257 | otherwise = ws + 1
258 where
259 (ws,ms) = SIZEOF_CostCentreStack `divMod` wORD_SIZE
260
261 -- ---------------------------------------------------------------------------
262 -- Set the current cost centre stack
263
264 emitSetCCC :: CostCentre -> Bool -> Bool -> FCode ()
265 emitSetCCC cc tick push
266 | not opt_SccProfilingOn = nopC
267 | otherwise = do
268 tmp <- newTemp ccsType -- TODO FIXME NOW
269 pushCostCentre tmp curCCS cc
270 when tick $ emit (bumpSccCount (CmmReg (CmmLocal tmp)))
271 when push $ emit (storeCurCCS (CmmReg (CmmLocal tmp)))
272
273 pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode ()
274 pushCostCentre result ccs cc
275 = emitRtsCallWithResult result AddrHint
276 rtsPackageId
277 (fsLit "PushCostCentre") [(ccs,AddrHint),
278 (CmmLit (mkCCostCentre cc), AddrHint)]
279 False
280
281 bumpSccCount :: CmmExpr -> CmmAGraph
282 bumpSccCount ccs
283 = addToMem REP_CostCentreStack_scc_count
284 (cmmOffsetB ccs oFFSET_CostCentreStack_scc_count) 1
285
286 -----------------------------------------------------------------------------
287 --
288 -- Lag/drag/void stuff
289 --
290 -----------------------------------------------------------------------------
291
292 --
293 -- Initial value for the LDV field in a static closure
294 --
295 staticLdvInit :: CmmLit
296 staticLdvInit = zeroCLit
297
298 --
299 -- Initial value of the LDV field in a dynamic closure
300 --
301 dynLdvInit :: CmmExpr
302 dynLdvInit = -- (era << LDV_SHIFT) | LDV_STATE_CREATE
303 CmmMachOp mo_wordOr [
304 CmmMachOp mo_wordShl [loadEra, CmmLit (mkIntCLit lDV_SHIFT) ],
305 CmmLit (mkWordCLit lDV_STATE_CREATE)
306 ]
307
308 --
309 -- Initialise the LDV word of a new closure
310 --
311 ldvRecordCreate :: CmmExpr -> FCode ()
312 ldvRecordCreate closure = emit $ mkStore (ldvWord closure) dynLdvInit
313
314 --
315 -- Called when a closure is entered, marks the closure as having been "used".
316 -- The closure is not an 'inherently used' one.
317 -- The closure is not IND or IND_OLDGEN because neither is considered for LDV
318 -- profiling.
319 --
320 ldvEnterClosure :: ClosureInfo -> FCode ()
321 ldvEnterClosure closure_info = ldvEnter (cmmOffsetB (CmmReg nodeReg) (-tag))
322 where tag = funTag closure_info
323 -- don't forget to substract node's tag
324
325 ldvEnter :: CmmExpr -> FCode ()
326 -- Argument is a closure pointer
327 ldvEnter cl_ptr
328 = ifProfiling $
329 -- if (era > 0) {
330 -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) |
331 -- era | LDV_STATE_USE }
332 emit (mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit])
333 (mkStore ldv_wd new_ldv_wd)
334 mkNop)
335 where
336 -- don't forget to substract node's tag
337 ldv_wd = ldvWord cl_ptr
338 new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd bWord)
339 (CmmLit (mkWordCLit lDV_CREATE_MASK)))
340 (cmmOrWord loadEra (CmmLit (mkWordCLit lDV_STATE_USE)))
341
342 loadEra :: CmmExpr
343 loadEra = CmmMachOp (MO_UU_Conv cIntWidth wordWidth)
344 [CmmLoad (mkLblExpr (mkCmmDataLabel rtsPackageId (fsLit "era"))) cInt]
345
346 ldvWord :: CmmExpr -> CmmExpr
347 -- Takes the address of a closure, and returns
348 -- the address of the LDV word in the closure
349 ldvWord closure_ptr = cmmOffsetB closure_ptr oFFSET_StgHeader_ldvw
350
351 -- LDV constants, from ghc/includes/Constants.h
352 lDV_SHIFT :: Int
353 lDV_SHIFT = LDV_SHIFT
354 --lDV_STATE_MASK :: StgWord
355 --lDV_STATE_MASK = LDV_STATE_MASK
356 lDV_CREATE_MASK :: StgWord
357 lDV_CREATE_MASK = LDV_CREATE_MASK
358 --lDV_LAST_MASK :: StgWord
359 --lDV_LAST_MASK = LDV_LAST_MASK
360 lDV_STATE_CREATE :: StgWord
361 lDV_STATE_CREATE = LDV_STATE_CREATE
362 lDV_STATE_USE :: StgWord
363 lDV_STATE_USE = LDV_STATE_USE
364