Change the way module initialisation is done (#3252, #4417)
[ghc.git] / compiler / codeGen / StgCmmProf.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Code generation for profiling
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module StgCmmProf (
10 initCostCentres, ccType, ccsType,
11 mkCCostCentre, mkCCostCentreStack,
12
13 -- Cost-centre Profiling
14 dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
15 enterCostCentre, enterCostCentrePAP, enterCostCentreThunk,
16 chooseDynCostCentres,
17 costCentreFrom,
18 curCCS, curCCSAddr,
19 emitSetCCC, emitCCS,
20
21 saveCurrentCostCentre, restoreCurrentCostCentre,
22
23 -- Lag/drag/void stuff
24 ldvEnter, ldvEnterClosure, ldvRecordCreate
25 ) where
26
27 #include "HsVersions.h"
28 #include "../includes/MachDeps.h"
29 -- For WORD_SIZE_IN_BITS only.
30 #include "../includes/rts/Constants.h"
31 -- For LDV_CREATE_MASK, LDV_STATE_USE
32 -- which are StgWords
33 #include "../includes/DerivedConstants.h"
34 -- For REP_xxx constants, which are MachReps
35
36 import StgCmmClosure
37 import StgCmmUtils
38 import StgCmmMonad
39 import SMRep
40
41 import MkGraph
42 import CmmExpr
43 import CmmDecl
44 import CmmUtils
45 import CLabel
46
47 import Id
48 import qualified Module
49 import CostCentre
50 import StgSyn
51 import StaticFlags
52 import FastString
53 import Module
54 import Constants -- Lots of field offsets
55 import Outputable
56
57 import Data.Char
58 import Control.Monad
59
60 -----------------------------------------------------------------------------
61 --
62 -- Cost-centre-stack Profiling
63 --
64 -----------------------------------------------------------------------------
65
66 -- Expression representing the current cost centre stack
67 ccsType :: CmmType -- Type of a cost-centre stack
68 ccsType = bWord
69
70 ccType :: CmmType -- Type of a cost centre
71 ccType = bWord
72
73 curCCS :: CmmExpr
74 curCCS = CmmLoad curCCSAddr ccsType
75
76 -- Address of current CCS variable, for storing into
77 curCCSAddr :: CmmExpr
78 curCCSAddr = CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "CCCS")))
79
80 mkCCostCentre :: CostCentre -> CmmLit
81 mkCCostCentre cc = CmmLabel (mkCCLabel cc)
82
83 mkCCostCentreStack :: CostCentreStack -> CmmLit
84 mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs)
85
86 costCentreFrom :: CmmExpr -- A closure pointer
87 -> CmmExpr -- The cost centre from that closure
88 costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) ccsType
89
90 staticProfHdr :: CostCentreStack -> [CmmLit]
91 -- The profiling header words in a static closure
92 -- Was SET_STATIC_PROF_HDR
93 staticProfHdr ccs = ifProfilingL [mkCCostCentreStack ccs,
94 staticLdvInit]
95
96 dynProfHdr :: CmmExpr -> [CmmExpr]
97 -- Profiling header words in a dynamic closure
98 dynProfHdr ccs = ifProfilingL [ccs, dynLdvInit]
99
100 initUpdFrameProf :: CmmExpr -> FCode ()
101 -- Initialise the profiling field of an update frame
102 initUpdFrameProf frame_amode
103 = ifProfiling $ -- frame->header.prof.ccs = CCCS
104 emit (mkStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS)
105 -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0)
106 -- is unnecessary because it is not used anyhow.
107
108 ---------------------------------------------------------------------------
109 -- Saving and restoring the current cost centre
110 ---------------------------------------------------------------------------
111
112 {- Note [Saving the current cost centre]
113 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
114 The current cost centre is like a global register. Like other
115 global registers, it's a caller-saves one. But consider
116 case (f x) of (p,q) -> rhs
117 Since 'f' may set the cost centre, we must restore it
118 before resuming rhs. So we want code like this:
119 local_cc = CCC -- save
120 r = f( x )
121 CCC = local_cc -- restore
122 That is, we explicitly "save" the current cost centre in
123 a LocalReg, local_cc; and restore it after the call. The
124 C-- infrastructure will arrange to save local_cc across the
125 call.
126
127 The same goes for join points;
128 let j x = join-stuff
129 in blah-blah
130 We want this kind of code:
131 local_cc = CCC -- save
132 blah-blah
133 J:
134 CCC = local_cc -- restore
135 -}
136
137 saveCurrentCostCentre :: FCode (Maybe LocalReg)
138 -- Returns Nothing if profiling is off
139 saveCurrentCostCentre
140 | not opt_SccProfilingOn
141 = return Nothing
142 | otherwise
143 = do { local_cc <- newTemp ccType
144 ; emit (mkAssign (CmmLocal local_cc) curCCS)
145 ; return (Just local_cc) }
146
147 restoreCurrentCostCentre :: Maybe LocalReg -> FCode ()
148 restoreCurrentCostCentre Nothing
149 = return ()
150 restoreCurrentCostCentre (Just local_cc)
151 = emit (mkStore curCCSAddr (CmmReg (CmmLocal local_cc)))
152
153
154 -------------------------------------------------------------------------------
155 -- Recording allocation in a cost centre
156 -------------------------------------------------------------------------------
157
158 -- | Record the allocation of a closure. The CmmExpr is the cost
159 -- centre stack to which to attribute the allocation.
160 profDynAlloc :: ClosureInfo -> CmmExpr -> FCode ()
161 profDynAlloc cl_info ccs
162 = ifProfiling $
163 profAlloc (CmmLit (mkIntCLit (closureSize cl_info))) ccs
164
165 -- | Record the allocation of a closure (size is given by a CmmExpr)
166 -- The size must be in words, because the allocation counter in a CCS counts
167 -- in words.
168 profAlloc :: CmmExpr -> CmmExpr -> FCode ()
169 profAlloc words ccs
170 = ifProfiling $
171 emit (addToMemE alloc_rep
172 (cmmOffsetB ccs oFFSET_CostCentreStack_mem_alloc)
173 (CmmMachOp (MO_UU_Conv wordWidth (typeWidth alloc_rep)) $
174 [CmmMachOp mo_wordSub [words,
175 CmmLit (mkIntCLit profHdrSize)]]))
176 -- subtract the "profiling overhead", which is the
177 -- profiling header in a closure.
178 where
179 alloc_rep = REP_CostCentreStack_mem_alloc
180
181 -- ----------------------------------------------------------------------
182 -- Setting the cost centre in a new closure
183
184 chooseDynCostCentres :: CostCentreStack
185 -> [Id] -- Args
186 -> StgExpr -- Body
187 -> FCode (CmmExpr, CmmExpr)
188 -- Called when allocating a closure
189 -- Tells which cost centre to put in the object, and which
190 -- to blame the cost of allocation on
191 chooseDynCostCentres ccs args body = do
192 -- Cost-centre we record in the object
193 use_ccs <- emitCCS ccs
194
195 -- Cost-centre on whom we blame the allocation
196 let blame_ccs
197 | null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS)
198 | otherwise = use_ccs
199
200 return (use_ccs, blame_ccs)
201
202
203 -- Some CostCentreStacks are a sequence of pushes on top of CCCS.
204 -- These pushes must be performed before we can refer to the stack in
205 -- an expression.
206 emitCCS :: CostCentreStack -> FCode CmmExpr
207 emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's)
208 where
209 (cc's, ccs') = decomposeCCS ccs
210
211 push_em ccs [] = return ccs
212 push_em ccs (cc:rest) = do
213 tmp <- newTemp ccsType
214 pushCostCentre tmp ccs cc
215 push_em (CmmReg (CmmLocal tmp)) rest
216
217 ccsExpr :: CostCentreStack -> CmmExpr
218 ccsExpr ccs
219 | isCurrentCCS ccs = curCCS
220 | otherwise = CmmLit (mkCCostCentreStack ccs)
221
222
223 isBox :: StgExpr -> Bool
224 -- If it's an utterly trivial RHS, then it must be
225 -- one introduced by boxHigherOrderArgs for profiling,
226 -- so we charge it to "OVERHEAD".
227 -- This looks like a GROSS HACK to me --SDM
228 isBox (StgApp _ []) = True
229 isBox _ = False
230
231
232 -- -----------------------------------------------------------------------
233 -- Setting the current cost centre on entry to a closure
234
235 -- For lexically scoped profiling we have to load the cost centre from
236 -- the closure entered, if the costs are not supposed to be inherited.
237 -- This is done immediately on entering the fast entry point.
238
239 -- Load current cost centre from closure, if not inherited.
240 -- Node is guaranteed to point to it, if profiling and not inherited.
241
242 enterCostCentre
243 :: ClosureInfo
244 -> CostCentreStack
245 -> StgExpr -- The RHS of the closure
246 -> FCode ()
247
248 -- We used to have a special case for bindings of form
249 -- f = g True
250 -- where g has arity 2. The RHS is a thunk, but we don't
251 -- need to update it; and we want to subsume costs.
252 -- We don't have these sort of PAPs any more, so the special
253 -- case has gone away.
254
255 enterCostCentre closure_info ccs body
256 = ifProfiling $
257 ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs)
258 enter_cost_centre closure_info ccs body
259
260 enter_cost_centre :: ClosureInfo -> CostCentreStack -> StgExpr -> FCode ()
261 enter_cost_centre closure_info ccs body
262 | isSubsumedCCS ccs
263 = ASSERT(isToplevClosure closure_info)
264 ASSERT(re_entrant)
265 enter_ccs_fsub
266
267 | isDerivedFromCurrentCCS ccs
268 = do {
269 if re_entrant && not is_box
270 then
271 enter_ccs_fun node_ccs
272 else
273 emit (mkStore curCCSAddr node_ccs)
274
275 -- don't forget to bump the scc count. This closure might have been
276 -- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal
277 -- pass has turned into simply let x = e in ...x... and attached
278 -- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that
279 -- we don't lose the scc counter, bump it in the entry code for x.
280 -- ToDo: for a multi-push we should really bump the counter for
281 -- each of the intervening CCSs, not just the top one.
282 ; when (not (isCurrentCCS ccs)) $
283 emit (bumpSccCount curCCS)
284 }
285
286 | isCafCCS ccs
287 = ASSERT(isToplevClosure closure_info)
288 ASSERT(not re_entrant)
289 do { -- This is just a special case of the isDerivedFromCurrentCCS
290 -- case above. We could delete this, but it's a micro
291 -- optimisation and saves a bit of code.
292 emit (mkStore curCCSAddr enc_ccs)
293 ; emit (bumpSccCount node_ccs)
294 }
295
296 | otherwise
297 = panic "enterCostCentre"
298 where
299 enc_ccs = CmmLit (mkCCostCentreStack ccs)
300 re_entrant = closureReEntrant closure_info
301 node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag))
302 is_box = isBox body
303
304 -- if this is a function, then node will be tagged; we must subract the tag
305 node_tag = funTag closure_info
306
307 -- set the current CCS when entering a PAP
308 enterCostCentrePAP :: CmmExpr -> FCode ()
309 enterCostCentrePAP closure =
310 ifProfiling $ do
311 enter_ccs_fun (costCentreFrom closure)
312 enteringPAP 1
313
314 enterCostCentreThunk :: CmmExpr -> FCode ()
315 enterCostCentreThunk closure =
316 ifProfiling $ do
317 emit $ mkStore curCCSAddr (costCentreFrom closure)
318
319 enter_ccs_fun :: CmmExpr -> FCode ()
320 enter_ccs_fun stack = emitRtsCall rtsPackageId (fsLit "EnterFunCCS") [(stack,AddrHint)] False
321 -- ToDo: vols
322
323 enter_ccs_fsub :: FCode ()
324 enter_ccs_fsub = enteringPAP 0
325
326 -- When entering a PAP, EnterFunCCS is called by both the PAP entry
327 -- code and the function entry code; we don't want the function's
328 -- entry code to also update CCCS in the event that it was called via
329 -- a PAP, so we set the flag entering_PAP to indicate that we are
330 -- entering via a PAP.
331 enteringPAP :: Integer -> FCode ()
332 enteringPAP n
333 = emit (mkStore (CmmLit (CmmLabel (mkCmmDataLabel rtsPackageId (fsLit "entering_PAP"))))
334 (CmmLit (CmmInt n cIntWidth)))
335
336 ifProfiling :: FCode () -> FCode ()
337 ifProfiling code
338 | opt_SccProfilingOn = code
339 | otherwise = nopC
340
341 ifProfilingL :: [a] -> [a]
342 ifProfilingL xs
343 | opt_SccProfilingOn = xs
344 | otherwise = []
345
346
347 ---------------------------------------------------------------
348 -- Initialising Cost Centres & CCSs
349 ---------------------------------------------------------------
350
351 initCostCentres :: CollectedCCs -> FCode ()
352 -- Emit the declarations
353 initCostCentres (local_CCs, ___extern_CCs, singleton_CCSs)
354 = whenC opt_SccProfilingOn $
355 do { mapM_ emitCostCentreDecl local_CCs
356 ; mapM_ emitCostCentreStackDecl singleton_CCSs }
357
358
359 emitCostCentreDecl :: CostCentre -> FCode ()
360 emitCostCentreDecl cc = do
361 { label <- mkStringCLit (costCentreUserName cc)
362 ; modl <- mkStringCLit (Module.moduleNameString
363 (Module.moduleName (cc_mod cc)))
364 -- All cost centres will be in the main package, since we
365 -- don't normally use -auto-all or add SCCs to other packages.
366 -- Hence don't emit the package name in the module here.
367 ; let lits = [ zero, -- StgInt ccID,
368 label, -- char *label,
369 modl, -- char *module,
370 zero, -- StgWord time_ticks
371 zero64, -- StgWord64 mem_alloc
372 subsumed, -- StgInt is_caf
373 zero -- struct _CostCentre *link
374 ]
375 ; emitDataLits (mkCCLabel cc) lits
376 }
377 where
378 subsumed | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF
379 | otherwise = mkIntCLit (ord 'B') -- 'B' == is boring
380
381 emitCostCentreStackDecl :: CostCentreStack -> FCode ()
382 emitCostCentreStackDecl ccs
383 = case maybeSingletonCCS ccs of
384 Just cc -> emitDataLits (mkCCSLabel ccs) (mk_lits cc)
385 Nothing -> pprPanic "emitCostCentreStackDecl" (ppr ccs)
386 where
387 mk_lits cc = zero :
388 mkCCostCentre cc :
389 replicate (sizeof_ccs_words - 2) zero
390 -- Note: to avoid making any assumptions about how the
391 -- C compiler (that compiles the RTS, in particular) does
392 -- layouts of structs containing long-longs, simply
393 -- pad out the struct with zero words until we hit the
394 -- size of the overall struct (which we get via DerivedConstants.h)
395
396 zero :: CmmLit
397 zero = mkIntCLit 0
398 zero64 :: CmmLit
399 zero64 = CmmInt 0 W64
400
401 sizeof_ccs_words :: Int
402 sizeof_ccs_words
403 -- round up to the next word.
404 | ms == 0 = ws
405 | otherwise = ws + 1
406 where
407 (ws,ms) = SIZEOF_CostCentreStack `divMod` wORD_SIZE
408
409 -- ---------------------------------------------------------------------------
410 -- Set the current cost centre stack
411
412 emitSetCCC :: CostCentre -> FCode ()
413 emitSetCCC cc
414 | not opt_SccProfilingOn = nopC
415 | otherwise = do
416 tmp <- newTemp ccsType -- TODO FIXME NOW
417 ASSERT( sccAbleCostCentre cc )
418 pushCostCentre tmp curCCS cc
419 emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp)))
420 when (isSccCountCostCentre cc) $
421 emit (bumpSccCount curCCS)
422
423 pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode ()
424 pushCostCentre result ccs cc
425 = emitRtsCallWithResult result AddrHint
426 rtsPackageId
427 (fsLit "PushCostCentre") [(ccs,AddrHint),
428 (CmmLit (mkCCostCentre cc), AddrHint)]
429 False
430
431 bumpSccCount :: CmmExpr -> CmmAGraph
432 bumpSccCount ccs
433 = addToMem REP_CostCentreStack_scc_count
434 (cmmOffsetB ccs oFFSET_CostCentreStack_scc_count) 1
435
436 -----------------------------------------------------------------------------
437 --
438 -- Lag/drag/void stuff
439 --
440 -----------------------------------------------------------------------------
441
442 --
443 -- Initial value for the LDV field in a static closure
444 --
445 staticLdvInit :: CmmLit
446 staticLdvInit = zeroCLit
447
448 --
449 -- Initial value of the LDV field in a dynamic closure
450 --
451 dynLdvInit :: CmmExpr
452 dynLdvInit = -- (era << LDV_SHIFT) | LDV_STATE_CREATE
453 CmmMachOp mo_wordOr [
454 CmmMachOp mo_wordShl [loadEra, CmmLit (mkIntCLit lDV_SHIFT) ],
455 CmmLit (mkWordCLit lDV_STATE_CREATE)
456 ]
457
458 --
459 -- Initialise the LDV word of a new closure
460 --
461 ldvRecordCreate :: CmmExpr -> FCode ()
462 ldvRecordCreate closure = emit $ mkStore (ldvWord closure) dynLdvInit
463
464 --
465 -- Called when a closure is entered, marks the closure as having been "used".
466 -- The closure is not an 'inherently used' one.
467 -- The closure is not IND or IND_OLDGEN because neither is considered for LDV
468 -- profiling.
469 --
470 ldvEnterClosure :: ClosureInfo -> FCode ()
471 ldvEnterClosure closure_info = ldvEnter (cmmOffsetB (CmmReg nodeReg) (-tag))
472 where tag = funTag closure_info
473 -- don't forget to substract node's tag
474
475 ldvEnter :: CmmExpr -> FCode ()
476 -- Argument is a closure pointer
477 ldvEnter cl_ptr
478 = ifProfiling $
479 -- if (era > 0) {
480 -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) |
481 -- era | LDV_STATE_USE }
482 emit (mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit])
483 (mkStore ldv_wd new_ldv_wd)
484 mkNop)
485 where
486 -- don't forget to substract node's tag
487 ldv_wd = ldvWord cl_ptr
488 new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd bWord)
489 (CmmLit (mkWordCLit lDV_CREATE_MASK)))
490 (cmmOrWord loadEra (CmmLit (mkWordCLit lDV_STATE_USE)))
491
492 loadEra :: CmmExpr
493 loadEra = CmmMachOp (MO_UU_Conv cIntWidth wordWidth)
494 [CmmLoad (mkLblExpr (mkCmmDataLabel rtsPackageId (fsLit "era"))) cInt]
495
496 ldvWord :: CmmExpr -> CmmExpr
497 -- Takes the address of a closure, and returns
498 -- the address of the LDV word in the closure
499 ldvWord closure_ptr = cmmOffsetB closure_ptr oFFSET_StgHeader_ldvw
500
501 -- LDV constants, from ghc/includes/Constants.h
502 lDV_SHIFT :: Int
503 lDV_SHIFT = LDV_SHIFT
504 --lDV_STATE_MASK :: StgWord
505 --lDV_STATE_MASK = LDV_STATE_MASK
506 lDV_CREATE_MASK :: StgWord
507 lDV_CREATE_MASK = LDV_CREATE_MASK
508 --lDV_LAST_MASK :: StgWord
509 --lDV_LAST_MASK = LDV_LAST_MASK
510 lDV_STATE_CREATE :: StgWord
511 lDV_STATE_CREATE = LDV_STATE_CREATE
512 lDV_STATE_USE :: StgWord
513 lDV_STATE_USE = LDV_STATE_USE
514