CLabel: Refactor pprDynamicLinkerAsmLabel
[ghc.git] / compiler / cmm / Debug.hs
1 {-# LANGUAGE GADTs #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Debugging data
6 --
7 -- Association of debug data on the Cmm level, with methods to encode it in
8 -- event log format for later inclusion in profiling event logs.
9 --
10 -----------------------------------------------------------------------------
11
12 module Debug (
13
14 DebugBlock(..), dblIsEntry,
15 cmmDebugGen,
16 cmmDebugLabels,
17 cmmDebugLink,
18 debugToMap,
19
20 -- * Unwinding information
21 UnwindTable, UnwindPoint(..),
22 UnwindExpr(..), toUnwindExpr
23 ) where
24
25 import GhcPrelude
26
27 import BlockId
28 import CLabel
29 import Cmm
30 import CmmUtils
31 import CoreSyn
32 import FastString ( nilFS, mkFastString )
33 import Module
34 import Outputable
35 import PprCore ()
36 import PprCmmExpr ( pprExpr )
37 import SrcLoc
38 import Util
39
40 import Hoopl.Block
41 import Hoopl.Collections
42 import Hoopl.Graph
43 import Hoopl.Label
44
45 import Data.Maybe
46 import Data.List ( minimumBy, nubBy )
47 import Data.Ord ( comparing )
48 import qualified Data.Map as Map
49
50 -- | Debug information about a block of code. Ticks scope over nested
51 -- blocks.
52 data DebugBlock =
53 DebugBlock
54 { dblProcedure :: !Label -- ^ Entry label of containing proc
55 , dblLabel :: !Label -- ^ Hoopl label
56 , dblCLabel :: !CLabel -- ^ Output label
57 , dblHasInfoTbl :: !Bool -- ^ Has an info table?
58 , dblParent :: !(Maybe DebugBlock)
59 -- ^ The parent of this proc. See Note [Splitting DebugBlocks]
60 , dblTicks :: ![CmmTickish] -- ^ Ticks defined in this block
61 , dblSourceTick
62 :: !(Maybe CmmTickish) -- ^ Best source tick covering block
63 , dblPosition :: !(Maybe Int) -- ^ Output position relative to
64 -- other blocks. @Nothing@ means
65 -- the block was optimized out
66 , dblUnwind :: [UnwindPoint]
67 , dblBlocks :: ![DebugBlock] -- ^ Nested blocks
68 }
69
70 -- | Is this the entry block?
71 dblIsEntry :: DebugBlock -> Bool
72 dblIsEntry blk = dblProcedure blk == dblLabel blk
73
74 instance Outputable DebugBlock where
75 ppr blk = (if dblProcedure blk == dblLabel blk
76 then text "proc "
77 else if dblHasInfoTbl blk
78 then text "pp-blk "
79 else text "blk ") <>
80 ppr (dblLabel blk) <+> parens (ppr (dblCLabel blk)) <+>
81 (maybe empty ppr (dblSourceTick blk)) <+>
82 (maybe (text "removed") ((text "pos " <>) . ppr)
83 (dblPosition blk)) <+>
84 (ppr (dblUnwind blk)) <+>
85 (if null (dblBlocks blk) then empty else ppr (dblBlocks blk))
86
87 -- | Intermediate data structure holding debug-relevant context information
88 -- about a block.
89 type BlockContext = (CmmBlock, RawCmmDecl)
90
91 -- | Extract debug data from a group of procedures. We will prefer
92 -- source notes that come from the given module (presumably the module
93 -- that we are currently compiling).
94 cmmDebugGen :: ModLocation -> RawCmmGroup -> [DebugBlock]
95 cmmDebugGen modLoc decls = map (blocksForScope Nothing) topScopes
96 where
97 blockCtxs :: Map.Map CmmTickScope [BlockContext]
98 blockCtxs = blockContexts decls
99
100 -- Analyse tick scope structure: Each one is either a top-level
101 -- tick scope, or the child of another.
102 (topScopes, childScopes)
103 = splitEithers $ map (\a -> findP a a) $ Map.keys blockCtxs
104 findP tsc GlobalScope = Left tsc -- top scope
105 findP tsc scp | scp' `Map.member` blockCtxs = Right (scp', tsc)
106 | otherwise = findP tsc scp'
107 where -- Note that we only following the left parent of
108 -- combined scopes. This loses us ticks, which we will
109 -- recover by copying ticks below.
110 scp' | SubScope _ scp' <- scp = scp'
111 | CombinedScope scp' _ <- scp = scp'
112 | otherwise = panic "findP impossible"
113
114 scopeMap = foldr (uncurry insertMulti) Map.empty childScopes
115
116 -- This allows us to recover ticks that we lost by flattening
117 -- the graph. Basically, if the parent is A but the child is
118 -- CBA, we know that there is no BA, because it would have taken
119 -- priority - but there might be a B scope, with ticks that
120 -- would not be associated with our child anymore. Note however
121 -- that there might be other childs (DB), which we have to
122 -- filter out.
123 --
124 -- We expect this to be called rarely, which is why we are not
125 -- trying too hard to be efficient here. In many cases we won't
126 -- have to construct blockCtxsU in the first place.
127 ticksToCopy :: CmmTickScope -> [CmmTickish]
128 ticksToCopy (CombinedScope scp s) = go s
129 where go s | scp `isTickSubScope` s = [] -- done
130 | SubScope _ s' <- s = ticks ++ go s'
131 | CombinedScope s1 s2 <- s = ticks ++ go s1 ++ go s2
132 | otherwise = panic "ticksToCopy impossible"
133 where ticks = bCtxsTicks $ fromMaybe [] $ Map.lookup s blockCtxs
134 ticksToCopy _ = []
135 bCtxsTicks = concatMap (blockTicks . fst)
136
137 -- Finding the "best" source tick is somewhat arbitrary -- we
138 -- select the first source span, while preferring source ticks
139 -- from the same source file. Furthermore, dumps take priority
140 -- (if we generated one, we probably want debug information to
141 -- refer to it).
142 bestSrcTick = minimumBy (comparing rangeRating)
143 rangeRating (SourceNote span _)
144 | srcSpanFile span == thisFile = 1
145 | otherwise = 2 :: Int
146 rangeRating note = pprPanic "rangeRating" (ppr note)
147 thisFile = maybe nilFS mkFastString $ ml_hs_file modLoc
148
149 -- Returns block tree for this scope as well as all nested
150 -- scopes. Note that if there are multiple blocks in the (exact)
151 -- same scope we elect one as the "branch" node and add the rest
152 -- as children.
153 blocksForScope :: Maybe CmmTickish -> CmmTickScope -> DebugBlock
154 blocksForScope cstick scope = mkBlock True (head bctxs)
155 where bctxs = fromJust $ Map.lookup scope blockCtxs
156 nested = fromMaybe [] $ Map.lookup scope scopeMap
157 childs = map (mkBlock False) (tail bctxs) ++
158 map (blocksForScope stick) nested
159
160 mkBlock :: Bool -> BlockContext -> DebugBlock
161 mkBlock top (block, prc)
162 = DebugBlock { dblProcedure = g_entry graph
163 , dblLabel = label
164 , dblCLabel = case info of
165 Just (Statics infoLbl _) -> infoLbl
166 Nothing
167 | g_entry graph == label -> entryLbl
168 | otherwise -> blockLbl label
169 , dblHasInfoTbl = isJust info
170 , dblParent = Nothing
171 , dblTicks = ticks
172 , dblPosition = Nothing -- see cmmDebugLink
173 , dblSourceTick = stick
174 , dblBlocks = blocks
175 , dblUnwind = []
176 }
177 where (CmmProc infos entryLbl _ graph) = prc
178 label = entryLabel block
179 info = mapLookup label infos
180 blocks | top = seqList childs childs
181 | otherwise = []
182
183 -- A source tick scopes over all nested blocks. However
184 -- their source ticks might take priority.
185 isSourceTick SourceNote {} = True
186 isSourceTick _ = False
187 -- Collect ticks from all blocks inside the tick scope.
188 -- We attempt to filter out duplicates while we're at it.
189 ticks = nubBy (flip tickishContains) $
190 bCtxsTicks bctxs ++ ticksToCopy scope
191 stick = case filter isSourceTick ticks of
192 [] -> cstick
193 sticks -> Just $! bestSrcTick (sticks ++ maybeToList cstick)
194
195 -- | Build a map of blocks sorted by their tick scopes
196 --
197 -- This involves a pre-order traversal, as we want blocks in rough
198 -- control flow order (so ticks have a chance to be sorted in the
199 -- right order).
200 blockContexts :: RawCmmGroup -> Map.Map CmmTickScope [BlockContext]
201 blockContexts decls = Map.map reverse $ foldr walkProc Map.empty decls
202 where walkProc :: RawCmmDecl
203 -> Map.Map CmmTickScope [BlockContext]
204 -> Map.Map CmmTickScope [BlockContext]
205 walkProc CmmData{} m = m
206 walkProc prc@(CmmProc _ _ _ graph) m
207 | mapNull blocks = m
208 | otherwise = snd $ walkBlock prc entry (emptyLbls, m)
209 where blocks = toBlockMap graph
210 entry = [mapFind (g_entry graph) blocks]
211 emptyLbls = setEmpty :: LabelSet
212
213 walkBlock :: RawCmmDecl -> [Block CmmNode C C]
214 -> (LabelSet, Map.Map CmmTickScope [BlockContext])
215 -> (LabelSet, Map.Map CmmTickScope [BlockContext])
216 walkBlock _ [] c = c
217 walkBlock prc (block:blocks) (visited, m)
218 | lbl `setMember` visited
219 = walkBlock prc blocks (visited, m)
220 | otherwise
221 = walkBlock prc blocks $
222 walkBlock prc succs
223 (lbl `setInsert` visited,
224 insertMulti scope (block, prc) m)
225 where CmmEntry lbl scope = firstNode block
226 (CmmProc _ _ _ graph) = prc
227 succs = map (flip mapFind (toBlockMap graph))
228 (successors (lastNode block))
229 mapFind = mapFindWithDefault (error "contextTree: block not found!")
230
231 insertMulti :: Ord k => k -> a -> Map.Map k [a] -> Map.Map k [a]
232 insertMulti k v = Map.insertWith (const (v:)) k [v]
233
234 cmmDebugLabels :: (i -> Bool) -> GenCmmGroup d g (ListGraph i) -> [Label]
235 cmmDebugLabels isMeta nats = seqList lbls lbls
236 where -- Find order in which procedures will be generated by the
237 -- back-end (that actually matters for DWARF generation).
238 --
239 -- Note that we might encounter blocks that are missing or only
240 -- consist of meta instructions -- we will declare them missing,
241 -- which will skip debug data generation without messing up the
242 -- block hierarchy.
243 lbls = map blockId $ filter (not . allMeta) $ concatMap getBlocks nats
244 getBlocks (CmmProc _ _ _ (ListGraph bs)) = bs
245 getBlocks _other = []
246 allMeta (BasicBlock _ instrs) = all isMeta instrs
247
248 -- | Sets position and unwind table fields in the debug block tree according to
249 -- native generated code.
250 cmmDebugLink :: [Label] -> LabelMap [UnwindPoint]
251 -> [DebugBlock] -> [DebugBlock]
252 cmmDebugLink labels unwindPts blocks = map link blocks
253 where blockPos :: LabelMap Int
254 blockPos = mapFromList $ flip zip [0..] labels
255 link block = block { dblPosition = mapLookup (dblLabel block) blockPos
256 , dblBlocks = map link (dblBlocks block)
257 , dblUnwind = fromMaybe mempty
258 $ mapLookup (dblLabel block) unwindPts
259 }
260
261 -- | Converts debug blocks into a label map for easier lookups
262 debugToMap :: [DebugBlock] -> LabelMap DebugBlock
263 debugToMap = mapUnions . map go
264 where go b = mapInsert (dblLabel b) b $ mapUnions $ map go (dblBlocks b)
265
266 {-
267 Note [What is this unwinding business?]
268 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
269
270 Unwinding tables are a variety of debugging information used by debugging tools
271 to reconstruct the execution history of a program at runtime. These tables
272 consist of sets of "instructions", one set for every instruction in the program,
273 which describe how to reconstruct the state of the machine at the point where
274 the current procedure was called. For instance, consider the following annotated
275 pseudo-code,
276
277 a_fun:
278 add rsp, 8 -- unwind: rsp = rsp - 8
279 mov rax, 1 -- unwind: rax = unknown
280 call another_block
281 sub rsp, 8 -- unwind: rsp = rsp
282
283 We see that attached to each instruction there is an "unwind" annotation, which
284 provides a relationship between each updated register and its value at the
285 time of entry to a_fun. This is the sort of information that allows gdb to give
286 you a stack backtrace given the execution state of your program. This
287 unwinding information is captured in various ways by various debug information
288 formats; in the case of DWARF (the only format supported by GHC) it is known as
289 Call Frame Information (CFI) and can be found in the .debug.frames section of
290 your object files.
291
292 Currently we only bother to produce unwinding information for registers which
293 are necessary to reconstruct flow-of-execution. On x86_64 this includes $rbp
294 (which is the STG stack pointer) and $rsp (the C stack pointer).
295
296 Let's consider how GHC would annotate a C-- program with unwinding information
297 with a typical C-- procedure as would come from the STG-to-Cmm code generator,
298
299 entry()
300 { c2fe:
301 v :: P64 = R2;
302 if ((Sp + 8) - 32 < SpLim) (likely: False) goto c2ff; else goto c2fg;
303 c2ff:
304 R2 = v :: P64;
305 R1 = test_closure;
306 call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8;
307 c2fg:
308 I64[Sp - 8] = c2dD;
309 R1 = v :: P64;
310 Sp = Sp - 8; // Sp updated here
311 if (R1 & 7 != 0) goto c2dD; else goto c2dE;
312 c2dE:
313 call (I64[R1])(R1) returns to c2dD, args: 8, res: 8, upd: 8;
314 c2dD:
315 w :: P64 = R1;
316 Hp = Hp + 48;
317 if (Hp > HpLim) (likely: False) goto c2fj; else goto c2fi;
318 ...
319 },
320
321 Let's consider how this procedure will be decorated with unwind information
322 (largely by CmmLayoutStack). Naturally, when we enter the procedure `entry` the
323 value of Sp is no different from what it was at its call site. Therefore we will
324 add an `unwind` statement saying this at the beginning of its unwind-annotated
325 code,
326
327 entry()
328 { c2fe:
329 unwind Sp = Just Sp + 0;
330 v :: P64 = R2;
331 if ((Sp + 8) - 32 < SpLim) (likely: False) goto c2ff; else goto c2fg;
332
333 After c2fe we we may pass to either c2ff or c2fg; let's first consider the
334 former. In this case there is nothing in particular that we need to do other
335 than reiterate what we already know about Sp,
336
337 c2ff:
338 unwind Sp = Just Sp + 0;
339 R2 = v :: P64;
340 R1 = test_closure;
341 call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8;
342
343 In contrast, c2fg updates Sp midway through its body. To ensure that unwinding
344 can happen correctly after this point we must include an unwind statement there,
345 in addition to the usual beginning-of-block statement,
346
347 c2fg:
348 unwind Sp = Just Sp + 0;
349 I64[Sp - 8] = c2dD;
350 R1 = v :: P64;
351 unwind Sp = Just Sp + 8;
352 Sp = Sp - 8;
353 if (R1 & 7 != 0) goto c2dD; else goto c2dE;
354
355 The remaining blocks are simple,
356
357 c2dE:
358 unwind Sp = Just Sp + 8;
359 call (I64[R1])(R1) returns to c2dD, args: 8, res: 8, upd: 8;
360 c2dD:
361 unwind Sp = Just Sp + 8;
362 w :: P64 = R1;
363 Hp = Hp + 48;
364 if (Hp > HpLim) (likely: False) goto c2fj; else goto c2fi;
365 ...
366 },
367
368
369 The flow of unwinding information through the compiler is a bit convoluted:
370
371 * C-- begins life in StgCmm without any unwind information. This is because we
372 haven't actually done any register assignment or stack layout yet, so there
373 is no need for unwind information.
374
375 * CmmLayoutStack figures out how to layout each procedure's stack, and produces
376 appropriate unwinding nodes for each adjustment of the STG Sp register.
377
378 * The unwind nodes are carried through the sinking pass. Currently this is
379 guaranteed not to invalidate unwind information since it won't touch stores
380 to Sp, but this will need revisiting if CmmSink gets smarter in the future.
381
382 * Eventually we make it to the native code generator backend which can then
383 preserve the unwind nodes in its machine-specific instructions. In so doing
384 the backend can also modify or add unwinding information; this is necessary,
385 for instance, in the case of x86-64, where adjustment of $rsp may be
386 necessary during calls to native foreign code due to the native calling
387 convention.
388
389 * The NCG then retrieves the final unwinding table for each block from the
390 backend with extractUnwindPoints.
391
392 * This unwind information is converted to DebugBlocks by Debug.cmmDebugGen
393
394 * These DebugBlcosk are then converted to, e.g., DWARF unwinding tables
395 (by the Dwarf module) and emitted in the final object.
396
397 See also: Note [Unwinding information in the NCG] in AsmCodeGen.
398 -}
399
400 -- | A label associated with an 'UnwindTable'
401 data UnwindPoint = UnwindPoint !CLabel !UnwindTable
402
403 instance Outputable UnwindPoint where
404 ppr (UnwindPoint lbl uws) =
405 braces $ ppr lbl<>colon
406 <+> hsep (punctuate comma $ map pprUw $ Map.toList uws)
407 where
408 pprUw (g, expr) = ppr g <> char '=' <> ppr expr
409
410 -- | Maps registers to expressions that yield their "old" values
411 -- further up the stack. Most interesting for the stack pointer @Sp@,
412 -- but might be useful to document saved registers, too. Note that a
413 -- register's value will be 'Nothing' when the register's previous
414 -- value cannot be reconstructed.
415 type UnwindTable = Map.Map GlobalReg (Maybe UnwindExpr)
416
417 -- | Expressions, used for unwind information
418 data UnwindExpr = UwConst !Int -- ^ literal value
419 | UwReg !GlobalReg !Int -- ^ register plus offset
420 | UwDeref UnwindExpr -- ^ pointer dereferencing
421 | UwLabel CLabel
422 | UwPlus UnwindExpr UnwindExpr
423 | UwMinus UnwindExpr UnwindExpr
424 | UwTimes UnwindExpr UnwindExpr
425 deriving (Eq)
426
427 instance Outputable UnwindExpr where
428 pprPrec _ (UwConst i) = ppr i
429 pprPrec _ (UwReg g 0) = ppr g
430 pprPrec p (UwReg g x) = pprPrec p (UwPlus (UwReg g 0) (UwConst x))
431 pprPrec _ (UwDeref e) = char '*' <> pprPrec 3 e
432 pprPrec _ (UwLabel l) = pprPrec 3 l
433 pprPrec p (UwPlus e0 e1) | p <= 0
434 = pprPrec 0 e0 <> char '+' <> pprPrec 0 e1
435 pprPrec p (UwMinus e0 e1) | p <= 0
436 = pprPrec 1 e0 <> char '-' <> pprPrec 1 e1
437 pprPrec p (UwTimes e0 e1) | p <= 1
438 = pprPrec 2 e0 <> char '*' <> pprPrec 2 e1
439 pprPrec _ other = parens (pprPrec 0 other)
440
441 -- | Conversion of Cmm expressions to unwind expressions. We check for
442 -- unsupported operator usages and simplify the expression as far as
443 -- possible.
444 toUnwindExpr :: CmmExpr -> UnwindExpr
445 toUnwindExpr (CmmLit (CmmInt i _)) = UwConst (fromIntegral i)
446 toUnwindExpr (CmmLit (CmmLabel l)) = UwLabel l
447 toUnwindExpr (CmmRegOff (CmmGlobal g) i) = UwReg g i
448 toUnwindExpr (CmmReg (CmmGlobal g)) = UwReg g 0
449 toUnwindExpr (CmmLoad e _) = UwDeref (toUnwindExpr e)
450 toUnwindExpr e@(CmmMachOp op [e1, e2]) =
451 case (op, toUnwindExpr e1, toUnwindExpr e2) of
452 (MO_Add{}, UwReg r x, UwConst y) -> UwReg r (x + y)
453 (MO_Sub{}, UwReg r x, UwConst y) -> UwReg r (x - y)
454 (MO_Add{}, UwConst x, UwReg r y) -> UwReg r (x + y)
455 (MO_Add{}, UwConst x, UwConst y) -> UwConst (x + y)
456 (MO_Sub{}, UwConst x, UwConst y) -> UwConst (x - y)
457 (MO_Mul{}, UwConst x, UwConst y) -> UwConst (x * y)
458 (MO_Add{}, u1, u2 ) -> UwPlus u1 u2
459 (MO_Sub{}, u1, u2 ) -> UwMinus u1 u2
460 (MO_Mul{}, u1, u2 ) -> UwTimes u1 u2
461 _otherwise -> pprPanic "Unsupported operator in unwind expression!"
462 (pprExpr e)
463 toUnwindExpr e
464 = pprPanic "Unsupported unwind expression!" (ppr e)