Dwarf: Rename binding to avoid shadowing ppr
[ghc.git] / compiler / nativeGen / Dwarf / Types.hs
1 module Dwarf.Types
2 ( -- * Dwarf information
3 DwarfInfo(..)
4 , pprDwarfInfo
5 , pprAbbrevDecls
6 -- * Dwarf address range table
7 , DwarfARange(..)
8 , pprDwarfARange
9 -- * Dwarf frame
10 , DwarfFrame(..), DwarfFrameProc(..), DwarfFrameBlock(..)
11 , pprDwarfFrame
12 -- * Utilities
13 , pprByte
14 , pprHalf
15 , pprData4'
16 , pprDwWord
17 , pprWord
18 , pprLEBWord
19 , pprLEBInt
20 , wordAlign
21 , sectionOffset
22 )
23 where
24
25 import Debug
26 import CLabel
27 import CmmExpr ( GlobalReg(..) )
28 import Encoding
29 import FastString
30 import Outputable
31 import Platform
32 import Unique
33 import Reg
34
35 import Dwarf.Constants
36
37 import Data.Bits
38 import Data.List ( mapAccumL )
39 import qualified Data.Map as Map
40 import Data.Word
41 import Data.Char
42
43 import CodeGen.Platform
44
45 -- | Individual dwarf records. Each one will be encoded as an entry in
46 -- the .debug_info section.
47 data DwarfInfo
48 = DwarfCompileUnit { dwChildren :: [DwarfInfo]
49 , dwName :: String
50 , dwProducer :: String
51 , dwCompDir :: String
52 , dwLowLabel :: CLabel
53 , dwHighLabel :: CLabel
54 , dwLineLabel :: LitString }
55 | DwarfSubprogram { dwChildren :: [DwarfInfo]
56 , dwName :: String
57 , dwLabel :: CLabel }
58 | DwarfBlock { dwChildren :: [DwarfInfo]
59 , dwLabel :: CLabel
60 , dwMarker :: CLabel }
61
62 -- | Abbreviation codes used for encoding above records in the
63 -- .debug_info section.
64 data DwarfAbbrev
65 = DwAbbrNull -- ^ Pseudo, used for marking the end of lists
66 | DwAbbrCompileUnit
67 | DwAbbrSubprogram
68 | DwAbbrBlock
69 deriving (Eq, Enum)
70
71 -- | Generate assembly for the given abbreviation code
72 pprAbbrev :: DwarfAbbrev -> SDoc
73 pprAbbrev = pprLEBWord . fromIntegral . fromEnum
74
75 -- | Abbreviation declaration. This explains the binary encoding we
76 -- use for representing 'DwarfInfo'. Be aware that this must be updated
77 -- along with 'pprDwarfInfo'.
78 pprAbbrevDecls :: Bool -> SDoc
79 pprAbbrevDecls haveDebugLine =
80 let mkAbbrev abbr tag chld flds =
81 let fld (tag, form) = pprLEBWord tag $$ pprLEBWord form
82 in pprAbbrev abbr $$ pprLEBWord tag $$ pprByte chld $$
83 vcat (map fld flds) $$ pprByte 0 $$ pprByte 0
84 in dwarfAbbrevSection $$
85 ptext dwarfAbbrevLabel <> colon $$
86 mkAbbrev DwAbbrCompileUnit dW_TAG_compile_unit dW_CHILDREN_yes
87 ([(dW_AT_name, dW_FORM_string)
88 , (dW_AT_producer, dW_FORM_string)
89 , (dW_AT_language, dW_FORM_data4)
90 , (dW_AT_comp_dir, dW_FORM_string)
91 , (dW_AT_use_UTF8, dW_FORM_flag_present) -- not represented in body
92 , (dW_AT_low_pc, dW_FORM_addr)
93 , (dW_AT_high_pc, dW_FORM_addr)
94 ] ++
95 (if haveDebugLine
96 then [ (dW_AT_stmt_list, dW_FORM_data4) ]
97 else [])) $$
98 mkAbbrev DwAbbrSubprogram dW_TAG_subprogram dW_CHILDREN_yes
99 [ (dW_AT_name, dW_FORM_string)
100 , (dW_AT_MIPS_linkage_name, dW_FORM_string)
101 , (dW_AT_external, dW_FORM_flag)
102 , (dW_AT_low_pc, dW_FORM_addr)
103 , (dW_AT_high_pc, dW_FORM_addr)
104 , (dW_AT_frame_base, dW_FORM_block1)
105 ] $$
106 mkAbbrev DwAbbrBlock dW_TAG_lexical_block dW_CHILDREN_yes
107 [ (dW_AT_name, dW_FORM_string)
108 , (dW_AT_low_pc, dW_FORM_addr)
109 , (dW_AT_high_pc, dW_FORM_addr)
110 ] $$
111 pprByte 0
112
113 -- | Generate assembly for DWARF data
114 pprDwarfInfo :: Bool -> DwarfInfo -> SDoc
115 pprDwarfInfo haveSrc d
116 = pprDwarfInfoOpen haveSrc d $$
117 vcat (map (pprDwarfInfo haveSrc) (dwChildren d)) $$
118 pprDwarfInfoClose
119
120 -- | Prints assembler data corresponding to DWARF info records. Note
121 -- that the binary format of this is paramterized in @abbrevDecls@ and
122 -- has to be kept in synch.
123 pprDwarfInfoOpen :: Bool -> DwarfInfo -> SDoc
124 pprDwarfInfoOpen haveSrc (DwarfCompileUnit _ name producer compDir lowLabel
125 highLabel lineLbl) =
126 pprAbbrev DwAbbrCompileUnit
127 $$ pprString name
128 $$ pprString producer
129 $$ pprData4 dW_LANG_Haskell
130 $$ pprString compDir
131 $$ pprWord (ppr lowLabel)
132 $$ pprWord (ppr highLabel)
133 $$ if haveSrc
134 then sectionOffset (ptext lineLbl) (ptext dwarfLineLabel)
135 else empty
136 pprDwarfInfoOpen _ (DwarfSubprogram _ name label) = sdocWithDynFlags $ \df ->
137 pprAbbrev DwAbbrSubprogram
138 $$ pprString name
139 $$ pprString (renderWithStyle df (ppr label) (mkCodeStyle CStyle))
140 $$ pprFlag (externallyVisibleCLabel label)
141 $$ pprWord (ppr label)
142 $$ pprWord (ppr $ mkAsmTempEndLabel label)
143 $$ pprByte 1
144 $$ pprByte dW_OP_call_frame_cfa
145 pprDwarfInfoOpen _ (DwarfBlock _ label marker) = sdocWithDynFlags $ \df ->
146 pprAbbrev DwAbbrBlock
147 $$ pprString (renderWithStyle df (ppr label) (mkCodeStyle CStyle))
148 $$ pprWord (ppr marker)
149 $$ pprWord (ppr $ mkAsmTempEndLabel marker)
150
151 -- | Close a DWARF info record with children
152 pprDwarfInfoClose :: SDoc
153 pprDwarfInfoClose = pprAbbrev DwAbbrNull
154
155 -- | A DWARF address range. This is used by the debugger to quickly locate
156 -- which compilation unit a given address belongs to. This type assumes
157 -- a non-segmented address-space.
158 data DwarfARange
159 = DwarfARange
160 { dwArngStartLabel :: CLabel
161 , dwArngEndLabel :: CLabel
162 , dwArngUnitUnique :: Unique
163 -- ^ from which the corresponding label in @.debug_info@ is derived
164 }
165
166 -- | Print assembler directives corresponding to a DWARF @.debug_aranges@
167 -- address table entry.
168 pprDwarfARange :: DwarfARange -> SDoc
169 pprDwarfARange arng = sdocWithPlatform $ \plat ->
170 let wordSize = platformWordSize plat
171 paddingSize = 4 :: Int
172 -- header is 12 bytes long.
173 -- entry is 8 bytes (32-bit platform) or 16 bytes (64-bit platform).
174 -- pad such that first entry begins at multiple of entry size.
175 pad n = vcat $ replicate n $ pprByte 0
176 initialLength = 8 + paddingSize + 2*2*wordSize
177 length = ppr (dwArngEndLabel arng)
178 <> char '-' <> ppr (dwArngStartLabel arng)
179 in pprDwWord (ppr initialLength)
180 $$ pprHalf 2
181 $$ sectionOffset (ppr $ mkAsmTempLabel $ dwArngUnitUnique arng)
182 (ptext dwarfInfoLabel)
183 $$ pprByte (fromIntegral wordSize)
184 $$ pprByte 0
185 $$ pad paddingSize
186 -- beginning of body
187 $$ pprWord (ppr $ dwArngStartLabel arng)
188 $$ pprWord length
189 -- terminus
190 $$ pprWord (char '0')
191 $$ pprWord (char '0')
192
193 -- | Information about unwind instructions for a procedure. This
194 -- corresponds to a "Common Information Entry" (CIE) in DWARF.
195 data DwarfFrame
196 = DwarfFrame
197 { dwCieLabel :: CLabel
198 , dwCieInit :: UnwindTable
199 , dwCieProcs :: [DwarfFrameProc]
200 }
201
202 -- | Unwind instructions for an individual procedure. Corresponds to a
203 -- "Frame Description Entry" (FDE) in DWARF.
204 data DwarfFrameProc
205 = DwarfFrameProc
206 { dwFdeProc :: CLabel
207 , dwFdeHasInfo :: Bool
208 , dwFdeBlocks :: [DwarfFrameBlock]
209 -- ^ List of blocks. Order must match asm!
210 }
211
212 -- | Unwind instructions for a block. Will become part of the
213 -- containing FDE.
214 data DwarfFrameBlock
215 = DwarfFrameBlock
216 { dwFdeBlock :: CLabel
217 , dwFdeBlkHasInfo :: Bool
218 , dwFdeUnwind :: UnwindTable
219 }
220
221 -- | Header for the .debug_frame section. Here we emit the "Common
222 -- Information Entry" record that etablishes general call frame
223 -- parameters and the default stack layout.
224 pprDwarfFrame :: DwarfFrame -> SDoc
225 pprDwarfFrame DwarfFrame{dwCieLabel=cieLabel,dwCieInit=cieInit,dwCieProcs=procs}
226 = sdocWithPlatform $ \plat ->
227 let cieStartLabel= mkAsmTempDerivedLabel cieLabel (fsLit "_start")
228 cieEndLabel = mkAsmTempEndLabel cieLabel
229 length = ppr cieEndLabel <> char '-' <> ppr cieStartLabel
230 spReg = dwarfGlobalRegNo plat Sp
231 retReg = dwarfReturnRegNo plat
232 wordSize = platformWordSize plat
233 pprInit (g, uw) = pprSetUnwind plat g (Nothing, uw)
234 in vcat [ ppr cieLabel <> colon
235 , pprData4' length -- Length of CIE
236 , ppr cieStartLabel <> colon
237 , pprData4' (ptext (sLit "-1"))
238 -- Common Information Entry marker (-1 = 0xf..f)
239 , pprByte 3 -- CIE version (we require DWARF 3)
240 , pprByte 0 -- Augmentation (none)
241 , pprByte 1 -- Code offset multiplicator
242 , pprByte (128-fromIntegral wordSize)
243 -- Data offset multiplicator
244 -- (stacks grow down => "-w" in signed LEB128)
245 , pprByte retReg -- virtual register holding return address
246 ] $$
247 -- Initial unwind table
248 vcat (map pprInit $ Map.toList cieInit) $$
249 vcat [ -- RET = *CFA
250 pprByte (dW_CFA_offset+retReg)
251 , pprByte 0
252
253 -- Sp' = CFA
254 -- (we need to set this manually as our Sp register is
255 -- often not the architecture's default stack register)
256 , pprByte dW_CFA_val_offset
257 , pprLEBWord (fromIntegral spReg)
258 , pprLEBWord 0
259 ] $$
260 wordAlign $$
261 ppr cieEndLabel <> colon $$
262 -- Procedure unwind tables
263 vcat (map (pprFrameProc cieLabel cieInit) procs)
264
265 -- | Writes a "Frame Description Entry" for a procedure. This consists
266 -- mainly of referencing the CIE and writing state machine
267 -- instructions to describe how the frame base (CFA) changes.
268 pprFrameProc :: CLabel -> UnwindTable -> DwarfFrameProc -> SDoc
269 pprFrameProc frameLbl initUw (DwarfFrameProc procLbl hasInfo blocks)
270 = let fdeLabel = mkAsmTempDerivedLabel procLbl (fsLit "_fde")
271 fdeEndLabel = mkAsmTempDerivedLabel procLbl (fsLit "_fde_end")
272 procEnd = mkAsmTempEndLabel procLbl
273 ifInfo str = if hasInfo then text str else empty
274 -- see [Note: Info Offset]
275 in vcat [ pprData4' (ppr fdeEndLabel <> char '-' <> ppr fdeLabel)
276 , ppr fdeLabel <> colon
277 , pprData4' (ppr frameLbl <> char '-' <>
278 ptext dwarfFrameLabel) -- Reference to CIE
279 , pprWord (ppr procLbl <> ifInfo "-1") -- Code pointer
280 , pprWord (ppr procEnd <> char '-' <>
281 ppr procLbl <> ifInfo "+1") -- Block byte length
282 ] $$
283 vcat (snd $ mapAccumL pprFrameBlock initUw blocks) $$
284 wordAlign $$
285 ppr fdeEndLabel <> colon
286
287 -- | Generates unwind information for a block. We only generate
288 -- instructions where unwind information actually changes. This small
289 -- optimisations saves a lot of space, as subsequent blocks often have
290 -- the same unwind information.
291 pprFrameBlock :: UnwindTable -> DwarfFrameBlock -> (UnwindTable, SDoc)
292 pprFrameBlock oldUws (DwarfFrameBlock blockLbl hasInfo uws)
293 | uws == oldUws
294 = (oldUws, empty)
295 | otherwise
296 = (,) uws $ sdocWithPlatform $ \plat ->
297 let lbl = ppr blockLbl <> if hasInfo then text "-1" else empty
298 -- see [Note: Info Offset]
299 isChanged g v | old == Just v = Nothing
300 | otherwise = Just (old, v)
301 where old = Map.lookup g oldUws
302 changed = Map.toList $ Map.mapMaybeWithKey isChanged uws
303 died = Map.toList $ Map.difference oldUws uws
304 in pprByte dW_CFA_set_loc $$ pprWord lbl $$
305 vcat (map (uncurry $ pprSetUnwind plat) changed) $$
306 vcat (map (pprUndefUnwind plat . fst) died)
307
308 -- [Note: Info Offset]
309 --
310 -- GDB was pretty much written with C-like programs in mind, and as a
311 -- result they assume that once you have a return address, it is a
312 -- good idea to look at (PC-1) to unwind further - as that's where the
313 -- "call" instruction is supposed to be.
314 --
315 -- Now on one hand, code generated by GHC looks nothing like what GDB
316 -- expects, and in fact going up from a return pointer is guaranteed
317 -- to land us inside an info table! On the other hand, that actually
318 -- gives us some wiggle room, as we expect IP to never *actually* end
319 -- up inside the info table, so we can "cheat" by putting whatever GDB
320 -- expects to see there. This is probably pretty safe, as GDB cannot
321 -- assume (PC-1) to be a valid code pointer in the first place - and I
322 -- have seen no code trying to correct this.
323 --
324 -- Note that this will not prevent GDB from failing to look-up the
325 -- correct function name for the frame, as that uses the symbol table,
326 -- which we can not manipulate as easily.
327
328 -- | Get DWARF register ID for a given GlobalReg
329 dwarfGlobalRegNo :: Platform -> GlobalReg -> Word8
330 dwarfGlobalRegNo p = maybe 0 (dwarfRegNo p . RegReal) . globalRegMaybe p
331
332 -- | Generate code for setting the unwind information for a register,
333 -- optimized using its known old value in the table. Note that "Sp" is
334 -- special: We see it as synonym for the CFA.
335 pprSetUnwind :: Platform -> GlobalReg -> (Maybe UnwindExpr, UnwindExpr) -> SDoc
336 pprSetUnwind _ Sp (Just (UwReg s _), UwReg s' o') | s == s'
337 = if o' >= 0
338 then pprByte dW_CFA_def_cfa_offset $$ pprLEBWord (fromIntegral o')
339 else pprByte dW_CFA_def_cfa_offset_sf $$ pprLEBInt o'
340 pprSetUnwind plat Sp (_, UwReg s' o')
341 = if o' >= 0
342 then pprByte dW_CFA_def_cfa $$
343 pprLEBWord (fromIntegral $ dwarfGlobalRegNo plat s') $$
344 pprLEBWord (fromIntegral o')
345 else pprByte dW_CFA_def_cfa_sf $$
346 pprLEBWord (fromIntegral $ dwarfGlobalRegNo plat s') $$
347 pprLEBInt o'
348 pprSetUnwind _ Sp (_, uw)
349 = pprByte dW_CFA_def_cfa_expression $$ pprUnwindExpr False uw
350 pprSetUnwind plat g (_, UwDeref (UwReg Sp o))
351 | o < 0 && ((-o) `mod` platformWordSize plat) == 0 -- expected case
352 = pprByte (dW_CFA_offset + dwarfGlobalRegNo plat g) $$
353 pprLEBWord (fromIntegral ((-o) `div` platformWordSize plat))
354 | otherwise
355 = pprByte dW_CFA_offset_extended_sf $$
356 pprLEBWord (fromIntegral (dwarfGlobalRegNo plat g)) $$
357 pprLEBInt o
358 pprSetUnwind plat g (_, UwDeref uw)
359 = pprByte dW_CFA_expression $$
360 pprLEBWord (fromIntegral (dwarfGlobalRegNo plat g)) $$
361 pprUnwindExpr True uw
362 pprSetUnwind plat g (_, uw)
363 = pprByte dW_CFA_val_expression $$
364 pprLEBWord (fromIntegral (dwarfGlobalRegNo plat g)) $$
365 pprUnwindExpr True uw
366
367 -- | Generates a DWARF expression for the given unwind expression. If
368 -- @spIsCFA@ is true, we see @Sp@ as the frame base CFA where it gets
369 -- mentioned.
370 pprUnwindExpr :: Bool -> UnwindExpr -> SDoc
371 pprUnwindExpr spIsCFA expr
372 = sdocWithPlatform $ \plat ->
373 let pprE (UwConst i)
374 | i >= 0 && i < 32 = pprByte (dW_OP_lit0 + fromIntegral i)
375 | otherwise = pprByte dW_OP_consts $$ pprLEBInt i -- lazy...
376 pprE (UwReg Sp i) | spIsCFA
377 = if i == 0
378 then pprByte dW_OP_call_frame_cfa
379 else ppr (UwPlus (UwReg Sp 0) (UwConst i))
380 pprE (UwReg g i) = pprByte (dW_OP_breg0+dwarfGlobalRegNo plat g) $$
381 pprLEBInt i
382 pprE (UwDeref u) = pprE u $$ pprByte dW_OP_deref
383 pprE (UwPlus u1 u2) = pprE u1 $$ pprE u2 $$ pprByte dW_OP_plus
384 pprE (UwMinus u1 u2) = pprE u1 $$ pprE u2 $$ pprByte dW_OP_minus
385 pprE (UwTimes u1 u2) = pprE u1 $$ pprE u2 $$ pprByte dW_OP_mul
386 in ptext (sLit "\t.byte 1f-.-1") $$
387 pprE expr $$
388 ptext (sLit "1:")
389
390 -- | Generate code for re-setting the unwind information for a
391 -- register to "undefined"
392 pprUndefUnwind :: Platform -> GlobalReg -> SDoc
393 pprUndefUnwind _ Sp = panic "pprUndefUnwind Sp" -- should never happen
394 pprUndefUnwind plat g = pprByte dW_CFA_undefined $$
395 pprLEBWord (fromIntegral $ dwarfGlobalRegNo plat g)
396
397
398 -- | Align assembly at (machine) word boundary
399 wordAlign :: SDoc
400 wordAlign = sdocWithPlatform $ \plat ->
401 ptext (sLit "\t.align ") <> case platformOS plat of
402 OSDarwin -> case platformWordSize plat of
403 8 -> text "3"
404 4 -> text "2"
405 _other -> error "wordAlign: Unsupported word size!"
406 _other -> ppr (platformWordSize plat)
407
408 -- | Assembly for a single byte of constant DWARF data
409 pprByte :: Word8 -> SDoc
410 pprByte x = ptext (sLit "\t.byte ") <> ppr (fromIntegral x :: Word)
411
412 -- | Assembly for a two-byte constant integer
413 pprHalf :: Word16 -> SDoc
414 pprHalf x = ptext (sLit "\t.hword ") <> ppr (fromIntegral x :: Word)
415
416 -- | Assembly for a constant DWARF flag
417 pprFlag :: Bool -> SDoc
418 pprFlag f = pprByte (if f then 0xff else 0x00)
419
420 -- | Assembly for 4 bytes of dynamic DWARF data
421 pprData4' :: SDoc -> SDoc
422 pprData4' x = ptext (sLit "\t.long ") <> x
423
424 -- | Assembly for 4 bytes of constant DWARF data
425 pprData4 :: Word -> SDoc
426 pprData4 = pprData4' . ppr
427
428 -- | Assembly for a DWARF word of dynamic data. This means 32 bit, as
429 -- we are generating 32 bit DWARF.
430 pprDwWord :: SDoc -> SDoc
431 pprDwWord = pprData4'
432
433 -- | Assembly for a machine word of dynamic data. Depends on the
434 -- architecture we are currently generating code for.
435 pprWord :: SDoc -> SDoc
436 pprWord s = (<> s) . sdocWithPlatform $ \plat ->
437 case platformWordSize plat of
438 4 -> ptext (sLit "\t.long ")
439 8 -> ptext (sLit "\t.quad ")
440 n -> panic $ "pprWord: Unsupported target platform word length " ++
441 show n ++ "!"
442
443 -- | Prints a number in "little endian base 128" format. The idea is
444 -- to optimize for small numbers by stopping once all further bytes
445 -- would be 0. The highest bit in every byte signals whether there
446 -- are further bytes to read.
447 pprLEBWord :: Word -> SDoc
448 pprLEBWord x | x < 128 = pprByte (fromIntegral x)
449 | otherwise = pprByte (fromIntegral $ 128 .|. (x .&. 127)) $$
450 pprLEBWord (x `shiftR` 7)
451
452 -- | Same as @pprLEBWord@, but for a signed number
453 pprLEBInt :: Int -> SDoc
454 pprLEBInt x | x >= -64 && x < 64
455 = pprByte (fromIntegral (x .&. 127))
456 | otherwise = pprByte (fromIntegral $ 128 .|. (x .&. 127)) $$
457 pprLEBInt (x `shiftR` 7)
458
459 -- | Generates a dynamic null-terminated string. If required the
460 -- caller needs to make sure that the string is escaped properly.
461 pprString' :: SDoc -> SDoc
462 pprString' str = ptext (sLit "\t.asciz \"") <> str <> char '"'
463
464 -- | Generate a string constant. We take care to escape the string.
465 pprString :: String -> SDoc
466 pprString str
467 = pprString' $ hcat $ map escapeChar $
468 if utf8EncodedLength str == length str
469 then str
470 else map (chr . fromIntegral) $ bytesFS $ mkFastString str
471
472 -- | Escape a single non-unicode character
473 escapeChar :: Char -> SDoc
474 escapeChar '\\' = ptext (sLit "\\\\")
475 escapeChar '\"' = ptext (sLit "\\\"")
476 escapeChar '\n' = ptext (sLit "\\n")
477 escapeChar c
478 | isAscii c && isPrint c && c /= '?' -- prevents trigraph warnings
479 = char c
480 | otherwise
481 = char '\\' <> char (intToDigit (ch `div` 64)) <>
482 char (intToDigit ((ch `div` 8) `mod` 8)) <>
483 char (intToDigit (ch `mod` 8))
484 where ch = ord c
485
486 -- | Generate an offset into another section. This is tricky because
487 -- this is handled differently depending on platform: Mac Os expects
488 -- us to calculate the offset using assembler arithmetic. Linux expects
489 -- us to just reference the target directly, and will figure out on
490 -- their own that we actually need an offset. Finally, Windows has
491 -- a special directive to refer to relative offsets. Fun.
492 sectionOffset :: SDoc -> SDoc -> SDoc
493 sectionOffset target section = sdocWithPlatform $ \plat ->
494 case platformOS plat of
495 OSDarwin -> pprDwWord (target <> char '-' <> section)
496 OSMinGW32 -> text "\t.secrel32 " <> target
497 _other -> pprDwWord target