Fix a couple of bugs in the way info tables are generated for 64-bit platforms
[ghc.git] / compiler / ghci / DebuggerUtils.hs
1 {-# LANGUAGE CPP #-}
2
3 module DebuggerUtils (
4 dataConInfoPtrToName,
5 ) where
6
7 import CmmInfo ( stdInfoTableSizeB )
8 import ByteCodeItbls
9 import DynFlags
10 import FastString
11 import TcRnTypes
12 import TcRnMonad
13 import IfaceEnv
14 import Module
15 import OccName
16 import Name
17 import Outputable
18 import Platform
19 import Util
20
21 import Data.Char
22 import Foreign
23 import Data.List
24
25 #include "HsVersions.h"
26
27 -- | Given a data constructor in the heap, find its Name.
28 -- The info tables for data constructors have a field which records
29 -- the source name of the constructor as a Ptr Word8 (UTF-8 encoded
30 -- string). The format is:
31 --
32 -- > Package:Module.Name
33 --
34 -- We use this string to lookup the interpreter's internal representation of the name
35 -- using the lookupOrig.
36 --
37 dataConInfoPtrToName :: Ptr () -> TcM (Either String Name)
38 dataConInfoPtrToName x = do
39 dflags <- getDynFlags
40 theString <- liftIO $ do
41 let ptr = castPtr x :: Ptr StgInfoTable
42 conDescAddress <- getConDescAddress dflags ptr
43 peekArray0 0 conDescAddress
44 let (pkg, mod, occ) = parse theString
45 pkgFS = mkFastStringByteList pkg
46 modFS = mkFastStringByteList mod
47 occFS = mkFastStringByteList occ
48 occName = mkOccNameFS OccName.dataName occFS
49 modName = mkModule (fsToPackageKey pkgFS) (mkModuleNameFS modFS)
50 return (Left $ showSDoc dflags $ ppr modName <> dot <> ppr occName)
51 `recoverM` (Right `fmap` lookupOrig modName occName)
52
53 where
54
55 {- To find the string in the constructor's info table we need to consider
56 the layout of info tables relative to the entry code for a closure.
57
58 An info table can be next to the entry code for the closure, or it can
59 be separate. The former (faster) is used in registerised versions of ghc,
60 and the latter (portable) is for non-registerised versions.
61
62 The diagrams below show where the string is to be found relative to
63 the normal info table of the closure.
64
65 1) Code next to table:
66
67 --------------
68 | | <- pointer to the start of the string
69 --------------
70 | | <- the (start of the) info table structure
71 | |
72 | |
73 --------------
74 | entry code |
75 | .... |
76
77 In this case the pointer to the start of the string can be found in
78 the memory location _one word before_ the first entry in the normal info
79 table.
80
81 2) Code NOT next to table:
82
83 --------------
84 info table structure -> | *------------------> --------------
85 | | | entry code |
86 | | | .... |
87 --------------
88 ptr to start of str -> | |
89 --------------
90
91 In this case the pointer to the start of the string can be found
92 in the memory location: info_table_ptr + info_table_size
93 -}
94
95 getConDescAddress :: DynFlags -> Ptr StgInfoTable -> IO (Ptr Word8)
96 getConDescAddress dflags ptr
97 | ghciTablesNextToCode = do
98 let ptr' = ptr `plusPtr` (- wORD_SIZE dflags)
99 -- offsetToString is really an StgWord, but we have to jump
100 -- through some hoops due to the way that our StgWord Haskell
101 -- type is the same on 32 and 64bit platforms
102 offsetToString <- case platformWordSize (targetPlatform dflags) of
103 4 -> do w <- peek ptr'
104 return (fromIntegral (w :: Word32))
105 8 -> do w <- peek ptr'
106 return (fromIntegral (w :: Word32))
107 w -> panic ("getConDescAddress: Unknown platformWordSize: " ++ show w)
108 return $ (ptr `plusPtr` stdInfoTableSizeB dflags) `plusPtr` offsetToString
109 | otherwise =
110 peek $ intPtrToPtr $ ptrToIntPtr ptr + fromIntegral (stdInfoTableSizeB dflags)
111 -- parsing names is a little bit fiddly because we have a string in the form:
112 -- pkg:A.B.C.foo, and we want to split it into three parts: ("pkg", "A.B.C", "foo").
113 -- Thus we split at the leftmost colon and the rightmost occurrence of the dot.
114 -- It would be easier if the string was in the form pkg:A.B.C:foo, but alas
115 -- this is not the conventional way of writing Haskell names. We stick with
116 -- convention, even though it makes the parsing code more troublesome.
117 -- Warning: this code assumes that the string is well formed.
118 parse :: [Word8] -> ([Word8], [Word8], [Word8])
119 parse input
120 = ASSERT(all (>0) (map length [pkg, mod, occ])) (pkg, mod, occ)
121 where
122 dot = fromIntegral (ord '.')
123 (pkg, rest1) = break (== fromIntegral (ord ':')) input
124 (mod, occ)
125 = (concat $ intersperse [dot] $ reverse modWords, occWord)
126 where
127 (modWords, occWord) = ASSERT(length rest1 > 0) (parseModOcc [] (tail rest1))
128 parseModOcc :: [[Word8]] -> [Word8] -> ([[Word8]], [Word8])
129 -- We only look for dots if str could start with a module name,
130 -- i.e. if it starts with an upper case character.
131 -- Otherwise we might think that "X.:->" is the module name in
132 -- "X.:->.+", whereas actually "X" is the module name and
133 -- ":->.+" is a constructor name.
134 parseModOcc acc str@(c : _)
135 | isUpper $ chr $ fromIntegral c
136 = case break (== dot) str of
137 (top, []) -> (acc, top)
138 (top, _ : bot) -> parseModOcc (top : acc) bot
139 parseModOcc acc str = (acc, str)