Allow top-level string literals in Core (#8472)
[ghc.git] / compiler / codeGen / StgCmm.hs
1 {-# LANGUAGE CPP #-}
2
3 -----------------------------------------------------------------------------
4 --
5 -- Stg to C-- code generation
6 --
7 -- (c) The University of Glasgow 2004-2006
8 --
9 -----------------------------------------------------------------------------
10
11 module StgCmm ( codeGen ) where
12
13 #include "HsVersions.h"
14
15 import StgCmmProf (initCostCentres, ldvEnter)
16 import StgCmmMonad
17 import StgCmmEnv
18 import StgCmmBind
19 import StgCmmCon
20 import StgCmmLayout
21 import StgCmmUtils
22 import StgCmmClosure
23 import StgCmmHpc
24 import StgCmmTicky
25
26 import Cmm
27 import CmmUtils
28 import CLabel
29
30 import StgSyn
31 import DynFlags
32
33 import HscTypes
34 import CostCentre
35 import Id
36 import IdInfo
37 import RepType
38 import DataCon
39 import Name
40 import TyCon
41 import Module
42 import Outputable
43 import Stream
44 import BasicTypes
45
46 import OrdList
47 import MkGraph
48
49 import qualified Data.ByteString as BS
50 import Data.IORef
51 import Control.Monad (when,void)
52 import Util
53
54 codeGen :: DynFlags
55 -> Module
56 -> [TyCon]
57 -> CollectedCCs -- (Local/global) cost-centres needing declaring/registering.
58 -> [StgTopBinding] -- Bindings to convert
59 -> HpcInfo
60 -> Stream IO CmmGroup () -- Output as a stream, so codegen can
61 -- be interleaved with output
62
63 codeGen dflags this_mod data_tycons
64 cost_centre_info stg_binds hpc_info
65 = do { -- cg: run the code generator, and yield the resulting CmmGroup
66 -- Using an IORef to store the state is a bit crude, but otherwise
67 -- we would need to add a state monad layer.
68 ; cgref <- liftIO $ newIORef =<< initC
69 ; let cg :: FCode () -> Stream IO CmmGroup ()
70 cg fcode = do
71 cmm <- liftIO $ do
72 st <- readIORef cgref
73 let (a,st') = runC dflags this_mod st (getCmm fcode)
74
75 -- NB. stub-out cgs_tops and cgs_stmts. This fixes
76 -- a big space leak. DO NOT REMOVE!
77 writeIORef cgref $! st'{ cgs_tops = nilOL,
78 cgs_stmts = mkNop }
79 return a
80 yield cmm
81
82 -- Note [codegen-split-init] the cmm_init block must come
83 -- FIRST. This is because when -split-objs is on we need to
84 -- combine this block with its initialisation routines; see
85 -- Note [pipeline-split-init].
86 ; cg (mkModuleInit cost_centre_info this_mod hpc_info)
87
88 ; mapM_ (cg . cgTopBinding dflags) stg_binds
89
90 -- Put datatype_stuff after code_stuff, because the
91 -- datatype closure table (for enumeration types) to
92 -- (say) PrelBase_True_closure, which is defined in
93 -- code_stuff
94 ; let do_tycon tycon = do
95 -- Generate a table of static closures for an
96 -- enumeration type Note that the closure pointers are
97 -- tagged.
98 when (isEnumerationTyCon tycon) $ cg (cgEnumerationTyCon tycon)
99 mapM_ (cg . cgDataCon) (tyConDataCons tycon)
100
101 ; mapM_ do_tycon data_tycons
102 }
103
104 ---------------------------------------------------------------
105 -- Top-level bindings
106 ---------------------------------------------------------------
107
108 {- 'cgTopBinding' is only used for top-level bindings, since they need
109 to be allocated statically (not in the heap) and need to be labelled.
110 No unboxed bindings can happen at top level.
111
112 In the code below, the static bindings are accumulated in the
113 @MkCgState@, and transferred into the ``statics'' slot by @forkStatics@.
114 This is so that we can write the top level processing in a compositional
115 style, with the increasing static environment being plumbed as a state
116 variable. -}
117
118 cgTopBinding :: DynFlags -> StgTopBinding -> FCode ()
119 cgTopBinding dflags (StgTopLifted (StgNonRec id rhs))
120 = do { id' <- maybeExternaliseId dflags id
121 ; let (info, fcode) = cgTopRhs dflags NonRecursive id' rhs
122 ; fcode
123 ; addBindC info -- Add the *un-externalised* Id to the envt,
124 -- so we find it when we look up occurrences
125 }
126
127 cgTopBinding dflags (StgTopLifted (StgRec pairs))
128 = do { let (bndrs, rhss) = unzip pairs
129 ; bndrs' <- Prelude.mapM (maybeExternaliseId dflags) bndrs
130 ; let pairs' = zip bndrs' rhss
131 r = unzipWith (cgTopRhs dflags Recursive) pairs'
132 (infos, fcodes) = unzip r
133 ; addBindsC infos
134 ; sequence_ fcodes
135 }
136
137 cgTopBinding dflags (StgTopStringLit id str)
138 = do { id' <- maybeExternaliseId dflags id
139 ; let label = mkBytesLabel (idName id')
140 ; let (lit, decl) = mkByteStringCLit label (BS.unpack str)
141 ; emitDecl decl
142 ; addBindC (litIdInfo dflags id' mkLFStringLit lit)
143 }
144
145 cgTopRhs :: DynFlags -> RecFlag -> Id -> StgRhs -> (CgIdInfo, FCode ())
146 -- The Id is passed along for setting up a binding...
147 -- It's already been externalised if necessary
148
149 cgTopRhs dflags _rec bndr (StgRhsCon _cc con args)
150 = cgTopRhsCon dflags bndr con (assertNonVoidStgArgs args)
151 -- con args are always non-void,
152 -- see Note [Post-unarisation invariants] in UnariseStg
153
154 cgTopRhs dflags rec bndr (StgRhsClosure cc bi fvs upd_flag args body)
155 = ASSERT(null fvs) -- There should be no free variables
156 cgTopRhsClosure dflags rec bndr cc bi upd_flag args body
157
158
159 ---------------------------------------------------------------
160 -- Module initialisation code
161 ---------------------------------------------------------------
162
163 {- The module initialisation code looks like this, roughly:
164
165 FN(__stginit_Foo) {
166 JMP_(__stginit_Foo_1_p)
167 }
168
169 FN(__stginit_Foo_1_p) {
170 ...
171 }
172
173 We have one version of the init code with a module version and the
174 'way' attached to it. The version number helps to catch cases
175 where modules are not compiled in dependency order before being
176 linked: if a module has been compiled since any modules which depend on
177 it, then the latter modules will refer to a different version in their
178 init blocks and a link error will ensue.
179
180 The 'way' suffix helps to catch cases where modules compiled in different
181 ways are linked together (eg. profiled and non-profiled).
182
183 We provide a plain, unadorned, version of the module init code
184 which just jumps to the version with the label and way attached. The
185 reason for this is that when using foreign exports, the caller of
186 startupHaskell() must supply the name of the init function for the "top"
187 module in the program, and we don't want to require that this name
188 has the version and way info appended to it.
189
190 We initialise the module tree by keeping a work-stack,
191 * pointed to by Sp
192 * that grows downward
193 * Sp points to the last occupied slot
194 -}
195
196 mkModuleInit
197 :: CollectedCCs -- cost centre info
198 -> Module
199 -> HpcInfo
200 -> FCode ()
201
202 mkModuleInit cost_centre_info this_mod hpc_info
203 = do { initHpc this_mod hpc_info
204 ; initCostCentres cost_centre_info
205 -- For backwards compatibility: user code may refer to this
206 -- label for calling hs_add_root().
207 ; let lbl = mkPlainModuleInitLabel this_mod
208 ; emitDecl (CmmData (Section Data lbl) (Statics lbl []))
209 }
210
211
212 ---------------------------------------------------------------
213 -- Generating static stuff for algebraic data types
214 ---------------------------------------------------------------
215
216
217 cgEnumerationTyCon :: TyCon -> FCode ()
218 cgEnumerationTyCon tycon
219 = do dflags <- getDynFlags
220 emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs)
221 [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs)
222 (tagForCon dflags con)
223 | con <- tyConDataCons tycon]
224
225
226 cgDataCon :: DataCon -> FCode ()
227 -- Generate the entry code, info tables, and (for niladic constructor)
228 -- the static closure, for a constructor.
229 cgDataCon data_con
230 = do { dflags <- getDynFlags
231 ; let
232 (tot_wds, -- #ptr_wds + #nonptr_wds
233 ptr_wds) -- #ptr_wds
234 = mkVirtConstrSizes dflags arg_reps
235
236 nonptr_wds = tot_wds - ptr_wds
237
238 dyn_info_tbl =
239 mkDataConInfoTable dflags data_con False ptr_wds nonptr_wds
240
241 -- We're generating info tables, so we don't know and care about
242 -- what the actual arguments are. Using () here as the place holder.
243 arg_reps :: [NonVoid PrimRep]
244 arg_reps = [ NonVoid rep_ty
245 | ty <- dataConRepArgTys data_con
246 , rep_ty <- typePrimRep ty
247 , not (isVoidRep rep_ty) ]
248
249 ; emitClosureAndInfoTable dyn_info_tbl NativeDirectCall [] $
250 -- NB: the closure pointer is assumed *untagged* on
251 -- entry to a constructor. If the pointer is tagged,
252 -- then we should not be entering it. This assumption
253 -- is used in ldvEnter and when tagging the pointer to
254 -- return it.
255 -- NB 2: We don't set CC when entering data (WDP 94/06)
256 do { tickyEnterDynCon
257 ; ldvEnter (CmmReg nodeReg)
258 ; tickyReturnOldCon (length arg_reps)
259 ; void $ emitReturn [cmmOffsetB dflags (CmmReg nodeReg) (tagForCon dflags data_con)]
260 }
261 -- The case continuation code expects a tagged pointer
262 }
263
264 ---------------------------------------------------------------
265 -- Stuff to support splitting
266 ---------------------------------------------------------------
267
268 maybeExternaliseId :: DynFlags -> Id -> FCode Id
269 maybeExternaliseId dflags id
270 | gopt Opt_SplitObjs dflags, -- See Note [Externalise when splitting]
271 -- in StgCmmMonad
272 isInternalName name = do { mod <- getModuleName
273 ; returnFC (setIdName id (externalise mod)) }
274 | otherwise = returnFC id
275 where
276 externalise mod = mkExternalName uniq mod new_occ loc
277 name = idName id
278 uniq = nameUnique name
279 new_occ = mkLocalOcc uniq (nameOccName name)
280 loc = nameSrcSpan name
281 -- We want to conjure up a name that can't clash with any
282 -- existing name. So we generate
283 -- Mod_$L243foo
284 -- where 243 is the unique.