Fix the generation of CallerSaves; fixes #7163
[ghc.git] / compiler / codeGen / CgUtils.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Code generator utilities; mostly monadic
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module CgUtils (
10 addIdReps,
11 cgLit,
12 emitDataLits, mkDataLits,
13 emitRODataLits, mkRODataLits,
14 emitIf, emitIfThenElse,
15 emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult,
16 emitRtsCallGen,
17 assignTemp, assignTemp_, newTemp,
18 emitSimultaneously,
19 emitSwitch, emitLitSwitch,
20 tagToClosure,
21
22 callerSaves, callerSaveVolatileRegs, get_GlobalReg_addr,
23 activeStgRegs, fixStgRegisters,
24
25 cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord,
26 cmmUGtWord, cmmSubWord, cmmMulWord, cmmAddWord, cmmUShrWord,
27 cmmOffsetExprW, cmmOffsetExprB,
28 cmmRegOffW, cmmRegOffB,
29 cmmLabelOffW, cmmLabelOffB,
30 cmmOffsetW, cmmOffsetB,
31 cmmOffsetLitW, cmmOffsetLitB,
32 cmmLoadIndexW,
33 cmmConstrTag, cmmConstrTag1,
34
35 tagForCon, tagCons, isSmallFamily,
36 cmmUntag, cmmIsTagged, cmmGetTag,
37
38 addToMem, addToMemE,
39 mkWordCLit,
40 newStringCLit, newByteStringCLit,
41 packHalfWordsCLit,
42 blankWord,
43
44 getSRTInfo
45 ) where
46
47 #include "HsVersions.h"
48 #include "../includes/stg/HaskellMachRegs.h"
49
50 import BlockId
51 import CodeGen.CallerSaves
52 import CgMonad
53 import TyCon
54 import DataCon
55 import Id
56 import IdInfo
57 import Constants
58 import SMRep
59 import OldCmm
60 import OldCmmUtils
61 import CLabel
62 import ForeignCall
63 import ClosureInfo
64 import StgSyn (SRT(..))
65 import Module
66 import Literal
67 import Digraph
68 import ListSetOps
69 import Util
70 import DynFlags
71 import FastString
72 import Outputable
73
74 import Data.Char
75 import Data.Word
76 import Data.List
77 import Data.Maybe
78 import Data.Ord
79
80 -------------------------------------------------------------------------
81 --
82 -- Random small functions
83 --
84 -------------------------------------------------------------------------
85
86 addIdReps :: [Id] -> [(CgRep, Id)]
87 addIdReps ids = [(idCgRep id, id) | id <- ids]
88
89 -------------------------------------------------------------------------
90 --
91 -- Literals
92 --
93 -------------------------------------------------------------------------
94
95 cgLit :: Literal -> FCode CmmLit
96 cgLit (MachStr s) = newByteStringCLit (bytesFB s)
97 cgLit other_lit = return (mkSimpleLit other_lit)
98
99 mkSimpleLit :: Literal -> CmmLit
100 mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordWidth
101 mkSimpleLit MachNullAddr = zeroCLit
102 mkSimpleLit (MachInt i) = CmmInt i wordWidth
103 mkSimpleLit (MachInt64 i) = CmmInt i W64
104 mkSimpleLit (MachWord i) = CmmInt i wordWidth
105 mkSimpleLit (MachWord64 i) = CmmInt i W64
106 mkSimpleLit (MachFloat r) = CmmFloat r W32
107 mkSimpleLit (MachDouble r) = CmmFloat r W64
108 mkSimpleLit (MachLabel fs ms fod)
109 = CmmLabel (mkForeignLabel fs ms labelSrc fod)
110 where
111 -- TODO: Literal labels might not actually be in the current package...
112 labelSrc = ForeignLabelInThisPackage
113 mkSimpleLit (MachStr _) = panic "mkSimpleLit: MachStr"
114 -- No LitInteger's should be left by the time this is called. CorePrep
115 -- should have converted them all to a real core representation.
116 mkSimpleLit (LitInteger {}) = panic "mkSimpleLit: LitInteger"
117
118 mkLtOp :: Literal -> MachOp
119 -- On signed literals we must do a signed comparison
120 mkLtOp (MachInt _) = MO_S_Lt wordWidth
121 mkLtOp (MachFloat _) = MO_F_Lt W32
122 mkLtOp (MachDouble _) = MO_F_Lt W64
123 mkLtOp lit = MO_U_Lt (typeWidth (cmmLitType (mkSimpleLit lit)))
124
125
126 ---------------------------------------------------
127 --
128 -- Cmm data type functions
129 --
130 ---------------------------------------------------
131
132
133
134 {-
135 The family size of a data type (the number of constructors)
136 can be either:
137 * small, if the family size < 2**tag_bits
138 * big, otherwise.
139
140 Small families can have the constructor tag in the tag
141 bits.
142 Big families only use the tag value 1 to represent
143 evaluatedness.
144 -}
145 isSmallFamily :: Int -> Bool
146 isSmallFamily fam_size = fam_size <= mAX_PTR_TAG
147
148 tagForCon :: DataCon -> ConTagZ
149 tagForCon con = tag
150 where
151 con_tag = dataConTagZ con
152 fam_size = tyConFamilySize (dataConTyCon con)
153 tag | isSmallFamily fam_size = con_tag + 1
154 | otherwise = 1
155
156 --Tag an expression, to do: refactor, this appears in some other module.
157 tagCons :: DataCon -> CmmExpr -> CmmExpr
158 tagCons con expr = cmmOffsetB expr (tagForCon con)
159
160 --------------------------------------------------------------------------
161 --
162 -- Incrementing a memory location
163 --
164 --------------------------------------------------------------------------
165
166 addToMem :: Width -- rep of the counter
167 -> CmmExpr -- Address
168 -> Int -- What to add (a word)
169 -> CmmStmt
170 addToMem width ptr n = addToMemE width ptr (CmmLit (CmmInt (toInteger n) width))
171
172 addToMemE :: Width -- rep of the counter
173 -> CmmExpr -- Address
174 -> CmmExpr -- What to add (a word-typed expression)
175 -> CmmStmt
176 addToMemE width ptr n
177 = CmmStore ptr (CmmMachOp (MO_Add width) [CmmLoad ptr (cmmBits width), n])
178
179 -------------------------------------------------------------------------
180 --
181 -- Converting a closure tag to a closure for enumeration types
182 -- (this is the implementation of tagToEnum#).
183 --
184 -------------------------------------------------------------------------
185
186 tagToClosure :: TyCon -> CmmExpr -> CmmExpr
187 tagToClosure tycon tag
188 = CmmLoad (cmmOffsetExprW closure_tbl tag) gcWord
189 where closure_tbl = CmmLit (CmmLabel lbl)
190 lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs
191
192 -------------------------------------------------------------------------
193 --
194 -- Conditionals and rts calls
195 --
196 -------------------------------------------------------------------------
197
198 emitIf :: CmmExpr -- Boolean
199 -> Code -- Then part
200 -> Code
201 -- Emit (if e then x)
202 -- ToDo: reverse the condition to avoid the extra branch instruction if possible
203 -- (some conditionals aren't reversible. eg. floating point comparisons cannot
204 -- be inverted because there exist some values for which both comparisons
205 -- return False, such as NaN.)
206 emitIf cond then_part
207 = do { then_id <- newLabelC
208 ; join_id <- newLabelC
209 ; stmtC (CmmCondBranch cond then_id)
210 ; stmtC (CmmBranch join_id)
211 ; labelC then_id
212 ; then_part
213 ; labelC join_id
214 }
215
216 emitIfThenElse :: CmmExpr -- Boolean
217 -> Code -- Then part
218 -> Code -- Else part
219 -> Code
220 -- Emit (if e then x else y)
221 emitIfThenElse cond then_part else_part
222 = do { then_id <- newLabelC
223 ; join_id <- newLabelC
224 ; stmtC (CmmCondBranch cond then_id)
225 ; else_part
226 ; stmtC (CmmBranch join_id)
227 ; labelC then_id
228 ; then_part
229 ; labelC join_id
230 }
231
232
233 -- | Emit code to call a Cmm function.
234 emitRtsCall
235 :: PackageId -- ^ package the function is in
236 -> FastString -- ^ name of function
237 -> [CmmHinted CmmExpr] -- ^ function args
238 -> Code -- ^ cmm code
239
240 emitRtsCall pkg fun args = emitRtsCallGen [] pkg fun args Nothing
241 -- The 'Nothing' says "save all global registers"
242
243 emitRtsCallWithVols :: PackageId -> FastString -> [CmmHinted CmmExpr] -> [GlobalReg] -> Code
244 emitRtsCallWithVols pkg fun args vols
245 = emitRtsCallGen [] pkg fun args (Just vols)
246
247 emitRtsCallWithResult
248 :: LocalReg -> ForeignHint
249 -> PackageId -> FastString
250 -> [CmmHinted CmmExpr] -> Code
251
252 emitRtsCallWithResult res hint pkg fun args
253 = emitRtsCallGen [CmmHinted res hint] pkg fun args Nothing
254
255 -- Make a call to an RTS C procedure
256 emitRtsCallGen
257 :: [CmmHinted LocalReg]
258 -> PackageId
259 -> FastString
260 -> [CmmHinted CmmExpr]
261 -> Maybe [GlobalReg]
262 -> Code
263 emitRtsCallGen res pkg fun args vols = do
264 dflags <- getDynFlags
265 let (caller_save, caller_load) = callerSaveVolatileRegs dflags vols
266 stmtsC caller_save
267 stmtC (CmmCall target res args CmmMayReturn)
268 stmtsC caller_load
269 where
270 target = CmmCallee fun_expr CCallConv
271 fun_expr = mkLblExpr (mkCmmCodeLabel pkg fun)
272
273 -----------------------------------------------------------------------------
274 --
275 -- Caller-Save Registers
276 --
277 -----------------------------------------------------------------------------
278
279 -- Here we generate the sequence of saves/restores required around a
280 -- foreign call instruction.
281
282 -- TODO: reconcile with includes/Regs.h
283 -- * Regs.h claims that BaseReg should be saved last and loaded first
284 -- * This might not have been tickled before since BaseReg is callee save
285 -- * Regs.h saves SparkHd, ParkT1, SparkBase and SparkLim
286 callerSaveVolatileRegs :: DynFlags -> Maybe [GlobalReg]
287 -> ([CmmStmt], [CmmStmt])
288 callerSaveVolatileRegs dflags vols = (caller_save, caller_load)
289 where
290 platform = targetPlatform dflags
291
292 caller_save = foldr ($!) [] (map callerSaveGlobalReg regs_to_save)
293 caller_load = foldr ($!) [] (map callerRestoreGlobalReg regs_to_save)
294
295 system_regs = [Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery,
296 {-SparkHd,SparkTl,SparkBase,SparkLim,-}BaseReg ]
297
298 regs_to_save = system_regs ++ vol_list
299
300 vol_list = case vols of Nothing -> all_of_em; Just regs -> regs
301
302 all_of_em = [ VanillaReg n VNonGcPtr | n <- [0..mAX_Vanilla_REG] ]
303 -- The VNonGcPtr is a lie, but I don't think it matters
304 ++ [ FloatReg n | n <- [0..mAX_Float_REG] ]
305 ++ [ DoubleReg n | n <- [0..mAX_Double_REG] ]
306 ++ [ LongReg n | n <- [0..mAX_Long_REG] ]
307
308 callerSaveGlobalReg reg next
309 | callerSaves platform reg =
310 CmmStore (get_GlobalReg_addr reg)
311 (CmmReg (CmmGlobal reg)) : next
312 | otherwise = next
313
314 callerRestoreGlobalReg reg next
315 | callerSaves platform reg =
316 CmmAssign (CmmGlobal reg)
317 (CmmLoad (get_GlobalReg_addr reg) (globalRegType reg))
318 : next
319 | otherwise = next
320
321
322 -- -----------------------------------------------------------------------------
323 -- Information about global registers
324
325 baseRegOffset :: GlobalReg -> Int
326
327 baseRegOffset (VanillaReg 1 _) = oFFSET_StgRegTable_rR1
328 baseRegOffset (VanillaReg 2 _) = oFFSET_StgRegTable_rR2
329 baseRegOffset (VanillaReg 3 _) = oFFSET_StgRegTable_rR3
330 baseRegOffset (VanillaReg 4 _) = oFFSET_StgRegTable_rR4
331 baseRegOffset (VanillaReg 5 _) = oFFSET_StgRegTable_rR5
332 baseRegOffset (VanillaReg 6 _) = oFFSET_StgRegTable_rR6
333 baseRegOffset (VanillaReg 7 _) = oFFSET_StgRegTable_rR7
334 baseRegOffset (VanillaReg 8 _) = oFFSET_StgRegTable_rR8
335 baseRegOffset (VanillaReg 9 _) = oFFSET_StgRegTable_rR9
336 baseRegOffset (VanillaReg 10 _) = oFFSET_StgRegTable_rR10
337 baseRegOffset (VanillaReg n _) = panic ("Registers above R10 are not supported (tried to use R" ++ show n ++ ")")
338 baseRegOffset (FloatReg 1) = oFFSET_StgRegTable_rF1
339 baseRegOffset (FloatReg 2) = oFFSET_StgRegTable_rF2
340 baseRegOffset (FloatReg 3) = oFFSET_StgRegTable_rF3
341 baseRegOffset (FloatReg 4) = oFFSET_StgRegTable_rF4
342 baseRegOffset (FloatReg n) = panic ("Registers above F4 are not supported (tried to use F" ++ show n ++ ")")
343 baseRegOffset (DoubleReg 1) = oFFSET_StgRegTable_rD1
344 baseRegOffset (DoubleReg 2) = oFFSET_StgRegTable_rD2
345 baseRegOffset (DoubleReg n) = panic ("Registers above D2 are not supported (tried to use D" ++ show n ++ ")")
346 baseRegOffset Sp = oFFSET_StgRegTable_rSp
347 baseRegOffset SpLim = oFFSET_StgRegTable_rSpLim
348 baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1
349 baseRegOffset (LongReg n) = panic ("Registers above L1 are not supported (tried to use L" ++ show n ++ ")")
350 baseRegOffset Hp = oFFSET_StgRegTable_rHp
351 baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim
352 baseRegOffset CCCS = oFFSET_StgRegTable_rCCCS
353 baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO
354 baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery
355 baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc
356 baseRegOffset EagerBlackholeInfo = oFFSET_stgEagerBlackholeInfo
357 baseRegOffset GCEnter1 = oFFSET_stgGCEnter1
358 baseRegOffset GCFun = oFFSET_stgGCFun
359 baseRegOffset BaseReg = panic "baseRegOffset:BaseReg"
360 baseRegOffset PicBaseReg = panic "baseRegOffset:PicBaseReg"
361
362
363 -------------------------------------------------------------------------
364 --
365 -- Strings generate a top-level data block
366 --
367 -------------------------------------------------------------------------
368
369 emitDataLits :: CLabel -> [CmmLit] -> Code
370 -- Emit a data-segment data block
371 emitDataLits lbl lits = emitDecl (mkDataLits Data lbl lits)
372
373 emitRODataLits :: String -> CLabel -> [CmmLit] -> Code
374 -- Emit a read-only data block
375 emitRODataLits _caller lbl lits
376 = emitDecl (mkRODataLits lbl lits)
377
378 newStringCLit :: String -> FCode CmmLit
379 -- Make a global definition for the string,
380 -- and return its label
381 newStringCLit str = newByteStringCLit (map (fromIntegral.ord) str)
382
383 newByteStringCLit :: [Word8] -> FCode CmmLit
384 newByteStringCLit bytes
385 = do { uniq <- newUnique
386 ; let (lit, decl) = mkByteStringCLit uniq bytes
387 ; emitDecl decl
388 ; return lit }
389
390 -------------------------------------------------------------------------
391 --
392 -- Assigning expressions to temporaries
393 --
394 -------------------------------------------------------------------------
395
396 -- | If the expression is trivial, return it. Otherwise, assign the
397 -- expression to a temporary register and return an expression
398 -- referring to this register.
399 assignTemp :: CmmExpr -> FCode CmmExpr
400 -- For a non-trivial expression, e, create a local
401 -- variable and assign the expression to it
402 assignTemp e
403 | isTrivialCmmExpr e = return e
404 | otherwise = do { reg <- newTemp (cmmExprType e)
405 ; stmtC (CmmAssign (CmmLocal reg) e)
406 ; return (CmmReg (CmmLocal reg)) }
407
408 -- | If the expression is trivial and doesn't refer to a global
409 -- register, return it. Otherwise, assign the expression to a
410 -- temporary register and return an expression referring to this
411 -- register.
412 assignTemp_ :: CmmExpr -> FCode CmmExpr
413 assignTemp_ e
414 | isTrivialCmmExpr e && hasNoGlobalRegs e = return e
415 | otherwise = do
416 reg <- newTemp (cmmExprType e)
417 stmtC (CmmAssign (CmmLocal reg) e)
418 return (CmmReg (CmmLocal reg))
419
420 newTemp :: CmmType -> FCode LocalReg
421 newTemp rep = do { uniq <- newUnique; return (LocalReg uniq rep) }
422
423 -------------------------------------------------------------------------
424 --
425 -- Building case analysis
426 --
427 -------------------------------------------------------------------------
428
429 emitSwitch
430 :: CmmExpr -- Tag to switch on
431 -> [(ConTagZ, CgStmts)] -- Tagged branches
432 -> Maybe CgStmts -- Default branch (if any)
433 -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour
434 -- outside this range is undefined
435 -> Code
436
437 -- ONLY A DEFAULT BRANCH: no case analysis to do
438 emitSwitch _ [] (Just stmts) _ _
439 = emitCgStmts stmts
440
441 -- Right, off we go
442 emitSwitch tag_expr branches mb_deflt lo_tag hi_tag
443 = -- Just sort the branches before calling mk_sritch
444 do { mb_deflt_id <-
445 case mb_deflt of
446 Nothing -> return Nothing
447 Just stmts -> do id <- forkCgStmts stmts; return (Just id)
448
449 ; dflags <- getDynFlags
450 ; let via_C | HscC <- hscTarget dflags = True
451 | otherwise = False
452
453 ; stmts <- mk_switch tag_expr (sortBy (comparing fst) branches)
454 mb_deflt_id lo_tag hi_tag via_C
455 ; emitCgStmts stmts
456 }
457
458
459 mk_switch :: CmmExpr -> [(ConTagZ, CgStmts)]
460 -> Maybe BlockId -> ConTagZ -> ConTagZ -> Bool
461 -> FCode CgStmts
462
463 -- SINGLETON TAG RANGE: no case analysis to do
464 mk_switch _tag_expr [(tag,stmts)] _ lo_tag hi_tag _via_C
465 | lo_tag == hi_tag
466 = ASSERT( tag == lo_tag )
467 return stmts
468
469 -- SINGLETON BRANCH, NO DEFAULT: no case analysis to do
470 mk_switch _tag_expr [(_tag,stmts)] Nothing _lo_tag _hi_tag _via_C
471 = return stmts
472 -- The simplifier might have eliminated a case
473 -- so we may have e.g. case xs of
474 -- [] -> e
475 -- In that situation we can be sure the (:) case
476 -- can't happen, so no need to test
477
478 -- SINGLETON BRANCH: one equality check to do
479 mk_switch tag_expr [(tag,stmts)] (Just deflt) _lo_tag _hi_tag _via_C
480 = return (CmmCondBranch cond deflt `consCgStmt` stmts)
481 where
482 cond = cmmNeWord tag_expr (CmmLit (mkIntCLit tag))
483 -- We have lo_tag < hi_tag, but there's only one branch,
484 -- so there must be a default
485
486 -- ToDo: we might want to check for the two branch case, where one of
487 -- the branches is the tag 0, because comparing '== 0' is likely to be
488 -- more efficient than other kinds of comparison.
489
490 -- DENSE TAG RANGE: use a switch statment.
491 --
492 -- We also use a switch uncoditionally when compiling via C, because
493 -- this will get emitted as a C switch statement and the C compiler
494 -- should do a good job of optimising it. Also, older GCC versions
495 -- (2.95 in particular) have problems compiling the complicated
496 -- if-trees generated by this code, so compiling to a switch every
497 -- time works around that problem.
498 --
499 mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C
500 | use_switch -- Use a switch
501 = do { branch_ids <- mapM forkCgStmts (map snd branches)
502 ; let
503 tagged_blk_ids = zip (map fst branches) (map Just branch_ids)
504
505 find_branch :: ConTagZ -> Maybe BlockId
506 find_branch i = assocDefault mb_deflt tagged_blk_ids i
507
508 -- NB. we have eliminated impossible branches at
509 -- either end of the range (see below), so the first
510 -- tag of a real branch is real_lo_tag (not lo_tag).
511 arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]]
512
513 switch_stmt = CmmSwitch (cmmOffset tag_expr (- real_lo_tag)) arms
514
515 ; ASSERT(not (all isNothing arms))
516 return (oneCgStmt switch_stmt)
517 }
518
519 -- if we can knock off a bunch of default cases with one if, then do so
520 | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches
521 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
522 ; let cond = cmmULtWord tag_expr' (CmmLit (mkIntCLit lowest_branch))
523 branch = CmmCondBranch cond deflt
524 ; stmts <- mk_switch tag_expr' branches mb_deflt
525 lowest_branch hi_tag via_C
526 ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts))
527 }
528
529 | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches
530 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
531 ; let cond = cmmUGtWord tag_expr' (CmmLit (mkIntCLit highest_branch))
532 branch = CmmCondBranch cond deflt
533 ; stmts <- mk_switch tag_expr' branches mb_deflt
534 lo_tag highest_branch via_C
535 ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts))
536 }
537
538 | otherwise -- Use an if-tree
539 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
540 -- To avoid duplication
541 ; lo_stmts <- mk_switch tag_expr' lo_branches mb_deflt
542 lo_tag (mid_tag-1) via_C
543 ; hi_stmts <- mk_switch tag_expr' hi_branches mb_deflt
544 mid_tag hi_tag via_C
545 ; hi_id <- forkCgStmts hi_stmts
546 ; let cond = cmmUGeWord tag_expr' (CmmLit (mkIntCLit mid_tag))
547 branch_stmt = CmmCondBranch cond hi_id
548 ; return (assign_tag `consCgStmt` (branch_stmt `consCgStmt` lo_stmts))
549 }
550 -- we test (e >= mid_tag) rather than (e < mid_tag), because
551 -- the former works better when e is a comparison, and there
552 -- are two tags 0 & 1 (mid_tag == 1). In this case, the code
553 -- generator can reduce the condition to e itself without
554 -- having to reverse the sense of the comparison: comparisons
555 -- can't always be easily reversed (eg. floating
556 -- pt. comparisons).
557 where
558 use_switch = {- pprTrace "mk_switch" (
559 ppr tag_expr <+> text "n_tags:" <+> int n_tags <+>
560 text "branches:" <+> ppr (map fst branches) <+>
561 text "n_branches:" <+> int n_branches <+>
562 text "lo_tag:" <+> int lo_tag <+>
563 text "hi_tag:" <+> int hi_tag <+>
564 text "real_lo_tag:" <+> int real_lo_tag <+>
565 text "real_hi_tag:" <+> int real_hi_tag) $ -}
566 ASSERT( n_branches > 1 && n_tags > 1 )
567 n_tags > 2 && (via_C || (dense && big_enough))
568 -- up to 4 branches we use a decision tree, otherwise
569 -- a switch (== jump table in the NCG). This seems to be
570 -- optimal, and corresponds with what gcc does.
571 big_enough = n_branches > 4
572 dense = n_branches > (n_tags `div` 2)
573 n_branches = length branches
574
575 -- ignore default slots at each end of the range if there's
576 -- no default branch defined.
577 lowest_branch = fst (head branches)
578 highest_branch = fst (last branches)
579
580 real_lo_tag
581 | isNothing mb_deflt = lowest_branch
582 | otherwise = lo_tag
583
584 real_hi_tag
585 | isNothing mb_deflt = highest_branch
586 | otherwise = hi_tag
587
588 n_tags = real_hi_tag - real_lo_tag + 1
589
590 -- INVARIANT: Provided hi_tag > lo_tag (which is true)
591 -- lo_tag <= mid_tag < hi_tag
592 -- lo_branches have tags < mid_tag
593 -- hi_branches have tags >= mid_tag
594
595 (mid_tag,_) = branches !! (n_branches `div` 2)
596 -- 2 branches => n_branches `div` 2 = 1
597 -- => branches !! 1 give the *second* tag
598 -- There are always at least 2 branches here
599
600 (lo_branches, hi_branches) = span is_lo branches
601 is_lo (t,_) = t < mid_tag
602
603 assignTemp' :: CmmExpr -> FCode (CmmStmt, CmmExpr)
604 assignTemp' e
605 | isTrivialCmmExpr e = return (CmmNop, e)
606 | otherwise = do { reg <- newTemp (cmmExprType e)
607 ; return (CmmAssign (CmmLocal reg) e, CmmReg (CmmLocal reg)) }
608
609 emitLitSwitch :: CmmExpr -- Tag to switch on
610 -> [(Literal, CgStmts)] -- Tagged branches
611 -> CgStmts -- Default branch (always)
612 -> Code -- Emit the code
613 -- Used for general literals, whose size might not be a word,
614 -- where there is always a default case, and where we don't know
615 -- the range of values for certain. For simplicity we always generate a tree.
616 --
617 -- ToDo: for integers we could do better here, perhaps by generalising
618 -- mk_switch and using that. --SDM 15/09/2004
619 emitLitSwitch _ [] deflt = emitCgStmts deflt
620 emitLitSwitch scrut branches deflt_blk
621 = do { scrut' <- assignTemp scrut
622 ; deflt_blk_id <- forkCgStmts deflt_blk
623 ; blk <- mk_lit_switch scrut' deflt_blk_id (sortBy (comparing fst) branches)
624 ; emitCgStmts blk }
625
626 mk_lit_switch :: CmmExpr -> BlockId
627 -> [(Literal,CgStmts)]
628 -> FCode CgStmts
629 mk_lit_switch scrut deflt_blk_id [(lit,blk)]
630 = return (consCgStmt if_stmt blk)
631 where
632 cmm_lit = mkSimpleLit lit
633 rep = cmmLitType cmm_lit
634 ne = if isFloatType rep then MO_F_Ne else MO_Ne
635 cond = CmmMachOp (ne (typeWidth rep)) [scrut, CmmLit cmm_lit]
636 if_stmt = CmmCondBranch cond deflt_blk_id
637
638 mk_lit_switch scrut deflt_blk_id branches
639 = do { hi_blk <- mk_lit_switch scrut deflt_blk_id hi_branches
640 ; lo_blk <- mk_lit_switch scrut deflt_blk_id lo_branches
641 ; lo_blk_id <- forkCgStmts lo_blk
642 ; let if_stmt = CmmCondBranch cond lo_blk_id
643 ; return (if_stmt `consCgStmt` hi_blk) }
644 where
645 n_branches = length branches
646 (mid_lit,_) = branches !! (n_branches `div` 2)
647 -- See notes above re mid_tag
648
649 (lo_branches, hi_branches) = span is_lo branches
650 is_lo (t,_) = t < mid_lit
651
652 cond = CmmMachOp (mkLtOp mid_lit)
653 [scrut, CmmLit (mkSimpleLit mid_lit)]
654
655 -------------------------------------------------------------------------
656 --
657 -- Simultaneous assignment
658 --
659 -------------------------------------------------------------------------
660
661
662 emitSimultaneously :: CmmStmts -> Code
663 -- Emit code to perform the assignments in the
664 -- input simultaneously, using temporary variables when necessary.
665 --
666 -- The Stmts must be:
667 -- CmmNop, CmmComment, CmmAssign, CmmStore
668 -- and nothing else
669
670
671 -- We use the strongly-connected component algorithm, in which
672 -- * the vertices are the statements
673 -- * an edge goes from s1 to s2 iff
674 -- s1 assigns to something s2 uses
675 -- that is, if s1 should *follow* s2 in the final order
676
677 type CVertex = (Int, CmmStmt) -- Give each vertex a unique number,
678 -- for fast comparison
679
680 emitSimultaneously stmts
681 = codeOnly $
682 case filterOut isNopStmt (stmtList stmts) of
683 -- Remove no-ops
684 [] -> nopC
685 [stmt] -> stmtC stmt -- It's often just one stmt
686 stmt_list -> doSimultaneously1 (zip [(1::Int)..] stmt_list)
687
688 doSimultaneously1 :: [CVertex] -> Code
689 doSimultaneously1 vertices
690 = let
691 edges = [ (vertex, key1, edges_from stmt1)
692 | vertex@(key1, stmt1) <- vertices
693 ]
694 edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices,
695 stmt1 `mustFollow` stmt2
696 ]
697 components = stronglyConnCompFromEdgedVertices edges
698
699 -- do_components deal with one strongly-connected component
700 -- Not cyclic, or singleton? Just do it
701 do_component (AcyclicSCC (_n, stmt)) = stmtC stmt
702 do_component (CyclicSCC [])
703 = panic "doSimultaneously1: do_component (CyclicSCC [])"
704 do_component (CyclicSCC [(_n, stmt)]) = stmtC stmt
705
706 -- Cyclic? Then go via temporaries. Pick one to
707 -- break the loop and try again with the rest.
708 do_component (CyclicSCC ((_n, first_stmt) : rest))
709 = do { from_temp <- go_via_temp first_stmt
710 ; doSimultaneously1 rest
711 ; stmtC from_temp }
712
713 go_via_temp (CmmAssign dest src)
714 = do { tmp <- newTemp (cmmRegType dest) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong
715 ; stmtC (CmmAssign (CmmLocal tmp) src)
716 ; return (CmmAssign dest (CmmReg (CmmLocal tmp))) }
717 go_via_temp (CmmStore dest src)
718 = do { tmp <- newTemp (cmmExprType src) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong
719 ; stmtC (CmmAssign (CmmLocal tmp) src)
720 ; return (CmmStore dest (CmmReg (CmmLocal tmp))) }
721 go_via_temp _ = panic "doSimultaneously1: go_via_temp"
722 in
723 mapCs do_component components
724
725 mustFollow :: CmmStmt -> CmmStmt -> Bool
726 CmmAssign reg _ `mustFollow` stmt = anySrc (reg `regUsedIn`) stmt
727 CmmStore loc e `mustFollow` stmt = anySrc (locUsedIn loc (cmmExprType e)) stmt
728 CmmNop `mustFollow` _ = False
729 CmmComment _ `mustFollow` _ = False
730 _ `mustFollow` _ = panic "mustFollow"
731
732
733 anySrc :: (CmmExpr -> Bool) -> CmmStmt -> Bool
734 -- True if the fn is true of any input of the stmt
735 anySrc p (CmmAssign _ e) = p e
736 anySrc p (CmmStore e1 e2) = p e1 || p e2 -- Might be used in either side
737 anySrc _ (CmmComment _) = False
738 anySrc _ CmmNop = False
739 anySrc _ _ = True -- Conservative
740
741 locUsedIn :: CmmExpr -> CmmType -> CmmExpr -> Bool
742 -- (locUsedIn a r e) checks whether writing to r[a] could affect the value of
743 -- 'e'. Returns True if it's not sure.
744 locUsedIn _ _ (CmmLit _) = False
745 locUsedIn loc rep (CmmLoad e ld_rep) = possiblySameLoc loc rep e ld_rep
746 locUsedIn _ _ (CmmReg _) = False
747 locUsedIn _ _ (CmmRegOff _ _) = False
748 locUsedIn loc rep (CmmMachOp _ es) = any (locUsedIn loc rep) es
749 locUsedIn _ _ (CmmStackSlot _ _) = panic "locUsedIn: CmmStackSlot"
750
751 possiblySameLoc :: CmmExpr -> CmmType -> CmmExpr -> CmmType -> Bool
752 -- Assumes that distinct registers (eg Hp, Sp) do not
753 -- point to the same location, nor any offset thereof.
754 possiblySameLoc (CmmReg r1) _ (CmmReg r2) _ = r1 == r2
755 possiblySameLoc (CmmReg r1) _ (CmmRegOff r2 0) _ = r1 == r2
756 possiblySameLoc (CmmRegOff r1 0) _ (CmmReg r2) _ = r1 == r2
757 possiblySameLoc (CmmRegOff r1 start1) rep1 (CmmRegOff r2 start2) rep2
758 = r1==r2 && end1 > start2 && end2 > start1
759 where
760 end1 = start1 + widthInBytes (typeWidth rep1)
761 end2 = start2 + widthInBytes (typeWidth rep2)
762
763 possiblySameLoc _ _ (CmmLit _) _ = False
764 possiblySameLoc _ _ _ _ = True -- Conservative
765
766 -------------------------------------------------------------------------
767 --
768 -- Static Reference Tables
769 --
770 -------------------------------------------------------------------------
771
772 -- There is just one SRT for each top level binding; all the nested
773 -- bindings use sub-sections of this SRT. The label is passed down to
774 -- the nested bindings via the monad.
775
776 getSRTInfo :: FCode C_SRT
777 getSRTInfo = do
778 srt_lbl <- getSRTLabel
779 srt <- getSRT
780 case srt of
781 -- TODO: Should we panic in this case?
782 -- Someone obviously thinks there should be an SRT
783 NoSRT -> return NoC_SRT
784 SRTEntries {} -> panic "getSRTInfo: SRTEntries. Perhaps you forgot to run SimplStg?"
785 SRT off len bmp
786 | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape]
787 -> do id <- newUnique
788 let srt_desc_lbl = mkLargeSRTLabel id
789 emitRODataLits "getSRTInfo" srt_desc_lbl
790 ( cmmLabelOffW srt_lbl off
791 : mkWordCLit (fromIntegral len)
792 : map mkWordCLit bmp)
793 return (C_SRT srt_desc_lbl 0 srt_escape)
794
795 | otherwise
796 -> return (C_SRT srt_lbl off (fromIntegral (head bmp)))
797 -- The fromIntegral converts to StgHalfWord
798
799 srt_escape :: StgHalfWord
800 srt_escape = -1
801
802 -- -----------------------------------------------------------------------------
803 --
804 -- STG/Cmm GlobalReg
805 --
806 -- -----------------------------------------------------------------------------
807
808 -- | Here is where the STG register map is defined for each target arch.
809 -- The order matters (for the llvm backend anyway)! We must make sure to
810 -- maintain the order here with the order used in the LLVM calling conventions.
811 -- Note that also, this isn't all registers, just the ones that are currently
812 -- possbily mapped to real registers.
813 activeStgRegs :: [GlobalReg]
814 activeStgRegs = [
815 #ifdef REG_Base
816 BaseReg
817 #endif
818 #ifdef REG_Sp
819 ,Sp
820 #endif
821 #ifdef REG_Hp
822 ,Hp
823 #endif
824 #ifdef REG_R1
825 ,VanillaReg 1 VGcPtr
826 #endif
827 #ifdef REG_R2
828 ,VanillaReg 2 VGcPtr
829 #endif
830 #ifdef REG_R3
831 ,VanillaReg 3 VGcPtr
832 #endif
833 #ifdef REG_R4
834 ,VanillaReg 4 VGcPtr
835 #endif
836 #ifdef REG_R5
837 ,VanillaReg 5 VGcPtr
838 #endif
839 #ifdef REG_R6
840 ,VanillaReg 6 VGcPtr
841 #endif
842 #ifdef REG_R7
843 ,VanillaReg 7 VGcPtr
844 #endif
845 #ifdef REG_R8
846 ,VanillaReg 8 VGcPtr
847 #endif
848 #ifdef REG_R9
849 ,VanillaReg 9 VGcPtr
850 #endif
851 #ifdef REG_R10
852 ,VanillaReg 10 VGcPtr
853 #endif
854 #ifdef REG_SpLim
855 ,SpLim
856 #endif
857 #ifdef REG_F1
858 ,FloatReg 1
859 #endif
860 #ifdef REG_F2
861 ,FloatReg 2
862 #endif
863 #ifdef REG_F3
864 ,FloatReg 3
865 #endif
866 #ifdef REG_F4
867 ,FloatReg 4
868 #endif
869 #ifdef REG_D1
870 ,DoubleReg 1
871 #endif
872 #ifdef REG_D2
873 ,DoubleReg 2
874 #endif
875 ]
876
877 -- | We map STG registers onto appropriate CmmExprs. Either they map
878 -- to real machine registers or stored as offsets from BaseReg. Given
879 -- a GlobalReg, get_GlobalReg_addr always produces the
880 -- register table address for it.
881 get_GlobalReg_addr :: GlobalReg -> CmmExpr
882 get_GlobalReg_addr BaseReg = regTableOffset 0
883 get_GlobalReg_addr mid = get_Regtable_addr_from_offset
884 (globalRegType mid) (baseRegOffset mid)
885
886 -- Calculate a literal representing an offset into the register table.
887 -- Used when we don't have an actual BaseReg to offset from.
888 regTableOffset :: Int -> CmmExpr
889 regTableOffset n =
890 CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r + n))
891
892 get_Regtable_addr_from_offset :: CmmType -> Int -> CmmExpr
893 get_Regtable_addr_from_offset _ offset =
894 #ifdef REG_Base
895 CmmRegOff (CmmGlobal BaseReg) offset
896 #else
897 regTableOffset offset
898 #endif
899
900 -- | Fixup global registers so that they assign to locations within the
901 -- RegTable if they aren't pinned for the current target.
902 fixStgRegisters :: RawCmmDecl -> RawCmmDecl
903 fixStgRegisters top@(CmmData _ _) = top
904
905 fixStgRegisters (CmmProc info lbl (ListGraph blocks)) =
906 let blocks' = map fixStgRegBlock blocks
907 in CmmProc info lbl $ ListGraph blocks'
908
909 fixStgRegBlock :: CmmBasicBlock -> CmmBasicBlock
910 fixStgRegBlock (BasicBlock id stmts) =
911 let stmts' = map fixStgRegStmt stmts
912 in BasicBlock id stmts'
913
914 fixStgRegStmt :: CmmStmt -> CmmStmt
915 fixStgRegStmt stmt
916 = case stmt of
917 CmmAssign (CmmGlobal reg) src ->
918 let src' = fixStgRegExpr src
919 baseAddr = get_GlobalReg_addr reg
920 in case reg `elem` activeStgRegs of
921 True -> CmmAssign (CmmGlobal reg) src'
922 False -> CmmStore baseAddr src'
923
924 CmmAssign reg src ->
925 let src' = fixStgRegExpr src
926 in CmmAssign reg src'
927
928 CmmStore addr src -> CmmStore (fixStgRegExpr addr) (fixStgRegExpr src)
929
930 CmmCall target regs args returns ->
931 let target' = case target of
932 CmmCallee e conv -> CmmCallee (fixStgRegExpr e) conv
933 CmmPrim op mStmts ->
934 CmmPrim op (fmap (map fixStgRegStmt) mStmts)
935 args' = map (\(CmmHinted arg hint) ->
936 (CmmHinted (fixStgRegExpr arg) hint)) args
937 in CmmCall target' regs args' returns
938
939 CmmCondBranch test dest -> CmmCondBranch (fixStgRegExpr test) dest
940
941 CmmSwitch expr ids -> CmmSwitch (fixStgRegExpr expr) ids
942
943 CmmJump addr live -> CmmJump (fixStgRegExpr addr) live
944
945 -- CmmNop, CmmComment, CmmBranch, CmmReturn
946 _other -> stmt
947
948
949 fixStgRegExpr :: CmmExpr -> CmmExpr
950 fixStgRegExpr expr
951 = case expr of
952 CmmLoad addr ty -> CmmLoad (fixStgRegExpr addr) ty
953
954 CmmMachOp mop args -> CmmMachOp mop args'
955 where args' = map fixStgRegExpr args
956
957 CmmReg (CmmGlobal reg) ->
958 -- Replace register leaves with appropriate StixTrees for
959 -- the given target. MagicIds which map to a reg on this
960 -- arch are left unchanged. For the rest, BaseReg is taken
961 -- to mean the address of the reg table in MainCapability,
962 -- and for all others we generate an indirection to its
963 -- location in the register table.
964 case reg `elem` activeStgRegs of
965 True -> expr
966 False ->
967 let baseAddr = get_GlobalReg_addr reg
968 in case reg of
969 BaseReg -> fixStgRegExpr baseAddr
970 _other -> fixStgRegExpr
971 (CmmLoad baseAddr (globalRegType reg))
972
973 CmmRegOff (CmmGlobal reg) offset ->
974 -- RegOf leaves are just a shorthand form. If the reg maps
975 -- to a real reg, we keep the shorthand, otherwise, we just
976 -- expand it and defer to the above code.
977 case reg `elem` activeStgRegs of
978 True -> expr
979 False -> fixStgRegExpr (CmmMachOp (MO_Add wordWidth) [
980 CmmReg (CmmGlobal reg),
981 CmmLit (CmmInt (fromIntegral offset)
982 wordWidth)])
983
984 -- CmmLit, CmmReg (CmmLocal), CmmStackSlot
985 _other -> expr
986