Start separating out the RTS and Haskell imports of MachRegs.h
[ghc.git] / compiler / codeGen / CgUtils.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Code generator utilities; mostly monadic
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module CgUtils (
10 addIdReps,
11 cgLit,
12 emitDataLits, mkDataLits,
13 emitRODataLits, mkRODataLits,
14 emitIf, emitIfThenElse,
15 emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult,
16 emitRtsCallGen,
17 assignTemp, assignTemp_, newTemp,
18 emitSimultaneously,
19 emitSwitch, emitLitSwitch,
20 tagToClosure,
21
22 callerSaves, callerSaveVolatileRegs, get_GlobalReg_addr,
23 activeStgRegs, fixStgRegisters,
24
25 cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord,
26 cmmUGtWord, cmmSubWord, cmmMulWord, cmmAddWord, cmmUShrWord,
27 cmmOffsetExprW, cmmOffsetExprB,
28 cmmRegOffW, cmmRegOffB,
29 cmmLabelOffW, cmmLabelOffB,
30 cmmOffsetW, cmmOffsetB,
31 cmmOffsetLitW, cmmOffsetLitB,
32 cmmLoadIndexW,
33 cmmConstrTag, cmmConstrTag1,
34
35 tagForCon, tagCons, isSmallFamily,
36 cmmUntag, cmmIsTagged, cmmGetTag,
37
38 addToMem, addToMemE,
39 mkWordCLit,
40 newStringCLit, newByteStringCLit,
41 packHalfWordsCLit,
42 blankWord,
43
44 getSRTInfo
45 ) where
46
47 #include "HsVersions.h"
48 #include "../includes/stg/HaskellMachRegs.h"
49
50 import BlockId
51 import CgMonad
52 import TyCon
53 import DataCon
54 import Id
55 import IdInfo
56 import Constants
57 import SMRep
58 import OldCmm
59 import OldCmmUtils
60 import CLabel
61 import ForeignCall
62 import ClosureInfo
63 import StgSyn (SRT(..))
64 import Module
65 import Literal
66 import Digraph
67 import ListSetOps
68 import Util
69 import DynFlags
70 import FastString
71 import Outputable
72
73 import Data.Char
74 import Data.Word
75 import Data.List
76 import Data.Maybe
77 import Data.Ord
78
79 -------------------------------------------------------------------------
80 --
81 -- Random small functions
82 --
83 -------------------------------------------------------------------------
84
85 addIdReps :: [Id] -> [(CgRep, Id)]
86 addIdReps ids = [(idCgRep id, id) | id <- ids]
87
88 -------------------------------------------------------------------------
89 --
90 -- Literals
91 --
92 -------------------------------------------------------------------------
93
94 cgLit :: Literal -> FCode CmmLit
95 cgLit (MachStr s) = newByteStringCLit (bytesFB s)
96 cgLit other_lit = return (mkSimpleLit other_lit)
97
98 mkSimpleLit :: Literal -> CmmLit
99 mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordWidth
100 mkSimpleLit MachNullAddr = zeroCLit
101 mkSimpleLit (MachInt i) = CmmInt i wordWidth
102 mkSimpleLit (MachInt64 i) = CmmInt i W64
103 mkSimpleLit (MachWord i) = CmmInt i wordWidth
104 mkSimpleLit (MachWord64 i) = CmmInt i W64
105 mkSimpleLit (MachFloat r) = CmmFloat r W32
106 mkSimpleLit (MachDouble r) = CmmFloat r W64
107 mkSimpleLit (MachLabel fs ms fod)
108 = CmmLabel (mkForeignLabel fs ms labelSrc fod)
109 where
110 -- TODO: Literal labels might not actually be in the current package...
111 labelSrc = ForeignLabelInThisPackage
112 mkSimpleLit (MachStr _) = panic "mkSimpleLit: MachStr"
113 -- No LitInteger's should be left by the time this is called. CorePrep
114 -- should have converted them all to a real core representation.
115 mkSimpleLit (LitInteger {}) = panic "mkSimpleLit: LitInteger"
116
117 mkLtOp :: Literal -> MachOp
118 -- On signed literals we must do a signed comparison
119 mkLtOp (MachInt _) = MO_S_Lt wordWidth
120 mkLtOp (MachFloat _) = MO_F_Lt W32
121 mkLtOp (MachDouble _) = MO_F_Lt W64
122 mkLtOp lit = MO_U_Lt (typeWidth (cmmLitType (mkSimpleLit lit)))
123
124
125 ---------------------------------------------------
126 --
127 -- Cmm data type functions
128 --
129 ---------------------------------------------------
130
131
132
133 {-
134 The family size of a data type (the number of constructors)
135 can be either:
136 * small, if the family size < 2**tag_bits
137 * big, otherwise.
138
139 Small families can have the constructor tag in the tag
140 bits.
141 Big families only use the tag value 1 to represent
142 evaluatedness.
143 -}
144 isSmallFamily :: Int -> Bool
145 isSmallFamily fam_size = fam_size <= mAX_PTR_TAG
146
147 tagForCon :: DataCon -> ConTagZ
148 tagForCon con = tag
149 where
150 con_tag = dataConTagZ con
151 fam_size = tyConFamilySize (dataConTyCon con)
152 tag | isSmallFamily fam_size = con_tag + 1
153 | otherwise = 1
154
155 --Tag an expression, to do: refactor, this appears in some other module.
156 tagCons :: DataCon -> CmmExpr -> CmmExpr
157 tagCons con expr = cmmOffsetB expr (tagForCon con)
158
159 --------------------------------------------------------------------------
160 --
161 -- Incrementing a memory location
162 --
163 --------------------------------------------------------------------------
164
165 addToMem :: Width -- rep of the counter
166 -> CmmExpr -- Address
167 -> Int -- What to add (a word)
168 -> CmmStmt
169 addToMem width ptr n = addToMemE width ptr (CmmLit (CmmInt (toInteger n) width))
170
171 addToMemE :: Width -- rep of the counter
172 -> CmmExpr -- Address
173 -> CmmExpr -- What to add (a word-typed expression)
174 -> CmmStmt
175 addToMemE width ptr n
176 = CmmStore ptr (CmmMachOp (MO_Add width) [CmmLoad ptr (cmmBits width), n])
177
178 -------------------------------------------------------------------------
179 --
180 -- Converting a closure tag to a closure for enumeration types
181 -- (this is the implementation of tagToEnum#).
182 --
183 -------------------------------------------------------------------------
184
185 tagToClosure :: TyCon -> CmmExpr -> CmmExpr
186 tagToClosure tycon tag
187 = CmmLoad (cmmOffsetExprW closure_tbl tag) gcWord
188 where closure_tbl = CmmLit (CmmLabel lbl)
189 lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs
190
191 -------------------------------------------------------------------------
192 --
193 -- Conditionals and rts calls
194 --
195 -------------------------------------------------------------------------
196
197 emitIf :: CmmExpr -- Boolean
198 -> Code -- Then part
199 -> Code
200 -- Emit (if e then x)
201 -- ToDo: reverse the condition to avoid the extra branch instruction if possible
202 -- (some conditionals aren't reversible. eg. floating point comparisons cannot
203 -- be inverted because there exist some values for which both comparisons
204 -- return False, such as NaN.)
205 emitIf cond then_part
206 = do { then_id <- newLabelC
207 ; join_id <- newLabelC
208 ; stmtC (CmmCondBranch cond then_id)
209 ; stmtC (CmmBranch join_id)
210 ; labelC then_id
211 ; then_part
212 ; labelC join_id
213 }
214
215 emitIfThenElse :: CmmExpr -- Boolean
216 -> Code -- Then part
217 -> Code -- Else part
218 -> Code
219 -- Emit (if e then x else y)
220 emitIfThenElse cond then_part else_part
221 = do { then_id <- newLabelC
222 ; join_id <- newLabelC
223 ; stmtC (CmmCondBranch cond then_id)
224 ; else_part
225 ; stmtC (CmmBranch join_id)
226 ; labelC then_id
227 ; then_part
228 ; labelC join_id
229 }
230
231
232 -- | Emit code to call a Cmm function.
233 emitRtsCall
234 :: PackageId -- ^ package the function is in
235 -> FastString -- ^ name of function
236 -> [CmmHinted CmmExpr] -- ^ function args
237 -> Code -- ^ cmm code
238
239 emitRtsCall pkg fun args = emitRtsCallGen [] pkg fun args Nothing
240 -- The 'Nothing' says "save all global registers"
241
242 emitRtsCallWithVols :: PackageId -> FastString -> [CmmHinted CmmExpr] -> [GlobalReg] -> Code
243 emitRtsCallWithVols pkg fun args vols
244 = emitRtsCallGen [] pkg fun args (Just vols)
245
246 emitRtsCallWithResult
247 :: LocalReg -> ForeignHint
248 -> PackageId -> FastString
249 -> [CmmHinted CmmExpr] -> Code
250
251 emitRtsCallWithResult res hint pkg fun args
252 = emitRtsCallGen [CmmHinted res hint] pkg fun args Nothing
253
254 -- Make a call to an RTS C procedure
255 emitRtsCallGen
256 :: [CmmHinted LocalReg]
257 -> PackageId
258 -> FastString
259 -> [CmmHinted CmmExpr]
260 -> Maybe [GlobalReg]
261 -> Code
262 emitRtsCallGen res pkg fun args vols = do
263 stmtsC caller_save
264 stmtC (CmmCall target res args CmmMayReturn)
265 stmtsC caller_load
266 where
267 (caller_save, caller_load) = callerSaveVolatileRegs vols
268 target = CmmCallee fun_expr CCallConv
269 fun_expr = mkLblExpr (mkCmmCodeLabel pkg fun)
270
271 -----------------------------------------------------------------------------
272 --
273 -- Caller-Save Registers
274 --
275 -----------------------------------------------------------------------------
276
277 -- Here we generate the sequence of saves/restores required around a
278 -- foreign call instruction.
279
280 -- TODO: reconcile with includes/Regs.h
281 -- * Regs.h claims that BaseReg should be saved last and loaded first
282 -- * This might not have been tickled before since BaseReg is callee save
283 -- * Regs.h saves SparkHd, ParkT1, SparkBase and SparkLim
284 callerSaveVolatileRegs :: Maybe [GlobalReg] -> ([CmmStmt], [CmmStmt])
285 callerSaveVolatileRegs vols = (caller_save, caller_load)
286 where
287 caller_save = foldr ($!) [] (map callerSaveGlobalReg regs_to_save)
288 caller_load = foldr ($!) [] (map callerRestoreGlobalReg regs_to_save)
289
290 system_regs = [Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery,
291 {-SparkHd,SparkTl,SparkBase,SparkLim,-}BaseReg ]
292
293 regs_to_save = system_regs ++ vol_list
294
295 vol_list = case vols of Nothing -> all_of_em; Just regs -> regs
296
297 all_of_em = [ VanillaReg n VNonGcPtr | n <- [0..mAX_Vanilla_REG] ]
298 -- The VNonGcPtr is a lie, but I don't think it matters
299 ++ [ FloatReg n | n <- [0..mAX_Float_REG] ]
300 ++ [ DoubleReg n | n <- [0..mAX_Double_REG] ]
301 ++ [ LongReg n | n <- [0..mAX_Long_REG] ]
302
303 callerSaveGlobalReg reg next
304 | callerSaves reg =
305 CmmStore (get_GlobalReg_addr reg)
306 (CmmReg (CmmGlobal reg)) : next
307 | otherwise = next
308
309 callerRestoreGlobalReg reg next
310 | callerSaves reg =
311 CmmAssign (CmmGlobal reg)
312 (CmmLoad (get_GlobalReg_addr reg) (globalRegType reg))
313 : next
314 | otherwise = next
315
316
317 -- | Returns @True@ if this global register is stored in a caller-saves
318 -- machine register.
319
320 callerSaves :: GlobalReg -> Bool
321
322 #ifdef CALLER_SAVES_Base
323 callerSaves BaseReg = True
324 #endif
325 #ifdef CALLER_SAVES_R1
326 callerSaves (VanillaReg 1 _) = True
327 #endif
328 #ifdef CALLER_SAVES_R2
329 callerSaves (VanillaReg 2 _) = True
330 #endif
331 #ifdef CALLER_SAVES_R3
332 callerSaves (VanillaReg 3 _) = True
333 #endif
334 #ifdef CALLER_SAVES_R4
335 callerSaves (VanillaReg 4 _) = True
336 #endif
337 #ifdef CALLER_SAVES_R5
338 callerSaves (VanillaReg 5 _) = True
339 #endif
340 #ifdef CALLER_SAVES_R6
341 callerSaves (VanillaReg 6 _) = True
342 #endif
343 #ifdef CALLER_SAVES_R7
344 callerSaves (VanillaReg 7 _) = True
345 #endif
346 #ifdef CALLER_SAVES_R8
347 callerSaves (VanillaReg 8 _) = True
348 #endif
349 #ifdef CALLER_SAVES_R9
350 callerSaves (VanillaReg 9 _) = True
351 #endif
352 #ifdef CALLER_SAVES_R10
353 callerSaves (VanillaReg 10 _) = True
354 #endif
355 #ifdef CALLER_SAVES_F1
356 callerSaves (FloatReg 1) = True
357 #endif
358 #ifdef CALLER_SAVES_F2
359 callerSaves (FloatReg 2) = True
360 #endif
361 #ifdef CALLER_SAVES_F3
362 callerSaves (FloatReg 3) = True
363 #endif
364 #ifdef CALLER_SAVES_F4
365 callerSaves (FloatReg 4) = True
366 #endif
367 #ifdef CALLER_SAVES_D1
368 callerSaves (DoubleReg 1) = True
369 #endif
370 #ifdef CALLER_SAVES_D2
371 callerSaves (DoubleReg 2) = True
372 #endif
373 #ifdef CALLER_SAVES_L1
374 callerSaves (LongReg 1) = True
375 #endif
376 #ifdef CALLER_SAVES_Sp
377 callerSaves Sp = True
378 #endif
379 #ifdef CALLER_SAVES_SpLim
380 callerSaves SpLim = True
381 #endif
382 #ifdef CALLER_SAVES_Hp
383 callerSaves Hp = True
384 #endif
385 #ifdef CALLER_SAVES_HpLim
386 callerSaves HpLim = True
387 #endif
388 #ifdef CALLER_SAVES_CCCS
389 callerSaves CCCS = True
390 #endif
391 #ifdef CALLER_SAVES_CurrentTSO
392 callerSaves CurrentTSO = True
393 #endif
394 #ifdef CALLER_SAVES_CurrentNursery
395 callerSaves CurrentNursery = True
396 #endif
397 callerSaves _ = False
398
399
400 -- -----------------------------------------------------------------------------
401 -- Information about global registers
402
403 baseRegOffset :: GlobalReg -> Int
404
405 baseRegOffset (VanillaReg 1 _) = oFFSET_StgRegTable_rR1
406 baseRegOffset (VanillaReg 2 _) = oFFSET_StgRegTable_rR2
407 baseRegOffset (VanillaReg 3 _) = oFFSET_StgRegTable_rR3
408 baseRegOffset (VanillaReg 4 _) = oFFSET_StgRegTable_rR4
409 baseRegOffset (VanillaReg 5 _) = oFFSET_StgRegTable_rR5
410 baseRegOffset (VanillaReg 6 _) = oFFSET_StgRegTable_rR6
411 baseRegOffset (VanillaReg 7 _) = oFFSET_StgRegTable_rR7
412 baseRegOffset (VanillaReg 8 _) = oFFSET_StgRegTable_rR8
413 baseRegOffset (VanillaReg 9 _) = oFFSET_StgRegTable_rR9
414 baseRegOffset (VanillaReg 10 _) = oFFSET_StgRegTable_rR10
415 baseRegOffset (VanillaReg n _) = panic ("Registers above R10 are not supported (tried to use R" ++ show n ++ ")")
416 baseRegOffset (FloatReg 1) = oFFSET_StgRegTable_rF1
417 baseRegOffset (FloatReg 2) = oFFSET_StgRegTable_rF2
418 baseRegOffset (FloatReg 3) = oFFSET_StgRegTable_rF3
419 baseRegOffset (FloatReg 4) = oFFSET_StgRegTable_rF4
420 baseRegOffset (FloatReg n) = panic ("Registers above F4 are not supported (tried to use F" ++ show n ++ ")")
421 baseRegOffset (DoubleReg 1) = oFFSET_StgRegTable_rD1
422 baseRegOffset (DoubleReg 2) = oFFSET_StgRegTable_rD2
423 baseRegOffset (DoubleReg n) = panic ("Registers above D2 are not supported (tried to use D" ++ show n ++ ")")
424 baseRegOffset Sp = oFFSET_StgRegTable_rSp
425 baseRegOffset SpLim = oFFSET_StgRegTable_rSpLim
426 baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1
427 baseRegOffset (LongReg n) = panic ("Registers above L1 are not supported (tried to use L" ++ show n ++ ")")
428 baseRegOffset Hp = oFFSET_StgRegTable_rHp
429 baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim
430 baseRegOffset CCCS = oFFSET_StgRegTable_rCCCS
431 baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO
432 baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery
433 baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc
434 baseRegOffset EagerBlackholeInfo = oFFSET_stgEagerBlackholeInfo
435 baseRegOffset GCEnter1 = oFFSET_stgGCEnter1
436 baseRegOffset GCFun = oFFSET_stgGCFun
437 baseRegOffset BaseReg = panic "baseRegOffset:BaseReg"
438 baseRegOffset PicBaseReg = panic "baseRegOffset:PicBaseReg"
439
440
441 -------------------------------------------------------------------------
442 --
443 -- Strings generate a top-level data block
444 --
445 -------------------------------------------------------------------------
446
447 emitDataLits :: CLabel -> [CmmLit] -> Code
448 -- Emit a data-segment data block
449 emitDataLits lbl lits = emitDecl (mkDataLits Data lbl lits)
450
451 emitRODataLits :: String -> CLabel -> [CmmLit] -> Code
452 -- Emit a read-only data block
453 emitRODataLits _caller lbl lits
454 = emitDecl (mkRODataLits lbl lits)
455
456 newStringCLit :: String -> FCode CmmLit
457 -- Make a global definition for the string,
458 -- and return its label
459 newStringCLit str = newByteStringCLit (map (fromIntegral.ord) str)
460
461 newByteStringCLit :: [Word8] -> FCode CmmLit
462 newByteStringCLit bytes
463 = do { uniq <- newUnique
464 ; let (lit, decl) = mkByteStringCLit uniq bytes
465 ; emitDecl decl
466 ; return lit }
467
468 -------------------------------------------------------------------------
469 --
470 -- Assigning expressions to temporaries
471 --
472 -------------------------------------------------------------------------
473
474 -- | If the expression is trivial, return it. Otherwise, assign the
475 -- expression to a temporary register and return an expression
476 -- referring to this register.
477 assignTemp :: CmmExpr -> FCode CmmExpr
478 -- For a non-trivial expression, e, create a local
479 -- variable and assign the expression to it
480 assignTemp e
481 | isTrivialCmmExpr e = return e
482 | otherwise = do { reg <- newTemp (cmmExprType e)
483 ; stmtC (CmmAssign (CmmLocal reg) e)
484 ; return (CmmReg (CmmLocal reg)) }
485
486 -- | If the expression is trivial and doesn't refer to a global
487 -- register, return it. Otherwise, assign the expression to a
488 -- temporary register and return an expression referring to this
489 -- register.
490 assignTemp_ :: CmmExpr -> FCode CmmExpr
491 assignTemp_ e
492 | isTrivialCmmExpr e && hasNoGlobalRegs e = return e
493 | otherwise = do
494 reg <- newTemp (cmmExprType e)
495 stmtC (CmmAssign (CmmLocal reg) e)
496 return (CmmReg (CmmLocal reg))
497
498 newTemp :: CmmType -> FCode LocalReg
499 newTemp rep = do { uniq <- newUnique; return (LocalReg uniq rep) }
500
501 -------------------------------------------------------------------------
502 --
503 -- Building case analysis
504 --
505 -------------------------------------------------------------------------
506
507 emitSwitch
508 :: CmmExpr -- Tag to switch on
509 -> [(ConTagZ, CgStmts)] -- Tagged branches
510 -> Maybe CgStmts -- Default branch (if any)
511 -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour
512 -- outside this range is undefined
513 -> Code
514
515 -- ONLY A DEFAULT BRANCH: no case analysis to do
516 emitSwitch _ [] (Just stmts) _ _
517 = emitCgStmts stmts
518
519 -- Right, off we go
520 emitSwitch tag_expr branches mb_deflt lo_tag hi_tag
521 = -- Just sort the branches before calling mk_sritch
522 do { mb_deflt_id <-
523 case mb_deflt of
524 Nothing -> return Nothing
525 Just stmts -> do id <- forkCgStmts stmts; return (Just id)
526
527 ; dflags <- getDynFlags
528 ; let via_C | HscC <- hscTarget dflags = True
529 | otherwise = False
530
531 ; stmts <- mk_switch tag_expr (sortBy (comparing fst) branches)
532 mb_deflt_id lo_tag hi_tag via_C
533 ; emitCgStmts stmts
534 }
535
536
537 mk_switch :: CmmExpr -> [(ConTagZ, CgStmts)]
538 -> Maybe BlockId -> ConTagZ -> ConTagZ -> Bool
539 -> FCode CgStmts
540
541 -- SINGLETON TAG RANGE: no case analysis to do
542 mk_switch _tag_expr [(tag,stmts)] _ lo_tag hi_tag _via_C
543 | lo_tag == hi_tag
544 = ASSERT( tag == lo_tag )
545 return stmts
546
547 -- SINGLETON BRANCH, NO DEFAULT: no case analysis to do
548 mk_switch _tag_expr [(_tag,stmts)] Nothing _lo_tag _hi_tag _via_C
549 = return stmts
550 -- The simplifier might have eliminated a case
551 -- so we may have e.g. case xs of
552 -- [] -> e
553 -- In that situation we can be sure the (:) case
554 -- can't happen, so no need to test
555
556 -- SINGLETON BRANCH: one equality check to do
557 mk_switch tag_expr [(tag,stmts)] (Just deflt) _lo_tag _hi_tag _via_C
558 = return (CmmCondBranch cond deflt `consCgStmt` stmts)
559 where
560 cond = cmmNeWord tag_expr (CmmLit (mkIntCLit tag))
561 -- We have lo_tag < hi_tag, but there's only one branch,
562 -- so there must be a default
563
564 -- ToDo: we might want to check for the two branch case, where one of
565 -- the branches is the tag 0, because comparing '== 0' is likely to be
566 -- more efficient than other kinds of comparison.
567
568 -- DENSE TAG RANGE: use a switch statment.
569 --
570 -- We also use a switch uncoditionally when compiling via C, because
571 -- this will get emitted as a C switch statement and the C compiler
572 -- should do a good job of optimising it. Also, older GCC versions
573 -- (2.95 in particular) have problems compiling the complicated
574 -- if-trees generated by this code, so compiling to a switch every
575 -- time works around that problem.
576 --
577 mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C
578 | use_switch -- Use a switch
579 = do { branch_ids <- mapM forkCgStmts (map snd branches)
580 ; let
581 tagged_blk_ids = zip (map fst branches) (map Just branch_ids)
582
583 find_branch :: ConTagZ -> Maybe BlockId
584 find_branch i = assocDefault mb_deflt tagged_blk_ids i
585
586 -- NB. we have eliminated impossible branches at
587 -- either end of the range (see below), so the first
588 -- tag of a real branch is real_lo_tag (not lo_tag).
589 arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]]
590
591 switch_stmt = CmmSwitch (cmmOffset tag_expr (- real_lo_tag)) arms
592
593 ; ASSERT(not (all isNothing arms))
594 return (oneCgStmt switch_stmt)
595 }
596
597 -- if we can knock off a bunch of default cases with one if, then do so
598 | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches
599 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
600 ; let cond = cmmULtWord tag_expr' (CmmLit (mkIntCLit lowest_branch))
601 branch = CmmCondBranch cond deflt
602 ; stmts <- mk_switch tag_expr' branches mb_deflt
603 lowest_branch hi_tag via_C
604 ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts))
605 }
606
607 | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches
608 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
609 ; let cond = cmmUGtWord tag_expr' (CmmLit (mkIntCLit highest_branch))
610 branch = CmmCondBranch cond deflt
611 ; stmts <- mk_switch tag_expr' branches mb_deflt
612 lo_tag highest_branch via_C
613 ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts))
614 }
615
616 | otherwise -- Use an if-tree
617 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
618 -- To avoid duplication
619 ; lo_stmts <- mk_switch tag_expr' lo_branches mb_deflt
620 lo_tag (mid_tag-1) via_C
621 ; hi_stmts <- mk_switch tag_expr' hi_branches mb_deflt
622 mid_tag hi_tag via_C
623 ; hi_id <- forkCgStmts hi_stmts
624 ; let cond = cmmUGeWord tag_expr' (CmmLit (mkIntCLit mid_tag))
625 branch_stmt = CmmCondBranch cond hi_id
626 ; return (assign_tag `consCgStmt` (branch_stmt `consCgStmt` lo_stmts))
627 }
628 -- we test (e >= mid_tag) rather than (e < mid_tag), because
629 -- the former works better when e is a comparison, and there
630 -- are two tags 0 & 1 (mid_tag == 1). In this case, the code
631 -- generator can reduce the condition to e itself without
632 -- having to reverse the sense of the comparison: comparisons
633 -- can't always be easily reversed (eg. floating
634 -- pt. comparisons).
635 where
636 use_switch = {- pprTrace "mk_switch" (
637 ppr tag_expr <+> text "n_tags:" <+> int n_tags <+>
638 text "branches:" <+> ppr (map fst branches) <+>
639 text "n_branches:" <+> int n_branches <+>
640 text "lo_tag:" <+> int lo_tag <+>
641 text "hi_tag:" <+> int hi_tag <+>
642 text "real_lo_tag:" <+> int real_lo_tag <+>
643 text "real_hi_tag:" <+> int real_hi_tag) $ -}
644 ASSERT( n_branches > 1 && n_tags > 1 )
645 n_tags > 2 && (via_C || (dense && big_enough))
646 -- up to 4 branches we use a decision tree, otherwise
647 -- a switch (== jump table in the NCG). This seems to be
648 -- optimal, and corresponds with what gcc does.
649 big_enough = n_branches > 4
650 dense = n_branches > (n_tags `div` 2)
651 n_branches = length branches
652
653 -- ignore default slots at each end of the range if there's
654 -- no default branch defined.
655 lowest_branch = fst (head branches)
656 highest_branch = fst (last branches)
657
658 real_lo_tag
659 | isNothing mb_deflt = lowest_branch
660 | otherwise = lo_tag
661
662 real_hi_tag
663 | isNothing mb_deflt = highest_branch
664 | otherwise = hi_tag
665
666 n_tags = real_hi_tag - real_lo_tag + 1
667
668 -- INVARIANT: Provided hi_tag > lo_tag (which is true)
669 -- lo_tag <= mid_tag < hi_tag
670 -- lo_branches have tags < mid_tag
671 -- hi_branches have tags >= mid_tag
672
673 (mid_tag,_) = branches !! (n_branches `div` 2)
674 -- 2 branches => n_branches `div` 2 = 1
675 -- => branches !! 1 give the *second* tag
676 -- There are always at least 2 branches here
677
678 (lo_branches, hi_branches) = span is_lo branches
679 is_lo (t,_) = t < mid_tag
680
681 assignTemp' :: CmmExpr -> FCode (CmmStmt, CmmExpr)
682 assignTemp' e
683 | isTrivialCmmExpr e = return (CmmNop, e)
684 | otherwise = do { reg <- newTemp (cmmExprType e)
685 ; return (CmmAssign (CmmLocal reg) e, CmmReg (CmmLocal reg)) }
686
687 emitLitSwitch :: CmmExpr -- Tag to switch on
688 -> [(Literal, CgStmts)] -- Tagged branches
689 -> CgStmts -- Default branch (always)
690 -> Code -- Emit the code
691 -- Used for general literals, whose size might not be a word,
692 -- where there is always a default case, and where we don't know
693 -- the range of values for certain. For simplicity we always generate a tree.
694 --
695 -- ToDo: for integers we could do better here, perhaps by generalising
696 -- mk_switch and using that. --SDM 15/09/2004
697 emitLitSwitch _ [] deflt = emitCgStmts deflt
698 emitLitSwitch scrut branches deflt_blk
699 = do { scrut' <- assignTemp scrut
700 ; deflt_blk_id <- forkCgStmts deflt_blk
701 ; blk <- mk_lit_switch scrut' deflt_blk_id (sortBy (comparing fst) branches)
702 ; emitCgStmts blk }
703
704 mk_lit_switch :: CmmExpr -> BlockId
705 -> [(Literal,CgStmts)]
706 -> FCode CgStmts
707 mk_lit_switch scrut deflt_blk_id [(lit,blk)]
708 = return (consCgStmt if_stmt blk)
709 where
710 cmm_lit = mkSimpleLit lit
711 rep = cmmLitType cmm_lit
712 ne = if isFloatType rep then MO_F_Ne else MO_Ne
713 cond = CmmMachOp (ne (typeWidth rep)) [scrut, CmmLit cmm_lit]
714 if_stmt = CmmCondBranch cond deflt_blk_id
715
716 mk_lit_switch scrut deflt_blk_id branches
717 = do { hi_blk <- mk_lit_switch scrut deflt_blk_id hi_branches
718 ; lo_blk <- mk_lit_switch scrut deflt_blk_id lo_branches
719 ; lo_blk_id <- forkCgStmts lo_blk
720 ; let if_stmt = CmmCondBranch cond lo_blk_id
721 ; return (if_stmt `consCgStmt` hi_blk) }
722 where
723 n_branches = length branches
724 (mid_lit,_) = branches !! (n_branches `div` 2)
725 -- See notes above re mid_tag
726
727 (lo_branches, hi_branches) = span is_lo branches
728 is_lo (t,_) = t < mid_lit
729
730 cond = CmmMachOp (mkLtOp mid_lit)
731 [scrut, CmmLit (mkSimpleLit mid_lit)]
732
733 -------------------------------------------------------------------------
734 --
735 -- Simultaneous assignment
736 --
737 -------------------------------------------------------------------------
738
739
740 emitSimultaneously :: CmmStmts -> Code
741 -- Emit code to perform the assignments in the
742 -- input simultaneously, using temporary variables when necessary.
743 --
744 -- The Stmts must be:
745 -- CmmNop, CmmComment, CmmAssign, CmmStore
746 -- and nothing else
747
748
749 -- We use the strongly-connected component algorithm, in which
750 -- * the vertices are the statements
751 -- * an edge goes from s1 to s2 iff
752 -- s1 assigns to something s2 uses
753 -- that is, if s1 should *follow* s2 in the final order
754
755 type CVertex = (Int, CmmStmt) -- Give each vertex a unique number,
756 -- for fast comparison
757
758 emitSimultaneously stmts
759 = codeOnly $
760 case filterOut isNopStmt (stmtList stmts) of
761 -- Remove no-ops
762 [] -> nopC
763 [stmt] -> stmtC stmt -- It's often just one stmt
764 stmt_list -> doSimultaneously1 (zip [(1::Int)..] stmt_list)
765
766 doSimultaneously1 :: [CVertex] -> Code
767 doSimultaneously1 vertices
768 = let
769 edges = [ (vertex, key1, edges_from stmt1)
770 | vertex@(key1, stmt1) <- vertices
771 ]
772 edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices,
773 stmt1 `mustFollow` stmt2
774 ]
775 components = stronglyConnCompFromEdgedVertices edges
776
777 -- do_components deal with one strongly-connected component
778 -- Not cyclic, or singleton? Just do it
779 do_component (AcyclicSCC (_n, stmt)) = stmtC stmt
780 do_component (CyclicSCC [])
781 = panic "doSimultaneously1: do_component (CyclicSCC [])"
782 do_component (CyclicSCC [(_n, stmt)]) = stmtC stmt
783
784 -- Cyclic? Then go via temporaries. Pick one to
785 -- break the loop and try again with the rest.
786 do_component (CyclicSCC ((_n, first_stmt) : rest))
787 = do { from_temp <- go_via_temp first_stmt
788 ; doSimultaneously1 rest
789 ; stmtC from_temp }
790
791 go_via_temp (CmmAssign dest src)
792 = do { tmp <- newTemp (cmmRegType dest) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong
793 ; stmtC (CmmAssign (CmmLocal tmp) src)
794 ; return (CmmAssign dest (CmmReg (CmmLocal tmp))) }
795 go_via_temp (CmmStore dest src)
796 = do { tmp <- newTemp (cmmExprType src) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong
797 ; stmtC (CmmAssign (CmmLocal tmp) src)
798 ; return (CmmStore dest (CmmReg (CmmLocal tmp))) }
799 go_via_temp _ = panic "doSimultaneously1: go_via_temp"
800 in
801 mapCs do_component components
802
803 mustFollow :: CmmStmt -> CmmStmt -> Bool
804 CmmAssign reg _ `mustFollow` stmt = anySrc (reg `regUsedIn`) stmt
805 CmmStore loc e `mustFollow` stmt = anySrc (locUsedIn loc (cmmExprType e)) stmt
806 CmmNop `mustFollow` _ = False
807 CmmComment _ `mustFollow` _ = False
808 _ `mustFollow` _ = panic "mustFollow"
809
810
811 anySrc :: (CmmExpr -> Bool) -> CmmStmt -> Bool
812 -- True if the fn is true of any input of the stmt
813 anySrc p (CmmAssign _ e) = p e
814 anySrc p (CmmStore e1 e2) = p e1 || p e2 -- Might be used in either side
815 anySrc _ (CmmComment _) = False
816 anySrc _ CmmNop = False
817 anySrc _ _ = True -- Conservative
818
819 locUsedIn :: CmmExpr -> CmmType -> CmmExpr -> Bool
820 -- (locUsedIn a r e) checks whether writing to r[a] could affect the value of
821 -- 'e'. Returns True if it's not sure.
822 locUsedIn _ _ (CmmLit _) = False
823 locUsedIn loc rep (CmmLoad e ld_rep) = possiblySameLoc loc rep e ld_rep
824 locUsedIn _ _ (CmmReg _) = False
825 locUsedIn _ _ (CmmRegOff _ _) = False
826 locUsedIn loc rep (CmmMachOp _ es) = any (locUsedIn loc rep) es
827 locUsedIn _ _ (CmmStackSlot _ _) = panic "locUsedIn: CmmStackSlot"
828
829 possiblySameLoc :: CmmExpr -> CmmType -> CmmExpr -> CmmType -> Bool
830 -- Assumes that distinct registers (eg Hp, Sp) do not
831 -- point to the same location, nor any offset thereof.
832 possiblySameLoc (CmmReg r1) _ (CmmReg r2) _ = r1 == r2
833 possiblySameLoc (CmmReg r1) _ (CmmRegOff r2 0) _ = r1 == r2
834 possiblySameLoc (CmmRegOff r1 0) _ (CmmReg r2) _ = r1 == r2
835 possiblySameLoc (CmmRegOff r1 start1) rep1 (CmmRegOff r2 start2) rep2
836 = r1==r2 && end1 > start2 && end2 > start1
837 where
838 end1 = start1 + widthInBytes (typeWidth rep1)
839 end2 = start2 + widthInBytes (typeWidth rep2)
840
841 possiblySameLoc _ _ (CmmLit _) _ = False
842 possiblySameLoc _ _ _ _ = True -- Conservative
843
844 -------------------------------------------------------------------------
845 --
846 -- Static Reference Tables
847 --
848 -------------------------------------------------------------------------
849
850 -- There is just one SRT for each top level binding; all the nested
851 -- bindings use sub-sections of this SRT. The label is passed down to
852 -- the nested bindings via the monad.
853
854 getSRTInfo :: FCode C_SRT
855 getSRTInfo = do
856 srt_lbl <- getSRTLabel
857 srt <- getSRT
858 case srt of
859 -- TODO: Should we panic in this case?
860 -- Someone obviously thinks there should be an SRT
861 NoSRT -> return NoC_SRT
862 SRTEntries {} -> panic "getSRTInfo: SRTEntries. Perhaps you forgot to run SimplStg?"
863 SRT off len bmp
864 | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape]
865 -> do id <- newUnique
866 let srt_desc_lbl = mkLargeSRTLabel id
867 emitRODataLits "getSRTInfo" srt_desc_lbl
868 ( cmmLabelOffW srt_lbl off
869 : mkWordCLit (fromIntegral len)
870 : map mkWordCLit bmp)
871 return (C_SRT srt_desc_lbl 0 srt_escape)
872
873 | otherwise
874 -> return (C_SRT srt_lbl off (fromIntegral (head bmp)))
875 -- The fromIntegral converts to StgHalfWord
876
877 srt_escape :: StgHalfWord
878 srt_escape = -1
879
880 -- -----------------------------------------------------------------------------
881 --
882 -- STG/Cmm GlobalReg
883 --
884 -- -----------------------------------------------------------------------------
885
886 -- | Here is where the STG register map is defined for each target arch.
887 -- The order matters (for the llvm backend anyway)! We must make sure to
888 -- maintain the order here with the order used in the LLVM calling conventions.
889 -- Note that also, this isn't all registers, just the ones that are currently
890 -- possbily mapped to real registers.
891 activeStgRegs :: [GlobalReg]
892 activeStgRegs = [
893 #ifdef REG_Base
894 BaseReg
895 #endif
896 #ifdef REG_Sp
897 ,Sp
898 #endif
899 #ifdef REG_Hp
900 ,Hp
901 #endif
902 #ifdef REG_R1
903 ,VanillaReg 1 VGcPtr
904 #endif
905 #ifdef REG_R2
906 ,VanillaReg 2 VGcPtr
907 #endif
908 #ifdef REG_R3
909 ,VanillaReg 3 VGcPtr
910 #endif
911 #ifdef REG_R4
912 ,VanillaReg 4 VGcPtr
913 #endif
914 #ifdef REG_R5
915 ,VanillaReg 5 VGcPtr
916 #endif
917 #ifdef REG_R6
918 ,VanillaReg 6 VGcPtr
919 #endif
920 #ifdef REG_R7
921 ,VanillaReg 7 VGcPtr
922 #endif
923 #ifdef REG_R8
924 ,VanillaReg 8 VGcPtr
925 #endif
926 #ifdef REG_R9
927 ,VanillaReg 9 VGcPtr
928 #endif
929 #ifdef REG_R10
930 ,VanillaReg 10 VGcPtr
931 #endif
932 #ifdef REG_SpLim
933 ,SpLim
934 #endif
935 #ifdef REG_F1
936 ,FloatReg 1
937 #endif
938 #ifdef REG_F2
939 ,FloatReg 2
940 #endif
941 #ifdef REG_F3
942 ,FloatReg 3
943 #endif
944 #ifdef REG_F4
945 ,FloatReg 4
946 #endif
947 #ifdef REG_D1
948 ,DoubleReg 1
949 #endif
950 #ifdef REG_D2
951 ,DoubleReg 2
952 #endif
953 ]
954
955 -- | We map STG registers onto appropriate CmmExprs. Either they map
956 -- to real machine registers or stored as offsets from BaseReg. Given
957 -- a GlobalReg, get_GlobalReg_addr always produces the
958 -- register table address for it.
959 get_GlobalReg_addr :: GlobalReg -> CmmExpr
960 get_GlobalReg_addr BaseReg = regTableOffset 0
961 get_GlobalReg_addr mid = get_Regtable_addr_from_offset
962 (globalRegType mid) (baseRegOffset mid)
963
964 -- Calculate a literal representing an offset into the register table.
965 -- Used when we don't have an actual BaseReg to offset from.
966 regTableOffset :: Int -> CmmExpr
967 regTableOffset n =
968 CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r + n))
969
970 get_Regtable_addr_from_offset :: CmmType -> Int -> CmmExpr
971 get_Regtable_addr_from_offset _ offset =
972 #ifdef REG_Base
973 CmmRegOff (CmmGlobal BaseReg) offset
974 #else
975 regTableOffset offset
976 #endif
977
978 -- | Fixup global registers so that they assign to locations within the
979 -- RegTable if they aren't pinned for the current target.
980 fixStgRegisters :: RawCmmDecl -> RawCmmDecl
981 fixStgRegisters top@(CmmData _ _) = top
982
983 fixStgRegisters (CmmProc info lbl (ListGraph blocks)) =
984 let blocks' = map fixStgRegBlock blocks
985 in CmmProc info lbl $ ListGraph blocks'
986
987 fixStgRegBlock :: CmmBasicBlock -> CmmBasicBlock
988 fixStgRegBlock (BasicBlock id stmts) =
989 let stmts' = map fixStgRegStmt stmts
990 in BasicBlock id stmts'
991
992 fixStgRegStmt :: CmmStmt -> CmmStmt
993 fixStgRegStmt stmt
994 = case stmt of
995 CmmAssign (CmmGlobal reg) src ->
996 let src' = fixStgRegExpr src
997 baseAddr = get_GlobalReg_addr reg
998 in case reg `elem` activeStgRegs of
999 True -> CmmAssign (CmmGlobal reg) src'
1000 False -> CmmStore baseAddr src'
1001
1002 CmmAssign reg src ->
1003 let src' = fixStgRegExpr src
1004 in CmmAssign reg src'
1005
1006 CmmStore addr src -> CmmStore (fixStgRegExpr addr) (fixStgRegExpr src)
1007
1008 CmmCall target regs args returns ->
1009 let target' = case target of
1010 CmmCallee e conv -> CmmCallee (fixStgRegExpr e) conv
1011 CmmPrim op mStmts ->
1012 CmmPrim op (fmap (map fixStgRegStmt) mStmts)
1013 args' = map (\(CmmHinted arg hint) ->
1014 (CmmHinted (fixStgRegExpr arg) hint)) args
1015 in CmmCall target' regs args' returns
1016
1017 CmmCondBranch test dest -> CmmCondBranch (fixStgRegExpr test) dest
1018
1019 CmmSwitch expr ids -> CmmSwitch (fixStgRegExpr expr) ids
1020
1021 CmmJump addr live -> CmmJump (fixStgRegExpr addr) live
1022
1023 -- CmmNop, CmmComment, CmmBranch, CmmReturn
1024 _other -> stmt
1025
1026
1027 fixStgRegExpr :: CmmExpr -> CmmExpr
1028 fixStgRegExpr expr
1029 = case expr of
1030 CmmLoad addr ty -> CmmLoad (fixStgRegExpr addr) ty
1031
1032 CmmMachOp mop args -> CmmMachOp mop args'
1033 where args' = map fixStgRegExpr args
1034
1035 CmmReg (CmmGlobal reg) ->
1036 -- Replace register leaves with appropriate StixTrees for
1037 -- the given target. MagicIds which map to a reg on this
1038 -- arch are left unchanged. For the rest, BaseReg is taken
1039 -- to mean the address of the reg table in MainCapability,
1040 -- and for all others we generate an indirection to its
1041 -- location in the register table.
1042 case reg `elem` activeStgRegs of
1043 True -> expr
1044 False ->
1045 let baseAddr = get_GlobalReg_addr reg
1046 in case reg of
1047 BaseReg -> fixStgRegExpr baseAddr
1048 _other -> fixStgRegExpr
1049 (CmmLoad baseAddr (globalRegType reg))
1050
1051 CmmRegOff (CmmGlobal reg) offset ->
1052 -- RegOf leaves are just a shorthand form. If the reg maps
1053 -- to a real reg, we keep the shorthand, otherwise, we just
1054 -- expand it and defer to the above code.
1055 case reg `elem` activeStgRegs of
1056 True -> expr
1057 False -> fixStgRegExpr (CmmMachOp (MO_Add wordWidth) [
1058 CmmReg (CmmGlobal reg),
1059 CmmLit (CmmInt (fromIntegral offset)
1060 wordWidth)])
1061
1062 -- CmmLit, CmmReg (CmmLocal), CmmStackSlot
1063 _other -> expr
1064