Allow the use of R9 and R10 in primops; fixes trac #5423
[ghc.git] / compiler / codeGen / CgUtils.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Code generator utilities; mostly monadic
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module CgUtils (
10 addIdReps,
11 cgLit,
12 emitDataLits, mkDataLits,
13 emitRODataLits, mkRODataLits,
14 emitIf, emitIfThenElse,
15 emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult,
16 emitRtsCallGen,
17 assignTemp, assignTemp_, newTemp,
18 emitSimultaneously,
19 emitSwitch, emitLitSwitch,
20 tagToClosure,
21
22 callerSaves, callerSaveVolatileRegs, get_GlobalReg_addr,
23 activeStgRegs, fixStgRegisters,
24
25 cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord,
26 cmmUGtWord, cmmSubWord, cmmMulWord, cmmAddWord, cmmUShrWord,
27 cmmOffsetExprW, cmmOffsetExprB,
28 cmmRegOffW, cmmRegOffB,
29 cmmLabelOffW, cmmLabelOffB,
30 cmmOffsetW, cmmOffsetB,
31 cmmOffsetLitW, cmmOffsetLitB,
32 cmmLoadIndexW,
33 cmmConstrTag, cmmConstrTag1,
34
35 tagForCon, tagCons, isSmallFamily,
36 cmmUntag, cmmIsTagged, cmmGetTag,
37
38 addToMem, addToMemE,
39 mkWordCLit,
40 newStringCLit, newByteStringCLit,
41 packHalfWordsCLit,
42 blankWord,
43
44 getSRTInfo
45 ) where
46
47 #include "HsVersions.h"
48 #include "../includes/stg/MachRegs.h"
49
50 import BlockId
51 import CgMonad
52 import TyCon
53 import DataCon
54 import Id
55 import IdInfo
56 import Constants
57 import SMRep
58 import OldCmm
59 import OldCmmUtils
60 import CLabel
61 import ForeignCall
62 import ClosureInfo
63 import StgSyn (SRT(..))
64 import Module
65 import Literal
66 import Digraph
67 import ListSetOps
68 import Util
69 import DynFlags
70 import FastString
71 import Outputable
72
73 import Data.Char
74 import Data.Word
75 import Data.Maybe
76
77 -------------------------------------------------------------------------
78 --
79 -- Random small functions
80 --
81 -------------------------------------------------------------------------
82
83 addIdReps :: [Id] -> [(CgRep, Id)]
84 addIdReps ids = [(idCgRep id, id) | id <- ids]
85
86 -------------------------------------------------------------------------
87 --
88 -- Literals
89 --
90 -------------------------------------------------------------------------
91
92 cgLit :: Literal -> FCode CmmLit
93 cgLit (MachStr s) = newByteStringCLit (bytesFS s)
94 -- not unpackFS; we want the UTF-8 byte stream.
95 cgLit other_lit = return (mkSimpleLit other_lit)
96
97 mkSimpleLit :: Literal -> CmmLit
98 mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordWidth
99 mkSimpleLit MachNullAddr = zeroCLit
100 mkSimpleLit (MachInt i) = CmmInt i wordWidth
101 mkSimpleLit (MachInt64 i) = CmmInt i W64
102 mkSimpleLit (MachWord i) = CmmInt i wordWidth
103 mkSimpleLit (MachWord64 i) = CmmInt i W64
104 mkSimpleLit (MachFloat r) = CmmFloat r W32
105 mkSimpleLit (MachDouble r) = CmmFloat r W64
106 mkSimpleLit (MachLabel fs ms fod)
107 = CmmLabel (mkForeignLabel fs ms labelSrc fod)
108 where
109 -- TODO: Literal labels might not actually be in the current package...
110 labelSrc = ForeignLabelInThisPackage
111 mkSimpleLit (MachStr _) = panic "mkSimpleLit: MachStr"
112 -- No LitInteger's should be left by the time this is called. CorePrep
113 -- should have converted them all to a real core representation.
114 mkSimpleLit (LitInteger {}) = panic "mkSimpleLit: LitInteger"
115
116 mkLtOp :: Literal -> MachOp
117 -- On signed literals we must do a signed comparison
118 mkLtOp (MachInt _) = MO_S_Lt wordWidth
119 mkLtOp (MachFloat _) = MO_F_Lt W32
120 mkLtOp (MachDouble _) = MO_F_Lt W64
121 mkLtOp lit = MO_U_Lt (typeWidth (cmmLitType (mkSimpleLit lit)))
122
123
124 ---------------------------------------------------
125 --
126 -- Cmm data type functions
127 --
128 ---------------------------------------------------
129
130
131
132 {-
133 The family size of a data type (the number of constructors)
134 can be either:
135 * small, if the family size < 2**tag_bits
136 * big, otherwise.
137
138 Small families can have the constructor tag in the tag
139 bits.
140 Big families only use the tag value 1 to represent
141 evaluatedness.
142 -}
143 isSmallFamily :: Int -> Bool
144 isSmallFamily fam_size = fam_size <= mAX_PTR_TAG
145
146 tagForCon :: DataCon -> ConTagZ
147 tagForCon con = tag
148 where
149 con_tag = dataConTagZ con
150 fam_size = tyConFamilySize (dataConTyCon con)
151 tag | isSmallFamily fam_size = con_tag + 1
152 | otherwise = 1
153
154 --Tag an expression, to do: refactor, this appears in some other module.
155 tagCons :: DataCon -> CmmExpr -> CmmExpr
156 tagCons con expr = cmmOffsetB expr (tagForCon con)
157
158 --------------------------------------------------------------------------
159 --
160 -- Incrementing a memory location
161 --
162 --------------------------------------------------------------------------
163
164 addToMem :: Width -- rep of the counter
165 -> CmmExpr -- Address
166 -> Int -- What to add (a word)
167 -> CmmStmt
168 addToMem width ptr n = addToMemE width ptr (CmmLit (CmmInt (toInteger n) width))
169
170 addToMemE :: Width -- rep of the counter
171 -> CmmExpr -- Address
172 -> CmmExpr -- What to add (a word-typed expression)
173 -> CmmStmt
174 addToMemE width ptr n
175 = CmmStore ptr (CmmMachOp (MO_Add width) [CmmLoad ptr (cmmBits width), n])
176
177 -------------------------------------------------------------------------
178 --
179 -- Converting a closure tag to a closure for enumeration types
180 -- (this is the implementation of tagToEnum#).
181 --
182 -------------------------------------------------------------------------
183
184 tagToClosure :: TyCon -> CmmExpr -> CmmExpr
185 tagToClosure tycon tag
186 = CmmLoad (cmmOffsetExprW closure_tbl tag) gcWord
187 where closure_tbl = CmmLit (CmmLabel lbl)
188 lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs
189
190 -------------------------------------------------------------------------
191 --
192 -- Conditionals and rts calls
193 --
194 -------------------------------------------------------------------------
195
196 emitIf :: CmmExpr -- Boolean
197 -> Code -- Then part
198 -> Code
199 -- Emit (if e then x)
200 -- ToDo: reverse the condition to avoid the extra branch instruction if possible
201 -- (some conditionals aren't reversible. eg. floating point comparisons cannot
202 -- be inverted because there exist some values for which both comparisons
203 -- return False, such as NaN.)
204 emitIf cond then_part
205 = do { then_id <- newLabelC
206 ; join_id <- newLabelC
207 ; stmtC (CmmCondBranch cond then_id)
208 ; stmtC (CmmBranch join_id)
209 ; labelC then_id
210 ; then_part
211 ; labelC join_id
212 }
213
214 emitIfThenElse :: CmmExpr -- Boolean
215 -> Code -- Then part
216 -> Code -- Else part
217 -> Code
218 -- Emit (if e then x else y)
219 emitIfThenElse cond then_part else_part
220 = do { then_id <- newLabelC
221 ; join_id <- newLabelC
222 ; stmtC (CmmCondBranch cond then_id)
223 ; else_part
224 ; stmtC (CmmBranch join_id)
225 ; labelC then_id
226 ; then_part
227 ; labelC join_id
228 }
229
230
231 -- | Emit code to call a Cmm function.
232 emitRtsCall
233 :: PackageId -- ^ package the function is in
234 -> FastString -- ^ name of function
235 -> [CmmHinted CmmExpr] -- ^ function args
236 -> Bool -- ^ whether this is a safe call
237 -> Code -- ^ cmm code
238
239 emitRtsCall pkg fun args safe = emitRtsCallGen [] pkg fun args Nothing safe
240 -- The 'Nothing' says "save all global registers"
241
242 emitRtsCallWithVols :: PackageId -> FastString -> [CmmHinted CmmExpr] -> [GlobalReg] -> Bool -> Code
243 emitRtsCallWithVols pkg fun args vols safe
244 = emitRtsCallGen [] pkg fun args (Just vols) safe
245
246 emitRtsCallWithResult
247 :: LocalReg -> ForeignHint
248 -> PackageId -> FastString
249 -> [CmmHinted CmmExpr] -> Bool -> Code
250
251 emitRtsCallWithResult res hint pkg fun args safe
252 = emitRtsCallGen [CmmHinted res hint] pkg fun args Nothing safe
253
254 -- Make a call to an RTS C procedure
255 emitRtsCallGen
256 :: [CmmHinted LocalReg]
257 -> PackageId
258 -> FastString
259 -> [CmmHinted CmmExpr]
260 -> Maybe [GlobalReg]
261 -> Bool -- True <=> CmmSafe call
262 -> Code
263 emitRtsCallGen res pkg fun args vols safe = do
264 safety <- if safe
265 then getSRTInfo >>= (return . CmmSafe)
266 else return CmmUnsafe
267 stmtsC caller_save
268 stmtC (CmmCall target res args safety CmmMayReturn)
269 stmtsC caller_load
270 where
271 (caller_save, caller_load) = callerSaveVolatileRegs vols
272 target = CmmCallee fun_expr CCallConv
273 fun_expr = mkLblExpr (mkCmmCodeLabel pkg fun)
274
275 -----------------------------------------------------------------------------
276 --
277 -- Caller-Save Registers
278 --
279 -----------------------------------------------------------------------------
280
281 -- Here we generate the sequence of saves/restores required around a
282 -- foreign call instruction.
283
284 -- TODO: reconcile with includes/Regs.h
285 -- * Regs.h claims that BaseReg should be saved last and loaded first
286 -- * This might not have been tickled before since BaseReg is callee save
287 -- * Regs.h saves SparkHd, ParkT1, SparkBase and SparkLim
288 callerSaveVolatileRegs :: Maybe [GlobalReg] -> ([CmmStmt], [CmmStmt])
289 callerSaveVolatileRegs vols = (caller_save, caller_load)
290 where
291 caller_save = foldr ($!) [] (map callerSaveGlobalReg regs_to_save)
292 caller_load = foldr ($!) [] (map callerRestoreGlobalReg regs_to_save)
293
294 system_regs = [Sp,SpLim,Hp,HpLim,CurrentTSO,CurrentNursery,
295 {-SparkHd,SparkTl,SparkBase,SparkLim,-}BaseReg ]
296
297 regs_to_save = system_regs ++ vol_list
298
299 vol_list = case vols of Nothing -> all_of_em; Just regs -> regs
300
301 all_of_em = [ VanillaReg n VNonGcPtr | n <- [0..mAX_Vanilla_REG] ]
302 -- The VNonGcPtr is a lie, but I don't think it matters
303 ++ [ FloatReg n | n <- [0..mAX_Float_REG] ]
304 ++ [ DoubleReg n | n <- [0..mAX_Double_REG] ]
305 ++ [ LongReg n | n <- [0..mAX_Long_REG] ]
306
307 callerSaveGlobalReg reg next
308 | callerSaves reg =
309 CmmStore (get_GlobalReg_addr reg)
310 (CmmReg (CmmGlobal reg)) : next
311 | otherwise = next
312
313 callerRestoreGlobalReg reg next
314 | callerSaves reg =
315 CmmAssign (CmmGlobal reg)
316 (CmmLoad (get_GlobalReg_addr reg) (globalRegType reg))
317 : next
318 | otherwise = next
319
320
321 -- | Returns @True@ if this global register is stored in a caller-saves
322 -- machine register.
323
324 callerSaves :: GlobalReg -> Bool
325
326 #ifdef CALLER_SAVES_Base
327 callerSaves BaseReg = True
328 #endif
329 #ifdef CALLER_SAVES_R1
330 callerSaves (VanillaReg 1 _) = True
331 #endif
332 #ifdef CALLER_SAVES_R2
333 callerSaves (VanillaReg 2 _) = True
334 #endif
335 #ifdef CALLER_SAVES_R3
336 callerSaves (VanillaReg 3 _) = True
337 #endif
338 #ifdef CALLER_SAVES_R4
339 callerSaves (VanillaReg 4 _) = True
340 #endif
341 #ifdef CALLER_SAVES_R5
342 callerSaves (VanillaReg 5 _) = True
343 #endif
344 #ifdef CALLER_SAVES_R6
345 callerSaves (VanillaReg 6 _) = True
346 #endif
347 #ifdef CALLER_SAVES_R7
348 callerSaves (VanillaReg 7 _) = True
349 #endif
350 #ifdef CALLER_SAVES_R8
351 callerSaves (VanillaReg 8 _) = True
352 #endif
353 #ifdef CALLER_SAVES_R9
354 callerSaves (VanillaReg 9 _) = True
355 #endif
356 #ifdef CALLER_SAVES_R10
357 callerSaves (VanillaReg 10 _) = True
358 #endif
359 #ifdef CALLER_SAVES_F1
360 callerSaves (FloatReg 1) = True
361 #endif
362 #ifdef CALLER_SAVES_F2
363 callerSaves (FloatReg 2) = True
364 #endif
365 #ifdef CALLER_SAVES_F3
366 callerSaves (FloatReg 3) = True
367 #endif
368 #ifdef CALLER_SAVES_F4
369 callerSaves (FloatReg 4) = True
370 #endif
371 #ifdef CALLER_SAVES_D1
372 callerSaves (DoubleReg 1) = True
373 #endif
374 #ifdef CALLER_SAVES_D2
375 callerSaves (DoubleReg 2) = True
376 #endif
377 #ifdef CALLER_SAVES_L1
378 callerSaves (LongReg 1) = True
379 #endif
380 #ifdef CALLER_SAVES_Sp
381 callerSaves Sp = True
382 #endif
383 #ifdef CALLER_SAVES_SpLim
384 callerSaves SpLim = True
385 #endif
386 #ifdef CALLER_SAVES_Hp
387 callerSaves Hp = True
388 #endif
389 #ifdef CALLER_SAVES_HpLim
390 callerSaves HpLim = True
391 #endif
392 #ifdef CALLER_SAVES_CurrentTSO
393 callerSaves CurrentTSO = True
394 #endif
395 #ifdef CALLER_SAVES_CurrentNursery
396 callerSaves CurrentNursery = True
397 #endif
398 callerSaves _ = False
399
400
401 -- -----------------------------------------------------------------------------
402 -- Information about global registers
403
404 baseRegOffset :: GlobalReg -> Int
405
406 baseRegOffset (VanillaReg 1 _) = oFFSET_StgRegTable_rR1
407 baseRegOffset (VanillaReg 2 _) = oFFSET_StgRegTable_rR2
408 baseRegOffset (VanillaReg 3 _) = oFFSET_StgRegTable_rR3
409 baseRegOffset (VanillaReg 4 _) = oFFSET_StgRegTable_rR4
410 baseRegOffset (VanillaReg 5 _) = oFFSET_StgRegTable_rR5
411 baseRegOffset (VanillaReg 6 _) = oFFSET_StgRegTable_rR6
412 baseRegOffset (VanillaReg 7 _) = oFFSET_StgRegTable_rR7
413 baseRegOffset (VanillaReg 8 _) = oFFSET_StgRegTable_rR8
414 baseRegOffset (VanillaReg 9 _) = oFFSET_StgRegTable_rR9
415 baseRegOffset (VanillaReg 10 _) = oFFSET_StgRegTable_rR10
416 baseRegOffset (VanillaReg n _) = panic ("Registers above R10 are not supported (tried to use R" ++ show n ++ ")")
417 baseRegOffset (FloatReg 1) = oFFSET_StgRegTable_rF1
418 baseRegOffset (FloatReg 2) = oFFSET_StgRegTable_rF2
419 baseRegOffset (FloatReg 3) = oFFSET_StgRegTable_rF3
420 baseRegOffset (FloatReg 4) = oFFSET_StgRegTable_rF4
421 baseRegOffset (FloatReg n) = panic ("Registers above F4 are not supported (tried to use F" ++ show n ++ ")")
422 baseRegOffset (DoubleReg 1) = oFFSET_StgRegTable_rD1
423 baseRegOffset (DoubleReg 2) = oFFSET_StgRegTable_rD2
424 baseRegOffset (DoubleReg n) = panic ("Registers above D2 are not supported (tried to use D" ++ show n ++ ")")
425 baseRegOffset Sp = oFFSET_StgRegTable_rSp
426 baseRegOffset SpLim = oFFSET_StgRegTable_rSpLim
427 baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1
428 baseRegOffset (LongReg n) = panic ("Registers above L1 are not supported (tried to use L" ++ show n ++ ")")
429 baseRegOffset Hp = oFFSET_StgRegTable_rHp
430 baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim
431 baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO
432 baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery
433 baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc
434 baseRegOffset EagerBlackholeInfo = oFFSET_stgEagerBlackholeInfo
435 baseRegOffset GCEnter1 = oFFSET_stgGCEnter1
436 baseRegOffset GCFun = oFFSET_stgGCFun
437 baseRegOffset BaseReg = panic "baseRegOffset:BaseReg"
438 baseRegOffset PicBaseReg = panic "baseRegOffset:PicBaseReg"
439
440
441 -------------------------------------------------------------------------
442 --
443 -- Strings generate a top-level data block
444 --
445 -------------------------------------------------------------------------
446
447 emitDataLits :: CLabel -> [CmmLit] -> Code
448 -- Emit a data-segment data block
449 emitDataLits lbl lits = emitDecl (mkDataLits Data lbl lits)
450
451 emitRODataLits :: String -> CLabel -> [CmmLit] -> Code
452 -- Emit a read-only data block
453 emitRODataLits _caller lbl lits
454 = emitDecl (mkRODataLits lbl lits)
455
456 newStringCLit :: String -> FCode CmmLit
457 -- Make a global definition for the string,
458 -- and return its label
459 newStringCLit str = newByteStringCLit (map (fromIntegral.ord) str)
460
461 newByteStringCLit :: [Word8] -> FCode CmmLit
462 newByteStringCLit bytes
463 = do { uniq <- newUnique
464 ; let (lit, decl) = mkByteStringCLit uniq bytes
465 ; emitDecl decl
466 ; return lit }
467
468 -------------------------------------------------------------------------
469 --
470 -- Assigning expressions to temporaries
471 --
472 -------------------------------------------------------------------------
473
474 -- | If the expression is trivial, return it. Otherwise, assign the
475 -- expression to a temporary register and return an expression
476 -- referring to this register.
477 assignTemp :: CmmExpr -> FCode CmmExpr
478 -- For a non-trivial expression, e, create a local
479 -- variable and assign the expression to it
480 assignTemp e
481 | isTrivialCmmExpr e = return e
482 | otherwise = do { reg <- newTemp (cmmExprType e)
483 ; stmtC (CmmAssign (CmmLocal reg) e)
484 ; return (CmmReg (CmmLocal reg)) }
485
486 -- | If the expression is trivial and doesn't refer to a global
487 -- register, return it. Otherwise, assign the expression to a
488 -- temporary register and return an expression referring to this
489 -- register.
490 assignTemp_ :: CmmExpr -> FCode CmmExpr
491 assignTemp_ e
492 | isTrivialCmmExpr e && hasNoGlobalRegs e = return e
493 | otherwise = do
494 reg <- newTemp (cmmExprType e)
495 stmtC (CmmAssign (CmmLocal reg) e)
496 return (CmmReg (CmmLocal reg))
497
498 newTemp :: CmmType -> FCode LocalReg
499 newTemp rep = do { uniq <- newUnique; return (LocalReg uniq rep) }
500
501 -------------------------------------------------------------------------
502 --
503 -- Building case analysis
504 --
505 -------------------------------------------------------------------------
506
507 emitSwitch
508 :: CmmExpr -- Tag to switch on
509 -> [(ConTagZ, CgStmts)] -- Tagged branches
510 -> Maybe CgStmts -- Default branch (if any)
511 -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour
512 -- outside this range is undefined
513 -> Code
514
515 -- ONLY A DEFAULT BRANCH: no case analysis to do
516 emitSwitch _ [] (Just stmts) _ _
517 = emitCgStmts stmts
518
519 -- Right, off we go
520 emitSwitch tag_expr branches mb_deflt lo_tag hi_tag
521 = -- Just sort the branches before calling mk_sritch
522 do { mb_deflt_id <-
523 case mb_deflt of
524 Nothing -> return Nothing
525 Just stmts -> do id <- forkCgStmts stmts; return (Just id)
526
527 ; dflags <- getDynFlags
528 ; let via_C | HscC <- hscTarget dflags = True
529 | otherwise = False
530
531 ; stmts <- mk_switch tag_expr (sortLe le branches)
532 mb_deflt_id lo_tag hi_tag via_C
533 ; emitCgStmts stmts
534 }
535 where
536 (t1,_) `le` (t2,_) = t1 <= t2
537
538
539 mk_switch :: CmmExpr -> [(ConTagZ, CgStmts)]
540 -> Maybe BlockId -> ConTagZ -> ConTagZ -> Bool
541 -> FCode CgStmts
542
543 -- SINGLETON TAG RANGE: no case analysis to do
544 mk_switch _tag_expr [(tag,stmts)] _ lo_tag hi_tag _via_C
545 | lo_tag == hi_tag
546 = ASSERT( tag == lo_tag )
547 return stmts
548
549 -- SINGLETON BRANCH, NO DEFAULT: no case analysis to do
550 mk_switch _tag_expr [(_tag,stmts)] Nothing _lo_tag _hi_tag _via_C
551 = return stmts
552 -- The simplifier might have eliminated a case
553 -- so we may have e.g. case xs of
554 -- [] -> e
555 -- In that situation we can be sure the (:) case
556 -- can't happen, so no need to test
557
558 -- SINGLETON BRANCH: one equality check to do
559 mk_switch tag_expr [(tag,stmts)] (Just deflt) _lo_tag _hi_tag _via_C
560 = return (CmmCondBranch cond deflt `consCgStmt` stmts)
561 where
562 cond = cmmNeWord tag_expr (CmmLit (mkIntCLit tag))
563 -- We have lo_tag < hi_tag, but there's only one branch,
564 -- so there must be a default
565
566 -- ToDo: we might want to check for the two branch case, where one of
567 -- the branches is the tag 0, because comparing '== 0' is likely to be
568 -- more efficient than other kinds of comparison.
569
570 -- DENSE TAG RANGE: use a switch statment.
571 --
572 -- We also use a switch uncoditionally when compiling via C, because
573 -- this will get emitted as a C switch statement and the C compiler
574 -- should do a good job of optimising it. Also, older GCC versions
575 -- (2.95 in particular) have problems compiling the complicated
576 -- if-trees generated by this code, so compiling to a switch every
577 -- time works around that problem.
578 --
579 mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C
580 | use_switch -- Use a switch
581 = do { branch_ids <- mapM forkCgStmts (map snd branches)
582 ; let
583 tagged_blk_ids = zip (map fst branches) (map Just branch_ids)
584
585 find_branch :: ConTagZ -> Maybe BlockId
586 find_branch i = assocDefault mb_deflt tagged_blk_ids i
587
588 -- NB. we have eliminated impossible branches at
589 -- either end of the range (see below), so the first
590 -- tag of a real branch is real_lo_tag (not lo_tag).
591 arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]]
592
593 switch_stmt = CmmSwitch (cmmOffset tag_expr (- real_lo_tag)) arms
594
595 ; ASSERT(not (all isNothing arms))
596 return (oneCgStmt switch_stmt)
597 }
598
599 -- if we can knock off a bunch of default cases with one if, then do so
600 | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches
601 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
602 ; let cond = cmmULtWord tag_expr' (CmmLit (mkIntCLit lowest_branch))
603 branch = CmmCondBranch cond deflt
604 ; stmts <- mk_switch tag_expr' branches mb_deflt
605 lowest_branch hi_tag via_C
606 ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts))
607 }
608
609 | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches
610 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
611 ; let cond = cmmUGtWord tag_expr' (CmmLit (mkIntCLit highest_branch))
612 branch = CmmCondBranch cond deflt
613 ; stmts <- mk_switch tag_expr' branches mb_deflt
614 lo_tag highest_branch via_C
615 ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts))
616 }
617
618 | otherwise -- Use an if-tree
619 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
620 -- To avoid duplication
621 ; lo_stmts <- mk_switch tag_expr' lo_branches mb_deflt
622 lo_tag (mid_tag-1) via_C
623 ; hi_stmts <- mk_switch tag_expr' hi_branches mb_deflt
624 mid_tag hi_tag via_C
625 ; hi_id <- forkCgStmts hi_stmts
626 ; let cond = cmmUGeWord tag_expr' (CmmLit (mkIntCLit mid_tag))
627 branch_stmt = CmmCondBranch cond hi_id
628 ; return (assign_tag `consCgStmt` (branch_stmt `consCgStmt` lo_stmts))
629 }
630 -- we test (e >= mid_tag) rather than (e < mid_tag), because
631 -- the former works better when e is a comparison, and there
632 -- are two tags 0 & 1 (mid_tag == 1). In this case, the code
633 -- generator can reduce the condition to e itself without
634 -- having to reverse the sense of the comparison: comparisons
635 -- can't always be easily reversed (eg. floating
636 -- pt. comparisons).
637 where
638 use_switch = {- pprTrace "mk_switch" (
639 ppr tag_expr <+> text "n_tags:" <+> int n_tags <+>
640 text "branches:" <+> ppr (map fst branches) <+>
641 text "n_branches:" <+> int n_branches <+>
642 text "lo_tag:" <+> int lo_tag <+>
643 text "hi_tag:" <+> int hi_tag <+>
644 text "real_lo_tag:" <+> int real_lo_tag <+>
645 text "real_hi_tag:" <+> int real_hi_tag) $ -}
646 ASSERT( n_branches > 1 && n_tags > 1 )
647 n_tags > 2 && (via_C || (dense && big_enough))
648 -- up to 4 branches we use a decision tree, otherwise
649 -- a switch (== jump table in the NCG). This seems to be
650 -- optimal, and corresponds with what gcc does.
651 big_enough = n_branches > 4
652 dense = n_branches > (n_tags `div` 2)
653 n_branches = length branches
654
655 -- ignore default slots at each end of the range if there's
656 -- no default branch defined.
657 lowest_branch = fst (head branches)
658 highest_branch = fst (last branches)
659
660 real_lo_tag
661 | isNothing mb_deflt = lowest_branch
662 | otherwise = lo_tag
663
664 real_hi_tag
665 | isNothing mb_deflt = highest_branch
666 | otherwise = hi_tag
667
668 n_tags = real_hi_tag - real_lo_tag + 1
669
670 -- INVARIANT: Provided hi_tag > lo_tag (which is true)
671 -- lo_tag <= mid_tag < hi_tag
672 -- lo_branches have tags < mid_tag
673 -- hi_branches have tags >= mid_tag
674
675 (mid_tag,_) = branches !! (n_branches `div` 2)
676 -- 2 branches => n_branches `div` 2 = 1
677 -- => branches !! 1 give the *second* tag
678 -- There are always at least 2 branches here
679
680 (lo_branches, hi_branches) = span is_lo branches
681 is_lo (t,_) = t < mid_tag
682
683 assignTemp' :: CmmExpr -> FCode (CmmStmt, CmmExpr)
684 assignTemp' e
685 | isTrivialCmmExpr e = return (CmmNop, e)
686 | otherwise = do { reg <- newTemp (cmmExprType e)
687 ; return (CmmAssign (CmmLocal reg) e, CmmReg (CmmLocal reg)) }
688
689 emitLitSwitch :: CmmExpr -- Tag to switch on
690 -> [(Literal, CgStmts)] -- Tagged branches
691 -> CgStmts -- Default branch (always)
692 -> Code -- Emit the code
693 -- Used for general literals, whose size might not be a word,
694 -- where there is always a default case, and where we don't know
695 -- the range of values for certain. For simplicity we always generate a tree.
696 --
697 -- ToDo: for integers we could do better here, perhaps by generalising
698 -- mk_switch and using that. --SDM 15/09/2004
699 emitLitSwitch _ [] deflt = emitCgStmts deflt
700 emitLitSwitch scrut branches deflt_blk
701 = do { scrut' <- assignTemp scrut
702 ; deflt_blk_id <- forkCgStmts deflt_blk
703 ; blk <- mk_lit_switch scrut' deflt_blk_id (sortLe le branches)
704 ; emitCgStmts blk }
705 where
706 le (t1,_) (t2,_) = t1 <= t2
707
708 mk_lit_switch :: CmmExpr -> BlockId
709 -> [(Literal,CgStmts)]
710 -> FCode CgStmts
711 mk_lit_switch scrut deflt_blk_id [(lit,blk)]
712 = return (consCgStmt if_stmt blk)
713 where
714 cmm_lit = mkSimpleLit lit
715 rep = cmmLitType cmm_lit
716 ne = if isFloatType rep then MO_F_Ne else MO_Ne
717 cond = CmmMachOp (ne (typeWidth rep)) [scrut, CmmLit cmm_lit]
718 if_stmt = CmmCondBranch cond deflt_blk_id
719
720 mk_lit_switch scrut deflt_blk_id branches
721 = do { hi_blk <- mk_lit_switch scrut deflt_blk_id hi_branches
722 ; lo_blk <- mk_lit_switch scrut deflt_blk_id lo_branches
723 ; lo_blk_id <- forkCgStmts lo_blk
724 ; let if_stmt = CmmCondBranch cond lo_blk_id
725 ; return (if_stmt `consCgStmt` hi_blk) }
726 where
727 n_branches = length branches
728 (mid_lit,_) = branches !! (n_branches `div` 2)
729 -- See notes above re mid_tag
730
731 (lo_branches, hi_branches) = span is_lo branches
732 is_lo (t,_) = t < mid_lit
733
734 cond = CmmMachOp (mkLtOp mid_lit)
735 [scrut, CmmLit (mkSimpleLit mid_lit)]
736
737 -------------------------------------------------------------------------
738 --
739 -- Simultaneous assignment
740 --
741 -------------------------------------------------------------------------
742
743
744 emitSimultaneously :: CmmStmts -> Code
745 -- Emit code to perform the assignments in the
746 -- input simultaneously, using temporary variables when necessary.
747 --
748 -- The Stmts must be:
749 -- CmmNop, CmmComment, CmmAssign, CmmStore
750 -- and nothing else
751
752
753 -- We use the strongly-connected component algorithm, in which
754 -- * the vertices are the statements
755 -- * an edge goes from s1 to s2 iff
756 -- s1 assigns to something s2 uses
757 -- that is, if s1 should *follow* s2 in the final order
758
759 type CVertex = (Int, CmmStmt) -- Give each vertex a unique number,
760 -- for fast comparison
761
762 emitSimultaneously stmts
763 = codeOnly $
764 case filterOut isNopStmt (stmtList stmts) of
765 -- Remove no-ops
766 [] -> nopC
767 [stmt] -> stmtC stmt -- It's often just one stmt
768 stmt_list -> doSimultaneously1 (zip [(1::Int)..] stmt_list)
769
770 doSimultaneously1 :: [CVertex] -> Code
771 doSimultaneously1 vertices
772 = let
773 edges = [ (vertex, key1, edges_from stmt1)
774 | vertex@(key1, stmt1) <- vertices
775 ]
776 edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices,
777 stmt1 `mustFollow` stmt2
778 ]
779 components = stronglyConnCompFromEdgedVertices edges
780
781 -- do_components deal with one strongly-connected component
782 -- Not cyclic, or singleton? Just do it
783 do_component (AcyclicSCC (_n, stmt)) = stmtC stmt
784 do_component (CyclicSCC [])
785 = panic "doSimultaneously1: do_component (CyclicSCC [])"
786 do_component (CyclicSCC [(_n, stmt)]) = stmtC stmt
787
788 -- Cyclic? Then go via temporaries. Pick one to
789 -- break the loop and try again with the rest.
790 do_component (CyclicSCC ((_n, first_stmt) : rest))
791 = do { from_temp <- go_via_temp first_stmt
792 ; doSimultaneously1 rest
793 ; stmtC from_temp }
794
795 go_via_temp (CmmAssign dest src)
796 = do { tmp <- newTemp (cmmRegType dest) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong
797 ; stmtC (CmmAssign (CmmLocal tmp) src)
798 ; return (CmmAssign dest (CmmReg (CmmLocal tmp))) }
799 go_via_temp (CmmStore dest src)
800 = do { tmp <- newTemp (cmmExprType src) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong
801 ; stmtC (CmmAssign (CmmLocal tmp) src)
802 ; return (CmmStore dest (CmmReg (CmmLocal tmp))) }
803 go_via_temp _ = panic "doSimultaneously1: go_via_temp"
804 in
805 mapCs do_component components
806
807 mustFollow :: CmmStmt -> CmmStmt -> Bool
808 CmmAssign reg _ `mustFollow` stmt = anySrc (reg `regUsedIn`) stmt
809 CmmStore loc e `mustFollow` stmt = anySrc (locUsedIn loc (cmmExprType e)) stmt
810 CmmNop `mustFollow` _ = False
811 CmmComment _ `mustFollow` _ = False
812 _ `mustFollow` _ = panic "mustFollow"
813
814
815 anySrc :: (CmmExpr -> Bool) -> CmmStmt -> Bool
816 -- True if the fn is true of any input of the stmt
817 anySrc p (CmmAssign _ e) = p e
818 anySrc p (CmmStore e1 e2) = p e1 || p e2 -- Might be used in either side
819 anySrc _ (CmmComment _) = False
820 anySrc _ CmmNop = False
821 anySrc _ _ = True -- Conservative
822
823 locUsedIn :: CmmExpr -> CmmType -> CmmExpr -> Bool
824 -- (locUsedIn a r e) checks whether writing to r[a] could affect the value of
825 -- 'e'. Returns True if it's not sure.
826 locUsedIn _ _ (CmmLit _) = False
827 locUsedIn loc rep (CmmLoad e ld_rep) = possiblySameLoc loc rep e ld_rep
828 locUsedIn _ _ (CmmReg _) = False
829 locUsedIn _ _ (CmmRegOff _ _) = False
830 locUsedIn loc rep (CmmMachOp _ es) = any (locUsedIn loc rep) es
831 locUsedIn _ _ (CmmStackSlot _ _) = panic "locUsedIn: CmmStackSlot"
832
833 possiblySameLoc :: CmmExpr -> CmmType -> CmmExpr -> CmmType -> Bool
834 -- Assumes that distinct registers (eg Hp, Sp) do not
835 -- point to the same location, nor any offset thereof.
836 possiblySameLoc (CmmReg r1) _ (CmmReg r2) _ = r1 == r2
837 possiblySameLoc (CmmReg r1) _ (CmmRegOff r2 0) _ = r1 == r2
838 possiblySameLoc (CmmRegOff r1 0) _ (CmmReg r2) _ = r1 == r2
839 possiblySameLoc (CmmRegOff r1 start1) rep1 (CmmRegOff r2 start2) rep2
840 = r1==r2 && end1 > start2 && end2 > start1
841 where
842 end1 = start1 + widthInBytes (typeWidth rep1)
843 end2 = start2 + widthInBytes (typeWidth rep2)
844
845 possiblySameLoc _ _ (CmmLit _) _ = False
846 possiblySameLoc _ _ _ _ = True -- Conservative
847
848 -------------------------------------------------------------------------
849 --
850 -- Static Reference Tables
851 --
852 -------------------------------------------------------------------------
853
854 -- There is just one SRT for each top level binding; all the nested
855 -- bindings use sub-sections of this SRT. The label is passed down to
856 -- the nested bindings via the monad.
857
858 getSRTInfo :: FCode C_SRT
859 getSRTInfo = do
860 srt_lbl <- getSRTLabel
861 srt <- getSRT
862 case srt of
863 -- TODO: Should we panic in this case?
864 -- Someone obviously thinks there should be an SRT
865 NoSRT -> return NoC_SRT
866 SRTEntries {} -> panic "getSRTInfo: SRTEntries. Perhaps you forgot to run SimplStg?"
867 SRT off len bmp
868 | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape]
869 -> do id <- newUnique
870 let srt_desc_lbl = mkLargeSRTLabel id
871 emitRODataLits "getSRTInfo" srt_desc_lbl
872 ( cmmLabelOffW srt_lbl off
873 : mkWordCLit (fromIntegral len)
874 : map mkWordCLit bmp)
875 return (C_SRT srt_desc_lbl 0 srt_escape)
876
877 | otherwise
878 -> return (C_SRT srt_lbl off (fromIntegral (head bmp)))
879 -- The fromIntegral converts to StgHalfWord
880
881 srt_escape :: StgHalfWord
882 srt_escape = -1
883
884 -- -----------------------------------------------------------------------------
885 --
886 -- STG/Cmm GlobalReg
887 --
888 -- -----------------------------------------------------------------------------
889
890 -- | Here is where the STG register map is defined for each target arch.
891 -- The order matters (for the llvm backend anyway)! We must make sure to
892 -- maintain the order here with the order used in the LLVM calling conventions.
893 -- Note that also, this isn't all registers, just the ones that are currently
894 -- possbily mapped to real registers.
895 activeStgRegs :: [GlobalReg]
896 activeStgRegs = [
897 #ifdef REG_Base
898 BaseReg
899 #endif
900 #ifdef REG_Sp
901 ,Sp
902 #endif
903 #ifdef REG_Hp
904 ,Hp
905 #endif
906 #ifdef REG_R1
907 ,VanillaReg 1 VGcPtr
908 #endif
909 #ifdef REG_R2
910 ,VanillaReg 2 VGcPtr
911 #endif
912 #ifdef REG_R3
913 ,VanillaReg 3 VGcPtr
914 #endif
915 #ifdef REG_R4
916 ,VanillaReg 4 VGcPtr
917 #endif
918 #ifdef REG_R5
919 ,VanillaReg 5 VGcPtr
920 #endif
921 #ifdef REG_R6
922 ,VanillaReg 6 VGcPtr
923 #endif
924 #ifdef REG_R7
925 ,VanillaReg 7 VGcPtr
926 #endif
927 #ifdef REG_R8
928 ,VanillaReg 8 VGcPtr
929 #endif
930 #ifdef REG_R9
931 ,VanillaReg 9 VGcPtr
932 #endif
933 #ifdef REG_R10
934 ,VanillaReg 10 VGcPtr
935 #endif
936 #ifdef REG_SpLim
937 ,SpLim
938 #endif
939 #ifdef REG_F1
940 ,FloatReg 1
941 #endif
942 #ifdef REG_F2
943 ,FloatReg 2
944 #endif
945 #ifdef REG_F3
946 ,FloatReg 3
947 #endif
948 #ifdef REG_F4
949 ,FloatReg 4
950 #endif
951 #ifdef REG_D1
952 ,DoubleReg 1
953 #endif
954 #ifdef REG_D2
955 ,DoubleReg 2
956 #endif
957 ]
958
959 -- | We map STG registers onto appropriate CmmExprs. Either they map
960 -- to real machine registers or stored as offsets from BaseReg. Given
961 -- a GlobalReg, get_GlobalReg_addr always produces the
962 -- register table address for it.
963 get_GlobalReg_addr :: GlobalReg -> CmmExpr
964 get_GlobalReg_addr BaseReg = regTableOffset 0
965 get_GlobalReg_addr mid = get_Regtable_addr_from_offset
966 (globalRegType mid) (baseRegOffset mid)
967
968 -- Calculate a literal representing an offset into the register table.
969 -- Used when we don't have an actual BaseReg to offset from.
970 regTableOffset :: Int -> CmmExpr
971 regTableOffset n =
972 CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r + n))
973
974 get_Regtable_addr_from_offset :: CmmType -> Int -> CmmExpr
975 get_Regtable_addr_from_offset _ offset =
976 #ifdef REG_Base
977 CmmRegOff (CmmGlobal BaseReg) offset
978 #else
979 regTableOffset offset
980 #endif
981
982 -- | Fixup global registers so that they assign to locations within the
983 -- RegTable if they aren't pinned for the current target.
984 fixStgRegisters :: RawCmmDecl -> RawCmmDecl
985 fixStgRegisters top@(CmmData _ _) = top
986
987 fixStgRegisters (CmmProc info lbl (ListGraph blocks)) =
988 let blocks' = map fixStgRegBlock blocks
989 in CmmProc info lbl $ ListGraph blocks'
990
991 fixStgRegBlock :: CmmBasicBlock -> CmmBasicBlock
992 fixStgRegBlock (BasicBlock id stmts) =
993 let stmts' = map fixStgRegStmt stmts
994 in BasicBlock id stmts'
995
996 fixStgRegStmt :: CmmStmt -> CmmStmt
997 fixStgRegStmt stmt
998 = case stmt of
999 CmmAssign (CmmGlobal reg) src ->
1000 let src' = fixStgRegExpr src
1001 baseAddr = get_GlobalReg_addr reg
1002 in case reg `elem` activeStgRegs of
1003 True -> CmmAssign (CmmGlobal reg) src'
1004 False -> CmmStore baseAddr src'
1005
1006 CmmAssign reg src ->
1007 let src' = fixStgRegExpr src
1008 in CmmAssign reg src'
1009
1010 CmmStore addr src -> CmmStore (fixStgRegExpr addr) (fixStgRegExpr src)
1011
1012 CmmCall target regs args srt returns ->
1013 let target' = case target of
1014 CmmCallee e conv -> CmmCallee (fixStgRegExpr e) conv
1015 other -> other
1016 args' = map (\(CmmHinted arg hint) ->
1017 (CmmHinted (fixStgRegExpr arg) hint)) args
1018 in CmmCall target' regs args' srt returns
1019
1020 CmmCondBranch test dest -> CmmCondBranch (fixStgRegExpr test) dest
1021
1022 CmmSwitch expr ids -> CmmSwitch (fixStgRegExpr expr) ids
1023
1024 CmmJump addr regs -> CmmJump (fixStgRegExpr addr) regs
1025
1026 -- CmmNop, CmmComment, CmmBranch, CmmReturn
1027 _other -> stmt
1028
1029
1030 fixStgRegExpr :: CmmExpr -> CmmExpr
1031 fixStgRegExpr expr
1032 = case expr of
1033 CmmLoad addr ty -> CmmLoad (fixStgRegExpr addr) ty
1034
1035 CmmMachOp mop args -> CmmMachOp mop args'
1036 where args' = map fixStgRegExpr args
1037
1038 CmmReg (CmmGlobal reg) ->
1039 -- Replace register leaves with appropriate StixTrees for
1040 -- the given target. MagicIds which map to a reg on this
1041 -- arch are left unchanged. For the rest, BaseReg is taken
1042 -- to mean the address of the reg table in MainCapability,
1043 -- and for all others we generate an indirection to its
1044 -- location in the register table.
1045 case reg `elem` activeStgRegs of
1046 True -> expr
1047 False ->
1048 let baseAddr = get_GlobalReg_addr reg
1049 in case reg of
1050 BaseReg -> fixStgRegExpr baseAddr
1051 _other -> fixStgRegExpr
1052 (CmmLoad baseAddr (globalRegType reg))
1053
1054 CmmRegOff (CmmGlobal reg) offset ->
1055 -- RegOf leaves are just a shorthand form. If the reg maps
1056 -- to a real reg, we keep the shorthand, otherwise, we just
1057 -- expand it and defer to the above code.
1058 case reg `elem` activeStgRegs of
1059 True -> expr
1060 False -> fixStgRegExpr (CmmMachOp (MO_Add wordWidth) [
1061 CmmReg (CmmGlobal reg),
1062 CmmLit (CmmInt (fromIntegral offset)
1063 wordWidth)])
1064
1065 -- CmmLit, CmmReg (CmmLocal), CmmStackSlot
1066 _other -> expr
1067