Merge branch 'master' of http://darcs.haskell.org/ghc
[ghc.git] / compiler / codeGen / CgUtils.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Code generator utilities; mostly monadic
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module CgUtils (
10 addIdReps,
11 cgLit,
12 emitDataLits, mkDataLits,
13 emitRODataLits, mkRODataLits,
14 emitIf, emitIfThenElse,
15 emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult,
16 emitRtsCallGen,
17 assignTemp, assignTemp_, newTemp,
18 emitSimultaneously,
19 emitSwitch, emitLitSwitch,
20 tagToClosure,
21
22 callerSaves, callerSaveVolatileRegs, get_GlobalReg_addr,
23 activeStgRegs, fixStgRegisters,
24
25 cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord,
26 cmmUGtWord, cmmSubWord, cmmMulWord, cmmAddWord, cmmUShrWord,
27 cmmOffsetExprW, cmmOffsetExprB,
28 cmmRegOffW, cmmRegOffB,
29 cmmLabelOffW, cmmLabelOffB,
30 cmmOffsetW, cmmOffsetB,
31 cmmOffsetLitW, cmmOffsetLitB,
32 cmmLoadIndexW,
33 cmmConstrTag, cmmConstrTag1,
34
35 tagForCon, tagCons, isSmallFamily,
36 cmmUntag, cmmIsTagged, cmmGetTag,
37
38 addToMem, addToMemE,
39 mkWordCLit,
40 newStringCLit, newByteStringCLit,
41 packHalfWordsCLit,
42 blankWord,
43
44 getSRTInfo
45 ) where
46
47 #include "HsVersions.h"
48 #include "../includes/stg/MachRegs.h"
49
50 import BlockId
51 import CgMonad
52 import TyCon
53 import DataCon
54 import Id
55 import IdInfo
56 import Constants
57 import SMRep
58 import OldCmm
59 import OldCmmUtils
60 import CLabel
61 import ForeignCall
62 import ClosureInfo
63 import StgSyn (SRT(..))
64 import Module
65 import Literal
66 import Digraph
67 import ListSetOps
68 import Util
69 import DynFlags
70 import FastString
71 import Outputable
72
73 import Data.Char
74 import Data.Word
75 import Data.Maybe
76
77 -------------------------------------------------------------------------
78 --
79 -- Random small functions
80 --
81 -------------------------------------------------------------------------
82
83 addIdReps :: [Id] -> [(CgRep, Id)]
84 addIdReps ids = [(idCgRep id, id) | id <- ids]
85
86 -------------------------------------------------------------------------
87 --
88 -- Literals
89 --
90 -------------------------------------------------------------------------
91
92 cgLit :: Literal -> FCode CmmLit
93 cgLit (MachStr s) = newByteStringCLit (bytesFS s)
94 -- not unpackFS; we want the UTF-8 byte stream.
95 cgLit other_lit = return (mkSimpleLit other_lit)
96
97 mkSimpleLit :: Literal -> CmmLit
98 mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordWidth
99 mkSimpleLit MachNullAddr = zeroCLit
100 mkSimpleLit (MachInt i) = CmmInt i wordWidth
101 mkSimpleLit (MachInt64 i) = CmmInt i W64
102 mkSimpleLit (MachWord i) = CmmInt i wordWidth
103 mkSimpleLit (MachWord64 i) = CmmInt i W64
104 mkSimpleLit (MachFloat r) = CmmFloat r W32
105 mkSimpleLit (MachDouble r) = CmmFloat r W64
106 mkSimpleLit (MachLabel fs ms fod)
107 = CmmLabel (mkForeignLabel fs ms labelSrc fod)
108 where
109 -- TODO: Literal labels might not actually be in the current package...
110 labelSrc = ForeignLabelInThisPackage
111 mkSimpleLit (MachStr _) = panic "mkSimpleLit: MachStr"
112 -- No LitInteger's should be left by the time this is called. CorePrep
113 -- should have converted them all to a real core representation.
114 mkSimpleLit (LitInteger {}) = panic "mkSimpleLit: LitInteger"
115
116 mkLtOp :: Literal -> MachOp
117 -- On signed literals we must do a signed comparison
118 mkLtOp (MachInt _) = MO_S_Lt wordWidth
119 mkLtOp (MachFloat _) = MO_F_Lt W32
120 mkLtOp (MachDouble _) = MO_F_Lt W64
121 mkLtOp lit = MO_U_Lt (typeWidth (cmmLitType (mkSimpleLit lit)))
122
123
124 ---------------------------------------------------
125 --
126 -- Cmm data type functions
127 --
128 ---------------------------------------------------
129
130
131
132 {-
133 The family size of a data type (the number of constructors)
134 can be either:
135 * small, if the family size < 2**tag_bits
136 * big, otherwise.
137
138 Small families can have the constructor tag in the tag
139 bits.
140 Big families only use the tag value 1 to represent
141 evaluatedness.
142 -}
143 isSmallFamily :: Int -> Bool
144 isSmallFamily fam_size = fam_size <= mAX_PTR_TAG
145
146 tagForCon :: DataCon -> ConTagZ
147 tagForCon con = tag
148 where
149 con_tag = dataConTagZ con
150 fam_size = tyConFamilySize (dataConTyCon con)
151 tag | isSmallFamily fam_size = con_tag + 1
152 | otherwise = 1
153
154 --Tag an expression, to do: refactor, this appears in some other module.
155 tagCons :: DataCon -> CmmExpr -> CmmExpr
156 tagCons con expr = cmmOffsetB expr (tagForCon con)
157
158 --------------------------------------------------------------------------
159 --
160 -- Incrementing a memory location
161 --
162 --------------------------------------------------------------------------
163
164 addToMem :: Width -- rep of the counter
165 -> CmmExpr -- Address
166 -> Int -- What to add (a word)
167 -> CmmStmt
168 addToMem width ptr n = addToMemE width ptr (CmmLit (CmmInt (toInteger n) width))
169
170 addToMemE :: Width -- rep of the counter
171 -> CmmExpr -- Address
172 -> CmmExpr -- What to add (a word-typed expression)
173 -> CmmStmt
174 addToMemE width ptr n
175 = CmmStore ptr (CmmMachOp (MO_Add width) [CmmLoad ptr (cmmBits width), n])
176
177 -------------------------------------------------------------------------
178 --
179 -- Converting a closure tag to a closure for enumeration types
180 -- (this is the implementation of tagToEnum#).
181 --
182 -------------------------------------------------------------------------
183
184 tagToClosure :: TyCon -> CmmExpr -> CmmExpr
185 tagToClosure tycon tag
186 = CmmLoad (cmmOffsetExprW closure_tbl tag) gcWord
187 where closure_tbl = CmmLit (CmmLabel lbl)
188 lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs
189
190 -------------------------------------------------------------------------
191 --
192 -- Conditionals and rts calls
193 --
194 -------------------------------------------------------------------------
195
196 emitIf :: CmmExpr -- Boolean
197 -> Code -- Then part
198 -> Code
199 -- Emit (if e then x)
200 -- ToDo: reverse the condition to avoid the extra branch instruction if possible
201 -- (some conditionals aren't reversible. eg. floating point comparisons cannot
202 -- be inverted because there exist some values for which both comparisons
203 -- return False, such as NaN.)
204 emitIf cond then_part
205 = do { then_id <- newLabelC
206 ; join_id <- newLabelC
207 ; stmtC (CmmCondBranch cond then_id)
208 ; stmtC (CmmBranch join_id)
209 ; labelC then_id
210 ; then_part
211 ; labelC join_id
212 }
213
214 emitIfThenElse :: CmmExpr -- Boolean
215 -> Code -- Then part
216 -> Code -- Else part
217 -> Code
218 -- Emit (if e then x else y)
219 emitIfThenElse cond then_part else_part
220 = do { then_id <- newLabelC
221 ; join_id <- newLabelC
222 ; stmtC (CmmCondBranch cond then_id)
223 ; else_part
224 ; stmtC (CmmBranch join_id)
225 ; labelC then_id
226 ; then_part
227 ; labelC join_id
228 }
229
230
231 -- | Emit code to call a Cmm function.
232 emitRtsCall
233 :: PackageId -- ^ package the function is in
234 -> FastString -- ^ name of function
235 -> [CmmHinted CmmExpr] -- ^ function args
236 -> Bool -- ^ whether this is a safe call
237 -> Code -- ^ cmm code
238
239 emitRtsCall pkg fun args safe = emitRtsCallGen [] pkg fun args Nothing safe
240 -- The 'Nothing' says "save all global registers"
241
242 emitRtsCallWithVols :: PackageId -> FastString -> [CmmHinted CmmExpr] -> [GlobalReg] -> Bool -> Code
243 emitRtsCallWithVols pkg fun args vols safe
244 = emitRtsCallGen [] pkg fun args (Just vols) safe
245
246 emitRtsCallWithResult
247 :: LocalReg -> ForeignHint
248 -> PackageId -> FastString
249 -> [CmmHinted CmmExpr] -> Bool -> Code
250
251 emitRtsCallWithResult res hint pkg fun args safe
252 = emitRtsCallGen [CmmHinted res hint] pkg fun args Nothing safe
253
254 -- Make a call to an RTS C procedure
255 emitRtsCallGen
256 :: [CmmHinted LocalReg]
257 -> PackageId
258 -> FastString
259 -> [CmmHinted CmmExpr]
260 -> Maybe [GlobalReg]
261 -> Bool -- True <=> CmmSafe call
262 -> Code
263 emitRtsCallGen res pkg fun args vols safe = do
264 safety <- if safe
265 then getSRTInfo >>= (return . CmmSafe)
266 else return CmmUnsafe
267 stmtsC caller_save
268 stmtC (CmmCall target res args safety CmmMayReturn)
269 stmtsC caller_load
270 where
271 (caller_save, caller_load) = callerSaveVolatileRegs vols
272 target = CmmCallee fun_expr CCallConv
273 fun_expr = mkLblExpr (mkCmmCodeLabel pkg fun)
274
275 -----------------------------------------------------------------------------
276 --
277 -- Caller-Save Registers
278 --
279 -----------------------------------------------------------------------------
280
281 -- Here we generate the sequence of saves/restores required around a
282 -- foreign call instruction.
283
284 -- TODO: reconcile with includes/Regs.h
285 -- * Regs.h claims that BaseReg should be saved last and loaded first
286 -- * This might not have been tickled before since BaseReg is callee save
287 -- * Regs.h saves SparkHd, ParkT1, SparkBase and SparkLim
288 callerSaveVolatileRegs :: Maybe [GlobalReg] -> ([CmmStmt], [CmmStmt])
289 callerSaveVolatileRegs vols = (caller_save, caller_load)
290 where
291 caller_save = foldr ($!) [] (map callerSaveGlobalReg regs_to_save)
292 caller_load = foldr ($!) [] (map callerRestoreGlobalReg regs_to_save)
293
294 system_regs = [Sp,SpLim,Hp,HpLim,CurrentTSO,CurrentNursery,
295 {-SparkHd,SparkTl,SparkBase,SparkLim,-}BaseReg ]
296
297 regs_to_save = system_regs ++ vol_list
298
299 vol_list = case vols of Nothing -> all_of_em; Just regs -> regs
300
301 all_of_em = [ VanillaReg n VNonGcPtr | n <- [0..mAX_Vanilla_REG] ]
302 -- The VNonGcPtr is a lie, but I don't think it matters
303 ++ [ FloatReg n | n <- [0..mAX_Float_REG] ]
304 ++ [ DoubleReg n | n <- [0..mAX_Double_REG] ]
305 ++ [ LongReg n | n <- [0..mAX_Long_REG] ]
306
307 callerSaveGlobalReg reg next
308 | callerSaves reg =
309 CmmStore (get_GlobalReg_addr reg)
310 (CmmReg (CmmGlobal reg)) : next
311 | otherwise = next
312
313 callerRestoreGlobalReg reg next
314 | callerSaves reg =
315 CmmAssign (CmmGlobal reg)
316 (CmmLoad (get_GlobalReg_addr reg) (globalRegType reg))
317 : next
318 | otherwise = next
319
320
321 -- | Returns @True@ if this global register is stored in a caller-saves
322 -- machine register.
323
324 callerSaves :: GlobalReg -> Bool
325
326 #ifdef CALLER_SAVES_Base
327 callerSaves BaseReg = True
328 #endif
329 #ifdef CALLER_SAVES_R1
330 callerSaves (VanillaReg 1 _) = True
331 #endif
332 #ifdef CALLER_SAVES_R2
333 callerSaves (VanillaReg 2 _) = True
334 #endif
335 #ifdef CALLER_SAVES_R3
336 callerSaves (VanillaReg 3 _) = True
337 #endif
338 #ifdef CALLER_SAVES_R4
339 callerSaves (VanillaReg 4 _) = True
340 #endif
341 #ifdef CALLER_SAVES_R5
342 callerSaves (VanillaReg 5 _) = True
343 #endif
344 #ifdef CALLER_SAVES_R6
345 callerSaves (VanillaReg 6 _) = True
346 #endif
347 #ifdef CALLER_SAVES_R7
348 callerSaves (VanillaReg 7 _) = True
349 #endif
350 #ifdef CALLER_SAVES_R8
351 callerSaves (VanillaReg 8 _) = True
352 #endif
353 #ifdef CALLER_SAVES_F1
354 callerSaves (FloatReg 1) = True
355 #endif
356 #ifdef CALLER_SAVES_F2
357 callerSaves (FloatReg 2) = True
358 #endif
359 #ifdef CALLER_SAVES_F3
360 callerSaves (FloatReg 3) = True
361 #endif
362 #ifdef CALLER_SAVES_F4
363 callerSaves (FloatReg 4) = True
364 #endif
365 #ifdef CALLER_SAVES_D1
366 callerSaves (DoubleReg 1) = True
367 #endif
368 #ifdef CALLER_SAVES_D2
369 callerSaves (DoubleReg 2) = True
370 #endif
371 #ifdef CALLER_SAVES_L1
372 callerSaves (LongReg 1) = True
373 #endif
374 #ifdef CALLER_SAVES_Sp
375 callerSaves Sp = True
376 #endif
377 #ifdef CALLER_SAVES_SpLim
378 callerSaves SpLim = True
379 #endif
380 #ifdef CALLER_SAVES_Hp
381 callerSaves Hp = True
382 #endif
383 #ifdef CALLER_SAVES_HpLim
384 callerSaves HpLim = True
385 #endif
386 #ifdef CALLER_SAVES_CurrentTSO
387 callerSaves CurrentTSO = True
388 #endif
389 #ifdef CALLER_SAVES_CurrentNursery
390 callerSaves CurrentNursery = True
391 #endif
392 callerSaves _ = False
393
394
395 -- -----------------------------------------------------------------------------
396 -- Information about global registers
397
398 baseRegOffset :: GlobalReg -> Int
399
400 baseRegOffset (VanillaReg 1 _) = oFFSET_StgRegTable_rR1
401 baseRegOffset (VanillaReg 2 _) = oFFSET_StgRegTable_rR2
402 baseRegOffset (VanillaReg 3 _) = oFFSET_StgRegTable_rR3
403 baseRegOffset (VanillaReg 4 _) = oFFSET_StgRegTable_rR4
404 baseRegOffset (VanillaReg 5 _) = oFFSET_StgRegTable_rR5
405 baseRegOffset (VanillaReg 6 _) = oFFSET_StgRegTable_rR6
406 baseRegOffset (VanillaReg 7 _) = oFFSET_StgRegTable_rR7
407 baseRegOffset (VanillaReg 8 _) = oFFSET_StgRegTable_rR8
408 baseRegOffset (VanillaReg 9 _) = oFFSET_StgRegTable_rR9
409 baseRegOffset (VanillaReg 10 _) = oFFSET_StgRegTable_rR10
410 baseRegOffset (FloatReg 1) = oFFSET_StgRegTable_rF1
411 baseRegOffset (FloatReg 2) = oFFSET_StgRegTable_rF2
412 baseRegOffset (FloatReg 3) = oFFSET_StgRegTable_rF3
413 baseRegOffset (FloatReg 4) = oFFSET_StgRegTable_rF4
414 baseRegOffset (DoubleReg 1) = oFFSET_StgRegTable_rD1
415 baseRegOffset (DoubleReg 2) = oFFSET_StgRegTable_rD2
416 baseRegOffset Sp = oFFSET_StgRegTable_rSp
417 baseRegOffset SpLim = oFFSET_StgRegTable_rSpLim
418 baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1
419 baseRegOffset Hp = oFFSET_StgRegTable_rHp
420 baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim
421 baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO
422 baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery
423 baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc
424 baseRegOffset EagerBlackholeInfo = oFFSET_stgEagerBlackholeInfo
425 baseRegOffset GCEnter1 = oFFSET_stgGCEnter1
426 baseRegOffset GCFun = oFFSET_stgGCFun
427 baseRegOffset BaseReg = panic "baseRegOffset:BaseReg"
428 baseRegOffset _ = panic "baseRegOffset:other"
429
430
431 -------------------------------------------------------------------------
432 --
433 -- Strings generate a top-level data block
434 --
435 -------------------------------------------------------------------------
436
437 emitDataLits :: CLabel -> [CmmLit] -> Code
438 -- Emit a data-segment data block
439 emitDataLits lbl lits = emitDecl (mkDataLits Data lbl lits)
440
441 emitRODataLits :: String -> CLabel -> [CmmLit] -> Code
442 -- Emit a read-only data block
443 emitRODataLits _caller lbl lits
444 = emitDecl (mkRODataLits lbl lits)
445
446 newStringCLit :: String -> FCode CmmLit
447 -- Make a global definition for the string,
448 -- and return its label
449 newStringCLit str = newByteStringCLit (map (fromIntegral.ord) str)
450
451 newByteStringCLit :: [Word8] -> FCode CmmLit
452 newByteStringCLit bytes
453 = do { uniq <- newUnique
454 ; let (lit, decl) = mkByteStringCLit uniq bytes
455 ; emitDecl decl
456 ; return lit }
457
458 -------------------------------------------------------------------------
459 --
460 -- Assigning expressions to temporaries
461 --
462 -------------------------------------------------------------------------
463
464 -- | If the expression is trivial, return it. Otherwise, assign the
465 -- expression to a temporary register and return an expression
466 -- referring to this register.
467 assignTemp :: CmmExpr -> FCode CmmExpr
468 -- For a non-trivial expression, e, create a local
469 -- variable and assign the expression to it
470 assignTemp e
471 | isTrivialCmmExpr e = return e
472 | otherwise = do { reg <- newTemp (cmmExprType e)
473 ; stmtC (CmmAssign (CmmLocal reg) e)
474 ; return (CmmReg (CmmLocal reg)) }
475
476 -- | If the expression is trivial and doesn't refer to a global
477 -- register, return it. Otherwise, assign the expression to a
478 -- temporary register and return an expression referring to this
479 -- register.
480 assignTemp_ :: CmmExpr -> FCode CmmExpr
481 assignTemp_ e
482 | isTrivialCmmExpr e && hasNoGlobalRegs e = return e
483 | otherwise = do
484 reg <- newTemp (cmmExprType e)
485 stmtC (CmmAssign (CmmLocal reg) e)
486 return (CmmReg (CmmLocal reg))
487
488 newTemp :: CmmType -> FCode LocalReg
489 newTemp rep = do { uniq <- newUnique; return (LocalReg uniq rep) }
490
491 -------------------------------------------------------------------------
492 --
493 -- Building case analysis
494 --
495 -------------------------------------------------------------------------
496
497 emitSwitch
498 :: CmmExpr -- Tag to switch on
499 -> [(ConTagZ, CgStmts)] -- Tagged branches
500 -> Maybe CgStmts -- Default branch (if any)
501 -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour
502 -- outside this range is undefined
503 -> Code
504
505 -- ONLY A DEFAULT BRANCH: no case analysis to do
506 emitSwitch _ [] (Just stmts) _ _
507 = emitCgStmts stmts
508
509 -- Right, off we go
510 emitSwitch tag_expr branches mb_deflt lo_tag hi_tag
511 = -- Just sort the branches before calling mk_sritch
512 do { mb_deflt_id <-
513 case mb_deflt of
514 Nothing -> return Nothing
515 Just stmts -> do id <- forkCgStmts stmts; return (Just id)
516
517 ; dflags <- getDynFlags
518 ; let via_C | HscC <- hscTarget dflags = True
519 | otherwise = False
520
521 ; stmts <- mk_switch tag_expr (sortLe le branches)
522 mb_deflt_id lo_tag hi_tag via_C
523 ; emitCgStmts stmts
524 }
525 where
526 (t1,_) `le` (t2,_) = t1 <= t2
527
528
529 mk_switch :: CmmExpr -> [(ConTagZ, CgStmts)]
530 -> Maybe BlockId -> ConTagZ -> ConTagZ -> Bool
531 -> FCode CgStmts
532
533 -- SINGLETON TAG RANGE: no case analysis to do
534 mk_switch _tag_expr [(tag,stmts)] _ lo_tag hi_tag _via_C
535 | lo_tag == hi_tag
536 = ASSERT( tag == lo_tag )
537 return stmts
538
539 -- SINGLETON BRANCH, NO DEFAULT: no case analysis to do
540 mk_switch _tag_expr [(_tag,stmts)] Nothing _lo_tag _hi_tag _via_C
541 = return stmts
542 -- The simplifier might have eliminated a case
543 -- so we may have e.g. case xs of
544 -- [] -> e
545 -- In that situation we can be sure the (:) case
546 -- can't happen, so no need to test
547
548 -- SINGLETON BRANCH: one equality check to do
549 mk_switch tag_expr [(tag,stmts)] (Just deflt) _lo_tag _hi_tag _via_C
550 = return (CmmCondBranch cond deflt `consCgStmt` stmts)
551 where
552 cond = cmmNeWord tag_expr (CmmLit (mkIntCLit tag))
553 -- We have lo_tag < hi_tag, but there's only one branch,
554 -- so there must be a default
555
556 -- ToDo: we might want to check for the two branch case, where one of
557 -- the branches is the tag 0, because comparing '== 0' is likely to be
558 -- more efficient than other kinds of comparison.
559
560 -- DENSE TAG RANGE: use a switch statment.
561 --
562 -- We also use a switch uncoditionally when compiling via C, because
563 -- this will get emitted as a C switch statement and the C compiler
564 -- should do a good job of optimising it. Also, older GCC versions
565 -- (2.95 in particular) have problems compiling the complicated
566 -- if-trees generated by this code, so compiling to a switch every
567 -- time works around that problem.
568 --
569 mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C
570 | use_switch -- Use a switch
571 = do { branch_ids <- mapM forkCgStmts (map snd branches)
572 ; let
573 tagged_blk_ids = zip (map fst branches) (map Just branch_ids)
574
575 find_branch :: ConTagZ -> Maybe BlockId
576 find_branch i = assocDefault mb_deflt tagged_blk_ids i
577
578 -- NB. we have eliminated impossible branches at
579 -- either end of the range (see below), so the first
580 -- tag of a real branch is real_lo_tag (not lo_tag).
581 arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]]
582
583 switch_stmt = CmmSwitch (cmmOffset tag_expr (- real_lo_tag)) arms
584
585 ; ASSERT(not (all isNothing arms))
586 return (oneCgStmt switch_stmt)
587 }
588
589 -- if we can knock off a bunch of default cases with one if, then do so
590 | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches
591 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
592 ; let cond = cmmULtWord tag_expr' (CmmLit (mkIntCLit lowest_branch))
593 branch = CmmCondBranch cond deflt
594 ; stmts <- mk_switch tag_expr' branches mb_deflt
595 lowest_branch hi_tag via_C
596 ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts))
597 }
598
599 | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches
600 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
601 ; let cond = cmmUGtWord tag_expr' (CmmLit (mkIntCLit highest_branch))
602 branch = CmmCondBranch cond deflt
603 ; stmts <- mk_switch tag_expr' branches mb_deflt
604 lo_tag highest_branch via_C
605 ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts))
606 }
607
608 | otherwise -- Use an if-tree
609 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
610 -- To avoid duplication
611 ; lo_stmts <- mk_switch tag_expr' lo_branches mb_deflt
612 lo_tag (mid_tag-1) via_C
613 ; hi_stmts <- mk_switch tag_expr' hi_branches mb_deflt
614 mid_tag hi_tag via_C
615 ; hi_id <- forkCgStmts hi_stmts
616 ; let cond = cmmUGeWord tag_expr' (CmmLit (mkIntCLit mid_tag))
617 branch_stmt = CmmCondBranch cond hi_id
618 ; return (assign_tag `consCgStmt` (branch_stmt `consCgStmt` lo_stmts))
619 }
620 -- we test (e >= mid_tag) rather than (e < mid_tag), because
621 -- the former works better when e is a comparison, and there
622 -- are two tags 0 & 1 (mid_tag == 1). In this case, the code
623 -- generator can reduce the condition to e itself without
624 -- having to reverse the sense of the comparison: comparisons
625 -- can't always be easily reversed (eg. floating
626 -- pt. comparisons).
627 where
628 use_switch = {- pprTrace "mk_switch" (
629 ppr tag_expr <+> text "n_tags:" <+> int n_tags <+>
630 text "branches:" <+> ppr (map fst branches) <+>
631 text "n_branches:" <+> int n_branches <+>
632 text "lo_tag:" <+> int lo_tag <+>
633 text "hi_tag:" <+> int hi_tag <+>
634 text "real_lo_tag:" <+> int real_lo_tag <+>
635 text "real_hi_tag:" <+> int real_hi_tag) $ -}
636 ASSERT( n_branches > 1 && n_tags > 1 )
637 n_tags > 2 && (via_C || (dense && big_enough))
638 -- up to 4 branches we use a decision tree, otherwise
639 -- a switch (== jump table in the NCG). This seems to be
640 -- optimal, and corresponds with what gcc does.
641 big_enough = n_branches > 4
642 dense = n_branches > (n_tags `div` 2)
643 n_branches = length branches
644
645 -- ignore default slots at each end of the range if there's
646 -- no default branch defined.
647 lowest_branch = fst (head branches)
648 highest_branch = fst (last branches)
649
650 real_lo_tag
651 | isNothing mb_deflt = lowest_branch
652 | otherwise = lo_tag
653
654 real_hi_tag
655 | isNothing mb_deflt = highest_branch
656 | otherwise = hi_tag
657
658 n_tags = real_hi_tag - real_lo_tag + 1
659
660 -- INVARIANT: Provided hi_tag > lo_tag (which is true)
661 -- lo_tag <= mid_tag < hi_tag
662 -- lo_branches have tags < mid_tag
663 -- hi_branches have tags >= mid_tag
664
665 (mid_tag,_) = branches !! (n_branches `div` 2)
666 -- 2 branches => n_branches `div` 2 = 1
667 -- => branches !! 1 give the *second* tag
668 -- There are always at least 2 branches here
669
670 (lo_branches, hi_branches) = span is_lo branches
671 is_lo (t,_) = t < mid_tag
672
673 assignTemp' :: CmmExpr -> FCode (CmmStmt, CmmExpr)
674 assignTemp' e
675 | isTrivialCmmExpr e = return (CmmNop, e)
676 | otherwise = do { reg <- newTemp (cmmExprType e)
677 ; return (CmmAssign (CmmLocal reg) e, CmmReg (CmmLocal reg)) }
678
679 emitLitSwitch :: CmmExpr -- Tag to switch on
680 -> [(Literal, CgStmts)] -- Tagged branches
681 -> CgStmts -- Default branch (always)
682 -> Code -- Emit the code
683 -- Used for general literals, whose size might not be a word,
684 -- where there is always a default case, and where we don't know
685 -- the range of values for certain. For simplicity we always generate a tree.
686 --
687 -- ToDo: for integers we could do better here, perhaps by generalising
688 -- mk_switch and using that. --SDM 15/09/2004
689 emitLitSwitch _ [] deflt = emitCgStmts deflt
690 emitLitSwitch scrut branches deflt_blk
691 = do { scrut' <- assignTemp scrut
692 ; deflt_blk_id <- forkCgStmts deflt_blk
693 ; blk <- mk_lit_switch scrut' deflt_blk_id (sortLe le branches)
694 ; emitCgStmts blk }
695 where
696 le (t1,_) (t2,_) = t1 <= t2
697
698 mk_lit_switch :: CmmExpr -> BlockId
699 -> [(Literal,CgStmts)]
700 -> FCode CgStmts
701 mk_lit_switch scrut deflt_blk_id [(lit,blk)]
702 = return (consCgStmt if_stmt blk)
703 where
704 cmm_lit = mkSimpleLit lit
705 rep = cmmLitType cmm_lit
706 ne = if isFloatType rep then MO_F_Ne else MO_Ne
707 cond = CmmMachOp (ne (typeWidth rep)) [scrut, CmmLit cmm_lit]
708 if_stmt = CmmCondBranch cond deflt_blk_id
709
710 mk_lit_switch scrut deflt_blk_id branches
711 = do { hi_blk <- mk_lit_switch scrut deflt_blk_id hi_branches
712 ; lo_blk <- mk_lit_switch scrut deflt_blk_id lo_branches
713 ; lo_blk_id <- forkCgStmts lo_blk
714 ; let if_stmt = CmmCondBranch cond lo_blk_id
715 ; return (if_stmt `consCgStmt` hi_blk) }
716 where
717 n_branches = length branches
718 (mid_lit,_) = branches !! (n_branches `div` 2)
719 -- See notes above re mid_tag
720
721 (lo_branches, hi_branches) = span is_lo branches
722 is_lo (t,_) = t < mid_lit
723
724 cond = CmmMachOp (mkLtOp mid_lit)
725 [scrut, CmmLit (mkSimpleLit mid_lit)]
726
727 -------------------------------------------------------------------------
728 --
729 -- Simultaneous assignment
730 --
731 -------------------------------------------------------------------------
732
733
734 emitSimultaneously :: CmmStmts -> Code
735 -- Emit code to perform the assignments in the
736 -- input simultaneously, using temporary variables when necessary.
737 --
738 -- The Stmts must be:
739 -- CmmNop, CmmComment, CmmAssign, CmmStore
740 -- and nothing else
741
742
743 -- We use the strongly-connected component algorithm, in which
744 -- * the vertices are the statements
745 -- * an edge goes from s1 to s2 iff
746 -- s1 assigns to something s2 uses
747 -- that is, if s1 should *follow* s2 in the final order
748
749 type CVertex = (Int, CmmStmt) -- Give each vertex a unique number,
750 -- for fast comparison
751
752 emitSimultaneously stmts
753 = codeOnly $
754 case filterOut isNopStmt (stmtList stmts) of
755 -- Remove no-ops
756 [] -> nopC
757 [stmt] -> stmtC stmt -- It's often just one stmt
758 stmt_list -> doSimultaneously1 (zip [(1::Int)..] stmt_list)
759
760 doSimultaneously1 :: [CVertex] -> Code
761 doSimultaneously1 vertices
762 = let
763 edges = [ (vertex, key1, edges_from stmt1)
764 | vertex@(key1, stmt1) <- vertices
765 ]
766 edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices,
767 stmt1 `mustFollow` stmt2
768 ]
769 components = stronglyConnCompFromEdgedVertices edges
770
771 -- do_components deal with one strongly-connected component
772 -- Not cyclic, or singleton? Just do it
773 do_component (AcyclicSCC (_n, stmt)) = stmtC stmt
774 do_component (CyclicSCC [])
775 = panic "doSimultaneously1: do_component (CyclicSCC [])"
776 do_component (CyclicSCC [(_n, stmt)]) = stmtC stmt
777
778 -- Cyclic? Then go via temporaries. Pick one to
779 -- break the loop and try again with the rest.
780 do_component (CyclicSCC ((_n, first_stmt) : rest))
781 = do { from_temp <- go_via_temp first_stmt
782 ; doSimultaneously1 rest
783 ; stmtC from_temp }
784
785 go_via_temp (CmmAssign dest src)
786 = do { tmp <- newTemp (cmmRegType dest) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong
787 ; stmtC (CmmAssign (CmmLocal tmp) src)
788 ; return (CmmAssign dest (CmmReg (CmmLocal tmp))) }
789 go_via_temp (CmmStore dest src)
790 = do { tmp <- newTemp (cmmExprType src) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong
791 ; stmtC (CmmAssign (CmmLocal tmp) src)
792 ; return (CmmStore dest (CmmReg (CmmLocal tmp))) }
793 go_via_temp _ = panic "doSimultaneously1: go_via_temp"
794 in
795 mapCs do_component components
796
797 mustFollow :: CmmStmt -> CmmStmt -> Bool
798 CmmAssign reg _ `mustFollow` stmt = anySrc (reg `regUsedIn`) stmt
799 CmmStore loc e `mustFollow` stmt = anySrc (locUsedIn loc (cmmExprType e)) stmt
800 CmmNop `mustFollow` _ = False
801 CmmComment _ `mustFollow` _ = False
802 _ `mustFollow` _ = panic "mustFollow"
803
804
805 anySrc :: (CmmExpr -> Bool) -> CmmStmt -> Bool
806 -- True if the fn is true of any input of the stmt
807 anySrc p (CmmAssign _ e) = p e
808 anySrc p (CmmStore e1 e2) = p e1 || p e2 -- Might be used in either side
809 anySrc _ (CmmComment _) = False
810 anySrc _ CmmNop = False
811 anySrc _ _ = True -- Conservative
812
813 locUsedIn :: CmmExpr -> CmmType -> CmmExpr -> Bool
814 -- (locUsedIn a r e) checks whether writing to r[a] could affect the value of
815 -- 'e'. Returns True if it's not sure.
816 locUsedIn _ _ (CmmLit _) = False
817 locUsedIn loc rep (CmmLoad e ld_rep) = possiblySameLoc loc rep e ld_rep
818 locUsedIn _ _ (CmmReg _) = False
819 locUsedIn _ _ (CmmRegOff _ _) = False
820 locUsedIn loc rep (CmmMachOp _ es) = any (locUsedIn loc rep) es
821 locUsedIn _ _ (CmmStackSlot _ _) = panic "locUsedIn: CmmStackSlot"
822
823 possiblySameLoc :: CmmExpr -> CmmType -> CmmExpr -> CmmType -> Bool
824 -- Assumes that distinct registers (eg Hp, Sp) do not
825 -- point to the same location, nor any offset thereof.
826 possiblySameLoc (CmmReg r1) _ (CmmReg r2) _ = r1 == r2
827 possiblySameLoc (CmmReg r1) _ (CmmRegOff r2 0) _ = r1 == r2
828 possiblySameLoc (CmmRegOff r1 0) _ (CmmReg r2) _ = r1 == r2
829 possiblySameLoc (CmmRegOff r1 start1) rep1 (CmmRegOff r2 start2) rep2
830 = r1==r2 && end1 > start2 && end2 > start1
831 where
832 end1 = start1 + widthInBytes (typeWidth rep1)
833 end2 = start2 + widthInBytes (typeWidth rep2)
834
835 possiblySameLoc _ _ (CmmLit _) _ = False
836 possiblySameLoc _ _ _ _ = True -- Conservative
837
838 -------------------------------------------------------------------------
839 --
840 -- Static Reference Tables
841 --
842 -------------------------------------------------------------------------
843
844 -- There is just one SRT for each top level binding; all the nested
845 -- bindings use sub-sections of this SRT. The label is passed down to
846 -- the nested bindings via the monad.
847
848 getSRTInfo :: FCode C_SRT
849 getSRTInfo = do
850 srt_lbl <- getSRTLabel
851 srt <- getSRT
852 case srt of
853 -- TODO: Should we panic in this case?
854 -- Someone obviously thinks there should be an SRT
855 NoSRT -> return NoC_SRT
856 SRTEntries {} -> panic "getSRTInfo: SRTEntries. Perhaps you forgot to run SimplStg?"
857 SRT off len bmp
858 | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape]
859 -> do id <- newUnique
860 let srt_desc_lbl = mkLargeSRTLabel id
861 emitRODataLits "getSRTInfo" srt_desc_lbl
862 ( cmmLabelOffW srt_lbl off
863 : mkWordCLit (fromIntegral len)
864 : map mkWordCLit bmp)
865 return (C_SRT srt_desc_lbl 0 srt_escape)
866
867 | otherwise
868 -> return (C_SRT srt_lbl off (fromIntegral (head bmp)))
869 -- The fromIntegral converts to StgHalfWord
870
871 srt_escape :: StgHalfWord
872 srt_escape = -1
873
874 -- -----------------------------------------------------------------------------
875 --
876 -- STG/Cmm GlobalReg
877 --
878 -- -----------------------------------------------------------------------------
879
880 -- | Here is where the STG register map is defined for each target arch.
881 -- The order matters (for the llvm backend anyway)! We must make sure to
882 -- maintain the order here with the order used in the LLVM calling conventions.
883 -- Note that also, this isn't all registers, just the ones that are currently
884 -- possbily mapped to real registers.
885 activeStgRegs :: [GlobalReg]
886 activeStgRegs = [
887 #ifdef REG_Base
888 BaseReg
889 #endif
890 #ifdef REG_Sp
891 ,Sp
892 #endif
893 #ifdef REG_Hp
894 ,Hp
895 #endif
896 #ifdef REG_R1
897 ,VanillaReg 1 VGcPtr
898 #endif
899 #ifdef REG_R2
900 ,VanillaReg 2 VGcPtr
901 #endif
902 #ifdef REG_R3
903 ,VanillaReg 3 VGcPtr
904 #endif
905 #ifdef REG_R4
906 ,VanillaReg 4 VGcPtr
907 #endif
908 #ifdef REG_R5
909 ,VanillaReg 5 VGcPtr
910 #endif
911 #ifdef REG_R6
912 ,VanillaReg 6 VGcPtr
913 #endif
914 #ifdef REG_R7
915 ,VanillaReg 7 VGcPtr
916 #endif
917 #ifdef REG_R8
918 ,VanillaReg 8 VGcPtr
919 #endif
920 #ifdef REG_SpLim
921 ,SpLim
922 #endif
923 #ifdef REG_F1
924 ,FloatReg 1
925 #endif
926 #ifdef REG_F2
927 ,FloatReg 2
928 #endif
929 #ifdef REG_F3
930 ,FloatReg 3
931 #endif
932 #ifdef REG_F4
933 ,FloatReg 4
934 #endif
935 #ifdef REG_D1
936 ,DoubleReg 1
937 #endif
938 #ifdef REG_D2
939 ,DoubleReg 2
940 #endif
941 ]
942
943 -- | We map STG registers onto appropriate CmmExprs. Either they map
944 -- to real machine registers or stored as offsets from BaseReg. Given
945 -- a GlobalReg, get_GlobalReg_addr always produces the
946 -- register table address for it.
947 get_GlobalReg_addr :: GlobalReg -> CmmExpr
948 get_GlobalReg_addr BaseReg = regTableOffset 0
949 get_GlobalReg_addr mid = get_Regtable_addr_from_offset
950 (globalRegType mid) (baseRegOffset mid)
951
952 -- Calculate a literal representing an offset into the register table.
953 -- Used when we don't have an actual BaseReg to offset from.
954 regTableOffset :: Int -> CmmExpr
955 regTableOffset n =
956 CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r + n))
957
958 get_Regtable_addr_from_offset :: CmmType -> Int -> CmmExpr
959 get_Regtable_addr_from_offset _ offset =
960 #ifdef REG_Base
961 CmmRegOff (CmmGlobal BaseReg) offset
962 #else
963 regTableOffset offset
964 #endif
965
966 -- | Fixup global registers so that they assign to locations within the
967 -- RegTable if they aren't pinned for the current target.
968 fixStgRegisters :: RawCmmDecl -> RawCmmDecl
969 fixStgRegisters top@(CmmData _ _) = top
970
971 fixStgRegisters (CmmProc info lbl (ListGraph blocks)) =
972 let blocks' = map fixStgRegBlock blocks
973 in CmmProc info lbl $ ListGraph blocks'
974
975 fixStgRegBlock :: CmmBasicBlock -> CmmBasicBlock
976 fixStgRegBlock (BasicBlock id stmts) =
977 let stmts' = map fixStgRegStmt stmts
978 in BasicBlock id stmts'
979
980 fixStgRegStmt :: CmmStmt -> CmmStmt
981 fixStgRegStmt stmt
982 = case stmt of
983 CmmAssign (CmmGlobal reg) src ->
984 let src' = fixStgRegExpr src
985 baseAddr = get_GlobalReg_addr reg
986 in case reg `elem` activeStgRegs of
987 True -> CmmAssign (CmmGlobal reg) src'
988 False -> CmmStore baseAddr src'
989
990 CmmAssign reg src ->
991 let src' = fixStgRegExpr src
992 in CmmAssign reg src'
993
994 CmmStore addr src -> CmmStore (fixStgRegExpr addr) (fixStgRegExpr src)
995
996 CmmCall target regs args srt returns ->
997 let target' = case target of
998 CmmCallee e conv -> CmmCallee (fixStgRegExpr e) conv
999 other -> other
1000 args' = map (\(CmmHinted arg hint) ->
1001 (CmmHinted (fixStgRegExpr arg) hint)) args
1002 in CmmCall target' regs args' srt returns
1003
1004 CmmCondBranch test dest -> CmmCondBranch (fixStgRegExpr test) dest
1005
1006 CmmSwitch expr ids -> CmmSwitch (fixStgRegExpr expr) ids
1007
1008 CmmJump addr regs -> CmmJump (fixStgRegExpr addr) regs
1009
1010 -- CmmNop, CmmComment, CmmBranch, CmmReturn
1011 _other -> stmt
1012
1013
1014 fixStgRegExpr :: CmmExpr -> CmmExpr
1015 fixStgRegExpr expr
1016 = case expr of
1017 CmmLoad addr ty -> CmmLoad (fixStgRegExpr addr) ty
1018
1019 CmmMachOp mop args -> CmmMachOp mop args'
1020 where args' = map fixStgRegExpr args
1021
1022 CmmReg (CmmGlobal reg) ->
1023 -- Replace register leaves with appropriate StixTrees for
1024 -- the given target. MagicIds which map to a reg on this
1025 -- arch are left unchanged. For the rest, BaseReg is taken
1026 -- to mean the address of the reg table in MainCapability,
1027 -- and for all others we generate an indirection to its
1028 -- location in the register table.
1029 case reg `elem` activeStgRegs of
1030 True -> expr
1031 False ->
1032 let baseAddr = get_GlobalReg_addr reg
1033 in case reg of
1034 BaseReg -> fixStgRegExpr baseAddr
1035 _other -> fixStgRegExpr
1036 (CmmLoad baseAddr (globalRegType reg))
1037
1038 CmmRegOff (CmmGlobal reg) offset ->
1039 -- RegOf leaves are just a shorthand form. If the reg maps
1040 -- to a real reg, we keep the shorthand, otherwise, we just
1041 -- expand it and defer to the above code.
1042 case reg `elem` activeStgRegs of
1043 True -> expr
1044 False -> fixStgRegExpr (CmmMachOp (MO_Add wordWidth) [
1045 CmmReg (CmmGlobal reg),
1046 CmmLit (CmmInt (fromIntegral offset)
1047 wordWidth)])
1048
1049 -- CmmLit, CmmReg (CmmLocal), CmmStackSlot
1050 _other -> expr
1051