Merge remote-tracking branch 'origin/type-nats' into type-nats-merge
[ghc.git] / compiler / codeGen / CgUtils.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Code generator utilities; mostly monadic
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module CgUtils (
10 addIdReps,
11 cgLit,
12 emitDataLits, mkDataLits,
13 emitRODataLits, mkRODataLits,
14 emitIf, emitIfThenElse,
15 emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult,
16 emitRtsCallGen,
17 assignTemp, assignTemp_, newTemp,
18 emitSimultaneously,
19 emitSwitch, emitLitSwitch,
20 tagToClosure,
21
22 callerSaves, callerSaveVolatileRegs, get_GlobalReg_addr,
23 activeStgRegs, fixStgRegisters,
24
25 cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord,
26 cmmUGtWord, cmmSubWord, cmmMulWord, cmmAddWord, cmmUShrWord,
27 cmmOffsetExprW, cmmOffsetExprB,
28 cmmRegOffW, cmmRegOffB,
29 cmmLabelOffW, cmmLabelOffB,
30 cmmOffsetW, cmmOffsetB,
31 cmmOffsetLitW, cmmOffsetLitB,
32 cmmLoadIndexW,
33 cmmConstrTag, cmmConstrTag1,
34
35 tagForCon, tagCons, isSmallFamily,
36 cmmUntag, cmmIsTagged, cmmGetTag,
37
38 addToMem, addToMemE,
39 mkWordCLit,
40 newStringCLit, newByteStringCLit,
41 packHalfWordsCLit,
42 blankWord,
43
44 getSRTInfo
45 ) where
46
47 #include "HsVersions.h"
48 #include "../includes/stg/MachRegs.h"
49
50 import BlockId
51 import CgMonad
52 import TyCon
53 import DataCon
54 import Id
55 import IdInfo
56 import Constants
57 import SMRep
58 import OldCmm
59 import OldCmmUtils
60 import CLabel
61 import ForeignCall
62 import ClosureInfo
63 import StgSyn (SRT(..))
64 import Module
65 import Literal
66 import Digraph
67 import ListSetOps
68 import Util
69 import DynFlags
70 import FastString
71 import Outputable
72
73 import Data.Char
74 import Data.Word
75 import Data.Maybe
76
77 -------------------------------------------------------------------------
78 --
79 -- Random small functions
80 --
81 -------------------------------------------------------------------------
82
83 addIdReps :: [Id] -> [(CgRep, Id)]
84 addIdReps ids = [(idCgRep id, id) | id <- ids]
85
86 -------------------------------------------------------------------------
87 --
88 -- Literals
89 --
90 -------------------------------------------------------------------------
91
92 cgLit :: Literal -> FCode CmmLit
93 cgLit (MachStr s) = newByteStringCLit (bytesFS s)
94 -- not unpackFS; we want the UTF-8 byte stream.
95 cgLit other_lit = return (mkSimpleLit other_lit)
96
97 mkSimpleLit :: Literal -> CmmLit
98 mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordWidth
99 mkSimpleLit MachNullAddr = zeroCLit
100 mkSimpleLit (MachInt i) = CmmInt i wordWidth
101 mkSimpleLit (MachInt64 i) = CmmInt i W64
102 mkSimpleLit (MachWord i) = CmmInt i wordWidth
103 mkSimpleLit (MachWord64 i) = CmmInt i W64
104 mkSimpleLit (MachFloat r) = CmmFloat r W32
105 mkSimpleLit (MachDouble r) = CmmFloat r W64
106 mkSimpleLit (MachLabel fs ms fod)
107 = CmmLabel (mkForeignLabel fs ms labelSrc fod)
108 where
109 -- TODO: Literal labels might not actually be in the current package...
110 labelSrc = ForeignLabelInThisPackage
111 mkSimpleLit (MachStr _) = panic "mkSimpleLit: MachStr"
112 -- No LitInteger's should be left by the time this is called. CorePrep
113 -- should have converted them all to a real core representation.
114 mkSimpleLit (LitInteger {}) = panic "mkSimpleLit: LitInteger"
115
116 mkLtOp :: Literal -> MachOp
117 -- On signed literals we must do a signed comparison
118 mkLtOp (MachInt _) = MO_S_Lt wordWidth
119 mkLtOp (MachFloat _) = MO_F_Lt W32
120 mkLtOp (MachDouble _) = MO_F_Lt W64
121 mkLtOp lit = MO_U_Lt (typeWidth (cmmLitType (mkSimpleLit lit)))
122
123
124 ---------------------------------------------------
125 --
126 -- Cmm data type functions
127 --
128 ---------------------------------------------------
129
130
131
132 {-
133 The family size of a data type (the number of constructors)
134 can be either:
135 * small, if the family size < 2**tag_bits
136 * big, otherwise.
137
138 Small families can have the constructor tag in the tag
139 bits.
140 Big families only use the tag value 1 to represent
141 evaluatedness.
142 -}
143 isSmallFamily :: Int -> Bool
144 isSmallFamily fam_size = fam_size <= mAX_PTR_TAG
145
146 tagForCon :: DataCon -> ConTagZ
147 tagForCon con = tag
148 where
149 con_tag = dataConTagZ con
150 fam_size = tyConFamilySize (dataConTyCon con)
151 tag | isSmallFamily fam_size = con_tag + 1
152 | otherwise = 1
153
154 --Tag an expression, to do: refactor, this appears in some other module.
155 tagCons :: DataCon -> CmmExpr -> CmmExpr
156 tagCons con expr = cmmOffsetB expr (tagForCon con)
157
158 --------------------------------------------------------------------------
159 --
160 -- Incrementing a memory location
161 --
162 --------------------------------------------------------------------------
163
164 addToMem :: Width -- rep of the counter
165 -> CmmExpr -- Address
166 -> Int -- What to add (a word)
167 -> CmmStmt
168 addToMem width ptr n = addToMemE width ptr (CmmLit (CmmInt (toInteger n) width))
169
170 addToMemE :: Width -- rep of the counter
171 -> CmmExpr -- Address
172 -> CmmExpr -- What to add (a word-typed expression)
173 -> CmmStmt
174 addToMemE width ptr n
175 = CmmStore ptr (CmmMachOp (MO_Add width) [CmmLoad ptr (cmmBits width), n])
176
177 -------------------------------------------------------------------------
178 --
179 -- Converting a closure tag to a closure for enumeration types
180 -- (this is the implementation of tagToEnum#).
181 --
182 -------------------------------------------------------------------------
183
184 tagToClosure :: TyCon -> CmmExpr -> CmmExpr
185 tagToClosure tycon tag
186 = CmmLoad (cmmOffsetExprW closure_tbl tag) gcWord
187 where closure_tbl = CmmLit (CmmLabel lbl)
188 lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs
189
190 -------------------------------------------------------------------------
191 --
192 -- Conditionals and rts calls
193 --
194 -------------------------------------------------------------------------
195
196 emitIf :: CmmExpr -- Boolean
197 -> Code -- Then part
198 -> Code
199 -- Emit (if e then x)
200 -- ToDo: reverse the condition to avoid the extra branch instruction if possible
201 -- (some conditionals aren't reversible. eg. floating point comparisons cannot
202 -- be inverted because there exist some values for which both comparisons
203 -- return False, such as NaN.)
204 emitIf cond then_part
205 = do { then_id <- newLabelC
206 ; join_id <- newLabelC
207 ; stmtC (CmmCondBranch cond then_id)
208 ; stmtC (CmmBranch join_id)
209 ; labelC then_id
210 ; then_part
211 ; labelC join_id
212 }
213
214 emitIfThenElse :: CmmExpr -- Boolean
215 -> Code -- Then part
216 -> Code -- Else part
217 -> Code
218 -- Emit (if e then x else y)
219 emitIfThenElse cond then_part else_part
220 = do { then_id <- newLabelC
221 ; join_id <- newLabelC
222 ; stmtC (CmmCondBranch cond then_id)
223 ; else_part
224 ; stmtC (CmmBranch join_id)
225 ; labelC then_id
226 ; then_part
227 ; labelC join_id
228 }
229
230
231 -- | Emit code to call a Cmm function.
232 emitRtsCall
233 :: PackageId -- ^ package the function is in
234 -> FastString -- ^ name of function
235 -> [CmmHinted CmmExpr] -- ^ function args
236 -> Code -- ^ cmm code
237
238 emitRtsCall pkg fun args = emitRtsCallGen [] pkg fun args Nothing
239 -- The 'Nothing' says "save all global registers"
240
241 emitRtsCallWithVols :: PackageId -> FastString -> [CmmHinted CmmExpr] -> [GlobalReg] -> Code
242 emitRtsCallWithVols pkg fun args vols
243 = emitRtsCallGen [] pkg fun args (Just vols)
244
245 emitRtsCallWithResult
246 :: LocalReg -> ForeignHint
247 -> PackageId -> FastString
248 -> [CmmHinted CmmExpr] -> Code
249
250 emitRtsCallWithResult res hint pkg fun args
251 = emitRtsCallGen [CmmHinted res hint] pkg fun args Nothing
252
253 -- Make a call to an RTS C procedure
254 emitRtsCallGen
255 :: [CmmHinted LocalReg]
256 -> PackageId
257 -> FastString
258 -> [CmmHinted CmmExpr]
259 -> Maybe [GlobalReg]
260 -> Code
261 emitRtsCallGen res pkg fun args vols = do
262 stmtsC caller_save
263 stmtC (CmmCall target res args CmmMayReturn)
264 stmtsC caller_load
265 where
266 (caller_save, caller_load) = callerSaveVolatileRegs vols
267 target = CmmCallee fun_expr CCallConv
268 fun_expr = mkLblExpr (mkCmmCodeLabel pkg fun)
269
270 -----------------------------------------------------------------------------
271 --
272 -- Caller-Save Registers
273 --
274 -----------------------------------------------------------------------------
275
276 -- Here we generate the sequence of saves/restores required around a
277 -- foreign call instruction.
278
279 -- TODO: reconcile with includes/Regs.h
280 -- * Regs.h claims that BaseReg should be saved last and loaded first
281 -- * This might not have been tickled before since BaseReg is callee save
282 -- * Regs.h saves SparkHd, ParkT1, SparkBase and SparkLim
283 callerSaveVolatileRegs :: Maybe [GlobalReg] -> ([CmmStmt], [CmmStmt])
284 callerSaveVolatileRegs vols = (caller_save, caller_load)
285 where
286 caller_save = foldr ($!) [] (map callerSaveGlobalReg regs_to_save)
287 caller_load = foldr ($!) [] (map callerRestoreGlobalReg regs_to_save)
288
289 system_regs = [Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery,
290 {-SparkHd,SparkTl,SparkBase,SparkLim,-}BaseReg ]
291
292 regs_to_save = system_regs ++ vol_list
293
294 vol_list = case vols of Nothing -> all_of_em; Just regs -> regs
295
296 all_of_em = [ VanillaReg n VNonGcPtr | n <- [0..mAX_Vanilla_REG] ]
297 -- The VNonGcPtr is a lie, but I don't think it matters
298 ++ [ FloatReg n | n <- [0..mAX_Float_REG] ]
299 ++ [ DoubleReg n | n <- [0..mAX_Double_REG] ]
300 ++ [ LongReg n | n <- [0..mAX_Long_REG] ]
301
302 callerSaveGlobalReg reg next
303 | callerSaves reg =
304 CmmStore (get_GlobalReg_addr reg)
305 (CmmReg (CmmGlobal reg)) : next
306 | otherwise = next
307
308 callerRestoreGlobalReg reg next
309 | callerSaves reg =
310 CmmAssign (CmmGlobal reg)
311 (CmmLoad (get_GlobalReg_addr reg) (globalRegType reg))
312 : next
313 | otherwise = next
314
315
316 -- | Returns @True@ if this global register is stored in a caller-saves
317 -- machine register.
318
319 callerSaves :: GlobalReg -> Bool
320
321 #ifdef CALLER_SAVES_Base
322 callerSaves BaseReg = True
323 #endif
324 #ifdef CALLER_SAVES_R1
325 callerSaves (VanillaReg 1 _) = True
326 #endif
327 #ifdef CALLER_SAVES_R2
328 callerSaves (VanillaReg 2 _) = True
329 #endif
330 #ifdef CALLER_SAVES_R3
331 callerSaves (VanillaReg 3 _) = True
332 #endif
333 #ifdef CALLER_SAVES_R4
334 callerSaves (VanillaReg 4 _) = True
335 #endif
336 #ifdef CALLER_SAVES_R5
337 callerSaves (VanillaReg 5 _) = True
338 #endif
339 #ifdef CALLER_SAVES_R6
340 callerSaves (VanillaReg 6 _) = True
341 #endif
342 #ifdef CALLER_SAVES_R7
343 callerSaves (VanillaReg 7 _) = True
344 #endif
345 #ifdef CALLER_SAVES_R8
346 callerSaves (VanillaReg 8 _) = True
347 #endif
348 #ifdef CALLER_SAVES_R9
349 callerSaves (VanillaReg 9 _) = True
350 #endif
351 #ifdef CALLER_SAVES_R10
352 callerSaves (VanillaReg 10 _) = True
353 #endif
354 #ifdef CALLER_SAVES_F1
355 callerSaves (FloatReg 1) = True
356 #endif
357 #ifdef CALLER_SAVES_F2
358 callerSaves (FloatReg 2) = True
359 #endif
360 #ifdef CALLER_SAVES_F3
361 callerSaves (FloatReg 3) = True
362 #endif
363 #ifdef CALLER_SAVES_F4
364 callerSaves (FloatReg 4) = True
365 #endif
366 #ifdef CALLER_SAVES_D1
367 callerSaves (DoubleReg 1) = True
368 #endif
369 #ifdef CALLER_SAVES_D2
370 callerSaves (DoubleReg 2) = True
371 #endif
372 #ifdef CALLER_SAVES_L1
373 callerSaves (LongReg 1) = True
374 #endif
375 #ifdef CALLER_SAVES_Sp
376 callerSaves Sp = True
377 #endif
378 #ifdef CALLER_SAVES_SpLim
379 callerSaves SpLim = True
380 #endif
381 #ifdef CALLER_SAVES_Hp
382 callerSaves Hp = True
383 #endif
384 #ifdef CALLER_SAVES_HpLim
385 callerSaves HpLim = True
386 #endif
387 #ifdef CALLER_SAVES_CCCS
388 callerSaves CCCS = True
389 #endif
390 #ifdef CALLER_SAVES_CurrentTSO
391 callerSaves CurrentTSO = True
392 #endif
393 #ifdef CALLER_SAVES_CurrentNursery
394 callerSaves CurrentNursery = True
395 #endif
396 callerSaves _ = False
397
398
399 -- -----------------------------------------------------------------------------
400 -- Information about global registers
401
402 baseRegOffset :: GlobalReg -> Int
403
404 baseRegOffset (VanillaReg 1 _) = oFFSET_StgRegTable_rR1
405 baseRegOffset (VanillaReg 2 _) = oFFSET_StgRegTable_rR2
406 baseRegOffset (VanillaReg 3 _) = oFFSET_StgRegTable_rR3
407 baseRegOffset (VanillaReg 4 _) = oFFSET_StgRegTable_rR4
408 baseRegOffset (VanillaReg 5 _) = oFFSET_StgRegTable_rR5
409 baseRegOffset (VanillaReg 6 _) = oFFSET_StgRegTable_rR6
410 baseRegOffset (VanillaReg 7 _) = oFFSET_StgRegTable_rR7
411 baseRegOffset (VanillaReg 8 _) = oFFSET_StgRegTable_rR8
412 baseRegOffset (VanillaReg 9 _) = oFFSET_StgRegTable_rR9
413 baseRegOffset (VanillaReg 10 _) = oFFSET_StgRegTable_rR10
414 baseRegOffset (VanillaReg n _) = panic ("Registers above R10 are not supported (tried to use R" ++ show n ++ ")")
415 baseRegOffset (FloatReg 1) = oFFSET_StgRegTable_rF1
416 baseRegOffset (FloatReg 2) = oFFSET_StgRegTable_rF2
417 baseRegOffset (FloatReg 3) = oFFSET_StgRegTable_rF3
418 baseRegOffset (FloatReg 4) = oFFSET_StgRegTable_rF4
419 baseRegOffset (FloatReg n) = panic ("Registers above F4 are not supported (tried to use F" ++ show n ++ ")")
420 baseRegOffset (DoubleReg 1) = oFFSET_StgRegTable_rD1
421 baseRegOffset (DoubleReg 2) = oFFSET_StgRegTable_rD2
422 baseRegOffset (DoubleReg n) = panic ("Registers above D2 are not supported (tried to use D" ++ show n ++ ")")
423 baseRegOffset Sp = oFFSET_StgRegTable_rSp
424 baseRegOffset SpLim = oFFSET_StgRegTable_rSpLim
425 baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1
426 baseRegOffset (LongReg n) = panic ("Registers above L1 are not supported (tried to use L" ++ show n ++ ")")
427 baseRegOffset Hp = oFFSET_StgRegTable_rHp
428 baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim
429 baseRegOffset CCCS = oFFSET_StgRegTable_rCCCS
430 baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO
431 baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery
432 baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc
433 baseRegOffset EagerBlackholeInfo = oFFSET_stgEagerBlackholeInfo
434 baseRegOffset GCEnter1 = oFFSET_stgGCEnter1
435 baseRegOffset GCFun = oFFSET_stgGCFun
436 baseRegOffset BaseReg = panic "baseRegOffset:BaseReg"
437 baseRegOffset PicBaseReg = panic "baseRegOffset:PicBaseReg"
438
439
440 -------------------------------------------------------------------------
441 --
442 -- Strings generate a top-level data block
443 --
444 -------------------------------------------------------------------------
445
446 emitDataLits :: CLabel -> [CmmLit] -> Code
447 -- Emit a data-segment data block
448 emitDataLits lbl lits = emitDecl (mkDataLits Data lbl lits)
449
450 emitRODataLits :: String -> CLabel -> [CmmLit] -> Code
451 -- Emit a read-only data block
452 emitRODataLits _caller lbl lits
453 = emitDecl (mkRODataLits lbl lits)
454
455 newStringCLit :: String -> FCode CmmLit
456 -- Make a global definition for the string,
457 -- and return its label
458 newStringCLit str = newByteStringCLit (map (fromIntegral.ord) str)
459
460 newByteStringCLit :: [Word8] -> FCode CmmLit
461 newByteStringCLit bytes
462 = do { uniq <- newUnique
463 ; let (lit, decl) = mkByteStringCLit uniq bytes
464 ; emitDecl decl
465 ; return lit }
466
467 -------------------------------------------------------------------------
468 --
469 -- Assigning expressions to temporaries
470 --
471 -------------------------------------------------------------------------
472
473 -- | If the expression is trivial, return it. Otherwise, assign the
474 -- expression to a temporary register and return an expression
475 -- referring to this register.
476 assignTemp :: CmmExpr -> FCode CmmExpr
477 -- For a non-trivial expression, e, create a local
478 -- variable and assign the expression to it
479 assignTemp e
480 | isTrivialCmmExpr e = return e
481 | otherwise = do { reg <- newTemp (cmmExprType e)
482 ; stmtC (CmmAssign (CmmLocal reg) e)
483 ; return (CmmReg (CmmLocal reg)) }
484
485 -- | If the expression is trivial and doesn't refer to a global
486 -- register, return it. Otherwise, assign the expression to a
487 -- temporary register and return an expression referring to this
488 -- register.
489 assignTemp_ :: CmmExpr -> FCode CmmExpr
490 assignTemp_ e
491 | isTrivialCmmExpr e && hasNoGlobalRegs e = return e
492 | otherwise = do
493 reg <- newTemp (cmmExprType e)
494 stmtC (CmmAssign (CmmLocal reg) e)
495 return (CmmReg (CmmLocal reg))
496
497 newTemp :: CmmType -> FCode LocalReg
498 newTemp rep = do { uniq <- newUnique; return (LocalReg uniq rep) }
499
500 -------------------------------------------------------------------------
501 --
502 -- Building case analysis
503 --
504 -------------------------------------------------------------------------
505
506 emitSwitch
507 :: CmmExpr -- Tag to switch on
508 -> [(ConTagZ, CgStmts)] -- Tagged branches
509 -> Maybe CgStmts -- Default branch (if any)
510 -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour
511 -- outside this range is undefined
512 -> Code
513
514 -- ONLY A DEFAULT BRANCH: no case analysis to do
515 emitSwitch _ [] (Just stmts) _ _
516 = emitCgStmts stmts
517
518 -- Right, off we go
519 emitSwitch tag_expr branches mb_deflt lo_tag hi_tag
520 = -- Just sort the branches before calling mk_sritch
521 do { mb_deflt_id <-
522 case mb_deflt of
523 Nothing -> return Nothing
524 Just stmts -> do id <- forkCgStmts stmts; return (Just id)
525
526 ; dflags <- getDynFlags
527 ; let via_C | HscC <- hscTarget dflags = True
528 | otherwise = False
529
530 ; stmts <- mk_switch tag_expr (sortLe le branches)
531 mb_deflt_id lo_tag hi_tag via_C
532 ; emitCgStmts stmts
533 }
534 where
535 (t1,_) `le` (t2,_) = t1 <= t2
536
537
538 mk_switch :: CmmExpr -> [(ConTagZ, CgStmts)]
539 -> Maybe BlockId -> ConTagZ -> ConTagZ -> Bool
540 -> FCode CgStmts
541
542 -- SINGLETON TAG RANGE: no case analysis to do
543 mk_switch _tag_expr [(tag,stmts)] _ lo_tag hi_tag _via_C
544 | lo_tag == hi_tag
545 = ASSERT( tag == lo_tag )
546 return stmts
547
548 -- SINGLETON BRANCH, NO DEFAULT: no case analysis to do
549 mk_switch _tag_expr [(_tag,stmts)] Nothing _lo_tag _hi_tag _via_C
550 = return stmts
551 -- The simplifier might have eliminated a case
552 -- so we may have e.g. case xs of
553 -- [] -> e
554 -- In that situation we can be sure the (:) case
555 -- can't happen, so no need to test
556
557 -- SINGLETON BRANCH: one equality check to do
558 mk_switch tag_expr [(tag,stmts)] (Just deflt) _lo_tag _hi_tag _via_C
559 = return (CmmCondBranch cond deflt `consCgStmt` stmts)
560 where
561 cond = cmmNeWord tag_expr (CmmLit (mkIntCLit tag))
562 -- We have lo_tag < hi_tag, but there's only one branch,
563 -- so there must be a default
564
565 -- ToDo: we might want to check for the two branch case, where one of
566 -- the branches is the tag 0, because comparing '== 0' is likely to be
567 -- more efficient than other kinds of comparison.
568
569 -- DENSE TAG RANGE: use a switch statment.
570 --
571 -- We also use a switch uncoditionally when compiling via C, because
572 -- this will get emitted as a C switch statement and the C compiler
573 -- should do a good job of optimising it. Also, older GCC versions
574 -- (2.95 in particular) have problems compiling the complicated
575 -- if-trees generated by this code, so compiling to a switch every
576 -- time works around that problem.
577 --
578 mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C
579 | use_switch -- Use a switch
580 = do { branch_ids <- mapM forkCgStmts (map snd branches)
581 ; let
582 tagged_blk_ids = zip (map fst branches) (map Just branch_ids)
583
584 find_branch :: ConTagZ -> Maybe BlockId
585 find_branch i = assocDefault mb_deflt tagged_blk_ids i
586
587 -- NB. we have eliminated impossible branches at
588 -- either end of the range (see below), so the first
589 -- tag of a real branch is real_lo_tag (not lo_tag).
590 arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]]
591
592 switch_stmt = CmmSwitch (cmmOffset tag_expr (- real_lo_tag)) arms
593
594 ; ASSERT(not (all isNothing arms))
595 return (oneCgStmt switch_stmt)
596 }
597
598 -- if we can knock off a bunch of default cases with one if, then do so
599 | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches
600 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
601 ; let cond = cmmULtWord tag_expr' (CmmLit (mkIntCLit lowest_branch))
602 branch = CmmCondBranch cond deflt
603 ; stmts <- mk_switch tag_expr' branches mb_deflt
604 lowest_branch hi_tag via_C
605 ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts))
606 }
607
608 | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches
609 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
610 ; let cond = cmmUGtWord tag_expr' (CmmLit (mkIntCLit highest_branch))
611 branch = CmmCondBranch cond deflt
612 ; stmts <- mk_switch tag_expr' branches mb_deflt
613 lo_tag highest_branch via_C
614 ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts))
615 }
616
617 | otherwise -- Use an if-tree
618 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
619 -- To avoid duplication
620 ; lo_stmts <- mk_switch tag_expr' lo_branches mb_deflt
621 lo_tag (mid_tag-1) via_C
622 ; hi_stmts <- mk_switch tag_expr' hi_branches mb_deflt
623 mid_tag hi_tag via_C
624 ; hi_id <- forkCgStmts hi_stmts
625 ; let cond = cmmUGeWord tag_expr' (CmmLit (mkIntCLit mid_tag))
626 branch_stmt = CmmCondBranch cond hi_id
627 ; return (assign_tag `consCgStmt` (branch_stmt `consCgStmt` lo_stmts))
628 }
629 -- we test (e >= mid_tag) rather than (e < mid_tag), because
630 -- the former works better when e is a comparison, and there
631 -- are two tags 0 & 1 (mid_tag == 1). In this case, the code
632 -- generator can reduce the condition to e itself without
633 -- having to reverse the sense of the comparison: comparisons
634 -- can't always be easily reversed (eg. floating
635 -- pt. comparisons).
636 where
637 use_switch = {- pprTrace "mk_switch" (
638 ppr tag_expr <+> text "n_tags:" <+> int n_tags <+>
639 text "branches:" <+> ppr (map fst branches) <+>
640 text "n_branches:" <+> int n_branches <+>
641 text "lo_tag:" <+> int lo_tag <+>
642 text "hi_tag:" <+> int hi_tag <+>
643 text "real_lo_tag:" <+> int real_lo_tag <+>
644 text "real_hi_tag:" <+> int real_hi_tag) $ -}
645 ASSERT( n_branches > 1 && n_tags > 1 )
646 n_tags > 2 && (via_C || (dense && big_enough))
647 -- up to 4 branches we use a decision tree, otherwise
648 -- a switch (== jump table in the NCG). This seems to be
649 -- optimal, and corresponds with what gcc does.
650 big_enough = n_branches > 4
651 dense = n_branches > (n_tags `div` 2)
652 n_branches = length branches
653
654 -- ignore default slots at each end of the range if there's
655 -- no default branch defined.
656 lowest_branch = fst (head branches)
657 highest_branch = fst (last branches)
658
659 real_lo_tag
660 | isNothing mb_deflt = lowest_branch
661 | otherwise = lo_tag
662
663 real_hi_tag
664 | isNothing mb_deflt = highest_branch
665 | otherwise = hi_tag
666
667 n_tags = real_hi_tag - real_lo_tag + 1
668
669 -- INVARIANT: Provided hi_tag > lo_tag (which is true)
670 -- lo_tag <= mid_tag < hi_tag
671 -- lo_branches have tags < mid_tag
672 -- hi_branches have tags >= mid_tag
673
674 (mid_tag,_) = branches !! (n_branches `div` 2)
675 -- 2 branches => n_branches `div` 2 = 1
676 -- => branches !! 1 give the *second* tag
677 -- There are always at least 2 branches here
678
679 (lo_branches, hi_branches) = span is_lo branches
680 is_lo (t,_) = t < mid_tag
681
682 assignTemp' :: CmmExpr -> FCode (CmmStmt, CmmExpr)
683 assignTemp' e
684 | isTrivialCmmExpr e = return (CmmNop, e)
685 | otherwise = do { reg <- newTemp (cmmExprType e)
686 ; return (CmmAssign (CmmLocal reg) e, CmmReg (CmmLocal reg)) }
687
688 emitLitSwitch :: CmmExpr -- Tag to switch on
689 -> [(Literal, CgStmts)] -- Tagged branches
690 -> CgStmts -- Default branch (always)
691 -> Code -- Emit the code
692 -- Used for general literals, whose size might not be a word,
693 -- where there is always a default case, and where we don't know
694 -- the range of values for certain. For simplicity we always generate a tree.
695 --
696 -- ToDo: for integers we could do better here, perhaps by generalising
697 -- mk_switch and using that. --SDM 15/09/2004
698 emitLitSwitch _ [] deflt = emitCgStmts deflt
699 emitLitSwitch scrut branches deflt_blk
700 = do { scrut' <- assignTemp scrut
701 ; deflt_blk_id <- forkCgStmts deflt_blk
702 ; blk <- mk_lit_switch scrut' deflt_blk_id (sortLe le branches)
703 ; emitCgStmts blk }
704 where
705 le (t1,_) (t2,_) = t1 <= t2
706
707 mk_lit_switch :: CmmExpr -> BlockId
708 -> [(Literal,CgStmts)]
709 -> FCode CgStmts
710 mk_lit_switch scrut deflt_blk_id [(lit,blk)]
711 = return (consCgStmt if_stmt blk)
712 where
713 cmm_lit = mkSimpleLit lit
714 rep = cmmLitType cmm_lit
715 ne = if isFloatType rep then MO_F_Ne else MO_Ne
716 cond = CmmMachOp (ne (typeWidth rep)) [scrut, CmmLit cmm_lit]
717 if_stmt = CmmCondBranch cond deflt_blk_id
718
719 mk_lit_switch scrut deflt_blk_id branches
720 = do { hi_blk <- mk_lit_switch scrut deflt_blk_id hi_branches
721 ; lo_blk <- mk_lit_switch scrut deflt_blk_id lo_branches
722 ; lo_blk_id <- forkCgStmts lo_blk
723 ; let if_stmt = CmmCondBranch cond lo_blk_id
724 ; return (if_stmt `consCgStmt` hi_blk) }
725 where
726 n_branches = length branches
727 (mid_lit,_) = branches !! (n_branches `div` 2)
728 -- See notes above re mid_tag
729
730 (lo_branches, hi_branches) = span is_lo branches
731 is_lo (t,_) = t < mid_lit
732
733 cond = CmmMachOp (mkLtOp mid_lit)
734 [scrut, CmmLit (mkSimpleLit mid_lit)]
735
736 -------------------------------------------------------------------------
737 --
738 -- Simultaneous assignment
739 --
740 -------------------------------------------------------------------------
741
742
743 emitSimultaneously :: CmmStmts -> Code
744 -- Emit code to perform the assignments in the
745 -- input simultaneously, using temporary variables when necessary.
746 --
747 -- The Stmts must be:
748 -- CmmNop, CmmComment, CmmAssign, CmmStore
749 -- and nothing else
750
751
752 -- We use the strongly-connected component algorithm, in which
753 -- * the vertices are the statements
754 -- * an edge goes from s1 to s2 iff
755 -- s1 assigns to something s2 uses
756 -- that is, if s1 should *follow* s2 in the final order
757
758 type CVertex = (Int, CmmStmt) -- Give each vertex a unique number,
759 -- for fast comparison
760
761 emitSimultaneously stmts
762 = codeOnly $
763 case filterOut isNopStmt (stmtList stmts) of
764 -- Remove no-ops
765 [] -> nopC
766 [stmt] -> stmtC stmt -- It's often just one stmt
767 stmt_list -> doSimultaneously1 (zip [(1::Int)..] stmt_list)
768
769 doSimultaneously1 :: [CVertex] -> Code
770 doSimultaneously1 vertices
771 = let
772 edges = [ (vertex, key1, edges_from stmt1)
773 | vertex@(key1, stmt1) <- vertices
774 ]
775 edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices,
776 stmt1 `mustFollow` stmt2
777 ]
778 components = stronglyConnCompFromEdgedVertices edges
779
780 -- do_components deal with one strongly-connected component
781 -- Not cyclic, or singleton? Just do it
782 do_component (AcyclicSCC (_n, stmt)) = stmtC stmt
783 do_component (CyclicSCC [])
784 = panic "doSimultaneously1: do_component (CyclicSCC [])"
785 do_component (CyclicSCC [(_n, stmt)]) = stmtC stmt
786
787 -- Cyclic? Then go via temporaries. Pick one to
788 -- break the loop and try again with the rest.
789 do_component (CyclicSCC ((_n, first_stmt) : rest))
790 = do { from_temp <- go_via_temp first_stmt
791 ; doSimultaneously1 rest
792 ; stmtC from_temp }
793
794 go_via_temp (CmmAssign dest src)
795 = do { tmp <- newTemp (cmmRegType dest) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong
796 ; stmtC (CmmAssign (CmmLocal tmp) src)
797 ; return (CmmAssign dest (CmmReg (CmmLocal tmp))) }
798 go_via_temp (CmmStore dest src)
799 = do { tmp <- newTemp (cmmExprType src) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong
800 ; stmtC (CmmAssign (CmmLocal tmp) src)
801 ; return (CmmStore dest (CmmReg (CmmLocal tmp))) }
802 go_via_temp _ = panic "doSimultaneously1: go_via_temp"
803 in
804 mapCs do_component components
805
806 mustFollow :: CmmStmt -> CmmStmt -> Bool
807 CmmAssign reg _ `mustFollow` stmt = anySrc (reg `regUsedIn`) stmt
808 CmmStore loc e `mustFollow` stmt = anySrc (locUsedIn loc (cmmExprType e)) stmt
809 CmmNop `mustFollow` _ = False
810 CmmComment _ `mustFollow` _ = False
811 _ `mustFollow` _ = panic "mustFollow"
812
813
814 anySrc :: (CmmExpr -> Bool) -> CmmStmt -> Bool
815 -- True if the fn is true of any input of the stmt
816 anySrc p (CmmAssign _ e) = p e
817 anySrc p (CmmStore e1 e2) = p e1 || p e2 -- Might be used in either side
818 anySrc _ (CmmComment _) = False
819 anySrc _ CmmNop = False
820 anySrc _ _ = True -- Conservative
821
822 locUsedIn :: CmmExpr -> CmmType -> CmmExpr -> Bool
823 -- (locUsedIn a r e) checks whether writing to r[a] could affect the value of
824 -- 'e'. Returns True if it's not sure.
825 locUsedIn _ _ (CmmLit _) = False
826 locUsedIn loc rep (CmmLoad e ld_rep) = possiblySameLoc loc rep e ld_rep
827 locUsedIn _ _ (CmmReg _) = False
828 locUsedIn _ _ (CmmRegOff _ _) = False
829 locUsedIn loc rep (CmmMachOp _ es) = any (locUsedIn loc rep) es
830 locUsedIn _ _ (CmmStackSlot _ _) = panic "locUsedIn: CmmStackSlot"
831
832 possiblySameLoc :: CmmExpr -> CmmType -> CmmExpr -> CmmType -> Bool
833 -- Assumes that distinct registers (eg Hp, Sp) do not
834 -- point to the same location, nor any offset thereof.
835 possiblySameLoc (CmmReg r1) _ (CmmReg r2) _ = r1 == r2
836 possiblySameLoc (CmmReg r1) _ (CmmRegOff r2 0) _ = r1 == r2
837 possiblySameLoc (CmmRegOff r1 0) _ (CmmReg r2) _ = r1 == r2
838 possiblySameLoc (CmmRegOff r1 start1) rep1 (CmmRegOff r2 start2) rep2
839 = r1==r2 && end1 > start2 && end2 > start1
840 where
841 end1 = start1 + widthInBytes (typeWidth rep1)
842 end2 = start2 + widthInBytes (typeWidth rep2)
843
844 possiblySameLoc _ _ (CmmLit _) _ = False
845 possiblySameLoc _ _ _ _ = True -- Conservative
846
847 -------------------------------------------------------------------------
848 --
849 -- Static Reference Tables
850 --
851 -------------------------------------------------------------------------
852
853 -- There is just one SRT for each top level binding; all the nested
854 -- bindings use sub-sections of this SRT. The label is passed down to
855 -- the nested bindings via the monad.
856
857 getSRTInfo :: FCode C_SRT
858 getSRTInfo = do
859 srt_lbl <- getSRTLabel
860 srt <- getSRT
861 case srt of
862 -- TODO: Should we panic in this case?
863 -- Someone obviously thinks there should be an SRT
864 NoSRT -> return NoC_SRT
865 SRTEntries {} -> panic "getSRTInfo: SRTEntries. Perhaps you forgot to run SimplStg?"
866 SRT off len bmp
867 | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape]
868 -> do id <- newUnique
869 let srt_desc_lbl = mkLargeSRTLabel id
870 emitRODataLits "getSRTInfo" srt_desc_lbl
871 ( cmmLabelOffW srt_lbl off
872 : mkWordCLit (fromIntegral len)
873 : map mkWordCLit bmp)
874 return (C_SRT srt_desc_lbl 0 srt_escape)
875
876 | otherwise
877 -> return (C_SRT srt_lbl off (fromIntegral (head bmp)))
878 -- The fromIntegral converts to StgHalfWord
879
880 srt_escape :: StgHalfWord
881 srt_escape = -1
882
883 -- -----------------------------------------------------------------------------
884 --
885 -- STG/Cmm GlobalReg
886 --
887 -- -----------------------------------------------------------------------------
888
889 -- | Here is where the STG register map is defined for each target arch.
890 -- The order matters (for the llvm backend anyway)! We must make sure to
891 -- maintain the order here with the order used in the LLVM calling conventions.
892 -- Note that also, this isn't all registers, just the ones that are currently
893 -- possbily mapped to real registers.
894 activeStgRegs :: [GlobalReg]
895 activeStgRegs = [
896 #ifdef REG_Base
897 BaseReg
898 #endif
899 #ifdef REG_Sp
900 ,Sp
901 #endif
902 #ifdef REG_Hp
903 ,Hp
904 #endif
905 #ifdef REG_R1
906 ,VanillaReg 1 VGcPtr
907 #endif
908 #ifdef REG_R2
909 ,VanillaReg 2 VGcPtr
910 #endif
911 #ifdef REG_R3
912 ,VanillaReg 3 VGcPtr
913 #endif
914 #ifdef REG_R4
915 ,VanillaReg 4 VGcPtr
916 #endif
917 #ifdef REG_R5
918 ,VanillaReg 5 VGcPtr
919 #endif
920 #ifdef REG_R6
921 ,VanillaReg 6 VGcPtr
922 #endif
923 #ifdef REG_R7
924 ,VanillaReg 7 VGcPtr
925 #endif
926 #ifdef REG_R8
927 ,VanillaReg 8 VGcPtr
928 #endif
929 #ifdef REG_R9
930 ,VanillaReg 9 VGcPtr
931 #endif
932 #ifdef REG_R10
933 ,VanillaReg 10 VGcPtr
934 #endif
935 #ifdef REG_SpLim
936 ,SpLim
937 #endif
938 #ifdef REG_F1
939 ,FloatReg 1
940 #endif
941 #ifdef REG_F2
942 ,FloatReg 2
943 #endif
944 #ifdef REG_F3
945 ,FloatReg 3
946 #endif
947 #ifdef REG_F4
948 ,FloatReg 4
949 #endif
950 #ifdef REG_D1
951 ,DoubleReg 1
952 #endif
953 #ifdef REG_D2
954 ,DoubleReg 2
955 #endif
956 ]
957
958 -- | We map STG registers onto appropriate CmmExprs. Either they map
959 -- to real machine registers or stored as offsets from BaseReg. Given
960 -- a GlobalReg, get_GlobalReg_addr always produces the
961 -- register table address for it.
962 get_GlobalReg_addr :: GlobalReg -> CmmExpr
963 get_GlobalReg_addr BaseReg = regTableOffset 0
964 get_GlobalReg_addr mid = get_Regtable_addr_from_offset
965 (globalRegType mid) (baseRegOffset mid)
966
967 -- Calculate a literal representing an offset into the register table.
968 -- Used when we don't have an actual BaseReg to offset from.
969 regTableOffset :: Int -> CmmExpr
970 regTableOffset n =
971 CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r + n))
972
973 get_Regtable_addr_from_offset :: CmmType -> Int -> CmmExpr
974 get_Regtable_addr_from_offset _ offset =
975 #ifdef REG_Base
976 CmmRegOff (CmmGlobal BaseReg) offset
977 #else
978 regTableOffset offset
979 #endif
980
981 -- | Fixup global registers so that they assign to locations within the
982 -- RegTable if they aren't pinned for the current target.
983 fixStgRegisters :: RawCmmDecl -> RawCmmDecl
984 fixStgRegisters top@(CmmData _ _) = top
985
986 fixStgRegisters (CmmProc info lbl (ListGraph blocks)) =
987 let blocks' = map fixStgRegBlock blocks
988 in CmmProc info lbl $ ListGraph blocks'
989
990 fixStgRegBlock :: CmmBasicBlock -> CmmBasicBlock
991 fixStgRegBlock (BasicBlock id stmts) =
992 let stmts' = map fixStgRegStmt stmts
993 in BasicBlock id stmts'
994
995 fixStgRegStmt :: CmmStmt -> CmmStmt
996 fixStgRegStmt stmt
997 = case stmt of
998 CmmAssign (CmmGlobal reg) src ->
999 let src' = fixStgRegExpr src
1000 baseAddr = get_GlobalReg_addr reg
1001 in case reg `elem` activeStgRegs of
1002 True -> CmmAssign (CmmGlobal reg) src'
1003 False -> CmmStore baseAddr src'
1004
1005 CmmAssign reg src ->
1006 let src' = fixStgRegExpr src
1007 in CmmAssign reg src'
1008
1009 CmmStore addr src -> CmmStore (fixStgRegExpr addr) (fixStgRegExpr src)
1010
1011 CmmCall target regs args returns ->
1012 let target' = case target of
1013 CmmCallee e conv -> CmmCallee (fixStgRegExpr e) conv
1014 other -> other
1015 args' = map (\(CmmHinted arg hint) ->
1016 (CmmHinted (fixStgRegExpr arg) hint)) args
1017 in CmmCall target' regs args' returns
1018
1019 CmmCondBranch test dest -> CmmCondBranch (fixStgRegExpr test) dest
1020
1021 CmmSwitch expr ids -> CmmSwitch (fixStgRegExpr expr) ids
1022
1023 CmmJump addr regs -> CmmJump (fixStgRegExpr addr) regs
1024
1025 -- CmmNop, CmmComment, CmmBranch, CmmReturn
1026 _other -> stmt
1027
1028
1029 fixStgRegExpr :: CmmExpr -> CmmExpr
1030 fixStgRegExpr expr
1031 = case expr of
1032 CmmLoad addr ty -> CmmLoad (fixStgRegExpr addr) ty
1033
1034 CmmMachOp mop args -> CmmMachOp mop args'
1035 where args' = map fixStgRegExpr args
1036
1037 CmmReg (CmmGlobal reg) ->
1038 -- Replace register leaves with appropriate StixTrees for
1039 -- the given target. MagicIds which map to a reg on this
1040 -- arch are left unchanged. For the rest, BaseReg is taken
1041 -- to mean the address of the reg table in MainCapability,
1042 -- and for all others we generate an indirection to its
1043 -- location in the register table.
1044 case reg `elem` activeStgRegs of
1045 True -> expr
1046 False ->
1047 let baseAddr = get_GlobalReg_addr reg
1048 in case reg of
1049 BaseReg -> fixStgRegExpr baseAddr
1050 _other -> fixStgRegExpr
1051 (CmmLoad baseAddr (globalRegType reg))
1052
1053 CmmRegOff (CmmGlobal reg) offset ->
1054 -- RegOf leaves are just a shorthand form. If the reg maps
1055 -- to a real reg, we keep the shorthand, otherwise, we just
1056 -- expand it and defer to the above code.
1057 case reg `elem` activeStgRegs of
1058 True -> expr
1059 False -> fixStgRegExpr (CmmMachOp (MO_Add wordWidth) [
1060 CmmReg (CmmGlobal reg),
1061 CmmLit (CmmInt (fromIntegral offset)
1062 wordWidth)])
1063
1064 -- CmmLit, CmmReg (CmmLocal), CmmStackSlot
1065 _other -> expr
1066