Change more uses of sortLe to sortBy
[ghc.git] / compiler / codeGen / CgUtils.hs
1 -----------------------------------------------------------------------------
2 --
3 -- Code generator utilities; mostly monadic
4 --
5 -- (c) The University of Glasgow 2004-2006
6 --
7 -----------------------------------------------------------------------------
8
9 module CgUtils (
10 addIdReps,
11 cgLit,
12 emitDataLits, mkDataLits,
13 emitRODataLits, mkRODataLits,
14 emitIf, emitIfThenElse,
15 emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult,
16 emitRtsCallGen,
17 assignTemp, assignTemp_, newTemp,
18 emitSimultaneously,
19 emitSwitch, emitLitSwitch,
20 tagToClosure,
21
22 callerSaves, callerSaveVolatileRegs, get_GlobalReg_addr,
23 activeStgRegs, fixStgRegisters,
24
25 cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord,
26 cmmUGtWord, cmmSubWord, cmmMulWord, cmmAddWord, cmmUShrWord,
27 cmmOffsetExprW, cmmOffsetExprB,
28 cmmRegOffW, cmmRegOffB,
29 cmmLabelOffW, cmmLabelOffB,
30 cmmOffsetW, cmmOffsetB,
31 cmmOffsetLitW, cmmOffsetLitB,
32 cmmLoadIndexW,
33 cmmConstrTag, cmmConstrTag1,
34
35 tagForCon, tagCons, isSmallFamily,
36 cmmUntag, cmmIsTagged, cmmGetTag,
37
38 addToMem, addToMemE,
39 mkWordCLit,
40 newStringCLit, newByteStringCLit,
41 packHalfWordsCLit,
42 blankWord,
43
44 getSRTInfo
45 ) where
46
47 #include "HsVersions.h"
48 #include "../includes/stg/MachRegs.h"
49
50 import BlockId
51 import CgMonad
52 import TyCon
53 import DataCon
54 import Id
55 import IdInfo
56 import Constants
57 import SMRep
58 import OldCmm
59 import OldCmmUtils
60 import CLabel
61 import ForeignCall
62 import ClosureInfo
63 import StgSyn (SRT(..))
64 import Module
65 import Literal
66 import Digraph
67 import ListSetOps
68 import Util
69 import DynFlags
70 import FastString
71 import Outputable
72
73 import Data.Char
74 import Data.Word
75 import Data.List
76 import Data.Maybe
77 import Data.Ord
78
79 -------------------------------------------------------------------------
80 --
81 -- Random small functions
82 --
83 -------------------------------------------------------------------------
84
85 addIdReps :: [Id] -> [(CgRep, Id)]
86 addIdReps ids = [(idCgRep id, id) | id <- ids]
87
88 -------------------------------------------------------------------------
89 --
90 -- Literals
91 --
92 -------------------------------------------------------------------------
93
94 cgLit :: Literal -> FCode CmmLit
95 cgLit (MachStr s) = newByteStringCLit (bytesFS s)
96 -- not unpackFS; we want the UTF-8 byte stream.
97 cgLit other_lit = return (mkSimpleLit other_lit)
98
99 mkSimpleLit :: Literal -> CmmLit
100 mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordWidth
101 mkSimpleLit MachNullAddr = zeroCLit
102 mkSimpleLit (MachInt i) = CmmInt i wordWidth
103 mkSimpleLit (MachInt64 i) = CmmInt i W64
104 mkSimpleLit (MachWord i) = CmmInt i wordWidth
105 mkSimpleLit (MachWord64 i) = CmmInt i W64
106 mkSimpleLit (MachFloat r) = CmmFloat r W32
107 mkSimpleLit (MachDouble r) = CmmFloat r W64
108 mkSimpleLit (MachLabel fs ms fod)
109 = CmmLabel (mkForeignLabel fs ms labelSrc fod)
110 where
111 -- TODO: Literal labels might not actually be in the current package...
112 labelSrc = ForeignLabelInThisPackage
113 mkSimpleLit (MachStr _) = panic "mkSimpleLit: MachStr"
114 -- No LitInteger's should be left by the time this is called. CorePrep
115 -- should have converted them all to a real core representation.
116 mkSimpleLit (LitInteger {}) = panic "mkSimpleLit: LitInteger"
117
118 mkLtOp :: Literal -> MachOp
119 -- On signed literals we must do a signed comparison
120 mkLtOp (MachInt _) = MO_S_Lt wordWidth
121 mkLtOp (MachFloat _) = MO_F_Lt W32
122 mkLtOp (MachDouble _) = MO_F_Lt W64
123 mkLtOp lit = MO_U_Lt (typeWidth (cmmLitType (mkSimpleLit lit)))
124
125
126 ---------------------------------------------------
127 --
128 -- Cmm data type functions
129 --
130 ---------------------------------------------------
131
132
133
134 {-
135 The family size of a data type (the number of constructors)
136 can be either:
137 * small, if the family size < 2**tag_bits
138 * big, otherwise.
139
140 Small families can have the constructor tag in the tag
141 bits.
142 Big families only use the tag value 1 to represent
143 evaluatedness.
144 -}
145 isSmallFamily :: Int -> Bool
146 isSmallFamily fam_size = fam_size <= mAX_PTR_TAG
147
148 tagForCon :: DataCon -> ConTagZ
149 tagForCon con = tag
150 where
151 con_tag = dataConTagZ con
152 fam_size = tyConFamilySize (dataConTyCon con)
153 tag | isSmallFamily fam_size = con_tag + 1
154 | otherwise = 1
155
156 --Tag an expression, to do: refactor, this appears in some other module.
157 tagCons :: DataCon -> CmmExpr -> CmmExpr
158 tagCons con expr = cmmOffsetB expr (tagForCon con)
159
160 --------------------------------------------------------------------------
161 --
162 -- Incrementing a memory location
163 --
164 --------------------------------------------------------------------------
165
166 addToMem :: Width -- rep of the counter
167 -> CmmExpr -- Address
168 -> Int -- What to add (a word)
169 -> CmmStmt
170 addToMem width ptr n = addToMemE width ptr (CmmLit (CmmInt (toInteger n) width))
171
172 addToMemE :: Width -- rep of the counter
173 -> CmmExpr -- Address
174 -> CmmExpr -- What to add (a word-typed expression)
175 -> CmmStmt
176 addToMemE width ptr n
177 = CmmStore ptr (CmmMachOp (MO_Add width) [CmmLoad ptr (cmmBits width), n])
178
179 -------------------------------------------------------------------------
180 --
181 -- Converting a closure tag to a closure for enumeration types
182 -- (this is the implementation of tagToEnum#).
183 --
184 -------------------------------------------------------------------------
185
186 tagToClosure :: TyCon -> CmmExpr -> CmmExpr
187 tagToClosure tycon tag
188 = CmmLoad (cmmOffsetExprW closure_tbl tag) gcWord
189 where closure_tbl = CmmLit (CmmLabel lbl)
190 lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs
191
192 -------------------------------------------------------------------------
193 --
194 -- Conditionals and rts calls
195 --
196 -------------------------------------------------------------------------
197
198 emitIf :: CmmExpr -- Boolean
199 -> Code -- Then part
200 -> Code
201 -- Emit (if e then x)
202 -- ToDo: reverse the condition to avoid the extra branch instruction if possible
203 -- (some conditionals aren't reversible. eg. floating point comparisons cannot
204 -- be inverted because there exist some values for which both comparisons
205 -- return False, such as NaN.)
206 emitIf cond then_part
207 = do { then_id <- newLabelC
208 ; join_id <- newLabelC
209 ; stmtC (CmmCondBranch cond then_id)
210 ; stmtC (CmmBranch join_id)
211 ; labelC then_id
212 ; then_part
213 ; labelC join_id
214 }
215
216 emitIfThenElse :: CmmExpr -- Boolean
217 -> Code -- Then part
218 -> Code -- Else part
219 -> Code
220 -- Emit (if e then x else y)
221 emitIfThenElse cond then_part else_part
222 = do { then_id <- newLabelC
223 ; join_id <- newLabelC
224 ; stmtC (CmmCondBranch cond then_id)
225 ; else_part
226 ; stmtC (CmmBranch join_id)
227 ; labelC then_id
228 ; then_part
229 ; labelC join_id
230 }
231
232
233 -- | Emit code to call a Cmm function.
234 emitRtsCall
235 :: PackageId -- ^ package the function is in
236 -> FastString -- ^ name of function
237 -> [CmmHinted CmmExpr] -- ^ function args
238 -> Code -- ^ cmm code
239
240 emitRtsCall pkg fun args = emitRtsCallGen [] pkg fun args Nothing
241 -- The 'Nothing' says "save all global registers"
242
243 emitRtsCallWithVols :: PackageId -> FastString -> [CmmHinted CmmExpr] -> [GlobalReg] -> Code
244 emitRtsCallWithVols pkg fun args vols
245 = emitRtsCallGen [] pkg fun args (Just vols)
246
247 emitRtsCallWithResult
248 :: LocalReg -> ForeignHint
249 -> PackageId -> FastString
250 -> [CmmHinted CmmExpr] -> Code
251
252 emitRtsCallWithResult res hint pkg fun args
253 = emitRtsCallGen [CmmHinted res hint] pkg fun args Nothing
254
255 -- Make a call to an RTS C procedure
256 emitRtsCallGen
257 :: [CmmHinted LocalReg]
258 -> PackageId
259 -> FastString
260 -> [CmmHinted CmmExpr]
261 -> Maybe [GlobalReg]
262 -> Code
263 emitRtsCallGen res pkg fun args vols = do
264 stmtsC caller_save
265 stmtC (CmmCall target res args CmmMayReturn)
266 stmtsC caller_load
267 where
268 (caller_save, caller_load) = callerSaveVolatileRegs vols
269 target = CmmCallee fun_expr CCallConv
270 fun_expr = mkLblExpr (mkCmmCodeLabel pkg fun)
271
272 -----------------------------------------------------------------------------
273 --
274 -- Caller-Save Registers
275 --
276 -----------------------------------------------------------------------------
277
278 -- Here we generate the sequence of saves/restores required around a
279 -- foreign call instruction.
280
281 -- TODO: reconcile with includes/Regs.h
282 -- * Regs.h claims that BaseReg should be saved last and loaded first
283 -- * This might not have been tickled before since BaseReg is callee save
284 -- * Regs.h saves SparkHd, ParkT1, SparkBase and SparkLim
285 callerSaveVolatileRegs :: Maybe [GlobalReg] -> ([CmmStmt], [CmmStmt])
286 callerSaveVolatileRegs vols = (caller_save, caller_load)
287 where
288 caller_save = foldr ($!) [] (map callerSaveGlobalReg regs_to_save)
289 caller_load = foldr ($!) [] (map callerRestoreGlobalReg regs_to_save)
290
291 system_regs = [Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery,
292 {-SparkHd,SparkTl,SparkBase,SparkLim,-}BaseReg ]
293
294 regs_to_save = system_regs ++ vol_list
295
296 vol_list = case vols of Nothing -> all_of_em; Just regs -> regs
297
298 all_of_em = [ VanillaReg n VNonGcPtr | n <- [0..mAX_Vanilla_REG] ]
299 -- The VNonGcPtr is a lie, but I don't think it matters
300 ++ [ FloatReg n | n <- [0..mAX_Float_REG] ]
301 ++ [ DoubleReg n | n <- [0..mAX_Double_REG] ]
302 ++ [ LongReg n | n <- [0..mAX_Long_REG] ]
303
304 callerSaveGlobalReg reg next
305 | callerSaves reg =
306 CmmStore (get_GlobalReg_addr reg)
307 (CmmReg (CmmGlobal reg)) : next
308 | otherwise = next
309
310 callerRestoreGlobalReg reg next
311 | callerSaves reg =
312 CmmAssign (CmmGlobal reg)
313 (CmmLoad (get_GlobalReg_addr reg) (globalRegType reg))
314 : next
315 | otherwise = next
316
317
318 -- | Returns @True@ if this global register is stored in a caller-saves
319 -- machine register.
320
321 callerSaves :: GlobalReg -> Bool
322
323 #ifdef CALLER_SAVES_Base
324 callerSaves BaseReg = True
325 #endif
326 #ifdef CALLER_SAVES_R1
327 callerSaves (VanillaReg 1 _) = True
328 #endif
329 #ifdef CALLER_SAVES_R2
330 callerSaves (VanillaReg 2 _) = True
331 #endif
332 #ifdef CALLER_SAVES_R3
333 callerSaves (VanillaReg 3 _) = True
334 #endif
335 #ifdef CALLER_SAVES_R4
336 callerSaves (VanillaReg 4 _) = True
337 #endif
338 #ifdef CALLER_SAVES_R5
339 callerSaves (VanillaReg 5 _) = True
340 #endif
341 #ifdef CALLER_SAVES_R6
342 callerSaves (VanillaReg 6 _) = True
343 #endif
344 #ifdef CALLER_SAVES_R7
345 callerSaves (VanillaReg 7 _) = True
346 #endif
347 #ifdef CALLER_SAVES_R8
348 callerSaves (VanillaReg 8 _) = True
349 #endif
350 #ifdef CALLER_SAVES_R9
351 callerSaves (VanillaReg 9 _) = True
352 #endif
353 #ifdef CALLER_SAVES_R10
354 callerSaves (VanillaReg 10 _) = True
355 #endif
356 #ifdef CALLER_SAVES_F1
357 callerSaves (FloatReg 1) = True
358 #endif
359 #ifdef CALLER_SAVES_F2
360 callerSaves (FloatReg 2) = True
361 #endif
362 #ifdef CALLER_SAVES_F3
363 callerSaves (FloatReg 3) = True
364 #endif
365 #ifdef CALLER_SAVES_F4
366 callerSaves (FloatReg 4) = True
367 #endif
368 #ifdef CALLER_SAVES_D1
369 callerSaves (DoubleReg 1) = True
370 #endif
371 #ifdef CALLER_SAVES_D2
372 callerSaves (DoubleReg 2) = True
373 #endif
374 #ifdef CALLER_SAVES_L1
375 callerSaves (LongReg 1) = True
376 #endif
377 #ifdef CALLER_SAVES_Sp
378 callerSaves Sp = True
379 #endif
380 #ifdef CALLER_SAVES_SpLim
381 callerSaves SpLim = True
382 #endif
383 #ifdef CALLER_SAVES_Hp
384 callerSaves Hp = True
385 #endif
386 #ifdef CALLER_SAVES_HpLim
387 callerSaves HpLim = True
388 #endif
389 #ifdef CALLER_SAVES_CCCS
390 callerSaves CCCS = True
391 #endif
392 #ifdef CALLER_SAVES_CurrentTSO
393 callerSaves CurrentTSO = True
394 #endif
395 #ifdef CALLER_SAVES_CurrentNursery
396 callerSaves CurrentNursery = True
397 #endif
398 callerSaves _ = False
399
400
401 -- -----------------------------------------------------------------------------
402 -- Information about global registers
403
404 baseRegOffset :: GlobalReg -> Int
405
406 baseRegOffset (VanillaReg 1 _) = oFFSET_StgRegTable_rR1
407 baseRegOffset (VanillaReg 2 _) = oFFSET_StgRegTable_rR2
408 baseRegOffset (VanillaReg 3 _) = oFFSET_StgRegTable_rR3
409 baseRegOffset (VanillaReg 4 _) = oFFSET_StgRegTable_rR4
410 baseRegOffset (VanillaReg 5 _) = oFFSET_StgRegTable_rR5
411 baseRegOffset (VanillaReg 6 _) = oFFSET_StgRegTable_rR6
412 baseRegOffset (VanillaReg 7 _) = oFFSET_StgRegTable_rR7
413 baseRegOffset (VanillaReg 8 _) = oFFSET_StgRegTable_rR8
414 baseRegOffset (VanillaReg 9 _) = oFFSET_StgRegTable_rR9
415 baseRegOffset (VanillaReg 10 _) = oFFSET_StgRegTable_rR10
416 baseRegOffset (VanillaReg n _) = panic ("Registers above R10 are not supported (tried to use R" ++ show n ++ ")")
417 baseRegOffset (FloatReg 1) = oFFSET_StgRegTable_rF1
418 baseRegOffset (FloatReg 2) = oFFSET_StgRegTable_rF2
419 baseRegOffset (FloatReg 3) = oFFSET_StgRegTable_rF3
420 baseRegOffset (FloatReg 4) = oFFSET_StgRegTable_rF4
421 baseRegOffset (FloatReg n) = panic ("Registers above F4 are not supported (tried to use F" ++ show n ++ ")")
422 baseRegOffset (DoubleReg 1) = oFFSET_StgRegTable_rD1
423 baseRegOffset (DoubleReg 2) = oFFSET_StgRegTable_rD2
424 baseRegOffset (DoubleReg n) = panic ("Registers above D2 are not supported (tried to use D" ++ show n ++ ")")
425 baseRegOffset Sp = oFFSET_StgRegTable_rSp
426 baseRegOffset SpLim = oFFSET_StgRegTable_rSpLim
427 baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1
428 baseRegOffset (LongReg n) = panic ("Registers above L1 are not supported (tried to use L" ++ show n ++ ")")
429 baseRegOffset Hp = oFFSET_StgRegTable_rHp
430 baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim
431 baseRegOffset CCCS = oFFSET_StgRegTable_rCCCS
432 baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO
433 baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery
434 baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc
435 baseRegOffset EagerBlackholeInfo = oFFSET_stgEagerBlackholeInfo
436 baseRegOffset GCEnter1 = oFFSET_stgGCEnter1
437 baseRegOffset GCFun = oFFSET_stgGCFun
438 baseRegOffset BaseReg = panic "baseRegOffset:BaseReg"
439 baseRegOffset PicBaseReg = panic "baseRegOffset:PicBaseReg"
440
441
442 -------------------------------------------------------------------------
443 --
444 -- Strings generate a top-level data block
445 --
446 -------------------------------------------------------------------------
447
448 emitDataLits :: CLabel -> [CmmLit] -> Code
449 -- Emit a data-segment data block
450 emitDataLits lbl lits = emitDecl (mkDataLits Data lbl lits)
451
452 emitRODataLits :: String -> CLabel -> [CmmLit] -> Code
453 -- Emit a read-only data block
454 emitRODataLits _caller lbl lits
455 = emitDecl (mkRODataLits lbl lits)
456
457 newStringCLit :: String -> FCode CmmLit
458 -- Make a global definition for the string,
459 -- and return its label
460 newStringCLit str = newByteStringCLit (map (fromIntegral.ord) str)
461
462 newByteStringCLit :: [Word8] -> FCode CmmLit
463 newByteStringCLit bytes
464 = do { uniq <- newUnique
465 ; let (lit, decl) = mkByteStringCLit uniq bytes
466 ; emitDecl decl
467 ; return lit }
468
469 -------------------------------------------------------------------------
470 --
471 -- Assigning expressions to temporaries
472 --
473 -------------------------------------------------------------------------
474
475 -- | If the expression is trivial, return it. Otherwise, assign the
476 -- expression to a temporary register and return an expression
477 -- referring to this register.
478 assignTemp :: CmmExpr -> FCode CmmExpr
479 -- For a non-trivial expression, e, create a local
480 -- variable and assign the expression to it
481 assignTemp e
482 | isTrivialCmmExpr e = return e
483 | otherwise = do { reg <- newTemp (cmmExprType e)
484 ; stmtC (CmmAssign (CmmLocal reg) e)
485 ; return (CmmReg (CmmLocal reg)) }
486
487 -- | If the expression is trivial and doesn't refer to a global
488 -- register, return it. Otherwise, assign the expression to a
489 -- temporary register and return an expression referring to this
490 -- register.
491 assignTemp_ :: CmmExpr -> FCode CmmExpr
492 assignTemp_ e
493 | isTrivialCmmExpr e && hasNoGlobalRegs e = return e
494 | otherwise = do
495 reg <- newTemp (cmmExprType e)
496 stmtC (CmmAssign (CmmLocal reg) e)
497 return (CmmReg (CmmLocal reg))
498
499 newTemp :: CmmType -> FCode LocalReg
500 newTemp rep = do { uniq <- newUnique; return (LocalReg uniq rep) }
501
502 -------------------------------------------------------------------------
503 --
504 -- Building case analysis
505 --
506 -------------------------------------------------------------------------
507
508 emitSwitch
509 :: CmmExpr -- Tag to switch on
510 -> [(ConTagZ, CgStmts)] -- Tagged branches
511 -> Maybe CgStmts -- Default branch (if any)
512 -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour
513 -- outside this range is undefined
514 -> Code
515
516 -- ONLY A DEFAULT BRANCH: no case analysis to do
517 emitSwitch _ [] (Just stmts) _ _
518 = emitCgStmts stmts
519
520 -- Right, off we go
521 emitSwitch tag_expr branches mb_deflt lo_tag hi_tag
522 = -- Just sort the branches before calling mk_sritch
523 do { mb_deflt_id <-
524 case mb_deflt of
525 Nothing -> return Nothing
526 Just stmts -> do id <- forkCgStmts stmts; return (Just id)
527
528 ; dflags <- getDynFlags
529 ; let via_C | HscC <- hscTarget dflags = True
530 | otherwise = False
531
532 ; stmts <- mk_switch tag_expr (sortBy (comparing fst) branches)
533 mb_deflt_id lo_tag hi_tag via_C
534 ; emitCgStmts stmts
535 }
536
537
538 mk_switch :: CmmExpr -> [(ConTagZ, CgStmts)]
539 -> Maybe BlockId -> ConTagZ -> ConTagZ -> Bool
540 -> FCode CgStmts
541
542 -- SINGLETON TAG RANGE: no case analysis to do
543 mk_switch _tag_expr [(tag,stmts)] _ lo_tag hi_tag _via_C
544 | lo_tag == hi_tag
545 = ASSERT( tag == lo_tag )
546 return stmts
547
548 -- SINGLETON BRANCH, NO DEFAULT: no case analysis to do
549 mk_switch _tag_expr [(_tag,stmts)] Nothing _lo_tag _hi_tag _via_C
550 = return stmts
551 -- The simplifier might have eliminated a case
552 -- so we may have e.g. case xs of
553 -- [] -> e
554 -- In that situation we can be sure the (:) case
555 -- can't happen, so no need to test
556
557 -- SINGLETON BRANCH: one equality check to do
558 mk_switch tag_expr [(tag,stmts)] (Just deflt) _lo_tag _hi_tag _via_C
559 = return (CmmCondBranch cond deflt `consCgStmt` stmts)
560 where
561 cond = cmmNeWord tag_expr (CmmLit (mkIntCLit tag))
562 -- We have lo_tag < hi_tag, but there's only one branch,
563 -- so there must be a default
564
565 -- ToDo: we might want to check for the two branch case, where one of
566 -- the branches is the tag 0, because comparing '== 0' is likely to be
567 -- more efficient than other kinds of comparison.
568
569 -- DENSE TAG RANGE: use a switch statment.
570 --
571 -- We also use a switch uncoditionally when compiling via C, because
572 -- this will get emitted as a C switch statement and the C compiler
573 -- should do a good job of optimising it. Also, older GCC versions
574 -- (2.95 in particular) have problems compiling the complicated
575 -- if-trees generated by this code, so compiling to a switch every
576 -- time works around that problem.
577 --
578 mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C
579 | use_switch -- Use a switch
580 = do { branch_ids <- mapM forkCgStmts (map snd branches)
581 ; let
582 tagged_blk_ids = zip (map fst branches) (map Just branch_ids)
583
584 find_branch :: ConTagZ -> Maybe BlockId
585 find_branch i = assocDefault mb_deflt tagged_blk_ids i
586
587 -- NB. we have eliminated impossible branches at
588 -- either end of the range (see below), so the first
589 -- tag of a real branch is real_lo_tag (not lo_tag).
590 arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]]
591
592 switch_stmt = CmmSwitch (cmmOffset tag_expr (- real_lo_tag)) arms
593
594 ; ASSERT(not (all isNothing arms))
595 return (oneCgStmt switch_stmt)
596 }
597
598 -- if we can knock off a bunch of default cases with one if, then do so
599 | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches
600 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
601 ; let cond = cmmULtWord tag_expr' (CmmLit (mkIntCLit lowest_branch))
602 branch = CmmCondBranch cond deflt
603 ; stmts <- mk_switch tag_expr' branches mb_deflt
604 lowest_branch hi_tag via_C
605 ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts))
606 }
607
608 | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches
609 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
610 ; let cond = cmmUGtWord tag_expr' (CmmLit (mkIntCLit highest_branch))
611 branch = CmmCondBranch cond deflt
612 ; stmts <- mk_switch tag_expr' branches mb_deflt
613 lo_tag highest_branch via_C
614 ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts))
615 }
616
617 | otherwise -- Use an if-tree
618 = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
619 -- To avoid duplication
620 ; lo_stmts <- mk_switch tag_expr' lo_branches mb_deflt
621 lo_tag (mid_tag-1) via_C
622 ; hi_stmts <- mk_switch tag_expr' hi_branches mb_deflt
623 mid_tag hi_tag via_C
624 ; hi_id <- forkCgStmts hi_stmts
625 ; let cond = cmmUGeWord tag_expr' (CmmLit (mkIntCLit mid_tag))
626 branch_stmt = CmmCondBranch cond hi_id
627 ; return (assign_tag `consCgStmt` (branch_stmt `consCgStmt` lo_stmts))
628 }
629 -- we test (e >= mid_tag) rather than (e < mid_tag), because
630 -- the former works better when e is a comparison, and there
631 -- are two tags 0 & 1 (mid_tag == 1). In this case, the code
632 -- generator can reduce the condition to e itself without
633 -- having to reverse the sense of the comparison: comparisons
634 -- can't always be easily reversed (eg. floating
635 -- pt. comparisons).
636 where
637 use_switch = {- pprTrace "mk_switch" (
638 ppr tag_expr <+> text "n_tags:" <+> int n_tags <+>
639 text "branches:" <+> ppr (map fst branches) <+>
640 text "n_branches:" <+> int n_branches <+>
641 text "lo_tag:" <+> int lo_tag <+>
642 text "hi_tag:" <+> int hi_tag <+>
643 text "real_lo_tag:" <+> int real_lo_tag <+>
644 text "real_hi_tag:" <+> int real_hi_tag) $ -}
645 ASSERT( n_branches > 1 && n_tags > 1 )
646 n_tags > 2 && (via_C || (dense && big_enough))
647 -- up to 4 branches we use a decision tree, otherwise
648 -- a switch (== jump table in the NCG). This seems to be
649 -- optimal, and corresponds with what gcc does.
650 big_enough = n_branches > 4
651 dense = n_branches > (n_tags `div` 2)
652 n_branches = length branches
653
654 -- ignore default slots at each end of the range if there's
655 -- no default branch defined.
656 lowest_branch = fst (head branches)
657 highest_branch = fst (last branches)
658
659 real_lo_tag
660 | isNothing mb_deflt = lowest_branch
661 | otherwise = lo_tag
662
663 real_hi_tag
664 | isNothing mb_deflt = highest_branch
665 | otherwise = hi_tag
666
667 n_tags = real_hi_tag - real_lo_tag + 1
668
669 -- INVARIANT: Provided hi_tag > lo_tag (which is true)
670 -- lo_tag <= mid_tag < hi_tag
671 -- lo_branches have tags < mid_tag
672 -- hi_branches have tags >= mid_tag
673
674 (mid_tag,_) = branches !! (n_branches `div` 2)
675 -- 2 branches => n_branches `div` 2 = 1
676 -- => branches !! 1 give the *second* tag
677 -- There are always at least 2 branches here
678
679 (lo_branches, hi_branches) = span is_lo branches
680 is_lo (t,_) = t < mid_tag
681
682 assignTemp' :: CmmExpr -> FCode (CmmStmt, CmmExpr)
683 assignTemp' e
684 | isTrivialCmmExpr e = return (CmmNop, e)
685 | otherwise = do { reg <- newTemp (cmmExprType e)
686 ; return (CmmAssign (CmmLocal reg) e, CmmReg (CmmLocal reg)) }
687
688 emitLitSwitch :: CmmExpr -- Tag to switch on
689 -> [(Literal, CgStmts)] -- Tagged branches
690 -> CgStmts -- Default branch (always)
691 -> Code -- Emit the code
692 -- Used for general literals, whose size might not be a word,
693 -- where there is always a default case, and where we don't know
694 -- the range of values for certain. For simplicity we always generate a tree.
695 --
696 -- ToDo: for integers we could do better here, perhaps by generalising
697 -- mk_switch and using that. --SDM 15/09/2004
698 emitLitSwitch _ [] deflt = emitCgStmts deflt
699 emitLitSwitch scrut branches deflt_blk
700 = do { scrut' <- assignTemp scrut
701 ; deflt_blk_id <- forkCgStmts deflt_blk
702 ; blk <- mk_lit_switch scrut' deflt_blk_id (sortBy (comparing fst) branches)
703 ; emitCgStmts blk }
704
705 mk_lit_switch :: CmmExpr -> BlockId
706 -> [(Literal,CgStmts)]
707 -> FCode CgStmts
708 mk_lit_switch scrut deflt_blk_id [(lit,blk)]
709 = return (consCgStmt if_stmt blk)
710 where
711 cmm_lit = mkSimpleLit lit
712 rep = cmmLitType cmm_lit
713 ne = if isFloatType rep then MO_F_Ne else MO_Ne
714 cond = CmmMachOp (ne (typeWidth rep)) [scrut, CmmLit cmm_lit]
715 if_stmt = CmmCondBranch cond deflt_blk_id
716
717 mk_lit_switch scrut deflt_blk_id branches
718 = do { hi_blk <- mk_lit_switch scrut deflt_blk_id hi_branches
719 ; lo_blk <- mk_lit_switch scrut deflt_blk_id lo_branches
720 ; lo_blk_id <- forkCgStmts lo_blk
721 ; let if_stmt = CmmCondBranch cond lo_blk_id
722 ; return (if_stmt `consCgStmt` hi_blk) }
723 where
724 n_branches = length branches
725 (mid_lit,_) = branches !! (n_branches `div` 2)
726 -- See notes above re mid_tag
727
728 (lo_branches, hi_branches) = span is_lo branches
729 is_lo (t,_) = t < mid_lit
730
731 cond = CmmMachOp (mkLtOp mid_lit)
732 [scrut, CmmLit (mkSimpleLit mid_lit)]
733
734 -------------------------------------------------------------------------
735 --
736 -- Simultaneous assignment
737 --
738 -------------------------------------------------------------------------
739
740
741 emitSimultaneously :: CmmStmts -> Code
742 -- Emit code to perform the assignments in the
743 -- input simultaneously, using temporary variables when necessary.
744 --
745 -- The Stmts must be:
746 -- CmmNop, CmmComment, CmmAssign, CmmStore
747 -- and nothing else
748
749
750 -- We use the strongly-connected component algorithm, in which
751 -- * the vertices are the statements
752 -- * an edge goes from s1 to s2 iff
753 -- s1 assigns to something s2 uses
754 -- that is, if s1 should *follow* s2 in the final order
755
756 type CVertex = (Int, CmmStmt) -- Give each vertex a unique number,
757 -- for fast comparison
758
759 emitSimultaneously stmts
760 = codeOnly $
761 case filterOut isNopStmt (stmtList stmts) of
762 -- Remove no-ops
763 [] -> nopC
764 [stmt] -> stmtC stmt -- It's often just one stmt
765 stmt_list -> doSimultaneously1 (zip [(1::Int)..] stmt_list)
766
767 doSimultaneously1 :: [CVertex] -> Code
768 doSimultaneously1 vertices
769 = let
770 edges = [ (vertex, key1, edges_from stmt1)
771 | vertex@(key1, stmt1) <- vertices
772 ]
773 edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices,
774 stmt1 `mustFollow` stmt2
775 ]
776 components = stronglyConnCompFromEdgedVertices edges
777
778 -- do_components deal with one strongly-connected component
779 -- Not cyclic, or singleton? Just do it
780 do_component (AcyclicSCC (_n, stmt)) = stmtC stmt
781 do_component (CyclicSCC [])
782 = panic "doSimultaneously1: do_component (CyclicSCC [])"
783 do_component (CyclicSCC [(_n, stmt)]) = stmtC stmt
784
785 -- Cyclic? Then go via temporaries. Pick one to
786 -- break the loop and try again with the rest.
787 do_component (CyclicSCC ((_n, first_stmt) : rest))
788 = do { from_temp <- go_via_temp first_stmt
789 ; doSimultaneously1 rest
790 ; stmtC from_temp }
791
792 go_via_temp (CmmAssign dest src)
793 = do { tmp <- newTemp (cmmRegType dest) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong
794 ; stmtC (CmmAssign (CmmLocal tmp) src)
795 ; return (CmmAssign dest (CmmReg (CmmLocal tmp))) }
796 go_via_temp (CmmStore dest src)
797 = do { tmp <- newTemp (cmmExprType src) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong
798 ; stmtC (CmmAssign (CmmLocal tmp) src)
799 ; return (CmmStore dest (CmmReg (CmmLocal tmp))) }
800 go_via_temp _ = panic "doSimultaneously1: go_via_temp"
801 in
802 mapCs do_component components
803
804 mustFollow :: CmmStmt -> CmmStmt -> Bool
805 CmmAssign reg _ `mustFollow` stmt = anySrc (reg `regUsedIn`) stmt
806 CmmStore loc e `mustFollow` stmt = anySrc (locUsedIn loc (cmmExprType e)) stmt
807 CmmNop `mustFollow` _ = False
808 CmmComment _ `mustFollow` _ = False
809 _ `mustFollow` _ = panic "mustFollow"
810
811
812 anySrc :: (CmmExpr -> Bool) -> CmmStmt -> Bool
813 -- True if the fn is true of any input of the stmt
814 anySrc p (CmmAssign _ e) = p e
815 anySrc p (CmmStore e1 e2) = p e1 || p e2 -- Might be used in either side
816 anySrc _ (CmmComment _) = False
817 anySrc _ CmmNop = False
818 anySrc _ _ = True -- Conservative
819
820 locUsedIn :: CmmExpr -> CmmType -> CmmExpr -> Bool
821 -- (locUsedIn a r e) checks whether writing to r[a] could affect the value of
822 -- 'e'. Returns True if it's not sure.
823 locUsedIn _ _ (CmmLit _) = False
824 locUsedIn loc rep (CmmLoad e ld_rep) = possiblySameLoc loc rep e ld_rep
825 locUsedIn _ _ (CmmReg _) = False
826 locUsedIn _ _ (CmmRegOff _ _) = False
827 locUsedIn loc rep (CmmMachOp _ es) = any (locUsedIn loc rep) es
828 locUsedIn _ _ (CmmStackSlot _ _) = panic "locUsedIn: CmmStackSlot"
829
830 possiblySameLoc :: CmmExpr -> CmmType -> CmmExpr -> CmmType -> Bool
831 -- Assumes that distinct registers (eg Hp, Sp) do not
832 -- point to the same location, nor any offset thereof.
833 possiblySameLoc (CmmReg r1) _ (CmmReg r2) _ = r1 == r2
834 possiblySameLoc (CmmReg r1) _ (CmmRegOff r2 0) _ = r1 == r2
835 possiblySameLoc (CmmRegOff r1 0) _ (CmmReg r2) _ = r1 == r2
836 possiblySameLoc (CmmRegOff r1 start1) rep1 (CmmRegOff r2 start2) rep2
837 = r1==r2 && end1 > start2 && end2 > start1
838 where
839 end1 = start1 + widthInBytes (typeWidth rep1)
840 end2 = start2 + widthInBytes (typeWidth rep2)
841
842 possiblySameLoc _ _ (CmmLit _) _ = False
843 possiblySameLoc _ _ _ _ = True -- Conservative
844
845 -------------------------------------------------------------------------
846 --
847 -- Static Reference Tables
848 --
849 -------------------------------------------------------------------------
850
851 -- There is just one SRT for each top level binding; all the nested
852 -- bindings use sub-sections of this SRT. The label is passed down to
853 -- the nested bindings via the monad.
854
855 getSRTInfo :: FCode C_SRT
856 getSRTInfo = do
857 srt_lbl <- getSRTLabel
858 srt <- getSRT
859 case srt of
860 -- TODO: Should we panic in this case?
861 -- Someone obviously thinks there should be an SRT
862 NoSRT -> return NoC_SRT
863 SRTEntries {} -> panic "getSRTInfo: SRTEntries. Perhaps you forgot to run SimplStg?"
864 SRT off len bmp
865 | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape]
866 -> do id <- newUnique
867 let srt_desc_lbl = mkLargeSRTLabel id
868 emitRODataLits "getSRTInfo" srt_desc_lbl
869 ( cmmLabelOffW srt_lbl off
870 : mkWordCLit (fromIntegral len)
871 : map mkWordCLit bmp)
872 return (C_SRT srt_desc_lbl 0 srt_escape)
873
874 | otherwise
875 -> return (C_SRT srt_lbl off (fromIntegral (head bmp)))
876 -- The fromIntegral converts to StgHalfWord
877
878 srt_escape :: StgHalfWord
879 srt_escape = -1
880
881 -- -----------------------------------------------------------------------------
882 --
883 -- STG/Cmm GlobalReg
884 --
885 -- -----------------------------------------------------------------------------
886
887 -- | Here is where the STG register map is defined for each target arch.
888 -- The order matters (for the llvm backend anyway)! We must make sure to
889 -- maintain the order here with the order used in the LLVM calling conventions.
890 -- Note that also, this isn't all registers, just the ones that are currently
891 -- possbily mapped to real registers.
892 activeStgRegs :: [GlobalReg]
893 activeStgRegs = [
894 #ifdef REG_Base
895 BaseReg
896 #endif
897 #ifdef REG_Sp
898 ,Sp
899 #endif
900 #ifdef REG_Hp
901 ,Hp
902 #endif
903 #ifdef REG_R1
904 ,VanillaReg 1 VGcPtr
905 #endif
906 #ifdef REG_R2
907 ,VanillaReg 2 VGcPtr
908 #endif
909 #ifdef REG_R3
910 ,VanillaReg 3 VGcPtr
911 #endif
912 #ifdef REG_R4
913 ,VanillaReg 4 VGcPtr
914 #endif
915 #ifdef REG_R5
916 ,VanillaReg 5 VGcPtr
917 #endif
918 #ifdef REG_R6
919 ,VanillaReg 6 VGcPtr
920 #endif
921 #ifdef REG_R7
922 ,VanillaReg 7 VGcPtr
923 #endif
924 #ifdef REG_R8
925 ,VanillaReg 8 VGcPtr
926 #endif
927 #ifdef REG_R9
928 ,VanillaReg 9 VGcPtr
929 #endif
930 #ifdef REG_R10
931 ,VanillaReg 10 VGcPtr
932 #endif
933 #ifdef REG_SpLim
934 ,SpLim
935 #endif
936 #ifdef REG_F1
937 ,FloatReg 1
938 #endif
939 #ifdef REG_F2
940 ,FloatReg 2
941 #endif
942 #ifdef REG_F3
943 ,FloatReg 3
944 #endif
945 #ifdef REG_F4
946 ,FloatReg 4
947 #endif
948 #ifdef REG_D1
949 ,DoubleReg 1
950 #endif
951 #ifdef REG_D2
952 ,DoubleReg 2
953 #endif
954 ]
955
956 -- | We map STG registers onto appropriate CmmExprs. Either they map
957 -- to real machine registers or stored as offsets from BaseReg. Given
958 -- a GlobalReg, get_GlobalReg_addr always produces the
959 -- register table address for it.
960 get_GlobalReg_addr :: GlobalReg -> CmmExpr
961 get_GlobalReg_addr BaseReg = regTableOffset 0
962 get_GlobalReg_addr mid = get_Regtable_addr_from_offset
963 (globalRegType mid) (baseRegOffset mid)
964
965 -- Calculate a literal representing an offset into the register table.
966 -- Used when we don't have an actual BaseReg to offset from.
967 regTableOffset :: Int -> CmmExpr
968 regTableOffset n =
969 CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r + n))
970
971 get_Regtable_addr_from_offset :: CmmType -> Int -> CmmExpr
972 get_Regtable_addr_from_offset _ offset =
973 #ifdef REG_Base
974 CmmRegOff (CmmGlobal BaseReg) offset
975 #else
976 regTableOffset offset
977 #endif
978
979 -- | Fixup global registers so that they assign to locations within the
980 -- RegTable if they aren't pinned for the current target.
981 fixStgRegisters :: RawCmmDecl -> RawCmmDecl
982 fixStgRegisters top@(CmmData _ _) = top
983
984 fixStgRegisters (CmmProc info lbl (ListGraph blocks)) =
985 let blocks' = map fixStgRegBlock blocks
986 in CmmProc info lbl $ ListGraph blocks'
987
988 fixStgRegBlock :: CmmBasicBlock -> CmmBasicBlock
989 fixStgRegBlock (BasicBlock id stmts) =
990 let stmts' = map fixStgRegStmt stmts
991 in BasicBlock id stmts'
992
993 fixStgRegStmt :: CmmStmt -> CmmStmt
994 fixStgRegStmt stmt
995 = case stmt of
996 CmmAssign (CmmGlobal reg) src ->
997 let src' = fixStgRegExpr src
998 baseAddr = get_GlobalReg_addr reg
999 in case reg `elem` activeStgRegs of
1000 True -> CmmAssign (CmmGlobal reg) src'
1001 False -> CmmStore baseAddr src'
1002
1003 CmmAssign reg src ->
1004 let src' = fixStgRegExpr src
1005 in CmmAssign reg src'
1006
1007 CmmStore addr src -> CmmStore (fixStgRegExpr addr) (fixStgRegExpr src)
1008
1009 CmmCall target regs args returns ->
1010 let target' = case target of
1011 CmmCallee e conv -> CmmCallee (fixStgRegExpr e) conv
1012 CmmPrim op mStmts ->
1013 CmmPrim op (fmap (map fixStgRegStmt) mStmts)
1014 args' = map (\(CmmHinted arg hint) ->
1015 (CmmHinted (fixStgRegExpr arg) hint)) args
1016 in CmmCall target' regs args' returns
1017
1018 CmmCondBranch test dest -> CmmCondBranch (fixStgRegExpr test) dest
1019
1020 CmmSwitch expr ids -> CmmSwitch (fixStgRegExpr expr) ids
1021
1022 CmmJump addr live -> CmmJump (fixStgRegExpr addr) live
1023
1024 -- CmmNop, CmmComment, CmmBranch, CmmReturn
1025 _other -> stmt
1026
1027
1028 fixStgRegExpr :: CmmExpr -> CmmExpr
1029 fixStgRegExpr expr
1030 = case expr of
1031 CmmLoad addr ty -> CmmLoad (fixStgRegExpr addr) ty
1032
1033 CmmMachOp mop args -> CmmMachOp mop args'
1034 where args' = map fixStgRegExpr args
1035
1036 CmmReg (CmmGlobal reg) ->
1037 -- Replace register leaves with appropriate StixTrees for
1038 -- the given target. MagicIds which map to a reg on this
1039 -- arch are left unchanged. For the rest, BaseReg is taken
1040 -- to mean the address of the reg table in MainCapability,
1041 -- and for all others we generate an indirection to its
1042 -- location in the register table.
1043 case reg `elem` activeStgRegs of
1044 True -> expr
1045 False ->
1046 let baseAddr = get_GlobalReg_addr reg
1047 in case reg of
1048 BaseReg -> fixStgRegExpr baseAddr
1049 _other -> fixStgRegExpr
1050 (CmmLoad baseAddr (globalRegType reg))
1051
1052 CmmRegOff (CmmGlobal reg) offset ->
1053 -- RegOf leaves are just a shorthand form. If the reg maps
1054 -- to a real reg, we keep the shorthand, otherwise, we just
1055 -- expand it and defer to the above code.
1056 case reg `elem` activeStgRegs of
1057 True -> expr
1058 False -> fixStgRegExpr (CmmMachOp (MO_Add wordWidth) [
1059 CmmReg (CmmGlobal reg),
1060 CmmLit (CmmInt (fromIntegral offset)
1061 wordWidth)])
1062
1063 -- CmmLit, CmmReg (CmmLocal), CmmStackSlot
1064 _other -> expr
1065