Revert "Remove ArchUnknown"
[ghc.git] / compiler / nativeGen / RegAlloc / Graph / TrivColorable.hs
1 {-# LANGUAGE BangPatterns #-}
2
3 module RegAlloc.Graph.TrivColorable (
4 trivColorable,
5 )
6
7 where
8
9 #include "HsVersions.h"
10
11 import RegClass
12 import Reg
13
14 import GraphBase
15
16 import UniqFM
17 import FastTypes
18 import Platform
19 import Panic
20
21
22 -- trivColorable ---------------------------------------------------------------
23
24 -- trivColorable function for the graph coloring allocator
25 --
26 -- This gets hammered by scanGraph during register allocation,
27 -- so needs to be fairly efficient.
28 --
29 -- NOTE: This only works for arcitectures with just RcInteger and RcDouble
30 -- (which are disjoint) ie. x86, x86_64 and ppc
31 --
32 -- The number of allocatable regs is hard coded in here so we can do
33 -- a fast comparision in trivColorable.
34 --
35 -- It's ok if these numbers are _less_ than the actual number of free
36 -- regs, but they can't be more or the register conflict
37 -- graph won't color.
38 --
39 -- If the graph doesn't color then the allocator will panic, but it won't
40 -- generate bad object code or anything nasty like that.
41 --
42 -- There is an allocatableRegsInClass :: RegClass -> Int, but doing
43 -- the unboxing is too slow for us here.
44 -- TODO: Is that still true? Could we use allocatableRegsInClass
45 -- without losing performance now?
46 --
47 -- Look at includes/stg/MachRegs.h to get the numbers.
48 --
49
50
51 -- Disjoint registers ----------------------------------------------------------
52 --
53 -- The definition has been unfolded into individual cases for speed.
54 -- Each architecture has a different register setup, so we use a
55 -- different regSqueeze function for each.
56 --
57 accSqueeze
58 :: FastInt
59 -> FastInt
60 -> (reg -> FastInt)
61 -> UniqFM reg
62 -> FastInt
63
64 accSqueeze count maxCount squeeze ufm = acc count (eltsUFM ufm)
65 where acc count [] = count
66 acc count _ | count >=# maxCount = count
67 acc count (r:rs) = acc (count +# squeeze r) rs
68
69 {- Note [accSqueeze]
70 ~~~~~~~~~~~~~~~~~~~~
71 BL 2007/09
72 Doing a nice fold over the UniqSet makes trivColorable use
73 32% of total compile time and 42% of total alloc when compiling SHA1.lhs from darcs.
74 Therefore the UniqFM is made non-abstract and we use custom fold.
75
76 MS 2010/04
77 When converting UniqFM to use Data.IntMap, the fold cannot use UniqFM internal
78 representation any more. But it is imperative that the assSqueeze stops
79 the folding if the count gets greater or equal to maxCount. We thus convert
80 UniqFM to a (lazy) list, do the fold and stops if necessary, which was
81 the most efficient variant tried. Benchmark compiling 10-times SHA1.lhs follows.
82 (original = previous implementation, folding = fold of the whole UFM,
83 lazyFold = the current implementation,
84 hackFold = using internal representation of Data.IntMap)
85
86 original folding hackFold lazyFold
87 -O -fasm (used everywhere) 31.509s 30.387s 30.791s 30.603s
88 100.00% 96.44% 97.72% 97.12%
89 -fregs-graph 67.938s 74.875s 62.673s 64.679s
90 100.00% 110.21% 92.25% 95.20%
91 -fregs-iterative 89.761s 143.913s 81.075s 86.912s
92 100.00% 160.33% 90.32% 96.83%
93 -fnew-codegen 38.225s 37.142s 37.551s 37.119s
94 100.00% 97.17% 98.24% 97.11%
95 -fnew-codegen -fregs-graph 91.786s 91.51s 87.368s 86.88s
96 100.00% 99.70% 95.19% 94.65%
97 -fnew-codegen -fregs-iterative 206.72s 343.632s 194.694s 208.677s
98 100.00% 166.23% 94.18% 100.95%
99 -}
100
101 trivColorable
102 :: Platform
103 -> (RegClass -> VirtualReg -> FastInt)
104 -> (RegClass -> RealReg -> FastInt)
105 -> Triv VirtualReg RegClass RealReg
106
107 trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts exclusions
108 | let !cALLOCATABLE_REGS_INTEGER
109 = iUnbox (case platformArch platform of
110 ArchX86 -> 3
111 ArchX86_64 -> 5
112 ArchPPC -> 16
113 ArchSPARC -> 14
114 ArchPPC_64 -> panic "trivColorable ArchPPC_64"
115 ArchARM _ _ -> panic "trivColorable ArchARM"
116 ArchUnknown -> panic "trivColorable ArchUnknown")
117 , count2 <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_INTEGER
118 (virtualRegSqueeze RcInteger)
119 conflicts
120
121 , count3 <- accSqueeze count2 cALLOCATABLE_REGS_INTEGER
122 (realRegSqueeze RcInteger)
123 exclusions
124
125 = count3 <# cALLOCATABLE_REGS_INTEGER
126
127 trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions
128 | let !cALLOCATABLE_REGS_FLOAT
129 = iUnbox (case platformArch platform of
130 ArchX86 -> 0
131 ArchX86_64 -> 0
132 ArchPPC -> 0
133 ArchSPARC -> 22
134 ArchPPC_64 -> panic "trivColorable ArchPPC_64"
135 ArchARM _ _ -> panic "trivColorable ArchARM"
136 ArchUnknown -> panic "trivColorable ArchUnknown")
137 , count2 <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_FLOAT
138 (virtualRegSqueeze RcFloat)
139 conflicts
140
141 , count3 <- accSqueeze count2 cALLOCATABLE_REGS_FLOAT
142 (realRegSqueeze RcFloat)
143 exclusions
144
145 = count3 <# cALLOCATABLE_REGS_FLOAT
146
147 trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
148 | let !cALLOCATABLE_REGS_DOUBLE
149 = iUnbox (case platformArch platform of
150 ArchX86 -> 6
151 ArchX86_64 -> 0
152 ArchPPC -> 26
153 ArchSPARC -> 11
154 ArchPPC_64 -> panic "trivColorable ArchPPC_64"
155 ArchARM _ _ -> panic "trivColorable ArchARM"
156 ArchUnknown -> panic "trivColorable ArchUnknown")
157 , count2 <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_DOUBLE
158 (virtualRegSqueeze RcDouble)
159 conflicts
160
161 , count3 <- accSqueeze count2 cALLOCATABLE_REGS_DOUBLE
162 (realRegSqueeze RcDouble)
163 exclusions
164
165 = count3 <# cALLOCATABLE_REGS_DOUBLE
166
167 trivColorable platform virtualRegSqueeze realRegSqueeze RcDoubleSSE conflicts exclusions
168 | let !cALLOCATABLE_REGS_SSE
169 = iUnbox (case platformArch platform of
170 ArchX86 -> 8
171 ArchX86_64 -> 10
172 ArchPPC -> 0
173 ArchSPARC -> 0
174 ArchPPC_64 -> panic "trivColorable ArchPPC_64"
175 ArchARM _ _ -> panic "trivColorable ArchARM"
176 ArchUnknown -> panic "trivColorable ArchUnknown")
177 , count2 <- accSqueeze (_ILIT(0)) cALLOCATABLE_REGS_SSE
178 (virtualRegSqueeze RcDoubleSSE)
179 conflicts
180
181 , count3 <- accSqueeze count2 cALLOCATABLE_REGS_SSE
182 (realRegSqueeze RcDoubleSSE)
183 exclusions
184
185 = count3 <# cALLOCATABLE_REGS_SSE
186
187
188 -- Specification Code ----------------------------------------------------------
189 --
190 -- The trivColorable function for each particular architecture should
191 -- implement the following function, but faster.
192 --
193
194 {-
195 trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
196 trivColorable classN conflicts exclusions
197 = let
198
199 acc :: Reg -> (Int, Int) -> (Int, Int)
200 acc r (cd, cf)
201 = case regClass r of
202 RcInteger -> (cd+1, cf)
203 RcFloat -> (cd, cf+1)
204 _ -> panic "Regs.trivColorable: reg class not handled"
205
206 tmp = foldUniqSet acc (0, 0) conflicts
207 (countInt, countFloat) = foldUniqSet acc tmp exclusions
208
209 squeese = worst countInt classN RcInteger
210 + worst countFloat classN RcFloat
211
212 in squeese < allocatableRegsInClass classN
213
214 -- | Worst case displacement
215 -- node N of classN has n neighbors of class C.
216 --
217 -- We currently only have RcInteger and RcDouble, which don't conflict at all.
218 -- This is a bit boring compared to what's in RegArchX86.
219 --
220 worst :: Int -> RegClass -> RegClass -> Int
221 worst n classN classC
222 = case classN of
223 RcInteger
224 -> case classC of
225 RcInteger -> min n (allocatableRegsInClass RcInteger)
226 RcFloat -> 0
227
228 RcDouble
229 -> case classC of
230 RcFloat -> min n (allocatableRegsInClass RcFloat)
231 RcInteger -> 0
232
233 -- allocatableRegs is allMachRegNos with the fixed-use regs removed.
234 -- i.e., these are the regs for which we are prepared to allow the
235 -- register allocator to attempt to map VRegs to.
236 allocatableRegs :: [RegNo]
237 allocatableRegs
238 = let isFree i = isFastTrue (freeReg i)
239 in filter isFree allMachRegNos
240
241
242 -- | The number of regs in each class.
243 -- We go via top level CAFs to ensure that we're not recomputing
244 -- the length of these lists each time the fn is called.
245 allocatableRegsInClass :: RegClass -> Int
246 allocatableRegsInClass cls
247 = case cls of
248 RcInteger -> allocatableRegsInteger
249 RcFloat -> allocatableRegsDouble
250
251 allocatableRegsInteger :: Int
252 allocatableRegsInteger
253 = length $ filter (\r -> regClass r == RcInteger)
254 $ map RealReg allocatableRegs
255
256 allocatableRegsFloat :: Int
257 allocatableRegsFloat
258 = length $ filter (\r -> regClass r == RcFloat
259 $ map RealReg allocatableRegs
260 -}