Upgrade UniqSet to a newtype
[ghc.git] / compiler / nativeGen / RegAlloc / Graph / TrivColorable.hs
1 {-# LANGUAGE CPP #-}
2
3 module RegAlloc.Graph.TrivColorable (
4 trivColorable,
5 )
6
7 where
8
9 #include "HsVersions.h"
10
11 import RegClass
12 import Reg
13
14 import GraphBase
15
16 import UniqSet
17 import Platform
18 import Panic
19
20 -- trivColorable ---------------------------------------------------------------
21
22 -- trivColorable function for the graph coloring allocator
23 --
24 -- This gets hammered by scanGraph during register allocation,
25 -- so needs to be fairly efficient.
26 --
27 -- NOTE: This only works for arcitectures with just RcInteger and RcDouble
28 -- (which are disjoint) ie. x86, x86_64 and ppc
29 --
30 -- The number of allocatable regs is hard coded in here so we can do
31 -- a fast comparison in trivColorable.
32 --
33 -- It's ok if these numbers are _less_ than the actual number of free
34 -- regs, but they can't be more or the register conflict
35 -- graph won't color.
36 --
37 -- If the graph doesn't color then the allocator will panic, but it won't
38 -- generate bad object code or anything nasty like that.
39 --
40 -- There is an allocatableRegsInClass :: RegClass -> Int, but doing
41 -- the unboxing is too slow for us here.
42 -- TODO: Is that still true? Could we use allocatableRegsInClass
43 -- without losing performance now?
44 --
45 -- Look at includes/stg/MachRegs.h to get the numbers.
46 --
47
48
49 -- Disjoint registers ----------------------------------------------------------
50 --
51 -- The definition has been unfolded into individual cases for speed.
52 -- Each architecture has a different register setup, so we use a
53 -- different regSqueeze function for each.
54 --
55 accSqueeze
56 :: Int
57 -> Int
58 -> (reg -> Int)
59 -> UniqSet reg
60 -> Int
61
62 accSqueeze count maxCount squeeze us = acc count (nonDetEltsUniqSet us)
63 -- See Note [Unique Determinism and code generation]
64 where acc count [] = count
65 acc count _ | count >= maxCount = count
66 acc count (r:rs) = acc (count + squeeze r) rs
67
68 {- Note [accSqueeze]
69 ~~~~~~~~~~~~~~~~~~~~
70 BL 2007/09
71 Doing a nice fold over the UniqSet makes trivColorable use
72 32% of total compile time and 42% of total alloc when compiling SHA1.hs from darcs.
73 Therefore the UniqFM is made non-abstract and we use custom fold.
74
75 MS 2010/04
76 When converting UniqFM to use Data.IntMap, the fold cannot use UniqFM internal
77 representation any more. But it is imperative that the accSqueeze stops
78 the folding if the count gets greater or equal to maxCount. We thus convert
79 UniqFM to a (lazy) list, do the fold and stops if necessary, which was
80 the most efficient variant tried. Benchmark compiling 10-times SHA1.hs follows.
81 (original = previous implementation, folding = fold of the whole UFM,
82 lazyFold = the current implementation,
83 hackFold = using internal representation of Data.IntMap)
84
85 original folding hackFold lazyFold
86 -O -fasm (used everywhere) 31.509s 30.387s 30.791s 30.603s
87 100.00% 96.44% 97.72% 97.12%
88 -fregs-graph 67.938s 74.875s 62.673s 64.679s
89 100.00% 110.21% 92.25% 95.20%
90 -fregs-iterative 89.761s 143.913s 81.075s 86.912s
91 100.00% 160.33% 90.32% 96.83%
92 -fnew-codegen 38.225s 37.142s 37.551s 37.119s
93 100.00% 97.17% 98.24% 97.11%
94 -fnew-codegen -fregs-graph 91.786s 91.51s 87.368s 86.88s
95 100.00% 99.70% 95.19% 94.65%
96 -fnew-codegen -fregs-iterative 206.72s 343.632s 194.694s 208.677s
97 100.00% 166.23% 94.18% 100.95%
98 -}
99
100 trivColorable
101 :: Platform
102 -> (RegClass -> VirtualReg -> Int)
103 -> (RegClass -> RealReg -> Int)
104 -> Triv VirtualReg RegClass RealReg
105
106 trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts exclusions
107 | let cALLOCATABLE_REGS_INTEGER
108 = (case platformArch platform of
109 ArchX86 -> 3
110 ArchX86_64 -> 5
111 ArchPPC -> 16
112 ArchSPARC -> 14
113 ArchSPARC64 -> panic "trivColorable ArchSPARC64"
114 ArchPPC_64 _ -> 15
115 ArchARM _ _ _ -> panic "trivColorable ArchARM"
116 ArchARM64 -> panic "trivColorable ArchARM64"
117 ArchAlpha -> panic "trivColorable ArchAlpha"
118 ArchMipseb -> panic "trivColorable ArchMipseb"
119 ArchMipsel -> panic "trivColorable ArchMipsel"
120 ArchJavaScript-> panic "trivColorable ArchJavaScript"
121 ArchUnknown -> panic "trivColorable ArchUnknown")
122 , count2 <- accSqueeze 0 cALLOCATABLE_REGS_INTEGER
123 (virtualRegSqueeze RcInteger)
124 conflicts
125
126 , count3 <- accSqueeze count2 cALLOCATABLE_REGS_INTEGER
127 (realRegSqueeze RcInteger)
128 exclusions
129
130 = count3 < cALLOCATABLE_REGS_INTEGER
131
132 trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions
133 | let cALLOCATABLE_REGS_FLOAT
134 = (case platformArch platform of
135 ArchX86 -> 0
136 ArchX86_64 -> 0
137 ArchPPC -> 0
138 ArchSPARC -> 22
139 ArchSPARC64 -> panic "trivColorable ArchSPARC64"
140 ArchPPC_64 _ -> 0
141 ArchARM _ _ _ -> panic "trivColorable ArchARM"
142 ArchARM64 -> panic "trivColorable ArchARM64"
143 ArchAlpha -> panic "trivColorable ArchAlpha"
144 ArchMipseb -> panic "trivColorable ArchMipseb"
145 ArchMipsel -> panic "trivColorable ArchMipsel"
146 ArchJavaScript-> panic "trivColorable ArchJavaScript"
147 ArchUnknown -> panic "trivColorable ArchUnknown")
148 , count2 <- accSqueeze 0 cALLOCATABLE_REGS_FLOAT
149 (virtualRegSqueeze RcFloat)
150 conflicts
151
152 , count3 <- accSqueeze count2 cALLOCATABLE_REGS_FLOAT
153 (realRegSqueeze RcFloat)
154 exclusions
155
156 = count3 < cALLOCATABLE_REGS_FLOAT
157
158 trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
159 | let cALLOCATABLE_REGS_DOUBLE
160 = (case platformArch platform of
161 ArchX86 -> 6
162 ArchX86_64 -> 0
163 ArchPPC -> 26
164 ArchSPARC -> 11
165 ArchSPARC64 -> panic "trivColorable ArchSPARC64"
166 ArchPPC_64 _ -> 20
167 ArchARM _ _ _ -> panic "trivColorable ArchARM"
168 ArchARM64 -> panic "trivColorable ArchARM64"
169 ArchAlpha -> panic "trivColorable ArchAlpha"
170 ArchMipseb -> panic "trivColorable ArchMipseb"
171 ArchMipsel -> panic "trivColorable ArchMipsel"
172 ArchJavaScript-> panic "trivColorable ArchJavaScript"
173 ArchUnknown -> panic "trivColorable ArchUnknown")
174 , count2 <- accSqueeze 0 cALLOCATABLE_REGS_DOUBLE
175 (virtualRegSqueeze RcDouble)
176 conflicts
177
178 , count3 <- accSqueeze count2 cALLOCATABLE_REGS_DOUBLE
179 (realRegSqueeze RcDouble)
180 exclusions
181
182 = count3 < cALLOCATABLE_REGS_DOUBLE
183
184 trivColorable platform virtualRegSqueeze realRegSqueeze RcDoubleSSE conflicts exclusions
185 | let cALLOCATABLE_REGS_SSE
186 = (case platformArch platform of
187 ArchX86 -> 8
188 ArchX86_64 -> 10
189 ArchPPC -> 0
190 ArchSPARC -> 0
191 ArchSPARC64 -> panic "trivColorable ArchSPARC64"
192 ArchPPC_64 _ -> 0
193 ArchARM _ _ _ -> panic "trivColorable ArchARM"
194 ArchARM64 -> panic "trivColorable ArchARM64"
195 ArchAlpha -> panic "trivColorable ArchAlpha"
196 ArchMipseb -> panic "trivColorable ArchMipseb"
197 ArchMipsel -> panic "trivColorable ArchMipsel"
198 ArchJavaScript-> panic "trivColorable ArchJavaScript"
199 ArchUnknown -> panic "trivColorable ArchUnknown")
200 , count2 <- accSqueeze 0 cALLOCATABLE_REGS_SSE
201 (virtualRegSqueeze RcDoubleSSE)
202 conflicts
203
204 , count3 <- accSqueeze count2 cALLOCATABLE_REGS_SSE
205 (realRegSqueeze RcDoubleSSE)
206 exclusions
207
208 = count3 < cALLOCATABLE_REGS_SSE
209
210
211 -- Specification Code ----------------------------------------------------------
212 --
213 -- The trivColorable function for each particular architecture should
214 -- implement the following function, but faster.
215 --
216
217 {-
218 trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
219 trivColorable classN conflicts exclusions
220 = let
221
222 acc :: Reg -> (Int, Int) -> (Int, Int)
223 acc r (cd, cf)
224 = case regClass r of
225 RcInteger -> (cd+1, cf)
226 RcFloat -> (cd, cf+1)
227 _ -> panic "Regs.trivColorable: reg class not handled"
228
229 tmp = nonDetFoldUFM acc (0, 0) conflicts
230 (countInt, countFloat) = nonDetFoldUFM acc tmp exclusions
231
232 squeese = worst countInt classN RcInteger
233 + worst countFloat classN RcFloat
234
235 in squeese < allocatableRegsInClass classN
236
237 -- | Worst case displacement
238 -- node N of classN has n neighbors of class C.
239 --
240 -- We currently only have RcInteger and RcDouble, which don't conflict at all.
241 -- This is a bit boring compared to what's in RegArchX86.
242 --
243 worst :: Int -> RegClass -> RegClass -> Int
244 worst n classN classC
245 = case classN of
246 RcInteger
247 -> case classC of
248 RcInteger -> min n (allocatableRegsInClass RcInteger)
249 RcFloat -> 0
250
251 RcDouble
252 -> case classC of
253 RcFloat -> min n (allocatableRegsInClass RcFloat)
254 RcInteger -> 0
255
256 -- allocatableRegs is allMachRegNos with the fixed-use regs removed.
257 -- i.e., these are the regs for which we are prepared to allow the
258 -- register allocator to attempt to map VRegs to.
259 allocatableRegs :: [RegNo]
260 allocatableRegs
261 = let isFree i = freeReg i
262 in filter isFree allMachRegNos
263
264
265 -- | The number of regs in each class.
266 -- We go via top level CAFs to ensure that we're not recomputing
267 -- the length of these lists each time the fn is called.
268 allocatableRegsInClass :: RegClass -> Int
269 allocatableRegsInClass cls
270 = case cls of
271 RcInteger -> allocatableRegsInteger
272 RcFloat -> allocatableRegsDouble
273
274 allocatableRegsInteger :: Int
275 allocatableRegsInteger
276 = length $ filter (\r -> regClass r == RcInteger)
277 $ map RealReg allocatableRegs
278
279 allocatableRegsFloat :: Int
280 allocatableRegsFloat
281 = length $ filter (\r -> regClass r == RcFloat
282 $ map RealReg allocatableRegs
283 -}