Be more selective in which conditionals we invert
[ghc.git] / compiler / cmm / CmmSwitch.hs
1 {-# LANGUAGE GADTs #-}
2 module CmmSwitch (
3 SwitchTargets,
4 mkSwitchTargets,
5 switchTargetsCases, switchTargetsDefault, switchTargetsRange, switchTargetsSigned,
6 mapSwitchTargets, switchTargetsToTable, switchTargetsFallThrough,
7 switchTargetsToList, eqSwitchTargetWith,
8
9 SwitchPlan(..),
10 targetSupportsSwitch,
11 createSwitchPlan,
12 ) where
13
14 import GhcPrelude
15
16 import Outputable
17 import DynFlags
18 import Hoopl.Label (Label)
19
20 import Data.Maybe
21 import Data.List (groupBy)
22 import Data.Function (on)
23 import qualified Data.Map as M
24
25 -- Note [Cmm Switches, the general plan]
26 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
27 --
28 -- Compiling a high-level switch statement, as it comes out of a STG case
29 -- expression, for example, allows for a surprising amount of design decisions.
30 -- Therefore, we cleanly separated this from the Stg → Cmm transformation, as
31 -- well as from the actual code generation.
32 --
33 -- The overall plan is:
34 -- * The Stg → Cmm transformation creates a single `SwitchTargets` in
35 -- emitSwitch and emitCmmLitSwitch in StgCmmUtils.hs.
36 -- At this stage, they are unsuitable for code generation.
37 -- * A dedicated Cmm transformation (CmmImplementSwitchPlans) replaces these
38 -- switch statements with code that is suitable for code generation, i.e.
39 -- a nice balanced tree of decisions with dense jump tables in the leafs.
40 -- The actual planning of this tree is performed in pure code in createSwitchPlan
41 -- in this module. See Note [createSwitchPlan].
42 -- * The actual code generation will not do any further processing and
43 -- implement each CmmSwitch with a jump tables.
44 --
45 -- When compiling to LLVM or C, CmmImplementSwitchPlans leaves the switch
46 -- statements alone, as we can turn a SwitchTargets value into a nice
47 -- switch-statement in LLVM resp. C, and leave the rest to the compiler.
48 --
49 -- See Note [CmmSwitch vs. CmmImplementSwitchPlans] why the two module are
50 -- separated.
51
52 -----------------------------------------------------------------------------
53 -- Note [Magic Constants in CmmSwitch]
54 --
55 -- There are a lot of heuristics here that depend on magic values where it is
56 -- hard to determine the "best" value (for whatever that means). These are the
57 -- magic values:
58
59 -- | Number of consecutive default values allowed in a jump table. If there are
60 -- more of them, the jump tables are split.
61 --
62 -- Currently 7, as it costs 7 words of additional code when a jump table is
63 -- split (at least on x64, determined experimentally).
64 maxJumpTableHole :: Integer
65 maxJumpTableHole = 7
66
67 -- | Minimum size of a jump table. If the number is smaller, the switch is
68 -- implemented using conditionals.
69 -- Currently 5, because an if-then-else tree of 4 values is nice and compact.
70 minJumpTableSize :: Int
71 minJumpTableSize = 5
72
73 -- | Minimum non-zero offset for a jump table. See Note [Jump Table Offset].
74 minJumpTableOffset :: Integer
75 minJumpTableOffset = 2
76
77
78 -----------------------------------------------------------------------------
79 -- Switch Targets
80
81 -- Note [SwitchTargets]:
82 -- ~~~~~~~~~~~~~~~~~~~~~
83 --
84 -- The branches of a switch are stored in a SwitchTargets, which consists of an
85 -- (optional) default jump target, and a map from values to jump targets.
86 --
87 -- If the default jump target is absent, the behaviour of the switch outside the
88 -- values of the map is undefined.
89 --
90 -- We use an Integer for the keys the map so that it can be used in switches on
91 -- unsigned as well as signed integers.
92 --
93 -- The map may be empty (we prune out-of-range branches here, so it could be us
94 -- emptying it).
95 --
96 -- Before code generation, the table needs to be brought into a form where all
97 -- entries are non-negative, so that it can be compiled into a jump table.
98 -- See switchTargetsToTable.
99
100
101 -- | A value of type SwitchTargets contains the alternatives for a 'CmmSwitch'
102 -- value, and knows whether the value is signed, the possible range, an
103 -- optional default value and a map from values to jump labels.
104 data SwitchTargets =
105 SwitchTargets
106 Bool -- Signed values
107 (Integer, Integer) -- Range
108 (Maybe Label) -- Default value
109 (M.Map Integer Label) -- The branches
110 deriving (Show, Eq)
111
112 -- | The smart constructor mkSwitchTargets normalises the map a bit:
113 -- * No entries outside the range
114 -- * No entries equal to the default
115 -- * No default if all elements have explicit values
116 mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe Label -> M.Map Integer Label -> SwitchTargets
117 mkSwitchTargets signed range@(lo,hi) mbdef ids
118 = SwitchTargets signed range mbdef' ids'
119 where
120 ids' = dropDefault $ restrict ids
121 mbdef' | defaultNeeded = mbdef
122 | otherwise = Nothing
123
124 -- Drop entries outside the range, if there is a range
125 restrict = restrictMap (lo,hi)
126
127 -- Drop entries that equal the default, if there is a default
128 dropDefault | Just l <- mbdef = M.filter (/= l)
129 | otherwise = id
130
131 -- Check if the default is still needed
132 defaultNeeded = fromIntegral (M.size ids') /= hi-lo+1
133
134
135 -- | Changes all labels mentioned in the SwitchTargets value
136 mapSwitchTargets :: (Label -> Label) -> SwitchTargets -> SwitchTargets
137 mapSwitchTargets f (SwitchTargets signed range mbdef branches)
138 = SwitchTargets signed range (fmap f mbdef) (fmap f branches)
139
140 -- | Returns the list of non-default branches of the SwitchTargets value
141 switchTargetsCases :: SwitchTargets -> [(Integer, Label)]
142 switchTargetsCases (SwitchTargets _ _ _ branches) = M.toList branches
143
144 -- | Return the default label of the SwitchTargets value
145 switchTargetsDefault :: SwitchTargets -> Maybe Label
146 switchTargetsDefault (SwitchTargets _ _ mbdef _) = mbdef
147
148 -- | Return the range of the SwitchTargets value
149 switchTargetsRange :: SwitchTargets -> (Integer, Integer)
150 switchTargetsRange (SwitchTargets _ range _ _) = range
151
152 -- | Return whether this is used for a signed value
153 switchTargetsSigned :: SwitchTargets -> Bool
154 switchTargetsSigned (SwitchTargets signed _ _ _) = signed
155
156 -- | switchTargetsToTable creates a dense jump table, usable for code generation.
157 --
158 -- Also returns an offset to add to the value; the list is 0-based on the
159 -- result of that addition.
160 --
161 -- The conversion from Integer to Int is a bit of a wart, as the actual
162 -- scrutinee might be an unsigned word, but it just works, due to wrap-around
163 -- arithmetic (as verified by the CmmSwitchTest test case).
164 switchTargetsToTable :: SwitchTargets -> (Int, [Maybe Label])
165 switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches)
166 = (fromIntegral (-start), [ labelFor i | i <- [start..hi] ])
167 where
168 labelFor i = case M.lookup i branches of Just l -> Just l
169 Nothing -> mbdef
170 start | lo >= 0 && lo < minJumpTableOffset = 0 -- See Note [Jump Table Offset]
171 | otherwise = lo
172
173 -- Note [Jump Table Offset]
174 -- ~~~~~~~~~~~~~~~~~~~~~~~~
175 --
176 -- Usually, the code for a jump table starting at x will first subtract x from
177 -- the value, to avoid a large amount of empty entries. But if x is very small,
178 -- the extra entries are no worse than the subtraction in terms of code size, and
179 -- not having to do the subtraction is quicker.
180 --
181 -- I.e. instead of
182 -- _u20N:
183 -- leaq -1(%r14),%rax
184 -- jmp *_n20R(,%rax,8)
185 -- _n20R:
186 -- .quad _c20p
187 -- .quad _c20q
188 -- do
189 -- _u20N:
190 -- jmp *_n20Q(,%r14,8)
191 --
192 -- _n20Q:
193 -- .quad 0
194 -- .quad _c20p
195 -- .quad _c20q
196 -- .quad _c20r
197
198 -- | The list of all labels occuring in the SwitchTargets value.
199 switchTargetsToList :: SwitchTargets -> [Label]
200 switchTargetsToList (SwitchTargets _ _ mbdef branches)
201 = maybeToList mbdef ++ M.elems branches
202
203 -- | Groups cases with equal targets, suitable for pretty-printing to a
204 -- c-like switch statement with fall-through semantics.
205 switchTargetsFallThrough :: SwitchTargets -> ([([Integer], Label)], Maybe Label)
206 switchTargetsFallThrough (SwitchTargets _ _ mbdef branches) = (groups, mbdef)
207 where
208 groups = map (\xs -> (map fst xs, snd (head xs))) $
209 groupBy ((==) `on` snd) $
210 M.toList branches
211
212 -- | Custom equality helper, needed for "CmmCommonBlockElim"
213 eqSwitchTargetWith :: (Label -> Label -> Bool) -> SwitchTargets -> SwitchTargets -> Bool
214 eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets signed2 range2 mbdef2 ids2) =
215 signed1 == signed2 && range1 == range2 && goMB mbdef1 mbdef2 && goList (M.toList ids1) (M.toList ids2)
216 where
217 goMB Nothing Nothing = True
218 goMB (Just l1) (Just l2) = l1 `eq` l2
219 goMB _ _ = False
220 goList [] [] = True
221 goList ((i1,l1):ls1) ((i2,l2):ls2) = i1 == i2 && l1 `eq` l2 && goList ls1 ls2
222 goList _ _ = False
223
224 -----------------------------------------------------------------------------
225 -- Code generation for Switches
226
227
228 -- | A SwitchPlan abstractly describes how a Switch statement ought to be
229 -- implemented. See Note [createSwitchPlan]
230 data SwitchPlan
231 = Unconditionally Label
232 | IfEqual Integer Label SwitchPlan
233 | IfLT Bool Integer SwitchPlan SwitchPlan
234 | JumpTable SwitchTargets
235 deriving Show
236 --
237 -- Note [createSwitchPlan]
238 -- ~~~~~~~~~~~~~~~~~~~~~~~
239 --
240 -- A SwitchPlan describes how a Switch statement is to be broken down into
241 -- smaller pieces suitable for code generation.
242 --
243 -- createSwitchPlan creates such a switch plan, in these steps:
244 -- 1. It splits the switch statement at segments of non-default values that
245 -- are too large. See splitAtHoles and Note [Magic Constants in CmmSwitch]
246 -- 2. Too small jump tables should be avoided, so we break up smaller pieces
247 -- in breakTooSmall.
248 -- 3. We fill in the segments between those pieces with a jump to the default
249 -- label (if there is one), returning a SeparatedList in mkFlatSwitchPlan
250 -- 4. We find and replace two less-than branches by a single equal-to-test in
251 -- findSingleValues
252 -- 5. The thus collected pieces are assembled to a balanced binary tree.
253
254 {-
255 Note [Two alts + default]
256 ~~~~~~~~~~~~~~~~~~~~~~~~~
257
258 Discussion and a bit more info at #14644
259
260 When dealing with a switch of the form:
261 switch(e) {
262 case 1: goto l1;
263 case 3000: goto l2;
264 default: goto ldef;
265 }
266
267 If we treat it as a sparse jump table we would generate:
268
269 if (e > 3000) //Check if value is outside of the jump table.
270 goto ldef;
271 else {
272 if (e < 3000) { //Compare to upper value
273 if(e != 1) //Compare to remaining value
274 goto ldef;
275 else
276 goto l2;
277 }
278 else
279 goto l1;
280 }
281
282 Instead we special case this to :
283
284 if (e==1) goto l1;
285 else if (e==3000) goto l2;
286 else goto l3;
287
288 This means we have:
289 * Less comparisons for: 1,<3000
290 * Unchanged for 3000
291 * One more for >3000
292
293 This improves code in a few ways:
294 * One comparison less means smaller code which helps with cache.
295 * It exchanges a taken jump for two jumps no taken in the >range case.
296 Jumps not taken are cheaper (See Agner guides) making this about as fast.
297 * For all other cases the first range check is removed making it faster.
298
299 The end result is that the change is not measurably slower for the case
300 >3000 and faster for the other cases.
301
302 This makes running this kind of match in an inner loop cheaper by 10-20%
303 depending on the data.
304 In nofib this improves wheel-sieve1 by 4-9% depending on problem
305 size.
306
307 We could also add a second conditional jump after the comparison to
308 keep the range check like this:
309 cmp 3000, rArgument
310 jg <default>
311 je <branch 2>
312 While this is fairly cheap it made no big difference for the >3000 case
313 and slowed down all other cases making it not worthwhile.
314 -}
315
316
317 -- | Does the target support switch out of the box? Then leave this to the
318 -- target!
319 targetSupportsSwitch :: HscTarget -> Bool
320 targetSupportsSwitch HscC = True
321 targetSupportsSwitch HscLlvm = True
322 targetSupportsSwitch _ = False
323
324 -- | This function creates a SwitchPlan from a SwitchTargets value, breaking it
325 -- down into smaller pieces suitable for code generation.
326 createSwitchPlan :: SwitchTargets -> SwitchPlan
327 -- Lets do the common case of a singleton map quicky and efficiently (#10677)
328 createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
329 | [(x, l)] <- M.toList m
330 = IfEqual x l (Unconditionally defLabel)
331 -- And another common case, matching "booleans"
332 createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m)
333 | [(x1, l1), (_x2,l2)] <- M.toAscList m
334 --Checking If |range| = 2 is enough if we have two unique literals
335 , hi - lo == 1
336 = IfEqual x1 l1 (Unconditionally l2)
337 -- See Note [Two alts + default]
338 createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
339 | [(x1, l1), (x2,l2)] <- M.toAscList m
340 = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel))
341 createSwitchPlan (SwitchTargets signed range mbdef m) =
342 -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $
343 plan
344 where
345 pieces = concatMap breakTooSmall $ splitAtHoles maxJumpTableHole m
346 flatPlan = findSingleValues $ mkFlatSwitchPlan signed mbdef range pieces
347 plan = buildTree signed $ flatPlan
348
349
350 ---
351 --- Step 1: Splitting at large holes
352 ---
353 splitAtHoles :: Integer -> M.Map Integer a -> [M.Map Integer a]
354 splitAtHoles _ m | M.null m = []
355 splitAtHoles holeSize m = map (\range -> restrictMap range m) nonHoles
356 where
357 holes = filter (\(l,h) -> h - l > holeSize) $ zip (M.keys m) (tail (M.keys m))
358 nonHoles = reassocTuples lo holes hi
359
360 (lo,_) = M.findMin m
361 (hi,_) = M.findMax m
362
363 ---
364 --- Step 2: Avoid small jump tables
365 ---
366 -- We do not want jump tables below a certain size. This breaks them up
367 -- (into singleton maps, for now).
368 breakTooSmall :: M.Map Integer a -> [M.Map Integer a]
369 breakTooSmall m
370 | M.size m > minJumpTableSize = [m]
371 | otherwise = [M.singleton k v | (k,v) <- M.toList m]
372
373 ---
374 --- Step 3: Fill in the blanks
375 ---
376
377 -- | A FlatSwitchPlan is a list of SwitchPlans, with an integer inbetween every
378 -- two entries, dividing the range.
379 -- So if we have (abusing list syntax) [plan1,n,plan2], then we use plan1 if
380 -- the expression is < n, and plan2 otherwise.
381
382 type FlatSwitchPlan = SeparatedList Integer SwitchPlan
383
384 mkFlatSwitchPlan :: Bool -> Maybe Label -> (Integer, Integer) -> [M.Map Integer Label] -> FlatSwitchPlan
385
386 -- If we have no default (i.e. undefined where there is no entry), we can
387 -- branch at the minimum of each map
388 mkFlatSwitchPlan _ Nothing _ [] = pprPanic "mkFlatSwitchPlan with nothing left to do" empty
389 mkFlatSwitchPlan signed Nothing _ (m:ms)
390 = (mkLeafPlan signed Nothing m , [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ])
391
392 -- If we have a default, we have to interleave segments that jump
393 -- to the default between the maps
394 mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps)
395 where
396 go (lo,hi) []
397 | lo > hi = []
398 | otherwise = [(lo, Unconditionally l)]
399 go (lo,hi) (m:ms)
400 | lo < min
401 = (lo, Unconditionally l) : go (min,hi) (m:ms)
402 | lo == min
403 = (lo, mkLeafPlan signed (Just l) m) : go (max+1,hi) ms
404 | otherwise
405 = pprPanic "mkFlatSwitchPlan" (integer lo <+> integer min)
406 where
407 min = fst (M.findMin m)
408 max = fst (M.findMax m)
409
410
411 mkLeafPlan :: Bool -> Maybe Label -> M.Map Integer Label -> SwitchPlan
412 mkLeafPlan signed mbdef m
413 | [(_,l)] <- M.toList m -- singleton map
414 = Unconditionally l
415 | otherwise
416 = JumpTable $ mkSwitchTargets signed (min,max) mbdef m
417 where
418 min = fst (M.findMin m)
419 max = fst (M.findMax m)
420
421 ---
422 --- Step 4: Reduce the number of branches using ==
423 ---
424
425 -- A sequence of three unconditional jumps, with the outer two pointing to the
426 -- same value and the bounds off by exactly one can be improved
427 findSingleValues :: FlatSwitchPlan -> FlatSwitchPlan
428 findSingleValues (Unconditionally l, (i, Unconditionally l2) : (i', Unconditionally l3) : xs)
429 | l == l3 && i + 1 == i'
430 = findSingleValues (IfEqual i l2 (Unconditionally l), xs)
431 findSingleValues (p, (i,p'):xs)
432 = (p,i) `consSL` findSingleValues (p', xs)
433 findSingleValues (p, [])
434 = (p, [])
435
436 ---
437 --- Step 5: Actually build the tree
438 ---
439
440 -- Build a balanced tree from a separated list
441 buildTree :: Bool -> FlatSwitchPlan -> SwitchPlan
442 buildTree _ (p,[]) = p
443 buildTree signed sl = IfLT signed m (buildTree signed sl1) (buildTree signed sl2)
444 where
445 (sl1, m, sl2) = divideSL sl
446
447
448
449 --
450 -- Utility data type: Non-empty lists with extra markers in between each
451 -- element:
452 --
453
454 type SeparatedList b a = (a, [(b,a)])
455
456 consSL :: (a, b) -> SeparatedList b a -> SeparatedList b a
457 consSL (a, b) (a', xs) = (a, (b,a'):xs)
458
459 divideSL :: SeparatedList b a -> (SeparatedList b a, b, SeparatedList b a)
460 divideSL (_,[]) = error "divideSL: Singleton SeparatedList"
461 divideSL (p,xs) = ((p, xs1), m, (p', xs2))
462 where
463 (xs1, (m,p'):xs2) = splitAt (length xs `div` 2) xs
464
465 --
466 -- Other Utilities
467 --
468
469 restrictMap :: (Integer,Integer) -> M.Map Integer b -> M.Map Integer b
470 restrictMap (lo,hi) m = mid
471 where (_, mid_hi) = M.split (lo-1) m
472 (mid, _) = M.split (hi+1) mid_hi
473
474 -- for example: reassocTuples a [(b,c),(d,e)] f == [(a,b),(c,d),(e,f)]
475 reassocTuples :: a -> [(a,a)] -> a -> [(a,a)]
476 reassocTuples initial [] last
477 = [(initial,last)]
478 reassocTuples initial ((a,b):tuples) last
479 = (initial,a) : reassocTuples b tuples last
480
481 -- Note [CmmSwitch vs. CmmImplementSwitchPlans]
482 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
483 -- I (Joachim) separated the two somewhat closely related modules
484 --
485 -- - CmmSwitch, which provides the CmmSwitchTargets type and contains the strategy
486 -- for implementing a Cmm switch (createSwitchPlan), and
487 -- - CmmImplementSwitchPlans, which contains the actuall Cmm graph modification,
488 --
489 -- for these reasons:
490 --
491 -- * CmmSwitch is very low in the dependency tree, i.e. does not depend on any
492 -- GHC specific modules at all (with the exception of Output and Hoople
493 -- (Literal)). CmmImplementSwitchPlans is the Cmm transformation and hence very
494 -- high in the dependency tree.
495 -- * CmmSwitch provides the CmmSwitchTargets data type, which is abstract, but
496 -- used in CmmNodes.
497 -- * Because CmmSwitch is low in the dependency tree, the separation allows
498 -- for more parallelism when building GHC.
499 -- * The interaction between the modules is very explicit and easy to
500 -- understand, due to the small and simple interface.